@relayplane/proxy 1.5.0 → 1.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +27 -157
- package/dist/cli.d.ts +1 -1
- package/dist/cli.js +73 -8
- package/dist/cli.js.map +1 -1
- package/dist/standalone-proxy.d.ts.map +1 -1
- package/dist/standalone-proxy.js +434 -28
- package/dist/standalone-proxy.js.map +1 -1
- package/package.json +11 -6
package/dist/standalone-proxy.js
CHANGED
|
@@ -68,6 +68,15 @@ const core_1 = require("@relayplane/core");
|
|
|
68
68
|
const model_suggestions_js_1 = require("./utils/model-suggestions.js");
|
|
69
69
|
const telemetry_js_1 = require("./telemetry.js");
|
|
70
70
|
const stats_js_1 = require("./stats.js");
|
|
71
|
+
const PROXY_VERSION = (() => {
|
|
72
|
+
try {
|
|
73
|
+
const pkgPath = path.join(__dirname, '..', 'package.json');
|
|
74
|
+
return JSON.parse(fs.readFileSync(pkgPath, 'utf-8')).version;
|
|
75
|
+
}
|
|
76
|
+
catch {
|
|
77
|
+
return '0.0.0';
|
|
78
|
+
}
|
|
79
|
+
})();
|
|
71
80
|
/** Shared stats collector instance for the proxy server */
|
|
72
81
|
exports.proxyStatsCollector = new stats_js_1.StatsCollector();
|
|
73
82
|
/**
|
|
@@ -90,9 +99,33 @@ exports.DEFAULT_ENDPOINTS = {
|
|
|
90
99
|
baseUrl: 'https://api.x.ai/v1',
|
|
91
100
|
apiKeyEnv: 'XAI_API_KEY',
|
|
92
101
|
},
|
|
93
|
-
|
|
94
|
-
baseUrl: 'https://
|
|
95
|
-
apiKeyEnv: '
|
|
102
|
+
openrouter: {
|
|
103
|
+
baseUrl: 'https://openrouter.ai/api/v1',
|
|
104
|
+
apiKeyEnv: 'OPENROUTER_API_KEY',
|
|
105
|
+
},
|
|
106
|
+
deepseek: {
|
|
107
|
+
baseUrl: 'https://api.deepseek.com/v1',
|
|
108
|
+
apiKeyEnv: 'DEEPSEEK_API_KEY',
|
|
109
|
+
},
|
|
110
|
+
groq: {
|
|
111
|
+
baseUrl: 'https://api.groq.com/openai/v1',
|
|
112
|
+
apiKeyEnv: 'GROQ_API_KEY',
|
|
113
|
+
},
|
|
114
|
+
mistral: {
|
|
115
|
+
baseUrl: 'https://api.mistral.ai/v1',
|
|
116
|
+
apiKeyEnv: 'MISTRAL_API_KEY',
|
|
117
|
+
},
|
|
118
|
+
together: {
|
|
119
|
+
baseUrl: 'https://api.together.xyz/v1',
|
|
120
|
+
apiKeyEnv: 'TOGETHER_API_KEY',
|
|
121
|
+
},
|
|
122
|
+
fireworks: {
|
|
123
|
+
baseUrl: 'https://api.fireworks.ai/inference/v1',
|
|
124
|
+
apiKeyEnv: 'FIREWORKS_API_KEY',
|
|
125
|
+
},
|
|
126
|
+
perplexity: {
|
|
127
|
+
baseUrl: 'https://api.perplexity.ai',
|
|
128
|
+
apiKeyEnv: 'PERPLEXITY_API_KEY',
|
|
96
129
|
},
|
|
97
130
|
};
|
|
98
131
|
/**
|
|
@@ -264,6 +297,127 @@ const globalStats = {
|
|
|
264
297
|
escalations: 0,
|
|
265
298
|
startedAt: Date.now(),
|
|
266
299
|
};
|
|
300
|
+
const requestHistory = [];
|
|
301
|
+
const MAX_HISTORY = 10000;
|
|
302
|
+
const HISTORY_RETENTION_DAYS = 7;
|
|
303
|
+
let requestIdCounter = 0;
|
|
304
|
+
// --- Persistent history (JSONL) ---
|
|
305
|
+
const HISTORY_DIR = path.join(os.homedir(), '.relayplane');
|
|
306
|
+
const HISTORY_FILE = path.join(HISTORY_DIR, 'history.jsonl');
|
|
307
|
+
let historyWriteBuffer = [];
|
|
308
|
+
let historyFlushTimer = null;
|
|
309
|
+
let historyRequestsSinceLastPrune = 0;
|
|
310
|
+
function pruneOldEntries() {
|
|
311
|
+
const cutoff = Date.now() - HISTORY_RETENTION_DAYS * 86400000;
|
|
312
|
+
// Remove old entries from in-memory array
|
|
313
|
+
while (requestHistory.length > 0 && new Date(requestHistory[0].timestamp).getTime() < cutoff) {
|
|
314
|
+
requestHistory.shift();
|
|
315
|
+
}
|
|
316
|
+
// Cap at MAX_HISTORY
|
|
317
|
+
while (requestHistory.length > MAX_HISTORY) {
|
|
318
|
+
requestHistory.shift();
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
function loadHistoryFromDisk() {
|
|
322
|
+
try {
|
|
323
|
+
if (!fs.existsSync(HISTORY_FILE))
|
|
324
|
+
return;
|
|
325
|
+
const content = fs.readFileSync(HISTORY_FILE, 'utf-8');
|
|
326
|
+
const cutoff = Date.now() - HISTORY_RETENTION_DAYS * 86400000;
|
|
327
|
+
const lines = content.split('\n');
|
|
328
|
+
for (const line of lines) {
|
|
329
|
+
const trimmed = line.trim();
|
|
330
|
+
if (!trimmed)
|
|
331
|
+
continue;
|
|
332
|
+
try {
|
|
333
|
+
const entry = JSON.parse(trimmed);
|
|
334
|
+
if (new Date(entry.timestamp).getTime() >= cutoff) {
|
|
335
|
+
requestHistory.push(entry);
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
catch {
|
|
339
|
+
// Skip corrupt lines
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
// Cap at MAX_HISTORY (keep most recent)
|
|
343
|
+
while (requestHistory.length > MAX_HISTORY) {
|
|
344
|
+
requestHistory.shift();
|
|
345
|
+
}
|
|
346
|
+
// Update requestIdCounter based on loaded entries
|
|
347
|
+
for (const entry of requestHistory) {
|
|
348
|
+
const match = entry.id.match(/^req-(\d+)$/);
|
|
349
|
+
if (match) {
|
|
350
|
+
const num = parseInt(match[1], 10);
|
|
351
|
+
if (num > requestIdCounter)
|
|
352
|
+
requestIdCounter = num;
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
// Rewrite file with only valid/recent entries
|
|
356
|
+
rewriteHistoryFile();
|
|
357
|
+
console.log(`[RelayPlane] Loaded ${requestHistory.length} history entries from disk`);
|
|
358
|
+
}
|
|
359
|
+
catch (err) {
|
|
360
|
+
console.log(`[RelayPlane] Could not load history: ${err.message}`);
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
function rewriteHistoryFile() {
|
|
364
|
+
try {
|
|
365
|
+
fs.mkdirSync(HISTORY_DIR, { recursive: true });
|
|
366
|
+
const data = requestHistory.map(e => JSON.stringify(e)).join('\n') + (requestHistory.length ? '\n' : '');
|
|
367
|
+
fs.writeFileSync(HISTORY_FILE, data, 'utf-8');
|
|
368
|
+
}
|
|
369
|
+
catch (err) {
|
|
370
|
+
console.log(`[RelayPlane] Could not rewrite history file: ${err.message}`);
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
function flushHistoryBuffer() {
|
|
374
|
+
if (historyWriteBuffer.length === 0)
|
|
375
|
+
return;
|
|
376
|
+
try {
|
|
377
|
+
fs.mkdirSync(HISTORY_DIR, { recursive: true });
|
|
378
|
+
const data = historyWriteBuffer.map(e => JSON.stringify(e)).join('\n') + '\n';
|
|
379
|
+
fs.appendFileSync(HISTORY_FILE, data, 'utf-8');
|
|
380
|
+
}
|
|
381
|
+
catch (err) {
|
|
382
|
+
console.log(`[RelayPlane] Could not flush history: ${err.message}`);
|
|
383
|
+
}
|
|
384
|
+
historyWriteBuffer = [];
|
|
385
|
+
}
|
|
386
|
+
function scheduleHistoryFlush() {
|
|
387
|
+
if (historyFlushTimer)
|
|
388
|
+
return;
|
|
389
|
+
historyFlushTimer = setTimeout(() => {
|
|
390
|
+
historyFlushTimer = null;
|
|
391
|
+
flushHistoryBuffer();
|
|
392
|
+
}, 10000);
|
|
393
|
+
}
|
|
394
|
+
function bufferHistoryEntry(entry) {
|
|
395
|
+
historyWriteBuffer.push(entry);
|
|
396
|
+
historyRequestsSinceLastPrune++;
|
|
397
|
+
if (historyWriteBuffer.length >= 20) {
|
|
398
|
+
if (historyFlushTimer) {
|
|
399
|
+
clearTimeout(historyFlushTimer);
|
|
400
|
+
historyFlushTimer = null;
|
|
401
|
+
}
|
|
402
|
+
flushHistoryBuffer();
|
|
403
|
+
}
|
|
404
|
+
else {
|
|
405
|
+
scheduleHistoryFlush();
|
|
406
|
+
}
|
|
407
|
+
// Prune every 100 requests
|
|
408
|
+
if (historyRequestsSinceLastPrune >= 100) {
|
|
409
|
+
historyRequestsSinceLastPrune = 0;
|
|
410
|
+
pruneOldEntries();
|
|
411
|
+
rewriteHistoryFile();
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
function shutdownHistory() {
|
|
415
|
+
if (historyFlushTimer) {
|
|
416
|
+
clearTimeout(historyFlushTimer);
|
|
417
|
+
historyFlushTimer = null;
|
|
418
|
+
}
|
|
419
|
+
flushHistoryBuffer();
|
|
420
|
+
}
|
|
267
421
|
function logRequest(originalModel, targetModel, provider, latencyMs, success, mode, escalated) {
|
|
268
422
|
const timestamp = new Date().toISOString();
|
|
269
423
|
const status = success ? '✓' : '✗';
|
|
@@ -291,6 +445,35 @@ function logRequest(originalModel, targetModel, provider, latencyMs, success, mo
|
|
|
291
445
|
viaProxy: true,
|
|
292
446
|
success,
|
|
293
447
|
});
|
|
448
|
+
// Record to request history for telemetry endpoints
|
|
449
|
+
const entry = {
|
|
450
|
+
id: `req-${++requestIdCounter}`,
|
|
451
|
+
originalModel,
|
|
452
|
+
targetModel,
|
|
453
|
+
provider,
|
|
454
|
+
latencyMs,
|
|
455
|
+
success,
|
|
456
|
+
mode,
|
|
457
|
+
escalated: !!escalated,
|
|
458
|
+
timestamp,
|
|
459
|
+
tokensIn: 0,
|
|
460
|
+
tokensOut: 0,
|
|
461
|
+
costUsd: 0,
|
|
462
|
+
};
|
|
463
|
+
requestHistory.push(entry);
|
|
464
|
+
if (requestHistory.length > MAX_HISTORY) {
|
|
465
|
+
requestHistory.shift();
|
|
466
|
+
}
|
|
467
|
+
bufferHistoryEntry(entry);
|
|
468
|
+
}
|
|
469
|
+
/** Update the most recent history entry with token/cost info */
|
|
470
|
+
function updateLastHistoryEntry(tokensIn, tokensOut, costUsd) {
|
|
471
|
+
if (requestHistory.length > 0) {
|
|
472
|
+
const last = requestHistory[requestHistory.length - 1];
|
|
473
|
+
last.tokensIn = tokensIn;
|
|
474
|
+
last.tokensOut = tokensOut;
|
|
475
|
+
last.costUsd = costUsd;
|
|
476
|
+
}
|
|
294
477
|
}
|
|
295
478
|
const DEFAULT_PROXY_CONFIG = {
|
|
296
479
|
enabled: true,
|
|
@@ -824,40 +1007,40 @@ async function forwardToXAIStream(request, targetModel, apiKey) {
|
|
|
824
1007
|
return response;
|
|
825
1008
|
}
|
|
826
1009
|
/**
|
|
827
|
-
* Forward non-streaming request to
|
|
1010
|
+
* Forward non-streaming request to OpenAI-compatible provider (OpenRouter, DeepSeek, Groq)
|
|
828
1011
|
*/
|
|
829
|
-
async function
|
|
830
|
-
const
|
|
1012
|
+
async function forwardToOpenAICompatible(request, targetModel, apiKey, provider = 'openrouter') {
|
|
1013
|
+
const compatBody = {
|
|
831
1014
|
...request,
|
|
832
1015
|
model: targetModel,
|
|
833
1016
|
stream: false,
|
|
834
1017
|
};
|
|
835
|
-
const response = await fetch(
|
|
1018
|
+
const response = await fetch(`${exports.DEFAULT_ENDPOINTS[provider]?.baseUrl || "https://openrouter.ai/api/v1"}/chat/completions`, {
|
|
836
1019
|
method: 'POST',
|
|
837
1020
|
headers: {
|
|
838
1021
|
'Content-Type': 'application/json',
|
|
839
1022
|
Authorization: `Bearer ${apiKey}`,
|
|
840
1023
|
},
|
|
841
|
-
body: JSON.stringify(
|
|
1024
|
+
body: JSON.stringify(compatBody),
|
|
842
1025
|
});
|
|
843
1026
|
return response;
|
|
844
1027
|
}
|
|
845
1028
|
/**
|
|
846
|
-
* Forward streaming request to
|
|
1029
|
+
* Forward streaming request to OpenAI-compatible provider (OpenRouter, DeepSeek, Groq)
|
|
847
1030
|
*/
|
|
848
|
-
async function
|
|
849
|
-
const
|
|
1031
|
+
async function forwardToOpenAICompatibleStream(request, targetModel, apiKey, provider = 'openrouter') {
|
|
1032
|
+
const compatBody = {
|
|
850
1033
|
...request,
|
|
851
1034
|
model: targetModel,
|
|
852
1035
|
stream: true,
|
|
853
1036
|
};
|
|
854
|
-
const response = await fetch(
|
|
1037
|
+
const response = await fetch(`${exports.DEFAULT_ENDPOINTS[provider]?.baseUrl || "https://openrouter.ai/api/v1"}/chat/completions`, {
|
|
855
1038
|
method: 'POST',
|
|
856
1039
|
headers: {
|
|
857
1040
|
'Content-Type': 'application/json',
|
|
858
1041
|
Authorization: `Bearer ${apiKey}`,
|
|
859
1042
|
},
|
|
860
|
-
body: JSON.stringify(
|
|
1043
|
+
body: JSON.stringify(compatBody),
|
|
861
1044
|
});
|
|
862
1045
|
return response;
|
|
863
1046
|
}
|
|
@@ -1337,7 +1520,7 @@ function parsePreferredModel(preferredModel) {
|
|
|
1337
1520
|
if (!provider || !model)
|
|
1338
1521
|
return null;
|
|
1339
1522
|
// Validate provider
|
|
1340
|
-
const validProviders = ['openai', 'anthropic', 'google', 'xai', '
|
|
1523
|
+
const validProviders = ['openai', 'anthropic', 'google', 'xai', 'openrouter', 'deepseek', 'groq', 'local'];
|
|
1341
1524
|
if (!validProviders.includes(provider))
|
|
1342
1525
|
return null;
|
|
1343
1526
|
return { provider: provider, model };
|
|
@@ -1384,14 +1567,14 @@ function resolveExplicitModel(modelName) {
|
|
|
1384
1567
|
if (modelName.startsWith('grok-')) {
|
|
1385
1568
|
return { provider: 'xai', model: modelName };
|
|
1386
1569
|
}
|
|
1387
|
-
//
|
|
1388
|
-
if (modelName.startsWith('
|
|
1389
|
-
return { provider: '
|
|
1570
|
+
// OpenRouter/DeepSeek/Groq models
|
|
1571
|
+
if (modelName.startsWith('openrouter/') || modelName.startsWith('deepseek-') || modelName.startsWith('groq-')) {
|
|
1572
|
+
return { provider: 'openrouter', model: modelName };
|
|
1390
1573
|
}
|
|
1391
1574
|
// Provider-prefixed format: "anthropic/claude-3-5-sonnet-latest"
|
|
1392
1575
|
if (modelName.includes('/')) {
|
|
1393
1576
|
const [provider, model] = modelName.split('/');
|
|
1394
|
-
const validProviders = ['openai', 'anthropic', 'google', 'xai', '
|
|
1577
|
+
const validProviders = ['openai', 'anthropic', 'google', 'xai', 'openrouter', 'deepseek', 'groq', 'local'];
|
|
1395
1578
|
if (provider && model && validProviders.includes(provider)) {
|
|
1396
1579
|
return { provider: provider, model };
|
|
1397
1580
|
}
|
|
@@ -1572,6 +1755,73 @@ async function cascadeRequest(config, makeRequest, log) {
|
|
|
1572
1755
|
}
|
|
1573
1756
|
throw new Error('All cascade models exhausted');
|
|
1574
1757
|
}
|
|
1758
|
+
function getDashboardHTML() {
|
|
1759
|
+
return `<!DOCTYPE html><html lang="en"><head><meta charset="utf-8"><meta name="viewport" content="width=device-width,initial-scale=1"><title>RelayPlane Dashboard</title>
|
|
1760
|
+
<style>
|
|
1761
|
+
*{margin:0;padding:0;box-sizing:border-box}body{background:#0a0b0d;color:#e2e8f0;font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',sans-serif;padding:20px;max-width:1200px;margin:0 auto}
|
|
1762
|
+
a{color:#34d399}h1{font-size:1.5rem;font-weight:600}
|
|
1763
|
+
.header{display:flex;justify-content:space-between;align-items:center;padding:16px 0;border-bottom:1px solid #1e293b;margin-bottom:24px}
|
|
1764
|
+
.header .meta{font-size:.8rem;color:#64748b}
|
|
1765
|
+
.cards{display:grid;grid-template-columns:repeat(auto-fit,minmax(200px,1fr));gap:16px;margin-bottom:32px}
|
|
1766
|
+
.card{background:#111318;border:1px solid #1e293b;border-radius:12px;padding:20px}
|
|
1767
|
+
.card .label{font-size:.75rem;color:#64748b;text-transform:uppercase;letter-spacing:.05em;margin-bottom:6px}
|
|
1768
|
+
.card .value{font-size:1.75rem;font-weight:700}.green{color:#34d399}
|
|
1769
|
+
table{width:100%;border-collapse:collapse;font-size:.85rem}
|
|
1770
|
+
th{text-align:left;color:#64748b;font-weight:500;padding:8px 12px;border-bottom:1px solid #1e293b;font-size:.75rem;text-transform:uppercase;letter-spacing:.04em}
|
|
1771
|
+
td{padding:8px 12px;border-bottom:1px solid #111318}
|
|
1772
|
+
.section{margin-bottom:32px}.section h2{font-size:1rem;font-weight:600;margin-bottom:12px;color:#94a3b8}
|
|
1773
|
+
.dot{display:inline-block;width:8px;height:8px;border-radius:50%;margin-right:6px}.dot.up{background:#34d399}.dot.down{background:#ef4444}
|
|
1774
|
+
.badge{display:inline-block;padding:2px 8px;border-radius:6px;font-size:.75rem;font-weight:500}
|
|
1775
|
+
.badge.ok{background:#052e1633;color:#34d399}.badge.err{background:#2d0a0a;color:#ef4444}
|
|
1776
|
+
.prov{display:flex;gap:16px;flex-wrap:wrap}.prov-item{display:flex;align-items:center;font-size:.85rem;background:#111318;padding:8px 14px;border-radius:8px;border:1px solid #1e293b}
|
|
1777
|
+
</style></head><body>
|
|
1778
|
+
<div class="header"><div><h1>⚡ RelayPlane Dashboard</h1></div><div class="meta"><span id="ver"></span> · up <span id="uptime"></span> · refreshes every 5s</div></div>
|
|
1779
|
+
<div class="cards">
|
|
1780
|
+
<div class="card"><div class="label">Total Requests</div><div class="value" id="totalReq">—</div></div>
|
|
1781
|
+
<div class="card"><div class="label">Total Cost</div><div class="value" id="totalCost">—</div></div>
|
|
1782
|
+
<div class="card"><div class="label">Savings</div><div class="value green" id="savings">—</div></div>
|
|
1783
|
+
<div class="card"><div class="label">Avg Latency</div><div class="value" id="avgLat">—</div></div>
|
|
1784
|
+
</div>
|
|
1785
|
+
<div class="section"><h2>Model Breakdown</h2>
|
|
1786
|
+
<table><thead><tr><th>Model</th><th>Requests</th><th>Cost</th><th>% of Total</th></tr></thead><tbody id="models"></tbody></table></div>
|
|
1787
|
+
<div class="section"><h2>Provider Status</h2><div class="prov" id="providers"></div></div>
|
|
1788
|
+
<div class="section"><h2>Recent Runs</h2>
|
|
1789
|
+
<table><thead><tr><th>Time</th><th>Model</th><th>Tokens In</th><th>Tokens Out</th><th>Cost</th><th>Latency</th><th>Status</th></tr></thead><tbody id="runs"></tbody></table></div>
|
|
1790
|
+
<script>
|
|
1791
|
+
const $ = id => document.getElementById(id);
|
|
1792
|
+
function fmt(n,d=2){return typeof n==='number'?n.toFixed(d):'-'}
|
|
1793
|
+
function fmtTime(s){const d=new Date(s);return d.toLocaleTimeString()}
|
|
1794
|
+
function dur(s){const h=Math.floor(s/3600),m=Math.floor(s%3600/60);return h?h+'h '+m+'m':m+'m'}
|
|
1795
|
+
async function load(){
|
|
1796
|
+
try{
|
|
1797
|
+
const [health,stats,runsR,sav,provH]=await Promise.all([
|
|
1798
|
+
fetch('/health').then(r=>r.json()),
|
|
1799
|
+
fetch('/v1/telemetry/stats').then(r=>r.json()),
|
|
1800
|
+
fetch('/v1/telemetry/runs?limit=20').then(r=>r.json()),
|
|
1801
|
+
fetch('/v1/telemetry/savings').then(r=>r.json()),
|
|
1802
|
+
fetch('/v1/telemetry/health').then(r=>r.json())
|
|
1803
|
+
]);
|
|
1804
|
+
$('ver').textContent='v'+health.version;
|
|
1805
|
+
$('uptime').textContent=dur(health.uptime);
|
|
1806
|
+
$('totalReq').textContent=health.requests??0;
|
|
1807
|
+
$('totalCost').textContent='$'+fmt(stats.summary?.totalCostUsd??0,4);
|
|
1808
|
+
$('savings').textContent=(sav.percentage??0)+'%';
|
|
1809
|
+
$('avgLat').textContent=(stats.summary?.avgLatencyMs??0)+'ms';
|
|
1810
|
+
const total=stats.summary?.totalEvents||1;
|
|
1811
|
+
$('models').innerHTML=(stats.byModel||[]).map(m=>
|
|
1812
|
+
'<tr><td>'+m.model+'</td><td>'+m.count+'</td><td>$'+fmt(m.costUsd,4)+'</td><td>'+fmt(m.count/total*100,1)+'%</td></tr>'
|
|
1813
|
+
).join('')||'<tr><td colspan=4 style="color:#64748b">No data yet</td></tr>';
|
|
1814
|
+
$('runs').innerHTML=(runsR.runs||[]).map(r=>
|
|
1815
|
+
'<tr><td>'+fmtTime(r.started_at)+'</td><td>'+r.model+'</td><td>'+(r.tokensIn||0)+'</td><td>'+(r.tokensOut||0)+'</td><td>$'+fmt(r.costUsd,4)+'</td><td>'+r.latencyMs+'ms</td><td><span class="badge '+(r.status==='success'?'ok':'err')+'">'+r.status+'</span></td></tr>'
|
|
1816
|
+
).join('')||'<tr><td colspan=7 style="color:#64748b">No runs yet</td></tr>';
|
|
1817
|
+
$('providers').innerHTML=(provH.providers||[]).map(p=>
|
|
1818
|
+
'<div class="prov-item"><span class="dot '+(p.status==='healthy'?'up':'down')+'"></span>'+p.provider+'</div>'
|
|
1819
|
+
).join('');
|
|
1820
|
+
}catch(e){console.error(e)}
|
|
1821
|
+
}
|
|
1822
|
+
load();setInterval(load,5000);
|
|
1823
|
+
</script></body></html>`;
|
|
1824
|
+
}
|
|
1575
1825
|
/**
|
|
1576
1826
|
* Start the RelayPlane proxy server
|
|
1577
1827
|
*/
|
|
@@ -1584,6 +1834,15 @@ async function startProxy(config = {}) {
|
|
|
1584
1834
|
if (verbose)
|
|
1585
1835
|
console.log(`[relayplane] ${msg}`);
|
|
1586
1836
|
};
|
|
1837
|
+
// Load persistent history from disk
|
|
1838
|
+
loadHistoryFromDisk();
|
|
1839
|
+
// Flush history on shutdown
|
|
1840
|
+
const handleShutdown = () => {
|
|
1841
|
+
shutdownHistory();
|
|
1842
|
+
process.exit(0);
|
|
1843
|
+
};
|
|
1844
|
+
process.on('SIGINT', handleShutdown);
|
|
1845
|
+
process.on('SIGTERM', handleShutdown);
|
|
1587
1846
|
const configPath = getProxyConfigPath();
|
|
1588
1847
|
let proxyConfig = await loadProxyConfig(configPath, log);
|
|
1589
1848
|
const cooldownManager = new CooldownManager(getCooldownConfig(proxyConfig));
|
|
@@ -1639,7 +1898,7 @@ async function startProxy(config = {}) {
|
|
|
1639
1898
|
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
1640
1899
|
res.end(JSON.stringify({
|
|
1641
1900
|
status: 'ok',
|
|
1642
|
-
version:
|
|
1901
|
+
version: PROXY_VERSION,
|
|
1643
1902
|
uptime: Math.floor(uptimeMs / 1000),
|
|
1644
1903
|
uptimeMs,
|
|
1645
1904
|
requests: globalStats.totalRequests,
|
|
@@ -1723,6 +1982,120 @@ async function startProxy(config = {}) {
|
|
|
1723
1982
|
return;
|
|
1724
1983
|
}
|
|
1725
1984
|
}
|
|
1985
|
+
// === Telemetry endpoints for dashboard ===
|
|
1986
|
+
if (pathname.startsWith('/v1/telemetry/')) {
|
|
1987
|
+
const telemetryPath = pathname.replace('/v1/telemetry/', '');
|
|
1988
|
+
const queryString = url.includes('?') ? url.split('?')[1] ?? '' : '';
|
|
1989
|
+
const params = new URLSearchParams(queryString);
|
|
1990
|
+
if (req.method === 'GET' && telemetryPath === 'stats') {
|
|
1991
|
+
const days = parseInt(params.get('days') || '7', 10);
|
|
1992
|
+
const cutoff = Date.now() - days * 86400000;
|
|
1993
|
+
const recent = requestHistory.filter(r => new Date(r.timestamp).getTime() >= cutoff);
|
|
1994
|
+
// Model breakdown
|
|
1995
|
+
const modelMap = new Map();
|
|
1996
|
+
for (const r of recent) {
|
|
1997
|
+
const key = r.targetModel;
|
|
1998
|
+
const cur = modelMap.get(key) || { count: 0, cost: 0 };
|
|
1999
|
+
cur.count++;
|
|
2000
|
+
cur.cost += r.costUsd;
|
|
2001
|
+
modelMap.set(key, cur);
|
|
2002
|
+
}
|
|
2003
|
+
// Daily stats
|
|
2004
|
+
const dailyMap = new Map();
|
|
2005
|
+
for (const r of recent) {
|
|
2006
|
+
const date = r.timestamp.slice(0, 10);
|
|
2007
|
+
const cur = dailyMap.get(date) || { requests: 0, cost: 0 };
|
|
2008
|
+
cur.requests++;
|
|
2009
|
+
cur.cost += r.costUsd;
|
|
2010
|
+
dailyMap.set(date, cur);
|
|
2011
|
+
}
|
|
2012
|
+
const totalCost = recent.reduce((s, r) => s + r.costUsd, 0);
|
|
2013
|
+
const totalLatency = recent.reduce((s, r) => s + r.latencyMs, 0);
|
|
2014
|
+
const result = {
|
|
2015
|
+
summary: {
|
|
2016
|
+
totalCostUsd: totalCost,
|
|
2017
|
+
totalEvents: recent.length,
|
|
2018
|
+
avgLatencyMs: recent.length ? Math.round(totalLatency / recent.length) : 0,
|
|
2019
|
+
successRate: recent.length ? recent.filter(r => r.success).length / recent.length : 0,
|
|
2020
|
+
},
|
|
2021
|
+
byModel: Array.from(modelMap.entries()).map(([model, v]) => ({ model, count: v.count, costUsd: v.cost, savings: 0 })),
|
|
2022
|
+
dailyCosts: Array.from(dailyMap.entries()).map(([date, v]) => ({ date, costUsd: v.cost, requests: v.requests })),
|
|
2023
|
+
};
|
|
2024
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
2025
|
+
res.end(JSON.stringify(result));
|
|
2026
|
+
return;
|
|
2027
|
+
}
|
|
2028
|
+
if (req.method === 'GET' && telemetryPath === 'runs') {
|
|
2029
|
+
const limit = parseInt(params.get('limit') || '50', 10);
|
|
2030
|
+
const offset = parseInt(params.get('offset') || '0', 10);
|
|
2031
|
+
const sorted = [...requestHistory].reverse();
|
|
2032
|
+
const runs = sorted.slice(offset, offset + limit).map(r => ({
|
|
2033
|
+
id: r.id,
|
|
2034
|
+
workflow_name: r.mode,
|
|
2035
|
+
status: r.success ? 'success' : 'error',
|
|
2036
|
+
started_at: r.timestamp,
|
|
2037
|
+
model: r.targetModel,
|
|
2038
|
+
routed_to: `${r.provider}/${r.targetModel}`,
|
|
2039
|
+
taskType: r.mode,
|
|
2040
|
+
costUsd: r.costUsd,
|
|
2041
|
+
latencyMs: r.latencyMs,
|
|
2042
|
+
tokensIn: r.tokensIn,
|
|
2043
|
+
tokensOut: r.tokensOut,
|
|
2044
|
+
savings: 0,
|
|
2045
|
+
original_model: r.originalModel,
|
|
2046
|
+
}));
|
|
2047
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
2048
|
+
res.end(JSON.stringify({ runs, pagination: { total: requestHistory.length } }));
|
|
2049
|
+
return;
|
|
2050
|
+
}
|
|
2051
|
+
if (req.method === 'GET' && telemetryPath === 'savings') {
|
|
2052
|
+
// Calculate savings: difference between cost if all requests used opus vs actual cost
|
|
2053
|
+
const opusCostPer1kIn = 0.015;
|
|
2054
|
+
const opusCostPer1kOut = 0.075;
|
|
2055
|
+
let potentialCost = 0;
|
|
2056
|
+
let actualCost = 0;
|
|
2057
|
+
for (const r of requestHistory) {
|
|
2058
|
+
potentialCost += (r.tokensIn / 1000) * opusCostPer1kIn + (r.tokensOut / 1000) * opusCostPer1kOut;
|
|
2059
|
+
actualCost += r.costUsd;
|
|
2060
|
+
}
|
|
2061
|
+
const saved = potentialCost - actualCost;
|
|
2062
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
2063
|
+
res.end(JSON.stringify({
|
|
2064
|
+
total: potentialCost,
|
|
2065
|
+
savings: Math.max(0, saved),
|
|
2066
|
+
savedAmount: Math.max(0, saved),
|
|
2067
|
+
potentialSavings: potentialCost,
|
|
2068
|
+
percentage: potentialCost > 0 ? Math.round((saved / potentialCost) * 100) : 0,
|
|
2069
|
+
byDay: [],
|
|
2070
|
+
}));
|
|
2071
|
+
return;
|
|
2072
|
+
}
|
|
2073
|
+
if (req.method === 'GET' && telemetryPath === 'health') {
|
|
2074
|
+
const providers = [];
|
|
2075
|
+
for (const [name, ep] of Object.entries(exports.DEFAULT_ENDPOINTS)) {
|
|
2076
|
+
const hasKey = !!process.env[ep.apiKeyEnv];
|
|
2077
|
+
providers.push({
|
|
2078
|
+
provider: name,
|
|
2079
|
+
status: hasKey ? 'healthy' : 'down',
|
|
2080
|
+
latency: 0,
|
|
2081
|
+
successRate: hasKey ? 1 : 0,
|
|
2082
|
+
lastChecked: new Date().toISOString(),
|
|
2083
|
+
});
|
|
2084
|
+
}
|
|
2085
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
2086
|
+
res.end(JSON.stringify({ providers }));
|
|
2087
|
+
return;
|
|
2088
|
+
}
|
|
2089
|
+
res.writeHead(404, { 'Content-Type': 'application/json' });
|
|
2090
|
+
res.end(JSON.stringify({ error: 'Not found' }));
|
|
2091
|
+
return;
|
|
2092
|
+
}
|
|
2093
|
+
// === Dashboard ===
|
|
2094
|
+
if (req.method === 'GET' && (pathname === '/' || pathname === '/dashboard')) {
|
|
2095
|
+
res.writeHead(200, { 'Content-Type': 'text/html' });
|
|
2096
|
+
res.end(getDashboardHTML());
|
|
2097
|
+
return;
|
|
2098
|
+
}
|
|
1726
2099
|
// Extract auth context from incoming request
|
|
1727
2100
|
const ctx = extractRequestContext(req);
|
|
1728
2101
|
const anthropicEnvKey = process.env['ANTHROPIC_API_KEY'];
|
|
@@ -1992,6 +2365,8 @@ async function startProxy(config = {}) {
|
|
|
1992
2365
|
if (proxyConfig.reliability?.cooldowns?.enabled) {
|
|
1993
2366
|
cooldownManager.recordFailure(targetProvider, JSON.stringify(errorPayload));
|
|
1994
2367
|
}
|
|
2368
|
+
const durationMs = Date.now() - startTime;
|
|
2369
|
+
logRequest(originalModel ?? 'unknown', targetModel || requestedModel, targetProvider, durationMs, false, routingMode);
|
|
1995
2370
|
res.writeHead(providerResponse.status, { 'Content-Type': 'application/json' });
|
|
1996
2371
|
res.end(JSON.stringify(errorPayload));
|
|
1997
2372
|
return;
|
|
@@ -2394,6 +2769,13 @@ async function startProxy(config = {}) {
|
|
|
2394
2769
|
}, log);
|
|
2395
2770
|
const durationMs = Date.now() - startTime;
|
|
2396
2771
|
let responseData = cascadeResult.responseData;
|
|
2772
|
+
// Log cascade request for stats tracking
|
|
2773
|
+
logRequest(originalRequestedModel ?? 'unknown', cascadeResult.model, cascadeResult.provider, durationMs, true, 'cascade', cascadeResult.escalations > 0);
|
|
2774
|
+
const cascadeUsage = responseData?.usage;
|
|
2775
|
+
const cascadeTokensIn = cascadeUsage?.input_tokens ?? cascadeUsage?.prompt_tokens ?? 0;
|
|
2776
|
+
const cascadeTokensOut = cascadeUsage?.output_tokens ?? cascadeUsage?.completion_tokens ?? 0;
|
|
2777
|
+
const cascadeCost = (0, telemetry_js_1.estimateCost)(cascadeResult.model, cascadeTokensIn, cascadeTokensOut);
|
|
2778
|
+
updateLastHistoryEntry(cascadeTokensIn, cascadeTokensOut, cascadeCost);
|
|
2397
2779
|
if (recordTelemetry) {
|
|
2398
2780
|
try {
|
|
2399
2781
|
const runResult = await relay.run({
|
|
@@ -2415,15 +2797,14 @@ async function startProxy(config = {}) {
|
|
|
2415
2797
|
catch (err) {
|
|
2416
2798
|
log(`Failed to record run: ${err}`);
|
|
2417
2799
|
}
|
|
2418
|
-
|
|
2419
|
-
const tokensIn = usage?.input_tokens ?? usage?.prompt_tokens ?? 0;
|
|
2420
|
-
const tokensOut = usage?.output_tokens ?? usage?.completion_tokens ?? 0;
|
|
2421
|
-
sendCloudTelemetry(taskType, cascadeResult.model, tokensIn, tokensOut, durationMs, true, undefined, originalRequestedModel ?? undefined);
|
|
2800
|
+
sendCloudTelemetry(taskType, cascadeResult.model, cascadeTokensIn, cascadeTokensOut, durationMs, true, undefined, originalRequestedModel ?? undefined);
|
|
2422
2801
|
}
|
|
2423
2802
|
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
2424
2803
|
res.end(JSON.stringify(responseData));
|
|
2425
2804
|
}
|
|
2426
2805
|
catch (err) {
|
|
2806
|
+
const durationMs = Date.now() - startTime;
|
|
2807
|
+
logRequest(originalRequestedModel ?? 'unknown', targetModel || 'unknown', targetProvider, durationMs, false, 'cascade');
|
|
2427
2808
|
if (err instanceof ProviderResponseError) {
|
|
2428
2809
|
res.writeHead(err.status, { 'Content-Type': 'application/json' });
|
|
2429
2810
|
res.end(JSON.stringify(err.payload));
|
|
@@ -2488,8 +2869,10 @@ async function executeNonStreamingProviderRequest(request, targetProvider, targe
|
|
|
2488
2869
|
}
|
|
2489
2870
|
break;
|
|
2490
2871
|
}
|
|
2491
|
-
case '
|
|
2492
|
-
|
|
2872
|
+
case 'openrouter':
|
|
2873
|
+
case 'deepseek':
|
|
2874
|
+
case 'groq': {
|
|
2875
|
+
providerResponse = await forwardToOpenAICompatible(request, targetModel, apiKey);
|
|
2493
2876
|
responseData = (await providerResponse.json());
|
|
2494
2877
|
if (!providerResponse.ok) {
|
|
2495
2878
|
return { responseData, ok: false, status: providerResponse.status };
|
|
@@ -2520,8 +2903,10 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
2520
2903
|
case 'xai':
|
|
2521
2904
|
providerResponse = await forwardToXAIStream(request, targetModel, apiKey);
|
|
2522
2905
|
break;
|
|
2523
|
-
case '
|
|
2524
|
-
|
|
2906
|
+
case 'openrouter':
|
|
2907
|
+
case 'deepseek':
|
|
2908
|
+
case 'groq':
|
|
2909
|
+
providerResponse = await forwardToOpenAICompatibleStream(request, targetModel, apiKey);
|
|
2525
2910
|
break;
|
|
2526
2911
|
default:
|
|
2527
2912
|
providerResponse = await forwardToOpenAIStream(request, targetModel, apiKey);
|
|
@@ -2531,6 +2916,8 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
2531
2916
|
if (cooldownsEnabled) {
|
|
2532
2917
|
cooldownManager.recordFailure(targetProvider, JSON.stringify(errorData));
|
|
2533
2918
|
}
|
|
2919
|
+
const durationMs = Date.now() - startTime;
|
|
2920
|
+
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode);
|
|
2534
2921
|
res.writeHead(providerResponse.status, { 'Content-Type': 'application/json' });
|
|
2535
2922
|
res.end(JSON.stringify(errorData));
|
|
2536
2923
|
return;
|
|
@@ -2541,6 +2928,8 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
2541
2928
|
if (cooldownsEnabled) {
|
|
2542
2929
|
cooldownManager.recordFailure(targetProvider, errorMsg);
|
|
2543
2930
|
}
|
|
2931
|
+
const durationMs = Date.now() - startTime;
|
|
2932
|
+
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode);
|
|
2544
2933
|
res.writeHead(500, { 'Content-Type': 'application/json' });
|
|
2545
2934
|
res.end(JSON.stringify({ error: `Provider error: ${errorMsg}` }));
|
|
2546
2935
|
return;
|
|
@@ -2597,7 +2986,7 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
2597
2986
|
}
|
|
2598
2987
|
break;
|
|
2599
2988
|
default:
|
|
2600
|
-
// xAI,
|
|
2989
|
+
// xAI, OpenRouter, DeepSeek, Groq, OpenAI all use OpenAI-compatible streaming format
|
|
2601
2990
|
for await (const chunk of pipeOpenAIStream(providerResponse)) {
|
|
2602
2991
|
res.write(chunk);
|
|
2603
2992
|
try {
|
|
@@ -2623,6 +3012,11 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
2623
3012
|
cooldownManager.recordSuccess(targetProvider);
|
|
2624
3013
|
}
|
|
2625
3014
|
const durationMs = Date.now() - startTime;
|
|
3015
|
+
// Always log the request for stats/telemetry tracking
|
|
3016
|
+
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, true, routingMode);
|
|
3017
|
+
// Update token/cost info on the history entry
|
|
3018
|
+
const streamCost = (0, telemetry_js_1.estimateCost)(targetModel, streamTokensIn, streamTokensOut);
|
|
3019
|
+
updateLastHistoryEntry(streamTokensIn, streamTokensOut, streamCost);
|
|
2626
3020
|
if (recordTelemetry) {
|
|
2627
3021
|
// Record the run (non-blocking)
|
|
2628
3022
|
relay
|
|
@@ -2653,6 +3047,8 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
|
|
|
2653
3047
|
if (cooldownsEnabled) {
|
|
2654
3048
|
cooldownManager.recordFailure(targetProvider, JSON.stringify(responseData));
|
|
2655
3049
|
}
|
|
3050
|
+
const durationMs = Date.now() - startTime;
|
|
3051
|
+
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode);
|
|
2656
3052
|
res.writeHead(result.status, { 'Content-Type': 'application/json' });
|
|
2657
3053
|
res.end(JSON.stringify(responseData));
|
|
2658
3054
|
return;
|
|
@@ -2663,6 +3059,8 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
|
|
|
2663
3059
|
if (cooldownsEnabled) {
|
|
2664
3060
|
cooldownManager.recordFailure(targetProvider, errorMsg);
|
|
2665
3061
|
}
|
|
3062
|
+
const durationMs = Date.now() - startTime;
|
|
3063
|
+
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode);
|
|
2666
3064
|
res.writeHead(500, { 'Content-Type': 'application/json' });
|
|
2667
3065
|
res.end(JSON.stringify({ error: `Provider error: ${errorMsg}` }));
|
|
2668
3066
|
return;
|
|
@@ -2671,6 +3069,14 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
|
|
|
2671
3069
|
cooldownManager.recordSuccess(targetProvider);
|
|
2672
3070
|
}
|
|
2673
3071
|
const durationMs = Date.now() - startTime;
|
|
3072
|
+
// Log the successful request
|
|
3073
|
+
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, true, routingMode);
|
|
3074
|
+
// Update token/cost info
|
|
3075
|
+
const usage = responseData?.usage;
|
|
3076
|
+
const tokensIn = usage?.input_tokens ?? usage?.prompt_tokens ?? 0;
|
|
3077
|
+
const tokensOut = usage?.output_tokens ?? usage?.completion_tokens ?? 0;
|
|
3078
|
+
const cost = (0, telemetry_js_1.estimateCost)(targetModel, tokensIn, tokensOut);
|
|
3079
|
+
updateLastHistoryEntry(tokensIn, tokensOut, cost);
|
|
2674
3080
|
if (recordTelemetry) {
|
|
2675
3081
|
// Record the run in RelayPlane
|
|
2676
3082
|
try {
|