@relayplane/proxy 1.7.2 → 1.7.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent-tracker.d.ts +82 -0
- package/dist/agent-tracker.d.ts.map +1 -0
- package/dist/agent-tracker.js +281 -0
- package/dist/agent-tracker.js.map +1 -0
- package/dist/standalone-proxy.d.ts +18 -0
- package/dist/standalone-proxy.d.ts.map +1 -1
- package/dist/standalone-proxy.js +413 -68
- package/dist/standalone-proxy.js.map +1 -1
- package/package.json +1 -1
package/dist/standalone-proxy.js
CHANGED
|
@@ -56,6 +56,8 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
56
56
|
exports.SMART_ALIASES = exports.RELAYPLANE_ALIASES = exports.MODEL_MAPPING = exports.DEFAULT_ENDPOINTS = exports.proxyStatsCollector = void 0;
|
|
57
57
|
exports.getAvailableModelNames = getAvailableModelNames;
|
|
58
58
|
exports.resolveModelAlias = resolveModelAlias;
|
|
59
|
+
exports.extractRequestContent = extractRequestContent;
|
|
60
|
+
exports.extractResponseText = extractResponseText;
|
|
59
61
|
exports.parseModelSuffix = parseModelSuffix;
|
|
60
62
|
exports.classifyComplexity = classifyComplexity;
|
|
61
63
|
exports.shouldEscalate = shouldEscalate;
|
|
@@ -76,6 +78,7 @@ const budget_js_1 = require("./budget.js");
|
|
|
76
78
|
const anomaly_js_1 = require("./anomaly.js");
|
|
77
79
|
const alerts_js_1 = require("./alerts.js");
|
|
78
80
|
const downgrade_js_1 = require("./downgrade.js");
|
|
81
|
+
const agent_tracker_js_1 = require("./agent-tracker.js");
|
|
79
82
|
const version_status_js_1 = require("./utils/version-status.js");
|
|
80
83
|
const PROXY_VERSION = (() => {
|
|
81
84
|
try {
|
|
@@ -488,7 +491,7 @@ function shutdownHistory() {
|
|
|
488
491
|
}
|
|
489
492
|
flushHistoryBuffer();
|
|
490
493
|
}
|
|
491
|
-
function logRequest(originalModel, targetModel, provider, latencyMs, success, mode, escalated, taskType, complexity) {
|
|
494
|
+
function logRequest(originalModel, targetModel, provider, latencyMs, success, mode, escalated, taskType, complexity, agentFingerprint, agentId, errorMessage, errorStatusCode) {
|
|
492
495
|
const timestamp = new Date().toISOString();
|
|
493
496
|
const status = success ? '✓' : '✗';
|
|
494
497
|
const escalateTag = escalated ? ' [ESCALATED]' : '';
|
|
@@ -531,6 +534,10 @@ function logRequest(originalModel, targetModel, provider, latencyMs, success, mo
|
|
|
531
534
|
costUsd: 0,
|
|
532
535
|
taskType: taskType || 'general',
|
|
533
536
|
complexity: complexity || 'simple',
|
|
537
|
+
agentFingerprint,
|
|
538
|
+
agentId,
|
|
539
|
+
error: errorMessage,
|
|
540
|
+
statusCode: errorStatusCode,
|
|
534
541
|
};
|
|
535
542
|
requestHistory.push(entry);
|
|
536
543
|
if (requestHistory.length > MAX_HISTORY) {
|
|
@@ -539,7 +546,7 @@ function logRequest(originalModel, targetModel, provider, latencyMs, success, mo
|
|
|
539
546
|
bufferHistoryEntry(entry);
|
|
540
547
|
}
|
|
541
548
|
/** Update the most recent history entry with token/cost info */
|
|
542
|
-
function updateLastHistoryEntry(tokensIn, tokensOut, costUsd, responseModel, cacheCreationTokens, cacheReadTokens) {
|
|
549
|
+
function updateLastHistoryEntry(tokensIn, tokensOut, costUsd, responseModel, cacheCreationTokens, cacheReadTokens, agentFingerprint, agentId, requestContent, errorMessage, errorStatusCode) {
|
|
543
550
|
if (requestHistory.length > 0) {
|
|
544
551
|
const last = requestHistory[requestHistory.length - 1];
|
|
545
552
|
last.tokensIn = tokensIn;
|
|
@@ -552,8 +559,86 @@ function updateLastHistoryEntry(tokensIn, tokensOut, costUsd, responseModel, cac
|
|
|
552
559
|
last.cacheCreationTokens = cacheCreationTokens;
|
|
553
560
|
if (cacheReadTokens !== undefined)
|
|
554
561
|
last.cacheReadTokens = cacheReadTokens;
|
|
562
|
+
if (agentFingerprint !== undefined)
|
|
563
|
+
last.agentFingerprint = agentFingerprint;
|
|
564
|
+
if (agentId !== undefined)
|
|
565
|
+
last.agentId = agentId;
|
|
566
|
+
if (requestContent)
|
|
567
|
+
last.requestContent = requestContent;
|
|
568
|
+
if (errorMessage !== undefined)
|
|
569
|
+
last.error = errorMessage;
|
|
570
|
+
if (errorStatusCode !== undefined)
|
|
571
|
+
last.statusCode = errorStatusCode;
|
|
555
572
|
}
|
|
556
573
|
}
|
|
574
|
+
/**
|
|
575
|
+
* Extract request content for logging. Handles Anthropic and OpenAI formats.
|
|
576
|
+
*/
|
|
577
|
+
function extractRequestContent(body, isAnthropic) {
|
|
578
|
+
let systemPrompt = '';
|
|
579
|
+
let userMessage = '';
|
|
580
|
+
if (isAnthropic) {
|
|
581
|
+
if (typeof body.system === 'string') {
|
|
582
|
+
systemPrompt = body.system;
|
|
583
|
+
}
|
|
584
|
+
else if (Array.isArray(body.system)) {
|
|
585
|
+
systemPrompt = body.system
|
|
586
|
+
.map(p => p.type === 'text' ? (p.text ?? '') : (typeof p === 'string' ? String(p) : ''))
|
|
587
|
+
.join('');
|
|
588
|
+
}
|
|
589
|
+
}
|
|
590
|
+
else {
|
|
591
|
+
const sysmsgs = body.messages;
|
|
592
|
+
if (Array.isArray(sysmsgs)) {
|
|
593
|
+
for (const msg of sysmsgs) {
|
|
594
|
+
if (msg.role === 'system') {
|
|
595
|
+
systemPrompt = typeof msg.content === 'string' ? msg.content : '';
|
|
596
|
+
break;
|
|
597
|
+
}
|
|
598
|
+
}
|
|
599
|
+
}
|
|
600
|
+
}
|
|
601
|
+
const msgs = body.messages;
|
|
602
|
+
if (Array.isArray(msgs)) {
|
|
603
|
+
for (let i = msgs.length - 1; i >= 0; i--) {
|
|
604
|
+
if (msgs[i].role === 'user') {
|
|
605
|
+
const content = msgs[i].content;
|
|
606
|
+
if (typeof content === 'string') {
|
|
607
|
+
userMessage = content;
|
|
608
|
+
}
|
|
609
|
+
else if (Array.isArray(content)) {
|
|
610
|
+
userMessage = content
|
|
611
|
+
.filter(p => p.type === 'text')
|
|
612
|
+
.map(p => p.text ?? '')
|
|
613
|
+
.join('');
|
|
614
|
+
}
|
|
615
|
+
break;
|
|
616
|
+
}
|
|
617
|
+
}
|
|
618
|
+
}
|
|
619
|
+
return {
|
|
620
|
+
systemPrompt: systemPrompt ? systemPrompt.slice(0, 200) : undefined,
|
|
621
|
+
userMessage: userMessage || undefined,
|
|
622
|
+
};
|
|
623
|
+
}
|
|
624
|
+
/**
|
|
625
|
+
* Extract assistant response text from response payload.
|
|
626
|
+
*/
|
|
627
|
+
function extractResponseText(responseData, isAnthropic) {
|
|
628
|
+
if (isAnthropic) {
|
|
629
|
+
const content = responseData.content;
|
|
630
|
+
if (Array.isArray(content)) {
|
|
631
|
+
return content.filter(p => p.type === 'text').map(p => p.text ?? '').join('');
|
|
632
|
+
}
|
|
633
|
+
}
|
|
634
|
+
else {
|
|
635
|
+
const choices = responseData.choices;
|
|
636
|
+
if (Array.isArray(choices) && choices[0]?.message?.content) {
|
|
637
|
+
return choices[0].message.content;
|
|
638
|
+
}
|
|
639
|
+
}
|
|
640
|
+
return '';
|
|
641
|
+
}
|
|
557
642
|
const DEFAULT_PROXY_CONFIG = {
|
|
558
643
|
enabled: true,
|
|
559
644
|
modelOverrides: {},
|
|
@@ -584,6 +669,11 @@ const DEFAULT_PROXY_CONFIG = {
|
|
|
584
669
|
},
|
|
585
670
|
},
|
|
586
671
|
};
|
|
672
|
+
/** Module-level ref to active proxy config (set during startProxy) */
|
|
673
|
+
let _activeProxyConfig = {};
|
|
674
|
+
function isContentLoggingEnabled() {
|
|
675
|
+
return _activeProxyConfig.dashboard?.showRequestContent !== false;
|
|
676
|
+
}
|
|
587
677
|
function getProxyConfigPath() {
|
|
588
678
|
const customPath = process.env['RELAYPLANE_CONFIG_PATH'];
|
|
589
679
|
if (customPath && customPath.trim())
|
|
@@ -835,19 +925,16 @@ function buildAnthropicHeadersWithAuth(ctx, apiKey, isMaxToken) {
|
|
|
835
925
|
'Content-Type': 'application/json',
|
|
836
926
|
'anthropic-version': ctx.versionHeader || '2023-06-01',
|
|
837
927
|
};
|
|
838
|
-
// Auth: prefer incoming auth for passthrough
|
|
839
|
-
//
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
if (incomingIsOAuth && apiKey && !apiKey.startsWith('sk-ant-oat')) {
|
|
843
|
-
headers['x-api-key'] = apiKey;
|
|
844
|
-
}
|
|
845
|
-
else if (ctx.authHeader) {
|
|
928
|
+
// Auth: ALWAYS prefer incoming auth for passthrough (don't replace it)
|
|
929
|
+
// Incoming auth is from Claude Code/OpenClaw and is already the right token for the request
|
|
930
|
+
if (ctx.authHeader) {
|
|
931
|
+
// Incoming Authorization header takes priority - use it as-is
|
|
846
932
|
headers['Authorization'] = ctx.authHeader;
|
|
847
933
|
}
|
|
848
934
|
else if (ctx.apiKeyHeader) {
|
|
849
|
-
//
|
|
935
|
+
// Incoming x-api-key header
|
|
850
936
|
if (ctx.apiKeyHeader.startsWith('sk-ant-oat')) {
|
|
937
|
+
// MAX/OAuth tokens must use Authorization: Bearer, not x-api-key
|
|
851
938
|
headers['Authorization'] = `Bearer ${ctx.apiKeyHeader}`;
|
|
852
939
|
}
|
|
853
940
|
else {
|
|
@@ -855,7 +942,7 @@ function buildAnthropicHeadersWithAuth(ctx, apiKey, isMaxToken) {
|
|
|
855
942
|
}
|
|
856
943
|
}
|
|
857
944
|
else if (apiKey) {
|
|
858
|
-
//
|
|
945
|
+
// Fallback to configured API key (only if no incoming auth)
|
|
859
946
|
if (isMaxToken || apiKey.startsWith('sk-ant-oat')) {
|
|
860
947
|
headers['Authorization'] = `Bearer ${apiKey}`;
|
|
861
948
|
}
|
|
@@ -867,6 +954,13 @@ function buildAnthropicHeadersWithAuth(ctx, apiKey, isMaxToken) {
|
|
|
867
954
|
if (ctx.betaHeaders) {
|
|
868
955
|
headers['anthropic-beta'] = ctx.betaHeaders;
|
|
869
956
|
}
|
|
957
|
+
// Pass through OAuth identity headers (required by Anthropic for OAuth token validation)
|
|
958
|
+
if (ctx.userAgent) {
|
|
959
|
+
headers['user-agent'] = ctx.userAgent;
|
|
960
|
+
}
|
|
961
|
+
if (ctx.xApp) {
|
|
962
|
+
headers['x-app'] = ctx.xApp;
|
|
963
|
+
}
|
|
870
964
|
return headers;
|
|
871
965
|
}
|
|
872
966
|
/**
|
|
@@ -907,6 +1001,13 @@ function buildAnthropicHeaders(ctx, envApiKey) {
|
|
|
907
1001
|
if (ctx.betaHeaders) {
|
|
908
1002
|
headers['anthropic-beta'] = ctx.betaHeaders;
|
|
909
1003
|
}
|
|
1004
|
+
// Pass through OAuth identity headers (required by Anthropic for OAuth token validation)
|
|
1005
|
+
if (ctx.userAgent) {
|
|
1006
|
+
headers['user-agent'] = ctx.userAgent;
|
|
1007
|
+
}
|
|
1008
|
+
if (ctx.xApp) {
|
|
1009
|
+
headers['x-app'] = ctx.xApp;
|
|
1010
|
+
}
|
|
910
1011
|
return headers;
|
|
911
1012
|
}
|
|
912
1013
|
/**
|
|
@@ -1772,7 +1873,7 @@ function resolveExplicitModel(modelName) {
|
|
|
1772
1873
|
function resolveConfigModel(modelName) {
|
|
1773
1874
|
return resolveExplicitModel(modelName) ?? parsePreferredModel(modelName);
|
|
1774
1875
|
}
|
|
1775
|
-
function
|
|
1876
|
+
function extractResponseTextAuto(responseData) {
|
|
1776
1877
|
const openAiChoices = responseData['choices'];
|
|
1777
1878
|
if (openAiChoices && openAiChoices.length > 0) {
|
|
1778
1879
|
const first = openAiChoices[0];
|
|
@@ -1816,6 +1917,28 @@ function checkResponseModelMismatch(responseData, requestedModel, provider, log)
|
|
|
1816
1917
|
}
|
|
1817
1918
|
return responseModel;
|
|
1818
1919
|
}
|
|
1920
|
+
/**
|
|
1921
|
+
* Extract a human-readable error message from a provider error payload.
|
|
1922
|
+
* Handles Anthropic ({ error: { type, message } }) and OpenAI ({ error: { message } }) formats.
|
|
1923
|
+
*/
|
|
1924
|
+
function extractProviderErrorMessage(payload, statusCode) {
|
|
1925
|
+
const err = payload['error'];
|
|
1926
|
+
if (typeof err === 'string')
|
|
1927
|
+
return err;
|
|
1928
|
+
if (err && typeof err === 'object') {
|
|
1929
|
+
const errType = err['type'];
|
|
1930
|
+
const errMsg = err['message'];
|
|
1931
|
+
if (errType && errMsg)
|
|
1932
|
+
return `${errType}: ${errMsg}`;
|
|
1933
|
+
if (errMsg)
|
|
1934
|
+
return errMsg;
|
|
1935
|
+
if (errType)
|
|
1936
|
+
return errType;
|
|
1937
|
+
}
|
|
1938
|
+
if (statusCode)
|
|
1939
|
+
return `HTTP ${statusCode}`;
|
|
1940
|
+
return 'Unknown error';
|
|
1941
|
+
}
|
|
1819
1942
|
class ProviderResponseError extends Error {
|
|
1820
1943
|
status;
|
|
1821
1944
|
payload;
|
|
@@ -1841,6 +1964,8 @@ function extractRequestContext(req) {
|
|
|
1841
1964
|
betaHeaders: req.headers['anthropic-beta'],
|
|
1842
1965
|
versionHeader: req.headers['anthropic-version'],
|
|
1843
1966
|
apiKeyHeader: req.headers['x-api-key'],
|
|
1967
|
+
userAgent: req.headers['user-agent'],
|
|
1968
|
+
xApp: req.headers['x-app'],
|
|
1844
1969
|
};
|
|
1845
1970
|
}
|
|
1846
1971
|
const MAX_BODY_SIZE = 10 * 1024 * 1024; // 10MB max request body
|
|
@@ -1940,7 +2065,7 @@ async function cascadeRequest(config, makeRequest, log) {
|
|
|
1940
2065
|
const isLastModel = i === config.models.length - 1;
|
|
1941
2066
|
try {
|
|
1942
2067
|
const { responseData, provider, model: resolvedModel } = await makeRequest(model);
|
|
1943
|
-
const text =
|
|
2068
|
+
const text = extractResponseTextAuto(responseData);
|
|
1944
2069
|
if (isLastModel || escalations >= config.maxEscalations) {
|
|
1945
2070
|
return { responseData, provider, model: resolvedModel, escalations };
|
|
1946
2071
|
}
|
|
@@ -1969,7 +2094,7 @@ async function cascadeRequest(config, makeRequest, log) {
|
|
|
1969
2094
|
function getDashboardHTML() {
|
|
1970
2095
|
return `<!DOCTYPE html><html lang="en"><head><meta charset="utf-8"><meta name="viewport" content="width=device-width,initial-scale=1"><title>RelayPlane Dashboard</title>
|
|
1971
2096
|
<style>
|
|
1972
|
-
*{margin:0;padding:0;box-sizing:border-box}body{background:#0a0b0d;color:#e2e8f0;font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',sans-serif;padding:20px;max-width:
|
|
2097
|
+
*{margin:0;padding:0;box-sizing:border-box}body{background:#0a0b0d;color:#e2e8f0;font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',sans-serif;padding:20px;max-width:1600px;margin:0 auto}
|
|
1973
2098
|
a{color:#34d399}h1{font-size:1.5rem;font-weight:600}
|
|
1974
2099
|
.header{display:flex;justify-content:space-between;align-items:center;padding:16px 0;border-bottom:1px solid #1e293b;margin-bottom:24px}
|
|
1975
2100
|
.header .meta{font-size:.8rem;color:#64748b}
|
|
@@ -1977,13 +2102,18 @@ a{color:#34d399}h1{font-size:1.5rem;font-weight:600}
|
|
|
1977
2102
|
.card{background:#111318;border:1px solid #1e293b;border-radius:12px;padding:20px}
|
|
1978
2103
|
.card .label{font-size:.75rem;color:#64748b;text-transform:uppercase;letter-spacing:.05em;margin-bottom:6px}
|
|
1979
2104
|
.card .value{font-size:1.75rem;font-weight:700}.green{color:#34d399}
|
|
2105
|
+
.tooltip-wrap{position:relative;display:inline-block}
|
|
2106
|
+
.tooltip-wrap .tooltip-box{visibility:hidden;opacity:0;background:#1e293b;color:#e2e8f0;font-size:.8rem;font-weight:400;text-transform:none;letter-spacing:0;line-height:1.5;border:1px solid #334155;border-radius:8px;padding:10px 14px;position:absolute;top:calc(100% + 8px);left:50%;transform:translateX(-50%);width:280px;z-index:999;pointer-events:none;transition:opacity .15s;box-shadow:0 4px 16px rgba(0,0,0,.4)}
|
|
2107
|
+
.tooltip-wrap .tooltip-box::after{content:'';position:absolute;bottom:100%;left:50%;transform:translateX(-50%);border:6px solid transparent;border-bottom-color:#334155}
|
|
2108
|
+
.tooltip-wrap:hover .tooltip-box{visibility:visible;opacity:1}
|
|
2109
|
+
.info-icon{cursor:help;color:#64748b;font-size:.75rem;vertical-align:middle;margin-left:4px}
|
|
1980
2110
|
table{width:100%;border-collapse:collapse;font-size:.85rem}
|
|
1981
2111
|
th{text-align:left;color:#64748b;font-weight:500;padding:8px 12px;border-bottom:1px solid #1e293b;font-size:.75rem;text-transform:uppercase;letter-spacing:.04em}
|
|
1982
2112
|
td{padding:8px 12px;border-bottom:1px solid #111318}
|
|
1983
2113
|
.section{margin-bottom:32px}.section h2{font-size:1rem;font-weight:600;margin-bottom:12px;color:#94a3b8}
|
|
1984
2114
|
.dot{display:inline-block;width:8px;height:8px;border-radius:50%;margin-right:6px}.dot.up{background:#34d399}.dot.warn{background:#fbbf24}.dot.down{background:#ef4444}
|
|
1985
2115
|
.badge{display:inline-block;padding:2px 8px;border-radius:6px;font-size:.75rem;font-weight:500}
|
|
1986
|
-
.badge.ok{background:#052e1633;color:#34d399}.badge.err{background:#2d0a0a;color:#ef4444}
|
|
2116
|
+
.badge.ok{background:#052e1633;color:#34d399}.badge.err{background:#2d0a0a;color:#ef4444}.badge.err-auth{background:#2d0a0a;color:#ef4444}.badge.err-rate{background:#2d2a0a;color:#fbbf24}.badge.err-timeout{background:#2d1a0a;color:#fb923c}
|
|
1987
2117
|
.badge.tt-code{background:#1e3a5f;color:#60a5fa}.badge.tt-analysis{background:#3b1f6e;color:#a78bfa}.badge.tt-summarization{background:#1a3a2a;color:#6ee7b7}.badge.tt-qa{background:#3a2f1e;color:#fbbf24}.badge.tt-general{background:#1e293b;color:#94a3b8}
|
|
1988
2118
|
.badge.cx-simple{background:#052e1633;color:#34d399}.badge.cx-moderate{background:#2d2a0a;color:#fbbf24}.badge.cx-complex{background:#2d0a0a;color:#ef4444}
|
|
1989
2119
|
.vstat{display:inline-flex;align-items:center;gap:6px;margin-left:8px;padding:1px 8px;border-radius:999px;border:1px solid #334155;font-size:.72rem}
|
|
@@ -1992,19 +2122,22 @@ td{padding:8px 12px;border-bottom:1px solid #111318}
|
|
|
1992
2122
|
.vstat.unavailable{color:#a3a3a3;border-color:#52525b66;background:#18181b66}
|
|
1993
2123
|
@media(max-width:768px){.col-tt,.col-cx{display:none}}
|
|
1994
2124
|
.prov{display:flex;gap:16px;flex-wrap:wrap}.prov-item{display:flex;align-items:center;font-size:.85rem;background:#111318;padding:8px 14px;border-radius:8px;border:1px solid #1e293b}
|
|
2125
|
+
.rename-btn{background:none;border:none;cursor:pointer;font-size:.75rem;opacity:.5;padding:2px}.rename-btn:hover{opacity:1}
|
|
1995
2126
|
</style></head><body>
|
|
1996
2127
|
<div class="header"><div><h1>⚡ RelayPlane Dashboard</h1></div><div class="meta"><a href="/dashboard/config">Config</a> · <span id="ver"></span><span id="vstat" class="vstat unavailable">Unable to check</span> · up <span id="uptime"></span> · refreshes every 5s</div></div>
|
|
1997
2128
|
<div class="cards">
|
|
1998
2129
|
<div class="card"><div class="label">Total Requests</div><div class="value" id="totalReq">—</div></div>
|
|
1999
2130
|
<div class="card"><div class="label">Total Cost</div><div class="value" id="totalCost">—</div></div>
|
|
2000
|
-
<div class="card"><div class="label">Savings
|
|
2131
|
+
<div class="card"><div class="label">Routing Savings <span class="tooltip-wrap"><span class="info-icon">ⓘ</span><span class="tooltip-box" id="savings-tooltip">Loading...</span></span></div><div class="value green" id="savings">—</div><div id="savings-detail" style="font-size:.75rem;color:#64748b;margin-top:4px">—</div></div>
|
|
2001
2132
|
<div class="card"><div class="label">Avg Latency</div><div class="value" id="avgLat">—</div></div>
|
|
2002
2133
|
</div>
|
|
2003
2134
|
<div class="section"><h2>Model Breakdown</h2>
|
|
2004
|
-
<table><thead><tr><th>Model</th><th>Requests</th><th>Cost</th><th>% of Total</th></tr></thead><tbody id="models"></tbody></table></div>
|
|
2135
|
+
<table><thead><tr><th>Provider</th><th>Model</th><th>Requests</th><th>Cost</th><th>% of Total</th></tr></thead><tbody id="models"></tbody></table></div>
|
|
2136
|
+
<div class="section"><h2>Agent Cost Breakdown</h2>
|
|
2137
|
+
<table><thead><tr><th>Agent</th><th>Requests</th><th>Total Cost</th><th>Last Active</th><th></th></tr></thead><tbody id="agents"></tbody></table></div>
|
|
2005
2138
|
<div class="section"><h2>Provider Status</h2><div class="prov" id="providers"></div></div>
|
|
2006
2139
|
<div class="section"><h2>Recent Runs</h2>
|
|
2007
|
-
<table><thead><tr><th>Time</th><th>Model</th><th class="col-tt">Task Type</th><th class="col-cx">Complexity</th><th>Tokens In</th><th>Tokens Out</th><th class="col-cache">Cache Create</th><th class="col-cache">Cache Read</th><th>Cost</th><th>Latency</th><th>Status</th></tr></thead><tbody id="runs"></tbody></table></div>
|
|
2140
|
+
<table><thead><tr><th>Time</th><th>Agent</th><th>Model</th><th class="col-tt">Task Type</th><th class="col-cx">Complexity</th><th>Tokens In</th><th>Tokens Out</th><th class="col-cache">Cache Create</th><th class="col-cache">Cache Read</th><th>Cost</th><th>Latency</th><th>Status</th></tr></thead><tbody id="runs"></tbody></table></div>
|
|
2008
2141
|
<script>
|
|
2009
2142
|
const $ = id => document.getElementById(id);
|
|
2010
2143
|
function fmt(n,d=2){return typeof n==='number'?n.toFixed(d):'-'}
|
|
@@ -2012,12 +2145,13 @@ function fmtTime(s){const d=new Date(s);return d.toLocaleTimeString()}
|
|
|
2012
2145
|
function dur(s){const h=Math.floor(s/3600),m=Math.floor(s%3600/60);return h?h+'h '+m+'m':m+'m'}
|
|
2013
2146
|
async function load(){
|
|
2014
2147
|
try{
|
|
2015
|
-
const [health,stats,runsR,sav,provH]=await Promise.all([
|
|
2148
|
+
const [health,stats,runsR,sav,provH,agentsR]=await Promise.all([
|
|
2016
2149
|
fetch('/health').then(r=>r.json()),
|
|
2017
2150
|
fetch('/v1/telemetry/stats').then(r=>r.json()),
|
|
2018
2151
|
fetch('/v1/telemetry/runs?limit=20').then(r=>r.json()),
|
|
2019
2152
|
fetch('/v1/telemetry/savings').then(r=>r.json()),
|
|
2020
|
-
fetch('/v1/telemetry/health').then(r=>r.json())
|
|
2153
|
+
fetch('/v1/telemetry/health').then(r=>r.json()),
|
|
2154
|
+
fetch('/api/agents').then(r=>r.json()).catch(()=>({agents:[]}))
|
|
2021
2155
|
]);
|
|
2022
2156
|
$('ver').textContent='v'+health.version;
|
|
2023
2157
|
$('uptime').textContent=dur(health.uptime);
|
|
@@ -2037,16 +2171,65 @@ async function load(){
|
|
|
2037
2171
|
const total=stats.summary?.totalEvents||0;
|
|
2038
2172
|
$('totalReq').textContent=total;
|
|
2039
2173
|
$('totalCost').textContent='$'+fmt(stats.summary?.totalCostUsd??0,4);
|
|
2040
|
-
|
|
2174
|
+
const savAmt=sav.savedAmount??sav.savings??0;
|
|
2175
|
+
const cacheSav=sav.cacheSavings??0;
|
|
2176
|
+
const routeSav=sav.routingSavings??0;
|
|
2177
|
+
const actual=sav.actualCost??0;
|
|
2178
|
+
const hasAnthropic=sav.hasAnthropicCalls!==false;
|
|
2179
|
+
const baseline=sav.potentialSavings??sav.total??0;
|
|
2180
|
+
// Headline = routing savings % (RelayPlane's actual contribution)
|
|
2181
|
+
const routeBaseline=baseline>0?baseline:1;
|
|
2182
|
+
const routePct=hasAnthropic?Math.round((routeSav/routeBaseline)*100):0;
|
|
2183
|
+
const totalPct=sav.percentage??0;
|
|
2184
|
+
$('savings').textContent='$'+fmt(routeSav,2);
|
|
2185
|
+
// Secondary: show total % including cache as context
|
|
2186
|
+
if(hasAnthropic){
|
|
2187
|
+
$('savings-detail').innerHTML='<span style="color:#60a5fa">routing savings</span> · <span style="color:#64748b" title="Includes Anthropic prompt cache hits which happen regardless of routing">'+totalPct+'% total incl. cache</span>';
|
|
2188
|
+
} else {
|
|
2189
|
+
$('savings-detail').innerHTML='<span style="color:#a78bfa">$'+fmt(cacheSav,2)+' cache</span> · <span style="color:#64748b">'+totalPct+'% total</span>';
|
|
2190
|
+
}
|
|
2191
|
+
const tipEl=$('savings-tooltip');
|
|
2192
|
+
if(tipEl){
|
|
2193
|
+
let tip='<strong>How savings are calculated</strong><br><br>';
|
|
2194
|
+
if(hasAnthropic){
|
|
2195
|
+
tip+='<span style="color:#60a5fa">🔀 Routing savings: $'+fmt(routeSav,2)+'</span><br><small>Requests routed to cheaper models (e.g. Sonnet) vs always using Opus. RelayPlane contribution.</small><br><br>';
|
|
2196
|
+
tip+='<span style="color:#a78bfa">💾 Cache savings: $'+fmt(cacheSav,2)+'</span><br><small>Anthropic prompt cache hits (10× cheaper reads). This would happen without RelayPlane too.</small><br><br>';
|
|
2197
|
+
} else {
|
|
2198
|
+
tip+='<span style="color:#a78bfa">💾 Cache savings: $'+fmt(cacheSav,2)+'</span><br><small>Provider cache hits. Happens automatically, not specific to RelayPlane.</small><br><br>';
|
|
2199
|
+
}
|
|
2200
|
+
tip+='💳 Actual cost: <b>$'+fmt(actual,2)+'</b><br>✅ Total saved: <b>$'+fmt(savAmt,2)+'</b>';
|
|
2201
|
+
tipEl.innerHTML=tip;
|
|
2202
|
+
}
|
|
2041
2203
|
$('avgLat').textContent=(stats.summary?.avgLatencyMs??0)+'ms';
|
|
2042
2204
|
$('models').innerHTML=(stats.byModel||[]).map(m=>
|
|
2043
|
-
'<tr><td>'+m.model+'</td><td>'+m.count+'</td><td>$'+fmt(m.costUsd,4)+'</td><td>'+fmt(total>0?m.count/total*100:0,1)+'%</td></tr>'
|
|
2044
|
-
).join('')||'<tr><td colspan=
|
|
2205
|
+
'<tr><td style="color:#94a3b8;font-size:.85rem">'+(m.provider||'—')+'</td><td>'+m.model+'</td><td>'+m.count+'</td><td>$'+fmt(m.costUsd,4)+'</td><td>'+fmt(total>0?m.count/total*100:0,1)+'%</td></tr>'
|
|
2206
|
+
).join('')||'<tr><td colspan=5 style="color:#64748b">No data yet</td></tr>';
|
|
2045
2207
|
function ttCls(t){const m={code_generation:'tt-code',analysis:'tt-analysis',summarization:'tt-summarization',question_answering:'tt-qa'};return m[t]||'tt-general'}
|
|
2046
2208
|
function cxCls(c){const m={simple:'cx-simple',moderate:'cx-moderate',complex:'cx-complex'};return m[c]||'cx-simple'}
|
|
2047
|
-
|
|
2048
|
-
|
|
2049
|
-
|
|
2209
|
+
function esc(s){if(!s)return'';return s.replace(/&/g,'&').replace(/</g,'<').replace(/>/g,'>')}
|
|
2210
|
+
const agents=(agentsR.agents||[]).sort((a,b)=>(b.totalCost||0)-(a.totalCost||0));
|
|
2211
|
+
$('runs').innerHTML=(runsR.runs||[]).map((r,i)=>{
|
|
2212
|
+
function errBadge(r){if(r.status==='success')return '<span class="badge ok">success</span>';var cls='err';var label=r.error||'error';if(r.statusCode===401||r.statusCode===403||(r.error&&/auth/i.test(r.error)))cls='err-auth';else if(r.statusCode===429||(r.error&&/rate.?limit/i.test(r.error)))cls='err-rate';else if(r.error&&/timeout/i.test(r.error))cls='err-timeout';return '<span class="badge '+cls+'" title="'+esc(r.error||'')+' (HTTP '+( r.statusCode||'?')+')">'+(r.statusCode?r.statusCode+' ':'')+ (label.length>40?label.slice(0,40)+'…':label)+'</span>';}
|
|
2213
|
+
const agentName=agents.find(a=>a.fingerprint===r.agentFingerprint)?.name||(r.agentId||'—');
|
|
2214
|
+
const row='<tr style="cursor:pointer" onclick="toggleDetail('+i+')"><td><span id="arrow-'+i+'" style="color:#64748b;font-size:.7rem;margin-right:6px">▶</span>'+fmtTime(r.started_at)+'</td><td style="font-size:.85rem">'+esc(agentName)+'</td><td>'+r.model+'</td><td class="col-tt"><span class="badge '+ttCls(r.taskType)+'">'+(r.taskType||'general').replace(/_/g,' ')+'</span></td><td class="col-cx"><span class="badge '+cxCls(r.complexity)+'">'+(r.complexity||'simple')+'</span></td><td>'+(r.tokensIn||0)+'</td><td>'+(r.tokensOut||0)+'</td><td class="col-cache" style="color:#60a5fa">'+(r.cacheCreationTokens||0)+'</td><td class="col-cache" style="color:#34d399">'+(r.cacheReadTokens||0)+'</td><td>$'+fmt(r.costUsd,4)+'</td><td>'+r.latencyMs+'ms</td><td>'+errBadge(r)+'</td></tr>';
|
|
2215
|
+
const c=r.requestContent||{};
|
|
2216
|
+
let detail='<tr id="run-detail-'+i+'" style="display:none"><td colspan="12" style="padding:16px;background:#111217;border-bottom:1px solid #1e293b">';
|
|
2217
|
+
if(c.systemPrompt||c.userMessage||c.responsePreview){
|
|
2218
|
+
if(c.systemPrompt) detail+='<div style="color:#64748b;font-size:.85rem;margin-bottom:10px;font-style:italic"><strong style="color:#94a3b8">System:</strong> '+esc(c.systemPrompt)+'</div>';
|
|
2219
|
+
if(c.userMessage) detail+='<div style="background:#1a1c23;border:1px solid #1e293b;border-radius:8px;padding:12px;margin-bottom:10px"><strong style="color:#94a3b8;font-size:.8rem">User Message</strong><div style="margin-top:6px;white-space:pre-wrap">'+esc(c.userMessage)+'</div></div>';
|
|
2220
|
+
if(c.responsePreview) detail+='<div style="background:#1a1c23;border:1px solid #1e293b;border-radius:8px;padding:12px;margin-bottom:10px"><strong style="color:#94a3b8;font-size:.8rem">Response Preview</strong><div style="margin-top:6px;white-space:pre-wrap">'+esc(c.responsePreview)+'</div></div>';
|
|
2221
|
+
const btnAttrs='id="full-btn-'+i+'" style="background:#1e293b;color:#e2e8f0;border:1px solid #334155;padding:6px 12px;border-radius:6px;font-size:.8rem"';
|
|
2222
|
+
detail+=(r.tokensOut>0?'<button onclick="event.stopPropagation();loadFullResponse("'+r.id+'",'+i+')" '+btnAttrs+'>Show full response</button>':'<button disabled '+btnAttrs+' style="opacity:.4;cursor:default">Response not available (streaming)</button>')+'<pre id="full-resp-'+i+'" style="display:none;white-space:pre-wrap;margin-top:10px;background:#0d0e11;border:1px solid #1e293b;border-radius:8px;padding:12px;max-height:400px;overflow:auto;font-size:.8rem"></pre>';
|
|
2223
|
+
} else {
|
|
2224
|
+
detail+='<span style="color:#64748b">No content captured for this request</span>';
|
|
2225
|
+
}
|
|
2226
|
+
detail+='</td></tr>';
|
|
2227
|
+
return row+detail;
|
|
2228
|
+
}).join('')||'<tr><td colspan=12 style="color:#64748b">No runs yet</td></tr>';
|
|
2229
|
+
restoreExpanded();
|
|
2230
|
+
$('agents').innerHTML=agents.length?agents.map(a=>
|
|
2231
|
+
'<tr><td><span class="agent-name" data-fp="'+a.fingerprint+'">'+esc(a.name)+'</span> <button class="rename-btn" onclick="renameAgent("'+a.fingerprint+'","'+a.name.replace(/"/g,'')+'")">✏️</button></td><td>'+a.totalRequests+'</td><td>$'+fmt(a.totalCost,4)+'</td><td>'+fmtTime(a.lastSeen)+'</td><td style="font-size:.7rem;color:#64748b" title="'+esc(a.systemPromptPreview||'')+'">'+a.fingerprint+'</td></tr>'
|
|
2232
|
+
).join(''):'<tr><td colspan=5 style="color:#64748b">No agents detected yet</td></tr>';
|
|
2050
2233
|
$('providers').innerHTML=(provH.providers||[]).map(p=>{
|
|
2051
2234
|
const dotClass = p.status==='healthy'?'up':(p.status==='degraded'?'warn':'down');
|
|
2052
2235
|
const rate = p.successRate!==undefined?(' '+Math.round(p.successRate*100)+'%'):'';
|
|
@@ -2054,13 +2237,34 @@ async function load(){
|
|
|
2054
2237
|
}).join('');
|
|
2055
2238
|
}catch(e){console.error(e)}
|
|
2056
2239
|
}
|
|
2240
|
+
async function renameAgent(fp,currentName){
|
|
2241
|
+
const name=prompt('Rename agent:',currentName);
|
|
2242
|
+
if(!name||name===currentName)return;
|
|
2243
|
+
await fetch('/api/agents/rename',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({fingerprint:fp,name:name})});
|
|
2244
|
+
load();
|
|
2245
|
+
}
|
|
2246
|
+
const expandedRows=new Set();
|
|
2247
|
+
function toggleDetail(i){var d=document.getElementById('run-detail-'+i);var arrow=document.getElementById('arrow-'+i);if(d.style.display==='none'){d.style.display='table-row';expandedRows.add(i);if(arrow)arrow.textContent='▼'}else{d.style.display='none';expandedRows.delete(i);if(arrow)arrow.textContent='▶'}}
|
|
2248
|
+
function restoreExpanded(){expandedRows.forEach(i=>{var d=document.getElementById('run-detail-'+i);var arrow=document.getElementById('arrow-'+i);if(d)d.style.display='table-row';if(arrow)arrow.textContent='▼'})}
|
|
2249
|
+
async function loadFullResponse(runId,i){
|
|
2250
|
+
const btn=document.getElementById('full-btn-'+i);
|
|
2251
|
+
const pre=document.getElementById('full-resp-'+i);
|
|
2252
|
+
if(pre.style.display!=='none'){pre.style.display='none';btn.textContent='Show full response';return}
|
|
2253
|
+
btn.textContent='Loading...';
|
|
2254
|
+
try{
|
|
2255
|
+
const data=await fetch('/api/runs/'+runId).then(r=>r.json());
|
|
2256
|
+
const full=data.requestContent&&data.requestContent.fullResponse;
|
|
2257
|
+
if(full){pre.textContent=full;pre.style.display='block';btn.textContent='Hide full response'}
|
|
2258
|
+
else{btn.textContent='No full response available'}
|
|
2259
|
+
}catch{btn.textContent='Error loading response'}
|
|
2260
|
+
}
|
|
2057
2261
|
load();setInterval(load,5000);
|
|
2058
|
-
</script></body></html>`;
|
|
2262
|
+
</script><footer style="text-align:center;padding:20px 0;color:#475569;font-size:.75rem;border-top:1px solid #1e293b;margin-top:20px">🔒 Request content stays on your machine. Never sent to cloud.</footer></body></html>`;
|
|
2059
2263
|
}
|
|
2060
2264
|
function getConfigDashboardHTML() {
|
|
2061
2265
|
return `<!DOCTYPE html><html lang="en"><head><meta charset="utf-8"><meta name="viewport" content="width=device-width,initial-scale=1"><title>RelayPlane Config</title>
|
|
2062
2266
|
<style>
|
|
2063
|
-
*{margin:0;padding:0;box-sizing:border-box}body{background:#0a0b0d;color:#e2e8f0;font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',sans-serif;padding:20px;max-width:
|
|
2267
|
+
*{margin:0;padding:0;box-sizing:border-box}body{background:#0a0b0d;color:#e2e8f0;font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',sans-serif;padding:20px;max-width:1600px;margin:0 auto}
|
|
2064
2268
|
a{color:#34d399}h1{font-size:1.5rem;font-weight:600}
|
|
2065
2269
|
.header{display:flex;justify-content:space-between;align-items:center;padding:16px 0;border-bottom:1px solid #1e293b;margin-bottom:24px}
|
|
2066
2270
|
.header .meta{font-size:.8rem;color:#64748b}
|
|
@@ -2152,8 +2356,10 @@ async function startProxy(config = {}) {
|
|
|
2152
2356
|
};
|
|
2153
2357
|
// Load persistent history from disk
|
|
2154
2358
|
loadHistoryFromDisk();
|
|
2359
|
+
(0, agent_tracker_js_1.loadAgentRegistry)();
|
|
2155
2360
|
// Flush history on shutdown
|
|
2156
2361
|
const handleShutdown = () => {
|
|
2362
|
+
(0, agent_tracker_js_1.flushAgentRegistry)();
|
|
2157
2363
|
meshHandle.stop();
|
|
2158
2364
|
shutdownHistory();
|
|
2159
2365
|
process.exit(0);
|
|
@@ -2162,6 +2368,7 @@ async function startProxy(config = {}) {
|
|
|
2162
2368
|
process.on('SIGTERM', handleShutdown);
|
|
2163
2369
|
const configPath = getProxyConfigPath();
|
|
2164
2370
|
let proxyConfig = await loadProxyConfig(configPath, log);
|
|
2371
|
+
_activeProxyConfig = proxyConfig;
|
|
2165
2372
|
const cooldownManager = new CooldownManager(getCooldownConfig(proxyConfig));
|
|
2166
2373
|
// === Startup config validation (Task 4) ===
|
|
2167
2374
|
try {
|
|
@@ -2500,11 +2707,11 @@ async function startProxy(config = {}) {
|
|
|
2500
2707
|
const days = parseInt(params.get('days') || '7', 10);
|
|
2501
2708
|
const cutoff = Date.now() - days * 86400000;
|
|
2502
2709
|
const recent = requestHistory.filter(r => new Date(r.timestamp).getTime() >= cutoff);
|
|
2503
|
-
// Model breakdown
|
|
2710
|
+
// Model breakdown (keyed by provider/model for disambiguation)
|
|
2504
2711
|
const modelMap = new Map();
|
|
2505
2712
|
for (const r of recent) {
|
|
2506
|
-
const key = r.targetModel
|
|
2507
|
-
const cur = modelMap.get(key) || { count: 0, cost: 0 };
|
|
2713
|
+
const key = `${r.provider || 'unknown'}/${r.targetModel}`;
|
|
2714
|
+
const cur = modelMap.get(key) || { count: 0, cost: 0, provider: r.provider || 'unknown', model: r.targetModel };
|
|
2508
2715
|
cur.count++;
|
|
2509
2716
|
cur.cost += r.costUsd;
|
|
2510
2717
|
modelMap.set(key, cur);
|
|
@@ -2527,7 +2734,7 @@ async function startProxy(config = {}) {
|
|
|
2527
2734
|
avgLatencyMs: recent.length ? Math.round(totalLatency / recent.length) : 0,
|
|
2528
2735
|
successRate: recent.length ? recent.filter(r => r.success).length / recent.length : 0,
|
|
2529
2736
|
},
|
|
2530
|
-
byModel: Array.from(modelMap.entries()).map(([
|
|
2737
|
+
byModel: Array.from(modelMap.entries()).map(([, v]) => ({ model: v.model, provider: v.provider, count: v.count, costUsd: v.cost, savings: 0 })),
|
|
2531
2738
|
dailyCosts: Array.from(dailyMap.entries()).map(([date, v]) => ({ date, costUsd: v.cost, requests: v.requests })),
|
|
2532
2739
|
};
|
|
2533
2740
|
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
@@ -2565,6 +2772,16 @@ async function startProxy(config = {}) {
|
|
|
2565
2772
|
cacheReadTokens: r.cacheReadTokens ?? 0,
|
|
2566
2773
|
savings: Math.round(perRunSavings * 10000) / 10000,
|
|
2567
2774
|
escalated: r.escalated,
|
|
2775
|
+
error: r.error ?? null,
|
|
2776
|
+
statusCode: r.statusCode ?? null,
|
|
2777
|
+
agentFingerprint: r.agentFingerprint ?? null,
|
|
2778
|
+
agentId: r.agentId ?? null,
|
|
2779
|
+
requestContent: r.requestContent ? {
|
|
2780
|
+
systemPrompt: r.requestContent.systemPrompt,
|
|
2781
|
+
userMessage: r.requestContent.userMessage,
|
|
2782
|
+
responsePreview: r.requestContent.responsePreview,
|
|
2783
|
+
// fullResponse excluded from list endpoint to keep payloads small
|
|
2784
|
+
} : undefined,
|
|
2568
2785
|
};
|
|
2569
2786
|
});
|
|
2570
2787
|
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
@@ -2572,26 +2789,34 @@ async function startProxy(config = {}) {
|
|
|
2572
2789
|
return;
|
|
2573
2790
|
}
|
|
2574
2791
|
if (req.method === 'GET' && telemetryPath === 'savings') {
|
|
2575
|
-
//
|
|
2576
|
-
//
|
|
2577
|
-
|
|
2578
|
-
let totalOriginalCost = 0;
|
|
2792
|
+
// Routing savings: cost at same model with no cache vs actual cost
|
|
2793
|
+
// Cache savings: what cache hits saved vs paying full input price
|
|
2794
|
+
// Baseline: each request at full input price (no cache, no routing)
|
|
2579
2795
|
let totalActualCost = 0;
|
|
2580
|
-
let
|
|
2796
|
+
let totalCacheSavings = 0; // savings from cache hits (Anthropic feature)
|
|
2797
|
+
let totalRoutingSavings = 0; // savings from routing to cheaper model
|
|
2798
|
+
let hasAnthropicCalls = false;
|
|
2581
2799
|
const byDayMap = new Map();
|
|
2582
2800
|
for (const r of requestHistory) {
|
|
2583
|
-
// Pass same cache tokens to baseline so savings only reflect routing decisions,
|
|
2584
|
-
// not prompt-cache discounts (those happen regardless of which model is chosen).
|
|
2585
|
-
const origCost = (0, telemetry_js_1.estimateCost)(OPUS_BASELINE, r.tokensIn, r.tokensOut, r.cacheCreationTokens || undefined, r.cacheReadTokens || undefined);
|
|
2586
2801
|
const actualCost = r.costUsd;
|
|
2587
|
-
const saved = Math.max(0, origCost - actualCost);
|
|
2588
|
-
totalOriginalCost += origCost;
|
|
2589
2802
|
totalActualCost += actualCost;
|
|
2590
|
-
|
|
2803
|
+
// Cache savings: full input price vs what was paid with cache
|
|
2804
|
+
const fullInputCost = (0, telemetry_js_1.estimateCost)(r.targetModel, r.tokensIn + (r.cacheCreationTokens || 0) + (r.cacheReadTokens || 0), r.tokensOut);
|
|
2805
|
+
const cachedCost = r.costUsd;
|
|
2806
|
+
const cacheSaved = Math.max(0, fullInputCost - cachedCost);
|
|
2807
|
+
totalCacheSavings += cacheSaved;
|
|
2808
|
+
// Routing savings: what would this request cost at full Opus price (no cache)
|
|
2809
|
+
// vs what the routed model cost (no cache). Only meaningful for Anthropic.
|
|
2810
|
+
if (r.provider === 'anthropic') {
|
|
2811
|
+
hasAnthropicCalls = true;
|
|
2812
|
+
const opusCost = (0, telemetry_js_1.estimateCost)('claude-opus-4-6', r.tokensIn, r.tokensOut);
|
|
2813
|
+
const modelCost = (0, telemetry_js_1.estimateCost)(r.targetModel, r.tokensIn, r.tokensOut);
|
|
2814
|
+
const routingSaved = Math.max(0, opusCost - modelCost);
|
|
2815
|
+
totalRoutingSavings += routingSaved;
|
|
2816
|
+
}
|
|
2591
2817
|
const date = r.timestamp.slice(0, 10);
|
|
2592
2818
|
const day = byDayMap.get(date) || { savedAmount: 0, originalCost: 0, actualCost: 0 };
|
|
2593
|
-
day.savedAmount +=
|
|
2594
|
-
day.originalCost += origCost;
|
|
2819
|
+
day.savedAmount += Math.max(0, totalCacheSavings + totalRoutingSavings);
|
|
2595
2820
|
day.actualCost += actualCost;
|
|
2596
2821
|
byDayMap.set(date, day);
|
|
2597
2822
|
}
|
|
@@ -2603,16 +2828,19 @@ async function startProxy(config = {}) {
|
|
|
2603
2828
|
originalCost: Math.round(v.originalCost * 10000) / 10000,
|
|
2604
2829
|
actualCost: Math.round(v.actualCost * 10000) / 10000,
|
|
2605
2830
|
}));
|
|
2831
|
+
const totalSaved = totalCacheSavings + totalRoutingSavings;
|
|
2832
|
+
const baseline = totalActualCost + totalSaved;
|
|
2606
2833
|
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
2607
2834
|
res.end(JSON.stringify({
|
|
2608
|
-
total: Math.round(totalOriginalCost * 10000) / 10000,
|
|
2609
2835
|
actualCost: Math.round(totalActualCost * 10000) / 10000,
|
|
2610
|
-
|
|
2611
|
-
|
|
2612
|
-
|
|
2613
|
-
|
|
2614
|
-
|
|
2615
|
-
|
|
2836
|
+
savedAmount: Math.round(totalSaved * 10000) / 10000,
|
|
2837
|
+
savings: Math.round(totalSaved * 10000) / 10000,
|
|
2838
|
+
cacheSavings: Math.round(totalCacheSavings * 10000) / 10000,
|
|
2839
|
+
routingSavings: Math.round(totalRoutingSavings * 10000) / 10000,
|
|
2840
|
+
hasAnthropicCalls,
|
|
2841
|
+
potentialSavings: Math.round(baseline * 10000) / 10000,
|
|
2842
|
+
total: Math.round(baseline * 10000) / 10000,
|
|
2843
|
+
percentage: baseline > 0 ? Math.round((totalSaved / baseline) * 100) : 0,
|
|
2616
2844
|
byDay,
|
|
2617
2845
|
}));
|
|
2618
2846
|
return;
|
|
@@ -2662,6 +2890,63 @@ async function startProxy(config = {}) {
|
|
|
2662
2890
|
res.end(JSON.stringify({ error: 'Not found' }));
|
|
2663
2891
|
return;
|
|
2664
2892
|
}
|
|
2893
|
+
// === Agent tracking API ===
|
|
2894
|
+
// === /api/runs/:id — full request/response content for a single run ===
|
|
2895
|
+
const runsIdMatch = pathname.match(/^\/api\/runs\/(.+)$/);
|
|
2896
|
+
if (req.method === 'GET' && runsIdMatch) {
|
|
2897
|
+
const runId = runsIdMatch[1];
|
|
2898
|
+
const run = requestHistory.find(r => r.id === runId);
|
|
2899
|
+
if (!run) {
|
|
2900
|
+
res.writeHead(404, { 'Content-Type': 'application/json' });
|
|
2901
|
+
res.end(JSON.stringify({ error: 'Run not found' }));
|
|
2902
|
+
return;
|
|
2903
|
+
}
|
|
2904
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
2905
|
+
res.end(JSON.stringify({
|
|
2906
|
+
id: run.id,
|
|
2907
|
+
model: run.targetModel,
|
|
2908
|
+
provider: run.provider,
|
|
2909
|
+
timestamp: run.timestamp,
|
|
2910
|
+
tokensIn: run.tokensIn,
|
|
2911
|
+
tokensOut: run.tokensOut,
|
|
2912
|
+
costUsd: run.costUsd,
|
|
2913
|
+
latencyMs: run.latencyMs,
|
|
2914
|
+
success: run.success,
|
|
2915
|
+
requestContent: run.requestContent,
|
|
2916
|
+
}));
|
|
2917
|
+
return;
|
|
2918
|
+
}
|
|
2919
|
+
if (req.method === 'GET' && pathname === '/api/agents') {
|
|
2920
|
+
const summaries = (0, agent_tracker_js_1.getAgentSummaries)(requestHistory);
|
|
2921
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
2922
|
+
res.end(JSON.stringify({ agents: summaries }));
|
|
2923
|
+
return;
|
|
2924
|
+
}
|
|
2925
|
+
if (req.method === 'POST' && pathname === '/api/agents/rename') {
|
|
2926
|
+
try {
|
|
2927
|
+
const body = await readJsonBody(req);
|
|
2928
|
+
const fingerprint = body['fingerprint'];
|
|
2929
|
+
const name = body['name'];
|
|
2930
|
+
if (!fingerprint || !name) {
|
|
2931
|
+
res.writeHead(400, { 'Content-Type': 'application/json' });
|
|
2932
|
+
res.end(JSON.stringify({ error: 'Missing fingerprint or name' }));
|
|
2933
|
+
return;
|
|
2934
|
+
}
|
|
2935
|
+
const ok = (0, agent_tracker_js_1.renameAgent)(fingerprint, name);
|
|
2936
|
+
if (!ok) {
|
|
2937
|
+
res.writeHead(404, { 'Content-Type': 'application/json' });
|
|
2938
|
+
res.end(JSON.stringify({ error: 'Agent not found' }));
|
|
2939
|
+
return;
|
|
2940
|
+
}
|
|
2941
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
2942
|
+
res.end(JSON.stringify({ ok: true }));
|
|
2943
|
+
}
|
|
2944
|
+
catch {
|
|
2945
|
+
res.writeHead(400, { 'Content-Type': 'application/json' });
|
|
2946
|
+
res.end(JSON.stringify({ error: 'Invalid JSON' }));
|
|
2947
|
+
}
|
|
2948
|
+
return;
|
|
2949
|
+
}
|
|
2665
2950
|
// === Dashboard ===
|
|
2666
2951
|
if (req.method === 'GET' && (pathname === '/' || pathname === '/dashboard')) {
|
|
2667
2952
|
res.writeHead(200, { 'Content-Type': 'text/html' });
|
|
@@ -2742,6 +3027,14 @@ async function startProxy(config = {}) {
|
|
|
2742
3027
|
res.end(JSON.stringify({ error: 'Invalid JSON' }));
|
|
2743
3028
|
return;
|
|
2744
3029
|
}
|
|
3030
|
+
// Extract agent fingerprint and explicit agent ID
|
|
3031
|
+
const nativeSystemPrompt = (0, agent_tracker_js_1.extractSystemPromptFromBody)(requestBody);
|
|
3032
|
+
const nativeExplicitAgentId = getHeaderValue(req, 'x-relayplane-agent') || undefined;
|
|
3033
|
+
let nativeAgentFingerprint;
|
|
3034
|
+
if (nativeSystemPrompt) {
|
|
3035
|
+
const agentResult = (0, agent_tracker_js_1.trackAgent)(nativeSystemPrompt, 0, nativeExplicitAgentId);
|
|
3036
|
+
nativeAgentFingerprint = agentResult.fingerprint;
|
|
3037
|
+
}
|
|
2745
3038
|
const originalModel = requestBody['model'];
|
|
2746
3039
|
let requestedModel = headerModelOverride ?? originalModel ?? '';
|
|
2747
3040
|
if (headerModelOverride) {
|
|
@@ -3076,7 +3369,8 @@ async function startProxy(config = {}) {
|
|
|
3076
3369
|
cooldownManager.recordFailure(targetProvider, JSON.stringify(errorPayload));
|
|
3077
3370
|
}
|
|
3078
3371
|
const durationMs = Date.now() - startTime;
|
|
3079
|
-
|
|
3372
|
+
const errMsg = extractProviderErrorMessage(errorPayload, providerResponse.status);
|
|
3373
|
+
logRequest(originalModel ?? 'unknown', targetModel || requestedModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, undefined, undefined, errMsg, providerResponse.status);
|
|
3080
3374
|
res.writeHead(providerResponse.status, { 'Content-Type': 'application/json' });
|
|
3081
3375
|
res.end(JSON.stringify(errorPayload));
|
|
3082
3376
|
return;
|
|
@@ -3203,7 +3497,22 @@ async function startProxy(config = {}) {
|
|
|
3203
3497
|
const nativeTokIn = nativeBaseTokIn + nativeCacheCreation + nativeCacheRead;
|
|
3204
3498
|
// Cost calculation expects inputTokens to include cache tokens when cache params are provided
|
|
3205
3499
|
const nativeCostUsd = (0, telemetry_js_1.estimateCost)(targetModel || requestedModel, nativeTokIn, nativeTokOut, nativeCacheCreation || undefined, nativeCacheRead || undefined);
|
|
3206
|
-
|
|
3500
|
+
// Build request content if logging enabled
|
|
3501
|
+
let nativeContentData;
|
|
3502
|
+
if (isContentLoggingEnabled()) {
|
|
3503
|
+
const extracted = extractRequestContent(requestBody, true);
|
|
3504
|
+
const responseText = nativeResponseData ? extractResponseText(nativeResponseData, true) : '';
|
|
3505
|
+
nativeContentData = {
|
|
3506
|
+
...extracted,
|
|
3507
|
+
responsePreview: responseText ? responseText.slice(0, 500) : undefined,
|
|
3508
|
+
fullResponse: responseText || undefined,
|
|
3509
|
+
};
|
|
3510
|
+
}
|
|
3511
|
+
updateLastHistoryEntry(nativeTokIn, nativeTokOut, nativeCostUsd, undefined, nativeCacheCreation || undefined, nativeCacheRead || undefined, nativeAgentFingerprint, nativeExplicitAgentId, nativeContentData);
|
|
3512
|
+
// Update agent cost now that we know the actual cost
|
|
3513
|
+
if (nativeAgentFingerprint && nativeAgentFingerprint !== 'unknown') {
|
|
3514
|
+
(0, agent_tracker_js_1.updateAgentCost)(nativeAgentFingerprint, nativeCostUsd);
|
|
3515
|
+
}
|
|
3207
3516
|
// ── Post-request: budget spend + anomaly detection ──
|
|
3208
3517
|
postRequestRecord(targetModel || requestedModel, nativeTokIn, nativeTokOut, nativeCostUsd);
|
|
3209
3518
|
if (recordTelemetry) {
|
|
@@ -3224,7 +3533,17 @@ async function startProxy(config = {}) {
|
|
|
3224
3533
|
}
|
|
3225
3534
|
catch (err) {
|
|
3226
3535
|
const durationMs = Date.now() - startTime;
|
|
3227
|
-
|
|
3536
|
+
let catchErrMsg;
|
|
3537
|
+
let catchErrStatus;
|
|
3538
|
+
if (err instanceof ProviderResponseError) {
|
|
3539
|
+
catchErrMsg = extractProviderErrorMessage(err.payload, err.status);
|
|
3540
|
+
catchErrStatus = err.status;
|
|
3541
|
+
}
|
|
3542
|
+
else {
|
|
3543
|
+
catchErrMsg = err instanceof Error ? err.message : String(err);
|
|
3544
|
+
catchErrStatus = 500;
|
|
3545
|
+
}
|
|
3546
|
+
logRequest(originalModel ?? 'unknown', targetModel || requestedModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, undefined, undefined, catchErrMsg, catchErrStatus);
|
|
3228
3547
|
if (err instanceof ProviderResponseError) {
|
|
3229
3548
|
res.writeHead(err.status, { 'Content-Type': 'application/json' });
|
|
3230
3549
|
res.end(JSON.stringify(err.payload));
|
|
@@ -3301,6 +3620,14 @@ async function startProxy(config = {}) {
|
|
|
3301
3620
|
return;
|
|
3302
3621
|
}
|
|
3303
3622
|
const isStreaming = request.stream === true;
|
|
3623
|
+
// Extract agent fingerprint for chat/completions
|
|
3624
|
+
const chatSystemPrompt = (0, agent_tracker_js_1.extractSystemPromptFromBody)(request);
|
|
3625
|
+
const chatExplicitAgentId = getHeaderValue(req, 'x-relayplane-agent') || undefined;
|
|
3626
|
+
let chatAgentFingerprint;
|
|
3627
|
+
if (chatSystemPrompt) {
|
|
3628
|
+
const agentResult = (0, agent_tracker_js_1.trackAgent)(chatSystemPrompt, 0, chatExplicitAgentId);
|
|
3629
|
+
chatAgentFingerprint = agentResult.fingerprint;
|
|
3630
|
+
}
|
|
3304
3631
|
// ── Response Cache: check for cached response (chat/completions) ──
|
|
3305
3632
|
const chatCacheBypass = responseCache.shouldBypass(request);
|
|
3306
3633
|
let chatCacheHash;
|
|
@@ -3601,7 +3928,7 @@ async function startProxy(config = {}) {
|
|
|
3601
3928
|
const startTime = Date.now();
|
|
3602
3929
|
// Handle streaming vs non-streaming
|
|
3603
3930
|
if (isStreaming) {
|
|
3604
|
-
await handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, useCascade ? 'cascade' : routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity, chatCacheHash, chatCacheBypass);
|
|
3931
|
+
await handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, useCascade ? 'cascade' : routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity, chatCacheHash, chatCacheBypass, chatAgentFingerprint, chatExplicitAgentId);
|
|
3605
3932
|
}
|
|
3606
3933
|
else {
|
|
3607
3934
|
if (useCascade && cascadeConfig) {
|
|
@@ -3641,7 +3968,9 @@ async function startProxy(config = {}) {
|
|
|
3641
3968
|
const cascadeCacheCreation = cascadeUsage?.cache_creation_input_tokens || undefined;
|
|
3642
3969
|
const cascadeCacheRead = cascadeUsage?.cache_read_input_tokens || undefined;
|
|
3643
3970
|
const cascadeCost = (0, telemetry_js_1.estimateCost)(cascadeResult.model, cascadeTokensIn, cascadeTokensOut, cascadeCacheCreation, cascadeCacheRead);
|
|
3644
|
-
updateLastHistoryEntry(cascadeTokensIn, cascadeTokensOut, cascadeCost, chatCascadeRespModel, cascadeCacheCreation, cascadeCacheRead);
|
|
3971
|
+
updateLastHistoryEntry(cascadeTokensIn, cascadeTokensOut, cascadeCost, chatCascadeRespModel, cascadeCacheCreation, cascadeCacheRead, chatAgentFingerprint, chatExplicitAgentId);
|
|
3972
|
+
if (chatAgentFingerprint && chatAgentFingerprint !== 'unknown')
|
|
3973
|
+
(0, agent_tracker_js_1.updateAgentCost)(chatAgentFingerprint, cascadeCost);
|
|
3645
3974
|
if (recordTelemetry) {
|
|
3646
3975
|
try {
|
|
3647
3976
|
const runResult = await relay.run({
|
|
@@ -3674,7 +4003,17 @@ async function startProxy(config = {}) {
|
|
|
3674
4003
|
}
|
|
3675
4004
|
catch (err) {
|
|
3676
4005
|
const durationMs = Date.now() - startTime;
|
|
3677
|
-
|
|
4006
|
+
let cascadeErrMsg;
|
|
4007
|
+
let cascadeErrStatus;
|
|
4008
|
+
if (err instanceof ProviderResponseError) {
|
|
4009
|
+
cascadeErrMsg = extractProviderErrorMessage(err.payload, err.status);
|
|
4010
|
+
cascadeErrStatus = err.status;
|
|
4011
|
+
}
|
|
4012
|
+
else {
|
|
4013
|
+
cascadeErrMsg = err instanceof Error ? err.message : String(err);
|
|
4014
|
+
cascadeErrStatus = 500;
|
|
4015
|
+
}
|
|
4016
|
+
logRequest(originalRequestedModel ?? 'unknown', targetModel || 'unknown', targetProvider, durationMs, false, 'cascade', undefined, taskType, complexity, undefined, undefined, cascadeErrMsg, cascadeErrStatus);
|
|
3678
4017
|
if (err instanceof ProviderResponseError) {
|
|
3679
4018
|
res.writeHead(err.status, { 'Content-Type': 'application/json' });
|
|
3680
4019
|
res.end(JSON.stringify(err.payload));
|
|
@@ -3686,7 +4025,7 @@ async function startProxy(config = {}) {
|
|
|
3686
4025
|
}
|
|
3687
4026
|
}
|
|
3688
4027
|
else {
|
|
3689
|
-
await handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity);
|
|
4028
|
+
await handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity, chatAgentFingerprint, chatExplicitAgentId);
|
|
3690
4029
|
}
|
|
3691
4030
|
}
|
|
3692
4031
|
});
|
|
@@ -3829,7 +4168,7 @@ async function executeNonStreamingProviderRequest(request, targetProvider, targe
|
|
|
3829
4168
|
}
|
|
3830
4169
|
return { responseData, ok: true, status: 200 };
|
|
3831
4170
|
}
|
|
3832
|
-
async function handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity = 'simple', cacheHash, cacheBypass) {
|
|
4171
|
+
async function handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity = 'simple', cacheHash, cacheBypass, agentFingerprint, agentId) {
|
|
3833
4172
|
let providerResponse;
|
|
3834
4173
|
try {
|
|
3835
4174
|
switch (targetProvider) {
|
|
@@ -3857,7 +4196,8 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
3857
4196
|
cooldownManager.recordFailure(targetProvider, JSON.stringify(errorData));
|
|
3858
4197
|
}
|
|
3859
4198
|
const durationMs = Date.now() - startTime;
|
|
3860
|
-
|
|
4199
|
+
const streamErrMsg = extractProviderErrorMessage(errorData, providerResponse.status);
|
|
4200
|
+
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, undefined, undefined, streamErrMsg, providerResponse.status);
|
|
3861
4201
|
res.writeHead(providerResponse.status, { 'Content-Type': 'application/json' });
|
|
3862
4202
|
res.end(JSON.stringify(errorData));
|
|
3863
4203
|
return;
|
|
@@ -3869,7 +4209,7 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
3869
4209
|
cooldownManager.recordFailure(targetProvider, errorMsg);
|
|
3870
4210
|
}
|
|
3871
4211
|
const durationMs = Date.now() - startTime;
|
|
3872
|
-
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity);
|
|
4212
|
+
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, undefined, undefined, errorMsg, 500);
|
|
3873
4213
|
res.writeHead(500, { 'Content-Type': 'application/json' });
|
|
3874
4214
|
res.end(JSON.stringify({ error: `Provider error: ${errorMsg}` }));
|
|
3875
4215
|
return;
|
|
@@ -3988,7 +4328,9 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
3988
4328
|
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, true, routingMode, undefined, taskType, complexity);
|
|
3989
4329
|
// Update token/cost info on the history entry (with cache token discount)
|
|
3990
4330
|
const streamCost = (0, telemetry_js_1.estimateCost)(targetModel, streamTokensIn, streamTokensOut, streamCacheCreation || undefined, streamCacheRead || undefined);
|
|
3991
|
-
updateLastHistoryEntry(streamTokensIn, streamTokensOut, streamCost, undefined, streamCacheCreation || undefined, streamCacheRead || undefined);
|
|
4331
|
+
updateLastHistoryEntry(streamTokensIn, streamTokensOut, streamCost, undefined, streamCacheCreation || undefined, streamCacheRead || undefined, agentFingerprint, agentId);
|
|
4332
|
+
if (agentFingerprint && agentFingerprint !== 'unknown')
|
|
4333
|
+
(0, agent_tracker_js_1.updateAgentCost)(agentFingerprint, streamCost);
|
|
3992
4334
|
// ── Post-request: budget spend + anomaly detection ──
|
|
3993
4335
|
try {
|
|
3994
4336
|
(0, budget_js_1.getBudgetManager)().recordSpend(streamCost, targetModel);
|
|
@@ -4024,7 +4366,7 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
4024
4366
|
/**
|
|
4025
4367
|
* Handle non-streaming request
|
|
4026
4368
|
*/
|
|
4027
|
-
async function handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity = 'simple') {
|
|
4369
|
+
async function handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity = 'simple', agentFingerprint, agentId) {
|
|
4028
4370
|
let responseData;
|
|
4029
4371
|
try {
|
|
4030
4372
|
const result = await executeNonStreamingProviderRequest(request, targetProvider, targetModel, apiKey, ctx);
|
|
@@ -4034,7 +4376,8 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
|
|
|
4034
4376
|
cooldownManager.recordFailure(targetProvider, JSON.stringify(responseData));
|
|
4035
4377
|
}
|
|
4036
4378
|
const durationMs = Date.now() - startTime;
|
|
4037
|
-
|
|
4379
|
+
const nsErrMsg = extractProviderErrorMessage(responseData, result.status);
|
|
4380
|
+
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, undefined, undefined, nsErrMsg, result.status);
|
|
4038
4381
|
res.writeHead(result.status, { 'Content-Type': 'application/json' });
|
|
4039
4382
|
res.end(JSON.stringify(responseData));
|
|
4040
4383
|
return;
|
|
@@ -4046,7 +4389,7 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
|
|
|
4046
4389
|
cooldownManager.recordFailure(targetProvider, errorMsg);
|
|
4047
4390
|
}
|
|
4048
4391
|
const durationMs = Date.now() - startTime;
|
|
4049
|
-
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity);
|
|
4392
|
+
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, undefined, undefined, errorMsg, 500);
|
|
4050
4393
|
res.writeHead(500, { 'Content-Type': 'application/json' });
|
|
4051
4394
|
res.end(JSON.stringify({ error: `Provider error: ${errorMsg}` }));
|
|
4052
4395
|
return;
|
|
@@ -4066,7 +4409,9 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
|
|
|
4066
4409
|
const cacheCreationTokens = usage?.cache_creation_input_tokens ?? 0;
|
|
4067
4410
|
const cacheReadTokens = usage?.cache_read_input_tokens ?? 0;
|
|
4068
4411
|
const cost = (0, telemetry_js_1.estimateCost)(targetModel, tokensIn, tokensOut, cacheCreationTokens || undefined, cacheReadTokens || undefined);
|
|
4069
|
-
updateLastHistoryEntry(tokensIn, tokensOut, cost, nonStreamRespModel, cacheCreationTokens || undefined, cacheReadTokens || undefined);
|
|
4412
|
+
updateLastHistoryEntry(tokensIn, tokensOut, cost, nonStreamRespModel, cacheCreationTokens || undefined, cacheReadTokens || undefined, agentFingerprint, agentId);
|
|
4413
|
+
if (agentFingerprint && agentFingerprint !== 'unknown')
|
|
4414
|
+
(0, agent_tracker_js_1.updateAgentCost)(agentFingerprint, cost);
|
|
4070
4415
|
// ── Post-request: budget spend + anomaly detection ──
|
|
4071
4416
|
try {
|
|
4072
4417
|
(0, budget_js_1.getBudgetManager)().recordSpend(cost, targetModel);
|