@relayplane/proxy 1.7.1 → 1.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +48 -7
- package/dist/agent-tracker.d.ts +82 -0
- package/dist/agent-tracker.d.ts.map +1 -0
- package/dist/agent-tracker.js +281 -0
- package/dist/agent-tracker.js.map +1 -0
- package/dist/standalone-proxy.d.ts +18 -0
- package/dist/standalone-proxy.d.ts.map +1 -1
- package/dist/standalone-proxy.js +317 -40
- package/dist/standalone-proxy.js.map +1 -1
- package/dist/telemetry.d.ts.map +1 -1
- package/dist/telemetry.js +13 -0
- package/dist/telemetry.js.map +1 -1
- package/package.json +1 -1
package/dist/standalone-proxy.js
CHANGED
|
@@ -56,6 +56,8 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
56
56
|
exports.SMART_ALIASES = exports.RELAYPLANE_ALIASES = exports.MODEL_MAPPING = exports.DEFAULT_ENDPOINTS = exports.proxyStatsCollector = void 0;
|
|
57
57
|
exports.getAvailableModelNames = getAvailableModelNames;
|
|
58
58
|
exports.resolveModelAlias = resolveModelAlias;
|
|
59
|
+
exports.extractRequestContent = extractRequestContent;
|
|
60
|
+
exports.extractResponseText = extractResponseText;
|
|
59
61
|
exports.parseModelSuffix = parseModelSuffix;
|
|
60
62
|
exports.classifyComplexity = classifyComplexity;
|
|
61
63
|
exports.shouldEscalate = shouldEscalate;
|
|
@@ -76,6 +78,7 @@ const budget_js_1 = require("./budget.js");
|
|
|
76
78
|
const anomaly_js_1 = require("./anomaly.js");
|
|
77
79
|
const alerts_js_1 = require("./alerts.js");
|
|
78
80
|
const downgrade_js_1 = require("./downgrade.js");
|
|
81
|
+
const agent_tracker_js_1 = require("./agent-tracker.js");
|
|
79
82
|
const version_status_js_1 = require("./utils/version-status.js");
|
|
80
83
|
const PROXY_VERSION = (() => {
|
|
81
84
|
try {
|
|
@@ -488,7 +491,7 @@ function shutdownHistory() {
|
|
|
488
491
|
}
|
|
489
492
|
flushHistoryBuffer();
|
|
490
493
|
}
|
|
491
|
-
function logRequest(originalModel, targetModel, provider, latencyMs, success, mode, escalated, taskType, complexity) {
|
|
494
|
+
function logRequest(originalModel, targetModel, provider, latencyMs, success, mode, escalated, taskType, complexity, agentFingerprint, agentId) {
|
|
492
495
|
const timestamp = new Date().toISOString();
|
|
493
496
|
const status = success ? '✓' : '✗';
|
|
494
497
|
const escalateTag = escalated ? ' [ESCALATED]' : '';
|
|
@@ -531,6 +534,8 @@ function logRequest(originalModel, targetModel, provider, latencyMs, success, mo
|
|
|
531
534
|
costUsd: 0,
|
|
532
535
|
taskType: taskType || 'general',
|
|
533
536
|
complexity: complexity || 'simple',
|
|
537
|
+
agentFingerprint,
|
|
538
|
+
agentId,
|
|
534
539
|
};
|
|
535
540
|
requestHistory.push(entry);
|
|
536
541
|
if (requestHistory.length > MAX_HISTORY) {
|
|
@@ -539,7 +544,7 @@ function logRequest(originalModel, targetModel, provider, latencyMs, success, mo
|
|
|
539
544
|
bufferHistoryEntry(entry);
|
|
540
545
|
}
|
|
541
546
|
/** Update the most recent history entry with token/cost info */
|
|
542
|
-
function updateLastHistoryEntry(tokensIn, tokensOut, costUsd, responseModel) {
|
|
547
|
+
function updateLastHistoryEntry(tokensIn, tokensOut, costUsd, responseModel, cacheCreationTokens, cacheReadTokens, agentFingerprint, agentId, requestContent) {
|
|
543
548
|
if (requestHistory.length > 0) {
|
|
544
549
|
const last = requestHistory[requestHistory.length - 1];
|
|
545
550
|
last.tokensIn = tokensIn;
|
|
@@ -548,8 +553,86 @@ function updateLastHistoryEntry(tokensIn, tokensOut, costUsd, responseModel) {
|
|
|
548
553
|
if (responseModel) {
|
|
549
554
|
last.responseModel = responseModel;
|
|
550
555
|
}
|
|
556
|
+
if (cacheCreationTokens !== undefined)
|
|
557
|
+
last.cacheCreationTokens = cacheCreationTokens;
|
|
558
|
+
if (cacheReadTokens !== undefined)
|
|
559
|
+
last.cacheReadTokens = cacheReadTokens;
|
|
560
|
+
if (agentFingerprint !== undefined)
|
|
561
|
+
last.agentFingerprint = agentFingerprint;
|
|
562
|
+
if (agentId !== undefined)
|
|
563
|
+
last.agentId = agentId;
|
|
564
|
+
if (requestContent)
|
|
565
|
+
last.requestContent = requestContent;
|
|
551
566
|
}
|
|
552
567
|
}
|
|
568
|
+
/**
|
|
569
|
+
* Extract request content for logging. Handles Anthropic and OpenAI formats.
|
|
570
|
+
*/
|
|
571
|
+
function extractRequestContent(body, isAnthropic) {
|
|
572
|
+
let systemPrompt = '';
|
|
573
|
+
let userMessage = '';
|
|
574
|
+
if (isAnthropic) {
|
|
575
|
+
if (typeof body.system === 'string') {
|
|
576
|
+
systemPrompt = body.system;
|
|
577
|
+
}
|
|
578
|
+
else if (Array.isArray(body.system)) {
|
|
579
|
+
systemPrompt = body.system
|
|
580
|
+
.map(p => p.type === 'text' ? (p.text ?? '') : (typeof p === 'string' ? String(p) : ''))
|
|
581
|
+
.join('');
|
|
582
|
+
}
|
|
583
|
+
}
|
|
584
|
+
else {
|
|
585
|
+
const sysmsgs = body.messages;
|
|
586
|
+
if (Array.isArray(sysmsgs)) {
|
|
587
|
+
for (const msg of sysmsgs) {
|
|
588
|
+
if (msg.role === 'system') {
|
|
589
|
+
systemPrompt = typeof msg.content === 'string' ? msg.content : '';
|
|
590
|
+
break;
|
|
591
|
+
}
|
|
592
|
+
}
|
|
593
|
+
}
|
|
594
|
+
}
|
|
595
|
+
const msgs = body.messages;
|
|
596
|
+
if (Array.isArray(msgs)) {
|
|
597
|
+
for (let i = msgs.length - 1; i >= 0; i--) {
|
|
598
|
+
if (msgs[i].role === 'user') {
|
|
599
|
+
const content = msgs[i].content;
|
|
600
|
+
if (typeof content === 'string') {
|
|
601
|
+
userMessage = content;
|
|
602
|
+
}
|
|
603
|
+
else if (Array.isArray(content)) {
|
|
604
|
+
userMessage = content
|
|
605
|
+
.filter(p => p.type === 'text')
|
|
606
|
+
.map(p => p.text ?? '')
|
|
607
|
+
.join('');
|
|
608
|
+
}
|
|
609
|
+
break;
|
|
610
|
+
}
|
|
611
|
+
}
|
|
612
|
+
}
|
|
613
|
+
return {
|
|
614
|
+
systemPrompt: systemPrompt ? systemPrompt.slice(0, 200) : undefined,
|
|
615
|
+
userMessage: userMessage || undefined,
|
|
616
|
+
};
|
|
617
|
+
}
|
|
618
|
+
/**
|
|
619
|
+
* Extract assistant response text from response payload.
|
|
620
|
+
*/
|
|
621
|
+
function extractResponseText(responseData, isAnthropic) {
|
|
622
|
+
if (isAnthropic) {
|
|
623
|
+
const content = responseData.content;
|
|
624
|
+
if (Array.isArray(content)) {
|
|
625
|
+
return content.filter(p => p.type === 'text').map(p => p.text ?? '').join('');
|
|
626
|
+
}
|
|
627
|
+
}
|
|
628
|
+
else {
|
|
629
|
+
const choices = responseData.choices;
|
|
630
|
+
if (Array.isArray(choices) && choices[0]?.message?.content) {
|
|
631
|
+
return choices[0].message.content;
|
|
632
|
+
}
|
|
633
|
+
}
|
|
634
|
+
return '';
|
|
635
|
+
}
|
|
553
636
|
const DEFAULT_PROXY_CONFIG = {
|
|
554
637
|
enabled: true,
|
|
555
638
|
modelOverrides: {},
|
|
@@ -580,6 +663,11 @@ const DEFAULT_PROXY_CONFIG = {
|
|
|
580
663
|
},
|
|
581
664
|
},
|
|
582
665
|
};
|
|
666
|
+
/** Module-level ref to active proxy config (set during startProxy) */
|
|
667
|
+
let _activeProxyConfig = {};
|
|
668
|
+
function isContentLoggingEnabled() {
|
|
669
|
+
return _activeProxyConfig.dashboard?.showRequestContent !== false;
|
|
670
|
+
}
|
|
583
671
|
function getProxyConfigPath() {
|
|
584
672
|
const customPath = process.env['RELAYPLANE_CONFIG_PATH'];
|
|
585
673
|
if (customPath && customPath.trim())
|
|
@@ -773,6 +861,23 @@ function classifyComplexity(messages) {
|
|
|
773
861
|
score += 1;
|
|
774
862
|
if (andCount >= 5)
|
|
775
863
|
score += 1;
|
|
864
|
+
// Calculate total tokens across ALL messages, not just last user message.
|
|
865
|
+
// For agent workloads (OpenClaw, aider, Claude Code) the last user message is
|
|
866
|
+
// often tiny while the real complexity lives in the 100K+ token context.
|
|
867
|
+
const allText = extractMessageText(messages);
|
|
868
|
+
const totalTokens = Math.ceil(allText.length / 4);
|
|
869
|
+
// Context size floor — use as a hard signal regardless of last-message score
|
|
870
|
+
if (totalTokens > 100000)
|
|
871
|
+
score += 5; // definitely complex
|
|
872
|
+
else if (totalTokens > 50000)
|
|
873
|
+
score += 3; // likely moderate+
|
|
874
|
+
else if (totalTokens > 20000)
|
|
875
|
+
score += 2;
|
|
876
|
+
// Message count signal — long conversations imply multi-step reasoning
|
|
877
|
+
if (messages.length > 50)
|
|
878
|
+
score += 2;
|
|
879
|
+
else if (messages.length > 20)
|
|
880
|
+
score += 1;
|
|
776
881
|
if (score >= 4)
|
|
777
882
|
return 'complex';
|
|
778
883
|
if (score >= 2)
|
|
@@ -1505,11 +1610,13 @@ function convertAnthropicStreamEvent(eventType, eventData, messageId, model, too
|
|
|
1505
1610
|
const msg = eventData['message'];
|
|
1506
1611
|
baseChunk.id = msg?.['id'] || messageId;
|
|
1507
1612
|
choice.delta = { role: 'assistant', content: '' };
|
|
1508
|
-
// Pass through input token count from message_start
|
|
1613
|
+
// Pass through input token count from message_start (including cache tokens)
|
|
1509
1614
|
const msgUsage = msg?.['usage'];
|
|
1510
1615
|
if (msgUsage) {
|
|
1511
1616
|
baseChunk['usage'] = {
|
|
1512
1617
|
prompt_tokens: msgUsage['input_tokens'] ?? 0,
|
|
1618
|
+
cache_creation_tokens: msgUsage['cache_creation_input_tokens'] ?? 0,
|
|
1619
|
+
cache_read_tokens: msgUsage['cache_read_input_tokens'] ?? 0,
|
|
1513
1620
|
};
|
|
1514
1621
|
}
|
|
1515
1622
|
return `data: ${JSON.stringify(baseChunk)}\n\n`;
|
|
@@ -1749,7 +1856,7 @@ function resolveExplicitModel(modelName) {
|
|
|
1749
1856
|
function resolveConfigModel(modelName) {
|
|
1750
1857
|
return resolveExplicitModel(modelName) ?? parsePreferredModel(modelName);
|
|
1751
1858
|
}
|
|
1752
|
-
function
|
|
1859
|
+
function extractResponseTextAuto(responseData) {
|
|
1753
1860
|
const openAiChoices = responseData['choices'];
|
|
1754
1861
|
if (openAiChoices && openAiChoices.length > 0) {
|
|
1755
1862
|
const first = openAiChoices[0];
|
|
@@ -1917,7 +2024,7 @@ async function cascadeRequest(config, makeRequest, log) {
|
|
|
1917
2024
|
const isLastModel = i === config.models.length - 1;
|
|
1918
2025
|
try {
|
|
1919
2026
|
const { responseData, provider, model: resolvedModel } = await makeRequest(model);
|
|
1920
|
-
const text =
|
|
2027
|
+
const text = extractResponseTextAuto(responseData);
|
|
1921
2028
|
if (isLastModel || escalations >= config.maxEscalations) {
|
|
1922
2029
|
return { responseData, provider, model: resolvedModel, escalations };
|
|
1923
2030
|
}
|
|
@@ -1969,6 +2076,7 @@ td{padding:8px 12px;border-bottom:1px solid #111318}
|
|
|
1969
2076
|
.vstat.unavailable{color:#a3a3a3;border-color:#52525b66;background:#18181b66}
|
|
1970
2077
|
@media(max-width:768px){.col-tt,.col-cx{display:none}}
|
|
1971
2078
|
.prov{display:flex;gap:16px;flex-wrap:wrap}.prov-item{display:flex;align-items:center;font-size:.85rem;background:#111318;padding:8px 14px;border-radius:8px;border:1px solid #1e293b}
|
|
2079
|
+
.rename-btn{background:none;border:none;cursor:pointer;font-size:.75rem;opacity:.5;padding:2px}.rename-btn:hover{opacity:1}
|
|
1972
2080
|
</style></head><body>
|
|
1973
2081
|
<div class="header"><div><h1>⚡ RelayPlane Dashboard</h1></div><div class="meta"><a href="/dashboard/config">Config</a> · <span id="ver"></span><span id="vstat" class="vstat unavailable">Unable to check</span> · up <span id="uptime"></span> · refreshes every 5s</div></div>
|
|
1974
2082
|
<div class="cards">
|
|
@@ -1979,9 +2087,11 @@ td{padding:8px 12px;border-bottom:1px solid #111318}
|
|
|
1979
2087
|
</div>
|
|
1980
2088
|
<div class="section"><h2>Model Breakdown</h2>
|
|
1981
2089
|
<table><thead><tr><th>Model</th><th>Requests</th><th>Cost</th><th>% of Total</th></tr></thead><tbody id="models"></tbody></table></div>
|
|
2090
|
+
<div class="section"><h2>Agent Cost Breakdown</h2>
|
|
2091
|
+
<table><thead><tr><th>Agent</th><th>Requests</th><th>Total Cost</th><th>Last Active</th><th></th></tr></thead><tbody id="agents"></tbody></table></div>
|
|
1982
2092
|
<div class="section"><h2>Provider Status</h2><div class="prov" id="providers"></div></div>
|
|
1983
2093
|
<div class="section"><h2>Recent Runs</h2>
|
|
1984
|
-
<table><thead><tr><th>Time</th><th>Model</th><th class="col-tt">Task Type</th><th class="col-cx">Complexity</th><th>Tokens In</th><th>Tokens Out</th><th>Cost</th><th>Latency</th><th>Status</th></tr></thead><tbody id="runs"></tbody></table></div>
|
|
2094
|
+
<table><thead><tr><th>Time</th><th>Model</th><th class="col-tt">Task Type</th><th class="col-cx">Complexity</th><th>Tokens In</th><th>Tokens Out</th><th class="col-cache">Cache Create</th><th class="col-cache">Cache Read</th><th>Cost</th><th>Latency</th><th>Status</th></tr></thead><tbody id="runs"></tbody></table></div>
|
|
1985
2095
|
<script>
|
|
1986
2096
|
const $ = id => document.getElementById(id);
|
|
1987
2097
|
function fmt(n,d=2){return typeof n==='number'?n.toFixed(d):'-'}
|
|
@@ -1989,12 +2099,13 @@ function fmtTime(s){const d=new Date(s);return d.toLocaleTimeString()}
|
|
|
1989
2099
|
function dur(s){const h=Math.floor(s/3600),m=Math.floor(s%3600/60);return h?h+'h '+m+'m':m+'m'}
|
|
1990
2100
|
async function load(){
|
|
1991
2101
|
try{
|
|
1992
|
-
const [health,stats,runsR,sav,provH]=await Promise.all([
|
|
2102
|
+
const [health,stats,runsR,sav,provH,agentsR]=await Promise.all([
|
|
1993
2103
|
fetch('/health').then(r=>r.json()),
|
|
1994
2104
|
fetch('/v1/telemetry/stats').then(r=>r.json()),
|
|
1995
2105
|
fetch('/v1/telemetry/runs?limit=20').then(r=>r.json()),
|
|
1996
2106
|
fetch('/v1/telemetry/savings').then(r=>r.json()),
|
|
1997
|
-
fetch('/v1/telemetry/health').then(r=>r.json())
|
|
2107
|
+
fetch('/v1/telemetry/health').then(r=>r.json()),
|
|
2108
|
+
fetch('/api/agents').then(r=>r.json()).catch(()=>({agents:[]}))
|
|
1998
2109
|
]);
|
|
1999
2110
|
$('ver').textContent='v'+health.version;
|
|
2000
2111
|
$('uptime').textContent=dur(health.uptime);
|
|
@@ -2021,9 +2132,26 @@ async function load(){
|
|
|
2021
2132
|
).join('')||'<tr><td colspan=4 style="color:#64748b">No data yet</td></tr>';
|
|
2022
2133
|
function ttCls(t){const m={code_generation:'tt-code',analysis:'tt-analysis',summarization:'tt-summarization',question_answering:'tt-qa'};return m[t]||'tt-general'}
|
|
2023
2134
|
function cxCls(c){const m={simple:'cx-simple',moderate:'cx-moderate',complex:'cx-complex'};return m[c]||'cx-simple'}
|
|
2024
|
-
|
|
2025
|
-
|
|
2026
|
-
|
|
2135
|
+
function esc(s){if(!s)return'';return s.replace(/&/g,'&').replace(/</g,'<').replace(/>/g,'>')}
|
|
2136
|
+
$('runs').innerHTML=(runsR.runs||[]).map((r,i)=>{
|
|
2137
|
+
const row='<tr style="cursor:pointer" onclick="toggleDetail('+i+')" title="Click to expand"><td>'+fmtTime(r.started_at)+'</td><td>'+r.model+'</td><td class="col-tt"><span class="badge '+ttCls(r.taskType)+'">'+(r.taskType||'general').replace(/_/g,' ')+'</span></td><td class="col-cx"><span class="badge '+cxCls(r.complexity)+'">'+(r.complexity||'simple')+'</span></td><td>'+(r.tokensIn||0)+'</td><td>'+(r.tokensOut||0)+'</td><td class="col-cache" style="color:#60a5fa">'+(r.cacheCreationTokens||0)+'</td><td class="col-cache" style="color:#34d399">'+(r.cacheReadTokens||0)+'</td><td>$'+fmt(r.costUsd,4)+'</td><td>'+r.latencyMs+'ms</td><td><span class="badge '+(r.status==='success'?'ok':'err')+'">'+r.status+'</span></td></tr>';
|
|
2138
|
+
const c=r.requestContent||{};
|
|
2139
|
+
let detail='<tr id="run-detail-'+i+'" style="display:none"><td colspan="11" style="padding:16px;background:#111217;border-bottom:1px solid #1e293b">';
|
|
2140
|
+
if(c.systemPrompt||c.userMessage||c.responsePreview){
|
|
2141
|
+
if(c.systemPrompt) detail+='<div style="color:#64748b;font-size:.85rem;margin-bottom:10px;font-style:italic"><strong style="color:#94a3b8">System:</strong> '+esc(c.systemPrompt)+'</div>';
|
|
2142
|
+
if(c.userMessage) detail+='<div style="background:#1a1c23;border:1px solid #1e293b;border-radius:8px;padding:12px;margin-bottom:10px"><strong style="color:#94a3b8;font-size:.8rem">User Message</strong><div style="margin-top:6px;white-space:pre-wrap">'+esc(c.userMessage)+'</div></div>';
|
|
2143
|
+
if(c.responsePreview) detail+='<div style="background:#1a1c23;border:1px solid #1e293b;border-radius:8px;padding:12px;margin-bottom:10px"><strong style="color:#94a3b8;font-size:.8rem">Response Preview</strong><div style="margin-top:6px;white-space:pre-wrap">'+esc(c.responsePreview)+'</div></div>';
|
|
2144
|
+
detail+='<button onclick="event.stopPropagation();loadFullResponse(\''+r.id+'\','+i+')" id="full-btn-'+i+'" style="background:#1e293b;color:#e2e8f0;border:1px solid #334155;padding:6px 12px;border-radius:6px;cursor:pointer;font-size:.8rem">Show full response</button><pre id="full-resp-'+i+'" style="display:none;white-space:pre-wrap;margin-top:10px;background:#0d0e11;border:1px solid #1e293b;border-radius:8px;padding:12px;max-height:400px;overflow:auto;font-size:.8rem"></pre>';
|
|
2145
|
+
} else {
|
|
2146
|
+
detail+='<span style="color:#64748b">No content captured for this request</span>';
|
|
2147
|
+
}
|
|
2148
|
+
detail+='</td></tr>';
|
|
2149
|
+
return row+detail;
|
|
2150
|
+
}).join('')||'<tr><td colspan=11 style="color:#64748b">No runs yet</td></tr>';
|
|
2151
|
+
const agents=(agentsR.agents||[]).sort((a,b)=>(b.totalCost||0)-(a.totalCost||0));
|
|
2152
|
+
$('agents').innerHTML=agents.length?agents.map(a=>
|
|
2153
|
+
'<tr><td><span class="agent-name" data-fp="'+a.fingerprint+'">'+a.name+'</span> <button class="rename-btn" onclick="renameAgent(\''+a.fingerprint+'\',\''+a.name.replace(/'/g,"\\'")+'\')">✏️</button></td><td>'+a.totalRequests+'</td><td>$'+fmt(a.totalCost,4)+'</td><td>'+fmtTime(a.lastSeen)+'</td><td style="font-size:.7rem;color:#64748b" title="'+a.systemPromptPreview+'">'+a.fingerprint+'</td></tr>'
|
|
2154
|
+
).join(''):'<tr><td colspan=5 style="color:#64748b">No agents detected yet</td></tr>';
|
|
2027
2155
|
$('providers').innerHTML=(provH.providers||[]).map(p=>{
|
|
2028
2156
|
const dotClass = p.status==='healthy'?'up':(p.status==='degraded'?'warn':'down');
|
|
2029
2157
|
const rate = p.successRate!==undefined?(' '+Math.round(p.successRate*100)+'%'):'';
|
|
@@ -2031,8 +2159,27 @@ async function load(){
|
|
|
2031
2159
|
}).join('');
|
|
2032
2160
|
}catch(e){console.error(e)}
|
|
2033
2161
|
}
|
|
2162
|
+
async function renameAgent(fp,currentName){
|
|
2163
|
+
const name=prompt('Rename agent:',currentName);
|
|
2164
|
+
if(!name||name===currentName)return;
|
|
2165
|
+
await fetch('/api/agents/rename',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({fingerprint:fp,name:name})});
|
|
2166
|
+
load();
|
|
2167
|
+
}
|
|
2168
|
+
function toggleDetail(i){var d=document.getElementById('run-detail-'+i);d.style.display=d.style.display==='none'?'table-row':'none'}
|
|
2169
|
+
async function loadFullResponse(runId,i){
|
|
2170
|
+
const btn=document.getElementById('full-btn-'+i);
|
|
2171
|
+
const pre=document.getElementById('full-resp-'+i);
|
|
2172
|
+
if(pre.style.display!=='none'){pre.style.display='none';btn.textContent='Show full response';return}
|
|
2173
|
+
btn.textContent='Loading...';
|
|
2174
|
+
try{
|
|
2175
|
+
const data=await fetch('/api/runs/'+runId).then(r=>r.json());
|
|
2176
|
+
const full=data.requestContent&&data.requestContent.fullResponse;
|
|
2177
|
+
if(full){pre.textContent=full;pre.style.display='block';btn.textContent='Hide full response'}
|
|
2178
|
+
else{btn.textContent='No full response available'}
|
|
2179
|
+
}catch{btn.textContent='Error loading response'}
|
|
2180
|
+
}
|
|
2034
2181
|
load();setInterval(load,5000);
|
|
2035
|
-
</script></body></html>`;
|
|
2182
|
+
</script><footer style="text-align:center;padding:20px 0;color:#475569;font-size:.75rem;border-top:1px solid #1e293b;margin-top:20px">🔒 Request content stays on your machine. Never sent to cloud.</footer></body></html>`;
|
|
2036
2183
|
}
|
|
2037
2184
|
function getConfigDashboardHTML() {
|
|
2038
2185
|
return `<!DOCTYPE html><html lang="en"><head><meta charset="utf-8"><meta name="viewport" content="width=device-width,initial-scale=1"><title>RelayPlane Config</title>
|
|
@@ -2129,8 +2276,10 @@ async function startProxy(config = {}) {
|
|
|
2129
2276
|
};
|
|
2130
2277
|
// Load persistent history from disk
|
|
2131
2278
|
loadHistoryFromDisk();
|
|
2279
|
+
(0, agent_tracker_js_1.loadAgentRegistry)();
|
|
2132
2280
|
// Flush history on shutdown
|
|
2133
2281
|
const handleShutdown = () => {
|
|
2282
|
+
(0, agent_tracker_js_1.flushAgentRegistry)();
|
|
2134
2283
|
meshHandle.stop();
|
|
2135
2284
|
shutdownHistory();
|
|
2136
2285
|
process.exit(0);
|
|
@@ -2139,6 +2288,7 @@ async function startProxy(config = {}) {
|
|
|
2139
2288
|
process.on('SIGTERM', handleShutdown);
|
|
2140
2289
|
const configPath = getProxyConfigPath();
|
|
2141
2290
|
let proxyConfig = await loadProxyConfig(configPath, log);
|
|
2291
|
+
_activeProxyConfig = proxyConfig;
|
|
2142
2292
|
const cooldownManager = new CooldownManager(getCooldownConfig(proxyConfig));
|
|
2143
2293
|
// === Startup config validation (Task 4) ===
|
|
2144
2294
|
try {
|
|
@@ -2516,7 +2666,9 @@ async function startProxy(config = {}) {
|
|
|
2516
2666
|
const offset = parseInt(params.get('offset') || '0', 10);
|
|
2517
2667
|
const sorted = [...requestHistory].reverse();
|
|
2518
2668
|
const runs = sorted.slice(offset, offset + limit).map(r => {
|
|
2519
|
-
|
|
2669
|
+
// Savings should reflect routing decisions only — pass same cache tokens to baseline
|
|
2670
|
+
// so the cache discount doesn't get counted as "savings from routing"
|
|
2671
|
+
const origCost = (0, telemetry_js_1.estimateCost)('claude-opus-4-6', r.tokensIn, r.tokensOut, r.cacheCreationTokens || undefined, r.cacheReadTokens || undefined);
|
|
2520
2672
|
const perRunSavings = Math.max(0, origCost - r.costUsd);
|
|
2521
2673
|
return {
|
|
2522
2674
|
id: r.id,
|
|
@@ -2536,8 +2688,16 @@ async function startProxy(config = {}) {
|
|
|
2536
2688
|
latencyMs: r.latencyMs,
|
|
2537
2689
|
tokensIn: r.tokensIn,
|
|
2538
2690
|
tokensOut: r.tokensOut,
|
|
2691
|
+
cacheCreationTokens: r.cacheCreationTokens ?? 0,
|
|
2692
|
+
cacheReadTokens: r.cacheReadTokens ?? 0,
|
|
2539
2693
|
savings: Math.round(perRunSavings * 10000) / 10000,
|
|
2540
2694
|
escalated: r.escalated,
|
|
2695
|
+
requestContent: r.requestContent ? {
|
|
2696
|
+
systemPrompt: r.requestContent.systemPrompt,
|
|
2697
|
+
userMessage: r.requestContent.userMessage,
|
|
2698
|
+
responsePreview: r.requestContent.responsePreview,
|
|
2699
|
+
// fullResponse excluded from list endpoint to keep payloads small
|
|
2700
|
+
} : undefined,
|
|
2541
2701
|
};
|
|
2542
2702
|
});
|
|
2543
2703
|
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
@@ -2553,7 +2713,9 @@ async function startProxy(config = {}) {
|
|
|
2553
2713
|
let totalSavedAmount = 0;
|
|
2554
2714
|
const byDayMap = new Map();
|
|
2555
2715
|
for (const r of requestHistory) {
|
|
2556
|
-
|
|
2716
|
+
// Pass same cache tokens to baseline so savings only reflect routing decisions,
|
|
2717
|
+
// not prompt-cache discounts (those happen regardless of which model is chosen).
|
|
2718
|
+
const origCost = (0, telemetry_js_1.estimateCost)(OPUS_BASELINE, r.tokensIn, r.tokensOut, r.cacheCreationTokens || undefined, r.cacheReadTokens || undefined);
|
|
2557
2719
|
const actualCost = r.costUsd;
|
|
2558
2720
|
const saved = Math.max(0, origCost - actualCost);
|
|
2559
2721
|
totalOriginalCost += origCost;
|
|
@@ -2633,6 +2795,63 @@ async function startProxy(config = {}) {
|
|
|
2633
2795
|
res.end(JSON.stringify({ error: 'Not found' }));
|
|
2634
2796
|
return;
|
|
2635
2797
|
}
|
|
2798
|
+
// === Agent tracking API ===
|
|
2799
|
+
// === /api/runs/:id — full request/response content for a single run ===
|
|
2800
|
+
const runsIdMatch = pathname.match(/^\/api\/runs\/(.+)$/);
|
|
2801
|
+
if (req.method === 'GET' && runsIdMatch) {
|
|
2802
|
+
const runId = runsIdMatch[1];
|
|
2803
|
+
const run = requestHistory.find(r => r.id === runId);
|
|
2804
|
+
if (!run) {
|
|
2805
|
+
res.writeHead(404, { 'Content-Type': 'application/json' });
|
|
2806
|
+
res.end(JSON.stringify({ error: 'Run not found' }));
|
|
2807
|
+
return;
|
|
2808
|
+
}
|
|
2809
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
2810
|
+
res.end(JSON.stringify({
|
|
2811
|
+
id: run.id,
|
|
2812
|
+
model: run.targetModel,
|
|
2813
|
+
provider: run.provider,
|
|
2814
|
+
timestamp: run.timestamp,
|
|
2815
|
+
tokensIn: run.tokensIn,
|
|
2816
|
+
tokensOut: run.tokensOut,
|
|
2817
|
+
costUsd: run.costUsd,
|
|
2818
|
+
latencyMs: run.latencyMs,
|
|
2819
|
+
success: run.success,
|
|
2820
|
+
requestContent: run.requestContent,
|
|
2821
|
+
}));
|
|
2822
|
+
return;
|
|
2823
|
+
}
|
|
2824
|
+
if (req.method === 'GET' && pathname === '/api/agents') {
|
|
2825
|
+
const summaries = (0, agent_tracker_js_1.getAgentSummaries)(requestHistory);
|
|
2826
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
2827
|
+
res.end(JSON.stringify({ agents: summaries }));
|
|
2828
|
+
return;
|
|
2829
|
+
}
|
|
2830
|
+
if (req.method === 'POST' && pathname === '/api/agents/rename') {
|
|
2831
|
+
try {
|
|
2832
|
+
const body = await readJsonBody(req);
|
|
2833
|
+
const fingerprint = body['fingerprint'];
|
|
2834
|
+
const name = body['name'];
|
|
2835
|
+
if (!fingerprint || !name) {
|
|
2836
|
+
res.writeHead(400, { 'Content-Type': 'application/json' });
|
|
2837
|
+
res.end(JSON.stringify({ error: 'Missing fingerprint or name' }));
|
|
2838
|
+
return;
|
|
2839
|
+
}
|
|
2840
|
+
const ok = (0, agent_tracker_js_1.renameAgent)(fingerprint, name);
|
|
2841
|
+
if (!ok) {
|
|
2842
|
+
res.writeHead(404, { 'Content-Type': 'application/json' });
|
|
2843
|
+
res.end(JSON.stringify({ error: 'Agent not found' }));
|
|
2844
|
+
return;
|
|
2845
|
+
}
|
|
2846
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
2847
|
+
res.end(JSON.stringify({ ok: true }));
|
|
2848
|
+
}
|
|
2849
|
+
catch {
|
|
2850
|
+
res.writeHead(400, { 'Content-Type': 'application/json' });
|
|
2851
|
+
res.end(JSON.stringify({ error: 'Invalid JSON' }));
|
|
2852
|
+
}
|
|
2853
|
+
return;
|
|
2854
|
+
}
|
|
2636
2855
|
// === Dashboard ===
|
|
2637
2856
|
if (req.method === 'GET' && (pathname === '/' || pathname === '/dashboard')) {
|
|
2638
2857
|
res.writeHead(200, { 'Content-Type': 'text/html' });
|
|
@@ -2713,6 +2932,14 @@ async function startProxy(config = {}) {
|
|
|
2713
2932
|
res.end(JSON.stringify({ error: 'Invalid JSON' }));
|
|
2714
2933
|
return;
|
|
2715
2934
|
}
|
|
2935
|
+
// Extract agent fingerprint and explicit agent ID
|
|
2936
|
+
const nativeSystemPrompt = (0, agent_tracker_js_1.extractSystemPromptFromBody)(requestBody);
|
|
2937
|
+
const nativeExplicitAgentId = getHeaderValue(req, 'x-relayplane-agent') || undefined;
|
|
2938
|
+
let nativeAgentFingerprint;
|
|
2939
|
+
if (nativeSystemPrompt) {
|
|
2940
|
+
const agentResult = (0, agent_tracker_js_1.trackAgent)(nativeSystemPrompt, 0, nativeExplicitAgentId);
|
|
2941
|
+
nativeAgentFingerprint = agentResult.fingerprint;
|
|
2942
|
+
}
|
|
2716
2943
|
const originalModel = requestBody['model'];
|
|
2717
2944
|
let requestedModel = headerModelOverride ?? originalModel ?? '';
|
|
2718
2945
|
if (headerModelOverride) {
|
|
@@ -3151,7 +3378,7 @@ async function startProxy(config = {}) {
|
|
|
3151
3378
|
model: targetModel || requestedModel,
|
|
3152
3379
|
tokensIn: nativeUsage?.input_tokens ?? 0,
|
|
3153
3380
|
tokensOut: nativeUsage?.output_tokens ?? 0,
|
|
3154
|
-
costUsd: (0, telemetry_js_1.estimateCost)(targetModel || requestedModel, nativeUsage?.input_tokens ?? 0, nativeUsage?.output_tokens ?? 0),
|
|
3381
|
+
costUsd: (0, telemetry_js_1.estimateCost)(targetModel || requestedModel, nativeUsage?.input_tokens ?? 0, nativeUsage?.output_tokens ?? 0, nativeUsage?.cache_creation_input_tokens || undefined, nativeUsage?.cache_read_input_tokens || undefined),
|
|
3155
3382
|
taskType,
|
|
3156
3383
|
});
|
|
3157
3384
|
log(`Cache STORE for ${targetModel || requestedModel} (hash: ${cacheHash.slice(0, 8)})`);
|
|
@@ -3174,7 +3401,22 @@ async function startProxy(config = {}) {
|
|
|
3174
3401
|
const nativeTokIn = nativeBaseTokIn + nativeCacheCreation + nativeCacheRead;
|
|
3175
3402
|
// Cost calculation expects inputTokens to include cache tokens when cache params are provided
|
|
3176
3403
|
const nativeCostUsd = (0, telemetry_js_1.estimateCost)(targetModel || requestedModel, nativeTokIn, nativeTokOut, nativeCacheCreation || undefined, nativeCacheRead || undefined);
|
|
3177
|
-
|
|
3404
|
+
// Build request content if logging enabled
|
|
3405
|
+
let nativeContentData;
|
|
3406
|
+
if (isContentLoggingEnabled()) {
|
|
3407
|
+
const extracted = extractRequestContent(requestBody, true);
|
|
3408
|
+
const responseText = nativeResponseData ? extractResponseText(nativeResponseData, true) : '';
|
|
3409
|
+
nativeContentData = {
|
|
3410
|
+
...extracted,
|
|
3411
|
+
responsePreview: responseText ? responseText.slice(0, 500) : undefined,
|
|
3412
|
+
fullResponse: responseText || undefined,
|
|
3413
|
+
};
|
|
3414
|
+
}
|
|
3415
|
+
updateLastHistoryEntry(nativeTokIn, nativeTokOut, nativeCostUsd, undefined, nativeCacheCreation || undefined, nativeCacheRead || undefined, nativeAgentFingerprint, nativeExplicitAgentId, nativeContentData);
|
|
3416
|
+
// Update agent cost now that we know the actual cost
|
|
3417
|
+
if (nativeAgentFingerprint && nativeAgentFingerprint !== 'unknown') {
|
|
3418
|
+
(0, agent_tracker_js_1.updateAgentCost)(nativeAgentFingerprint, nativeCostUsd);
|
|
3419
|
+
}
|
|
3178
3420
|
// ── Post-request: budget spend + anomaly detection ──
|
|
3179
3421
|
postRequestRecord(targetModel || requestedModel, nativeTokIn, nativeTokOut, nativeCostUsd);
|
|
3180
3422
|
if (recordTelemetry) {
|
|
@@ -3183,6 +3425,10 @@ async function startProxy(config = {}) {
|
|
|
3183
3425
|
prompt: promptText.slice(0, 500),
|
|
3184
3426
|
taskType,
|
|
3185
3427
|
model: `${targetProvider}:${targetModel || requestedModel}`,
|
|
3428
|
+
})
|
|
3429
|
+
.then((runResult) => {
|
|
3430
|
+
// Backfill token/cost data — relay.run() has no adapters so records NULLs
|
|
3431
|
+
relay.patchRunTokens(runResult.runId, nativeTokIn, nativeTokOut, nativeCostUsd);
|
|
3186
3432
|
})
|
|
3187
3433
|
.catch(() => { });
|
|
3188
3434
|
sendCloudTelemetry(taskType, targetModel || requestedModel, nativeTokIn, nativeTokOut, durationMs, true, undefined, originalModel ?? undefined, nativeCacheCreation || undefined, nativeCacheRead || undefined);
|
|
@@ -3268,6 +3514,14 @@ async function startProxy(config = {}) {
|
|
|
3268
3514
|
return;
|
|
3269
3515
|
}
|
|
3270
3516
|
const isStreaming = request.stream === true;
|
|
3517
|
+
// Extract agent fingerprint for chat/completions
|
|
3518
|
+
const chatSystemPrompt = (0, agent_tracker_js_1.extractSystemPromptFromBody)(request);
|
|
3519
|
+
const chatExplicitAgentId = getHeaderValue(req, 'x-relayplane-agent') || undefined;
|
|
3520
|
+
let chatAgentFingerprint;
|
|
3521
|
+
if (chatSystemPrompt) {
|
|
3522
|
+
const agentResult = (0, agent_tracker_js_1.trackAgent)(chatSystemPrompt, 0, chatExplicitAgentId);
|
|
3523
|
+
chatAgentFingerprint = agentResult.fingerprint;
|
|
3524
|
+
}
|
|
3271
3525
|
// ── Response Cache: check for cached response (chat/completions) ──
|
|
3272
3526
|
const chatCacheBypass = responseCache.shouldBypass(request);
|
|
3273
3527
|
let chatCacheHash;
|
|
@@ -3568,7 +3822,7 @@ async function startProxy(config = {}) {
|
|
|
3568
3822
|
const startTime = Date.now();
|
|
3569
3823
|
// Handle streaming vs non-streaming
|
|
3570
3824
|
if (isStreaming) {
|
|
3571
|
-
await handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, useCascade ? 'cascade' : routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity, chatCacheHash, chatCacheBypass);
|
|
3825
|
+
await handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, useCascade ? 'cascade' : routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity, chatCacheHash, chatCacheBypass, chatAgentFingerprint, chatExplicitAgentId);
|
|
3572
3826
|
}
|
|
3573
3827
|
else {
|
|
3574
3828
|
if (useCascade && cascadeConfig) {
|
|
@@ -3605,8 +3859,12 @@ async function startProxy(config = {}) {
|
|
|
3605
3859
|
const cascadeUsage = responseData?.usage;
|
|
3606
3860
|
const cascadeTokensIn = cascadeUsage?.input_tokens ?? cascadeUsage?.prompt_tokens ?? 0;
|
|
3607
3861
|
const cascadeTokensOut = cascadeUsage?.output_tokens ?? cascadeUsage?.completion_tokens ?? 0;
|
|
3608
|
-
const
|
|
3609
|
-
|
|
3862
|
+
const cascadeCacheCreation = cascadeUsage?.cache_creation_input_tokens || undefined;
|
|
3863
|
+
const cascadeCacheRead = cascadeUsage?.cache_read_input_tokens || undefined;
|
|
3864
|
+
const cascadeCost = (0, telemetry_js_1.estimateCost)(cascadeResult.model, cascadeTokensIn, cascadeTokensOut, cascadeCacheCreation, cascadeCacheRead);
|
|
3865
|
+
updateLastHistoryEntry(cascadeTokensIn, cascadeTokensOut, cascadeCost, chatCascadeRespModel, cascadeCacheCreation, cascadeCacheRead, chatAgentFingerprint, chatExplicitAgentId);
|
|
3866
|
+
if (chatAgentFingerprint && chatAgentFingerprint !== 'unknown')
|
|
3867
|
+
(0, agent_tracker_js_1.updateAgentCost)(chatAgentFingerprint, cascadeCost);
|
|
3610
3868
|
if (recordTelemetry) {
|
|
3611
3869
|
try {
|
|
3612
3870
|
const runResult = await relay.run({
|
|
@@ -3614,6 +3872,8 @@ async function startProxy(config = {}) {
|
|
|
3614
3872
|
taskType,
|
|
3615
3873
|
model: `${cascadeResult.provider}:${cascadeResult.model}`,
|
|
3616
3874
|
});
|
|
3875
|
+
// Backfill token/cost data — relay.run() has no adapters so records NULLs
|
|
3876
|
+
relay.patchRunTokens(runResult.runId, cascadeTokensIn, cascadeTokensOut, cascadeCost);
|
|
3617
3877
|
responseData['_relayplane'] = {
|
|
3618
3878
|
runId: runResult.runId,
|
|
3619
3879
|
routedTo: `${cascadeResult.provider}/${cascadeResult.model}`,
|
|
@@ -3628,7 +3888,7 @@ async function startProxy(config = {}) {
|
|
|
3628
3888
|
catch (err) {
|
|
3629
3889
|
log(`Failed to record run: ${err}`);
|
|
3630
3890
|
}
|
|
3631
|
-
sendCloudTelemetry(taskType, cascadeResult.model, cascadeTokensIn, cascadeTokensOut, durationMs, true, undefined, originalRequestedModel ?? undefined);
|
|
3891
|
+
sendCloudTelemetry(taskType, cascadeResult.model, cascadeTokensIn, cascadeTokensOut, durationMs, true, undefined, originalRequestedModel ?? undefined, cascadeCacheCreation, cascadeCacheRead);
|
|
3632
3892
|
meshCapture(cascadeResult.model, cascadeResult.provider, taskType, cascadeTokensIn, cascadeTokensOut, cascadeCost, durationMs, true);
|
|
3633
3893
|
}
|
|
3634
3894
|
const chatCascadeRpHeaders = buildRelayPlaneResponseHeaders(cascadeResult.model, originalRequestedModel ?? 'unknown', complexity, cascadeResult.provider, 'cascade');
|
|
@@ -3649,7 +3909,7 @@ async function startProxy(config = {}) {
|
|
|
3649
3909
|
}
|
|
3650
3910
|
}
|
|
3651
3911
|
else {
|
|
3652
|
-
await handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity);
|
|
3912
|
+
await handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity, chatAgentFingerprint, chatExplicitAgentId);
|
|
3653
3913
|
}
|
|
3654
3914
|
}
|
|
3655
3915
|
});
|
|
@@ -3792,7 +4052,7 @@ async function executeNonStreamingProviderRequest(request, targetProvider, targe
|
|
|
3792
4052
|
}
|
|
3793
4053
|
return { responseData, ok: true, status: 200 };
|
|
3794
4054
|
}
|
|
3795
|
-
async function handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity = 'simple', cacheHash, cacheBypass) {
|
|
4055
|
+
async function handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity = 'simple', cacheHash, cacheBypass, agentFingerprint, agentId) {
|
|
3796
4056
|
let providerResponse;
|
|
3797
4057
|
try {
|
|
3798
4058
|
switch (targetProvider) {
|
|
@@ -3845,9 +4105,11 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
3845
4105
|
'Connection': 'keep-alive',
|
|
3846
4106
|
...streamRpHeaders,
|
|
3847
4107
|
});
|
|
3848
|
-
// Track token usage from streaming events
|
|
4108
|
+
// Track token usage from streaming events (including Anthropic prompt cache tokens)
|
|
3849
4109
|
let streamTokensIn = 0;
|
|
3850
4110
|
let streamTokensOut = 0;
|
|
4111
|
+
let streamCacheCreation = 0;
|
|
4112
|
+
let streamCacheRead = 0;
|
|
3851
4113
|
const shouldCacheStream = !!(cacheHash && !cacheBypass);
|
|
3852
4114
|
const rawChunks = [];
|
|
3853
4115
|
try {
|
|
@@ -3859,7 +4121,8 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
3859
4121
|
res.write(chunk);
|
|
3860
4122
|
if (shouldCacheStream)
|
|
3861
4123
|
rawChunks.push(chunk);
|
|
3862
|
-
// Parse OpenAI-format chunks for usage
|
|
4124
|
+
// Parse OpenAI-format chunks for usage — the converter embeds
|
|
4125
|
+
// cache_creation_tokens and cache_read_tokens from message_start.
|
|
3863
4126
|
try {
|
|
3864
4127
|
const lines = chunk.split('\n');
|
|
3865
4128
|
for (const line of lines) {
|
|
@@ -3868,6 +4131,8 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
3868
4131
|
if (evt.usage) {
|
|
3869
4132
|
streamTokensIn = evt.usage.prompt_tokens ?? streamTokensIn;
|
|
3870
4133
|
streamTokensOut = evt.usage.completion_tokens ?? streamTokensOut;
|
|
4134
|
+
streamCacheCreation = evt.usage.cache_creation_tokens ?? streamCacheCreation;
|
|
4135
|
+
streamCacheRead = evt.usage.cache_read_tokens ?? streamCacheRead;
|
|
3871
4136
|
}
|
|
3872
4137
|
}
|
|
3873
4138
|
}
|
|
@@ -3927,13 +4192,13 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
3927
4192
|
const streamPayload = JSON.stringify({
|
|
3928
4193
|
_relayplaneStreamCache: true,
|
|
3929
4194
|
ssePayload: rawChunks.join(''),
|
|
3930
|
-
usage: { input_tokens: streamTokensIn, output_tokens: streamTokensOut, prompt_tokens: streamTokensIn, completion_tokens: streamTokensOut },
|
|
4195
|
+
usage: { input_tokens: streamTokensIn, output_tokens: streamTokensOut, prompt_tokens: streamTokensIn, completion_tokens: streamTokensOut, cache_creation_input_tokens: streamCacheCreation, cache_read_input_tokens: streamCacheRead },
|
|
3931
4196
|
});
|
|
3932
4197
|
responseCache.set(cacheHash, streamPayload, {
|
|
3933
4198
|
model: targetModel,
|
|
3934
4199
|
tokensIn: streamTokensIn,
|
|
3935
4200
|
tokensOut: streamTokensOut,
|
|
3936
|
-
costUsd: (0, telemetry_js_1.estimateCost)(targetModel, streamTokensIn, streamTokensOut),
|
|
4201
|
+
costUsd: (0, telemetry_js_1.estimateCost)(targetModel, streamTokensIn, streamTokensOut, streamCacheCreation || undefined, streamCacheRead || undefined),
|
|
3937
4202
|
taskType,
|
|
3938
4203
|
});
|
|
3939
4204
|
log(`Cache STORE (stream) for chat/completions ${targetModel} (hash: ${cacheHash.slice(0, 8)})`);
|
|
@@ -3944,9 +4209,11 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
3944
4209
|
const durationMs = Date.now() - startTime;
|
|
3945
4210
|
// Always log the request for stats/telemetry tracking
|
|
3946
4211
|
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, true, routingMode, undefined, taskType, complexity);
|
|
3947
|
-
// Update token/cost info on the history entry
|
|
3948
|
-
const streamCost = (0, telemetry_js_1.estimateCost)(targetModel, streamTokensIn, streamTokensOut);
|
|
3949
|
-
updateLastHistoryEntry(streamTokensIn, streamTokensOut, streamCost);
|
|
4212
|
+
// Update token/cost info on the history entry (with cache token discount)
|
|
4213
|
+
const streamCost = (0, telemetry_js_1.estimateCost)(targetModel, streamTokensIn, streamTokensOut, streamCacheCreation || undefined, streamCacheRead || undefined);
|
|
4214
|
+
updateLastHistoryEntry(streamTokensIn, streamTokensOut, streamCost, undefined, streamCacheCreation || undefined, streamCacheRead || undefined, agentFingerprint, agentId);
|
|
4215
|
+
if (agentFingerprint && agentFingerprint !== 'unknown')
|
|
4216
|
+
(0, agent_tracker_js_1.updateAgentCost)(agentFingerprint, streamCost);
|
|
3950
4217
|
// ── Post-request: budget spend + anomaly detection ──
|
|
3951
4218
|
try {
|
|
3952
4219
|
(0, budget_js_1.getBudgetManager)().recordSpend(streamCost, targetModel);
|
|
@@ -3967,12 +4234,14 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
3967
4234
|
model: `${targetProvider}:${targetModel}`,
|
|
3968
4235
|
})
|
|
3969
4236
|
.then((runResult) => {
|
|
4237
|
+
// Backfill token/cost data — relay.run() has no adapters so records NULLs
|
|
4238
|
+
relay.patchRunTokens(runResult.runId, streamTokensIn, streamTokensOut, streamCost);
|
|
3970
4239
|
log(`Completed streaming in ${durationMs}ms, runId: ${runResult.runId}`);
|
|
3971
4240
|
})
|
|
3972
4241
|
.catch((err) => {
|
|
3973
4242
|
log(`Failed to record run: ${err}`);
|
|
3974
4243
|
});
|
|
3975
|
-
sendCloudTelemetry(taskType, targetModel, streamTokensIn, streamTokensOut, durationMs, true, undefined, request.model ?? undefined);
|
|
4244
|
+
sendCloudTelemetry(taskType, targetModel, streamTokensIn, streamTokensOut, durationMs, true, undefined, request.model ?? undefined, streamCacheCreation || undefined, streamCacheRead || undefined);
|
|
3976
4245
|
meshCapture(targetModel, targetProvider, taskType, streamTokensIn, streamTokensOut, streamCost, durationMs, true);
|
|
3977
4246
|
}
|
|
3978
4247
|
res.end();
|
|
@@ -3980,7 +4249,7 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
3980
4249
|
/**
|
|
3981
4250
|
* Handle non-streaming request
|
|
3982
4251
|
*/
|
|
3983
|
-
async function handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity = 'simple') {
|
|
4252
|
+
async function handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity = 'simple', agentFingerprint, agentId) {
|
|
3984
4253
|
let responseData;
|
|
3985
4254
|
try {
|
|
3986
4255
|
const result = await executeNonStreamingProviderRequest(request, targetProvider, targetModel, apiKey, ctx);
|
|
@@ -4015,12 +4284,16 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
|
|
|
4015
4284
|
const nonStreamRespModel = checkResponseModelMismatch(responseData, targetModel, targetProvider, log);
|
|
4016
4285
|
// Log the successful request
|
|
4017
4286
|
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, true, routingMode, undefined, taskType, complexity);
|
|
4018
|
-
// Update token/cost info
|
|
4287
|
+
// Update token/cost info (including Anthropic prompt cache tokens)
|
|
4019
4288
|
const usage = responseData?.usage;
|
|
4020
4289
|
const tokensIn = usage?.input_tokens ?? usage?.prompt_tokens ?? 0;
|
|
4021
4290
|
const tokensOut = usage?.output_tokens ?? usage?.completion_tokens ?? 0;
|
|
4022
|
-
const
|
|
4023
|
-
|
|
4291
|
+
const cacheCreationTokens = usage?.cache_creation_input_tokens ?? 0;
|
|
4292
|
+
const cacheReadTokens = usage?.cache_read_input_tokens ?? 0;
|
|
4293
|
+
const cost = (0, telemetry_js_1.estimateCost)(targetModel, tokensIn, tokensOut, cacheCreationTokens || undefined, cacheReadTokens || undefined);
|
|
4294
|
+
updateLastHistoryEntry(tokensIn, tokensOut, cost, nonStreamRespModel, cacheCreationTokens || undefined, cacheReadTokens || undefined, agentFingerprint, agentId);
|
|
4295
|
+
if (agentFingerprint && agentFingerprint !== 'unknown')
|
|
4296
|
+
(0, agent_tracker_js_1.updateAgentCost)(agentFingerprint, cost);
|
|
4024
4297
|
// ── Post-request: budget spend + anomaly detection ──
|
|
4025
4298
|
try {
|
|
4026
4299
|
(0, budget_js_1.getBudgetManager)().recordSpend(cost, targetModel);
|
|
@@ -4040,6 +4313,8 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
|
|
|
4040
4313
|
taskType,
|
|
4041
4314
|
model: `${targetProvider}:${targetModel}`,
|
|
4042
4315
|
});
|
|
4316
|
+
// Backfill token/cost data — relay.run() has no adapters so records NULLs
|
|
4317
|
+
relay.patchRunTokens(runResult.runId, tokensIn, tokensOut, cost);
|
|
4043
4318
|
// Add routing metadata to response
|
|
4044
4319
|
responseData['_relayplane'] = {
|
|
4045
4320
|
runId: runResult.runId,
|
|
@@ -4054,12 +4329,14 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
|
|
|
4054
4329
|
catch (err) {
|
|
4055
4330
|
log(`Failed to record run: ${err}`);
|
|
4056
4331
|
}
|
|
4057
|
-
// Extract token counts from response if available (Anthropic/OpenAI format)
|
|
4058
|
-
const
|
|
4059
|
-
const
|
|
4060
|
-
const
|
|
4061
|
-
|
|
4062
|
-
|
|
4332
|
+
// Extract token counts from response if available (Anthropic/OpenAI format, including cache)
|
|
4333
|
+
const innerUsage = responseData?.usage;
|
|
4334
|
+
const innerTokIn = innerUsage?.input_tokens ?? innerUsage?.prompt_tokens ?? 0;
|
|
4335
|
+
const innerTokOut = innerUsage?.output_tokens ?? innerUsage?.completion_tokens ?? 0;
|
|
4336
|
+
const innerCacheCreation = innerUsage?.cache_creation_input_tokens ?? 0;
|
|
4337
|
+
const innerCacheRead = innerUsage?.cache_read_input_tokens ?? 0;
|
|
4338
|
+
sendCloudTelemetry(taskType, targetModel, innerTokIn, innerTokOut, durationMs, true, undefined, undefined, innerCacheCreation || undefined, innerCacheRead || undefined);
|
|
4339
|
+
meshCapture(targetModel, targetProvider, taskType, innerTokIn, innerTokOut, cost, durationMs, true);
|
|
4063
4340
|
}
|
|
4064
4341
|
// ── Cache: store non-streaming chat/completions response ──
|
|
4065
4342
|
const chatRespCache = (0, response_cache_js_1.getResponseCache)();
|