@relayplane/proxy 1.7.2 → 1.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -56,6 +56,8 @@ Object.defineProperty(exports, "__esModule", { value: true });
56
56
  exports.SMART_ALIASES = exports.RELAYPLANE_ALIASES = exports.MODEL_MAPPING = exports.DEFAULT_ENDPOINTS = exports.proxyStatsCollector = void 0;
57
57
  exports.getAvailableModelNames = getAvailableModelNames;
58
58
  exports.resolveModelAlias = resolveModelAlias;
59
+ exports.extractRequestContent = extractRequestContent;
60
+ exports.extractResponseText = extractResponseText;
59
61
  exports.parseModelSuffix = parseModelSuffix;
60
62
  exports.classifyComplexity = classifyComplexity;
61
63
  exports.shouldEscalate = shouldEscalate;
@@ -76,6 +78,7 @@ const budget_js_1 = require("./budget.js");
76
78
  const anomaly_js_1 = require("./anomaly.js");
77
79
  const alerts_js_1 = require("./alerts.js");
78
80
  const downgrade_js_1 = require("./downgrade.js");
81
+ const agent_tracker_js_1 = require("./agent-tracker.js");
79
82
  const version_status_js_1 = require("./utils/version-status.js");
80
83
  const PROXY_VERSION = (() => {
81
84
  try {
@@ -488,7 +491,7 @@ function shutdownHistory() {
488
491
  }
489
492
  flushHistoryBuffer();
490
493
  }
491
- function logRequest(originalModel, targetModel, provider, latencyMs, success, mode, escalated, taskType, complexity) {
494
+ function logRequest(originalModel, targetModel, provider, latencyMs, success, mode, escalated, taskType, complexity, agentFingerprint, agentId) {
492
495
  const timestamp = new Date().toISOString();
493
496
  const status = success ? '✓' : '✗';
494
497
  const escalateTag = escalated ? ' [ESCALATED]' : '';
@@ -531,6 +534,8 @@ function logRequest(originalModel, targetModel, provider, latencyMs, success, mo
531
534
  costUsd: 0,
532
535
  taskType: taskType || 'general',
533
536
  complexity: complexity || 'simple',
537
+ agentFingerprint,
538
+ agentId,
534
539
  };
535
540
  requestHistory.push(entry);
536
541
  if (requestHistory.length > MAX_HISTORY) {
@@ -539,7 +544,7 @@ function logRequest(originalModel, targetModel, provider, latencyMs, success, mo
539
544
  bufferHistoryEntry(entry);
540
545
  }
541
546
  /** Update the most recent history entry with token/cost info */
542
- function updateLastHistoryEntry(tokensIn, tokensOut, costUsd, responseModel, cacheCreationTokens, cacheReadTokens) {
547
+ function updateLastHistoryEntry(tokensIn, tokensOut, costUsd, responseModel, cacheCreationTokens, cacheReadTokens, agentFingerprint, agentId, requestContent) {
543
548
  if (requestHistory.length > 0) {
544
549
  const last = requestHistory[requestHistory.length - 1];
545
550
  last.tokensIn = tokensIn;
@@ -552,8 +557,82 @@ function updateLastHistoryEntry(tokensIn, tokensOut, costUsd, responseModel, cac
552
557
  last.cacheCreationTokens = cacheCreationTokens;
553
558
  if (cacheReadTokens !== undefined)
554
559
  last.cacheReadTokens = cacheReadTokens;
560
+ if (agentFingerprint !== undefined)
561
+ last.agentFingerprint = agentFingerprint;
562
+ if (agentId !== undefined)
563
+ last.agentId = agentId;
564
+ if (requestContent)
565
+ last.requestContent = requestContent;
555
566
  }
556
567
  }
568
+ /**
569
+ * Extract request content for logging. Handles Anthropic and OpenAI formats.
570
+ */
571
+ function extractRequestContent(body, isAnthropic) {
572
+ let systemPrompt = '';
573
+ let userMessage = '';
574
+ if (isAnthropic) {
575
+ if (typeof body.system === 'string') {
576
+ systemPrompt = body.system;
577
+ }
578
+ else if (Array.isArray(body.system)) {
579
+ systemPrompt = body.system
580
+ .map(p => p.type === 'text' ? (p.text ?? '') : (typeof p === 'string' ? String(p) : ''))
581
+ .join('');
582
+ }
583
+ }
584
+ else {
585
+ const sysmsgs = body.messages;
586
+ if (Array.isArray(sysmsgs)) {
587
+ for (const msg of sysmsgs) {
588
+ if (msg.role === 'system') {
589
+ systemPrompt = typeof msg.content === 'string' ? msg.content : '';
590
+ break;
591
+ }
592
+ }
593
+ }
594
+ }
595
+ const msgs = body.messages;
596
+ if (Array.isArray(msgs)) {
597
+ for (let i = msgs.length - 1; i >= 0; i--) {
598
+ if (msgs[i].role === 'user') {
599
+ const content = msgs[i].content;
600
+ if (typeof content === 'string') {
601
+ userMessage = content;
602
+ }
603
+ else if (Array.isArray(content)) {
604
+ userMessage = content
605
+ .filter(p => p.type === 'text')
606
+ .map(p => p.text ?? '')
607
+ .join('');
608
+ }
609
+ break;
610
+ }
611
+ }
612
+ }
613
+ return {
614
+ systemPrompt: systemPrompt ? systemPrompt.slice(0, 200) : undefined,
615
+ userMessage: userMessage || undefined,
616
+ };
617
+ }
618
+ /**
619
+ * Extract assistant response text from response payload.
620
+ */
621
+ function extractResponseText(responseData, isAnthropic) {
622
+ if (isAnthropic) {
623
+ const content = responseData.content;
624
+ if (Array.isArray(content)) {
625
+ return content.filter(p => p.type === 'text').map(p => p.text ?? '').join('');
626
+ }
627
+ }
628
+ else {
629
+ const choices = responseData.choices;
630
+ if (Array.isArray(choices) && choices[0]?.message?.content) {
631
+ return choices[0].message.content;
632
+ }
633
+ }
634
+ return '';
635
+ }
557
636
  const DEFAULT_PROXY_CONFIG = {
558
637
  enabled: true,
559
638
  modelOverrides: {},
@@ -584,6 +663,11 @@ const DEFAULT_PROXY_CONFIG = {
584
663
  },
585
664
  },
586
665
  };
666
+ /** Module-level ref to active proxy config (set during startProxy) */
667
+ let _activeProxyConfig = {};
668
+ function isContentLoggingEnabled() {
669
+ return _activeProxyConfig.dashboard?.showRequestContent !== false;
670
+ }
587
671
  function getProxyConfigPath() {
588
672
  const customPath = process.env['RELAYPLANE_CONFIG_PATH'];
589
673
  if (customPath && customPath.trim())
@@ -1772,7 +1856,7 @@ function resolveExplicitModel(modelName) {
1772
1856
  function resolveConfigModel(modelName) {
1773
1857
  return resolveExplicitModel(modelName) ?? parsePreferredModel(modelName);
1774
1858
  }
1775
- function extractResponseText(responseData) {
1859
+ function extractResponseTextAuto(responseData) {
1776
1860
  const openAiChoices = responseData['choices'];
1777
1861
  if (openAiChoices && openAiChoices.length > 0) {
1778
1862
  const first = openAiChoices[0];
@@ -1940,7 +2024,7 @@ async function cascadeRequest(config, makeRequest, log) {
1940
2024
  const isLastModel = i === config.models.length - 1;
1941
2025
  try {
1942
2026
  const { responseData, provider, model: resolvedModel } = await makeRequest(model);
1943
- const text = extractResponseText(responseData);
2027
+ const text = extractResponseTextAuto(responseData);
1944
2028
  if (isLastModel || escalations >= config.maxEscalations) {
1945
2029
  return { responseData, provider, model: resolvedModel, escalations };
1946
2030
  }
@@ -1992,6 +2076,7 @@ td{padding:8px 12px;border-bottom:1px solid #111318}
1992
2076
  .vstat.unavailable{color:#a3a3a3;border-color:#52525b66;background:#18181b66}
1993
2077
  @media(max-width:768px){.col-tt,.col-cx{display:none}}
1994
2078
  .prov{display:flex;gap:16px;flex-wrap:wrap}.prov-item{display:flex;align-items:center;font-size:.85rem;background:#111318;padding:8px 14px;border-radius:8px;border:1px solid #1e293b}
2079
+ .rename-btn{background:none;border:none;cursor:pointer;font-size:.75rem;opacity:.5;padding:2px}.rename-btn:hover{opacity:1}
1995
2080
  </style></head><body>
1996
2081
  <div class="header"><div><h1>⚡ RelayPlane Dashboard</h1></div><div class="meta"><a href="/dashboard/config">Config</a> · <span id="ver"></span><span id="vstat" class="vstat unavailable">Unable to check</span> · up <span id="uptime"></span> · refreshes every 5s</div></div>
1997
2082
  <div class="cards">
@@ -2002,6 +2087,8 @@ td{padding:8px 12px;border-bottom:1px solid #111318}
2002
2087
  </div>
2003
2088
  <div class="section"><h2>Model Breakdown</h2>
2004
2089
  <table><thead><tr><th>Model</th><th>Requests</th><th>Cost</th><th>% of Total</th></tr></thead><tbody id="models"></tbody></table></div>
2090
+ <div class="section"><h2>Agent Cost Breakdown</h2>
2091
+ <table><thead><tr><th>Agent</th><th>Requests</th><th>Total Cost</th><th>Last Active</th><th></th></tr></thead><tbody id="agents"></tbody></table></div>
2005
2092
  <div class="section"><h2>Provider Status</h2><div class="prov" id="providers"></div></div>
2006
2093
  <div class="section"><h2>Recent Runs</h2>
2007
2094
  <table><thead><tr><th>Time</th><th>Model</th><th class="col-tt">Task Type</th><th class="col-cx">Complexity</th><th>Tokens In</th><th>Tokens Out</th><th class="col-cache">Cache Create</th><th class="col-cache">Cache Read</th><th>Cost</th><th>Latency</th><th>Status</th></tr></thead><tbody id="runs"></tbody></table></div>
@@ -2012,12 +2099,13 @@ function fmtTime(s){const d=new Date(s);return d.toLocaleTimeString()}
2012
2099
  function dur(s){const h=Math.floor(s/3600),m=Math.floor(s%3600/60);return h?h+'h '+m+'m':m+'m'}
2013
2100
  async function load(){
2014
2101
  try{
2015
- const [health,stats,runsR,sav,provH]=await Promise.all([
2102
+ const [health,stats,runsR,sav,provH,agentsR]=await Promise.all([
2016
2103
  fetch('/health').then(r=>r.json()),
2017
2104
  fetch('/v1/telemetry/stats').then(r=>r.json()),
2018
2105
  fetch('/v1/telemetry/runs?limit=20').then(r=>r.json()),
2019
2106
  fetch('/v1/telemetry/savings').then(r=>r.json()),
2020
- fetch('/v1/telemetry/health').then(r=>r.json())
2107
+ fetch('/v1/telemetry/health').then(r=>r.json()),
2108
+ fetch('/api/agents').then(r=>r.json()).catch(()=>({agents:[]}))
2021
2109
  ]);
2022
2110
  $('ver').textContent='v'+health.version;
2023
2111
  $('uptime').textContent=dur(health.uptime);
@@ -2044,9 +2132,26 @@ async function load(){
2044
2132
  ).join('')||'<tr><td colspan=4 style="color:#64748b">No data yet</td></tr>';
2045
2133
  function ttCls(t){const m={code_generation:'tt-code',analysis:'tt-analysis',summarization:'tt-summarization',question_answering:'tt-qa'};return m[t]||'tt-general'}
2046
2134
  function cxCls(c){const m={simple:'cx-simple',moderate:'cx-moderate',complex:'cx-complex'};return m[c]||'cx-simple'}
2047
- $('runs').innerHTML=(runsR.runs||[]).map(r=>
2048
- '<tr><td>'+fmtTime(r.started_at)+'</td><td>'+r.model+'</td><td class="col-tt"><span class="badge '+ttCls(r.taskType)+'">'+(r.taskType||'general').replace(/_/g,' ')+'</span></td><td class="col-cx"><span class="badge '+cxCls(r.complexity)+'">'+(r.complexity||'simple')+'</span></td><td>'+(r.tokensIn||0)+'</td><td>'+(r.tokensOut||0)+'</td><td class="col-cache" style="color:#60a5fa">'+(r.cacheCreationTokens||0)+'</td><td class="col-cache" style="color:#34d399">'+(r.cacheReadTokens||0)+'</td><td>$'+fmt(r.costUsd,4)+'</td><td>'+r.latencyMs+'ms</td><td><span class="badge '+(r.status==='success'?'ok':'err')+'">'+r.status+'</span></td></tr>'
2049
- ).join('')||'<tr><td colspan=11 style="color:#64748b">No runs yet</td></tr>';
2135
+ function esc(s){if(!s)return'';return s.replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;')}
2136
+ $('runs').innerHTML=(runsR.runs||[]).map((r,i)=>{
2137
+ const row='<tr style="cursor:pointer" onclick="toggleDetail('+i+')" title="Click to expand"><td>'+fmtTime(r.started_at)+'</td><td>'+r.model+'</td><td class="col-tt"><span class="badge '+ttCls(r.taskType)+'">'+(r.taskType||'general').replace(/_/g,' ')+'</span></td><td class="col-cx"><span class="badge '+cxCls(r.complexity)+'">'+(r.complexity||'simple')+'</span></td><td>'+(r.tokensIn||0)+'</td><td>'+(r.tokensOut||0)+'</td><td class="col-cache" style="color:#60a5fa">'+(r.cacheCreationTokens||0)+'</td><td class="col-cache" style="color:#34d399">'+(r.cacheReadTokens||0)+'</td><td>$'+fmt(r.costUsd,4)+'</td><td>'+r.latencyMs+'ms</td><td><span class="badge '+(r.status==='success'?'ok':'err')+'">'+r.status+'</span></td></tr>';
2138
+ const c=r.requestContent||{};
2139
+ let detail='<tr id="run-detail-'+i+'" style="display:none"><td colspan="11" style="padding:16px;background:#111217;border-bottom:1px solid #1e293b">';
2140
+ if(c.systemPrompt||c.userMessage||c.responsePreview){
2141
+ if(c.systemPrompt) detail+='<div style="color:#64748b;font-size:.85rem;margin-bottom:10px;font-style:italic"><strong style="color:#94a3b8">System:</strong> '+esc(c.systemPrompt)+'</div>';
2142
+ if(c.userMessage) detail+='<div style="background:#1a1c23;border:1px solid #1e293b;border-radius:8px;padding:12px;margin-bottom:10px"><strong style="color:#94a3b8;font-size:.8rem">User Message</strong><div style="margin-top:6px;white-space:pre-wrap">'+esc(c.userMessage)+'</div></div>';
2143
+ if(c.responsePreview) detail+='<div style="background:#1a1c23;border:1px solid #1e293b;border-radius:8px;padding:12px;margin-bottom:10px"><strong style="color:#94a3b8;font-size:.8rem">Response Preview</strong><div style="margin-top:6px;white-space:pre-wrap">'+esc(c.responsePreview)+'</div></div>';
2144
+ detail+='<button onclick="event.stopPropagation();loadFullResponse(\''+r.id+'\','+i+')" id="full-btn-'+i+'" style="background:#1e293b;color:#e2e8f0;border:1px solid #334155;padding:6px 12px;border-radius:6px;cursor:pointer;font-size:.8rem">Show full response</button><pre id="full-resp-'+i+'" style="display:none;white-space:pre-wrap;margin-top:10px;background:#0d0e11;border:1px solid #1e293b;border-radius:8px;padding:12px;max-height:400px;overflow:auto;font-size:.8rem"></pre>';
2145
+ } else {
2146
+ detail+='<span style="color:#64748b">No content captured for this request</span>';
2147
+ }
2148
+ detail+='</td></tr>';
2149
+ return row+detail;
2150
+ }).join('')||'<tr><td colspan=11 style="color:#64748b">No runs yet</td></tr>';
2151
+ const agents=(agentsR.agents||[]).sort((a,b)=>(b.totalCost||0)-(a.totalCost||0));
2152
+ $('agents').innerHTML=agents.length?agents.map(a=>
2153
+ '<tr><td><span class="agent-name" data-fp="'+a.fingerprint+'">'+a.name+'</span> <button class="rename-btn" onclick="renameAgent(\''+a.fingerprint+'\',\''+a.name.replace(/'/g,"\\'")+'\')">✏️</button></td><td>'+a.totalRequests+'</td><td>$'+fmt(a.totalCost,4)+'</td><td>'+fmtTime(a.lastSeen)+'</td><td style="font-size:.7rem;color:#64748b" title="'+a.systemPromptPreview+'">'+a.fingerprint+'</td></tr>'
2154
+ ).join(''):'<tr><td colspan=5 style="color:#64748b">No agents detected yet</td></tr>';
2050
2155
  $('providers').innerHTML=(provH.providers||[]).map(p=>{
2051
2156
  const dotClass = p.status==='healthy'?'up':(p.status==='degraded'?'warn':'down');
2052
2157
  const rate = p.successRate!==undefined?(' '+Math.round(p.successRate*100)+'%'):'';
@@ -2054,8 +2159,27 @@ async function load(){
2054
2159
  }).join('');
2055
2160
  }catch(e){console.error(e)}
2056
2161
  }
2162
+ async function renameAgent(fp,currentName){
2163
+ const name=prompt('Rename agent:',currentName);
2164
+ if(!name||name===currentName)return;
2165
+ await fetch('/api/agents/rename',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({fingerprint:fp,name:name})});
2166
+ load();
2167
+ }
2168
+ function toggleDetail(i){var d=document.getElementById('run-detail-'+i);d.style.display=d.style.display==='none'?'table-row':'none'}
2169
+ async function loadFullResponse(runId,i){
2170
+ const btn=document.getElementById('full-btn-'+i);
2171
+ const pre=document.getElementById('full-resp-'+i);
2172
+ if(pre.style.display!=='none'){pre.style.display='none';btn.textContent='Show full response';return}
2173
+ btn.textContent='Loading...';
2174
+ try{
2175
+ const data=await fetch('/api/runs/'+runId).then(r=>r.json());
2176
+ const full=data.requestContent&&data.requestContent.fullResponse;
2177
+ if(full){pre.textContent=full;pre.style.display='block';btn.textContent='Hide full response'}
2178
+ else{btn.textContent='No full response available'}
2179
+ }catch{btn.textContent='Error loading response'}
2180
+ }
2057
2181
  load();setInterval(load,5000);
2058
- </script></body></html>`;
2182
+ </script><footer style="text-align:center;padding:20px 0;color:#475569;font-size:.75rem;border-top:1px solid #1e293b;margin-top:20px">🔒 Request content stays on your machine. Never sent to cloud.</footer></body></html>`;
2059
2183
  }
2060
2184
  function getConfigDashboardHTML() {
2061
2185
  return `<!DOCTYPE html><html lang="en"><head><meta charset="utf-8"><meta name="viewport" content="width=device-width,initial-scale=1"><title>RelayPlane Config</title>
@@ -2152,8 +2276,10 @@ async function startProxy(config = {}) {
2152
2276
  };
2153
2277
  // Load persistent history from disk
2154
2278
  loadHistoryFromDisk();
2279
+ (0, agent_tracker_js_1.loadAgentRegistry)();
2155
2280
  // Flush history on shutdown
2156
2281
  const handleShutdown = () => {
2282
+ (0, agent_tracker_js_1.flushAgentRegistry)();
2157
2283
  meshHandle.stop();
2158
2284
  shutdownHistory();
2159
2285
  process.exit(0);
@@ -2162,6 +2288,7 @@ async function startProxy(config = {}) {
2162
2288
  process.on('SIGTERM', handleShutdown);
2163
2289
  const configPath = getProxyConfigPath();
2164
2290
  let proxyConfig = await loadProxyConfig(configPath, log);
2291
+ _activeProxyConfig = proxyConfig;
2165
2292
  const cooldownManager = new CooldownManager(getCooldownConfig(proxyConfig));
2166
2293
  // === Startup config validation (Task 4) ===
2167
2294
  try {
@@ -2565,6 +2692,12 @@ async function startProxy(config = {}) {
2565
2692
  cacheReadTokens: r.cacheReadTokens ?? 0,
2566
2693
  savings: Math.round(perRunSavings * 10000) / 10000,
2567
2694
  escalated: r.escalated,
2695
+ requestContent: r.requestContent ? {
2696
+ systemPrompt: r.requestContent.systemPrompt,
2697
+ userMessage: r.requestContent.userMessage,
2698
+ responsePreview: r.requestContent.responsePreview,
2699
+ // fullResponse excluded from list endpoint to keep payloads small
2700
+ } : undefined,
2568
2701
  };
2569
2702
  });
2570
2703
  res.writeHead(200, { 'Content-Type': 'application/json' });
@@ -2662,6 +2795,63 @@ async function startProxy(config = {}) {
2662
2795
  res.end(JSON.stringify({ error: 'Not found' }));
2663
2796
  return;
2664
2797
  }
2798
+ // === Agent tracking API ===
2799
+ // === /api/runs/:id — full request/response content for a single run ===
2800
+ const runsIdMatch = pathname.match(/^\/api\/runs\/(.+)$/);
2801
+ if (req.method === 'GET' && runsIdMatch) {
2802
+ const runId = runsIdMatch[1];
2803
+ const run = requestHistory.find(r => r.id === runId);
2804
+ if (!run) {
2805
+ res.writeHead(404, { 'Content-Type': 'application/json' });
2806
+ res.end(JSON.stringify({ error: 'Run not found' }));
2807
+ return;
2808
+ }
2809
+ res.writeHead(200, { 'Content-Type': 'application/json' });
2810
+ res.end(JSON.stringify({
2811
+ id: run.id,
2812
+ model: run.targetModel,
2813
+ provider: run.provider,
2814
+ timestamp: run.timestamp,
2815
+ tokensIn: run.tokensIn,
2816
+ tokensOut: run.tokensOut,
2817
+ costUsd: run.costUsd,
2818
+ latencyMs: run.latencyMs,
2819
+ success: run.success,
2820
+ requestContent: run.requestContent,
2821
+ }));
2822
+ return;
2823
+ }
2824
+ if (req.method === 'GET' && pathname === '/api/agents') {
2825
+ const summaries = (0, agent_tracker_js_1.getAgentSummaries)(requestHistory);
2826
+ res.writeHead(200, { 'Content-Type': 'application/json' });
2827
+ res.end(JSON.stringify({ agents: summaries }));
2828
+ return;
2829
+ }
2830
+ if (req.method === 'POST' && pathname === '/api/agents/rename') {
2831
+ try {
2832
+ const body = await readJsonBody(req);
2833
+ const fingerprint = body['fingerprint'];
2834
+ const name = body['name'];
2835
+ if (!fingerprint || !name) {
2836
+ res.writeHead(400, { 'Content-Type': 'application/json' });
2837
+ res.end(JSON.stringify({ error: 'Missing fingerprint or name' }));
2838
+ return;
2839
+ }
2840
+ const ok = (0, agent_tracker_js_1.renameAgent)(fingerprint, name);
2841
+ if (!ok) {
2842
+ res.writeHead(404, { 'Content-Type': 'application/json' });
2843
+ res.end(JSON.stringify({ error: 'Agent not found' }));
2844
+ return;
2845
+ }
2846
+ res.writeHead(200, { 'Content-Type': 'application/json' });
2847
+ res.end(JSON.stringify({ ok: true }));
2848
+ }
2849
+ catch {
2850
+ res.writeHead(400, { 'Content-Type': 'application/json' });
2851
+ res.end(JSON.stringify({ error: 'Invalid JSON' }));
2852
+ }
2853
+ return;
2854
+ }
2665
2855
  // === Dashboard ===
2666
2856
  if (req.method === 'GET' && (pathname === '/' || pathname === '/dashboard')) {
2667
2857
  res.writeHead(200, { 'Content-Type': 'text/html' });
@@ -2742,6 +2932,14 @@ async function startProxy(config = {}) {
2742
2932
  res.end(JSON.stringify({ error: 'Invalid JSON' }));
2743
2933
  return;
2744
2934
  }
2935
+ // Extract agent fingerprint and explicit agent ID
2936
+ const nativeSystemPrompt = (0, agent_tracker_js_1.extractSystemPromptFromBody)(requestBody);
2937
+ const nativeExplicitAgentId = getHeaderValue(req, 'x-relayplane-agent') || undefined;
2938
+ let nativeAgentFingerprint;
2939
+ if (nativeSystemPrompt) {
2940
+ const agentResult = (0, agent_tracker_js_1.trackAgent)(nativeSystemPrompt, 0, nativeExplicitAgentId);
2941
+ nativeAgentFingerprint = agentResult.fingerprint;
2942
+ }
2745
2943
  const originalModel = requestBody['model'];
2746
2944
  let requestedModel = headerModelOverride ?? originalModel ?? '';
2747
2945
  if (headerModelOverride) {
@@ -3203,7 +3401,22 @@ async function startProxy(config = {}) {
3203
3401
  const nativeTokIn = nativeBaseTokIn + nativeCacheCreation + nativeCacheRead;
3204
3402
  // Cost calculation expects inputTokens to include cache tokens when cache params are provided
3205
3403
  const nativeCostUsd = (0, telemetry_js_1.estimateCost)(targetModel || requestedModel, nativeTokIn, nativeTokOut, nativeCacheCreation || undefined, nativeCacheRead || undefined);
3206
- updateLastHistoryEntry(nativeTokIn, nativeTokOut, nativeCostUsd, undefined, nativeCacheCreation || undefined, nativeCacheRead || undefined);
3404
+ // Build request content if logging enabled
3405
+ let nativeContentData;
3406
+ if (isContentLoggingEnabled()) {
3407
+ const extracted = extractRequestContent(requestBody, true);
3408
+ const responseText = nativeResponseData ? extractResponseText(nativeResponseData, true) : '';
3409
+ nativeContentData = {
3410
+ ...extracted,
3411
+ responsePreview: responseText ? responseText.slice(0, 500) : undefined,
3412
+ fullResponse: responseText || undefined,
3413
+ };
3414
+ }
3415
+ updateLastHistoryEntry(nativeTokIn, nativeTokOut, nativeCostUsd, undefined, nativeCacheCreation || undefined, nativeCacheRead || undefined, nativeAgentFingerprint, nativeExplicitAgentId, nativeContentData);
3416
+ // Update agent cost now that we know the actual cost
3417
+ if (nativeAgentFingerprint && nativeAgentFingerprint !== 'unknown') {
3418
+ (0, agent_tracker_js_1.updateAgentCost)(nativeAgentFingerprint, nativeCostUsd);
3419
+ }
3207
3420
  // ── Post-request: budget spend + anomaly detection ──
3208
3421
  postRequestRecord(targetModel || requestedModel, nativeTokIn, nativeTokOut, nativeCostUsd);
3209
3422
  if (recordTelemetry) {
@@ -3301,6 +3514,14 @@ async function startProxy(config = {}) {
3301
3514
  return;
3302
3515
  }
3303
3516
  const isStreaming = request.stream === true;
3517
+ // Extract agent fingerprint for chat/completions
3518
+ const chatSystemPrompt = (0, agent_tracker_js_1.extractSystemPromptFromBody)(request);
3519
+ const chatExplicitAgentId = getHeaderValue(req, 'x-relayplane-agent') || undefined;
3520
+ let chatAgentFingerprint;
3521
+ if (chatSystemPrompt) {
3522
+ const agentResult = (0, agent_tracker_js_1.trackAgent)(chatSystemPrompt, 0, chatExplicitAgentId);
3523
+ chatAgentFingerprint = agentResult.fingerprint;
3524
+ }
3304
3525
  // ── Response Cache: check for cached response (chat/completions) ──
3305
3526
  const chatCacheBypass = responseCache.shouldBypass(request);
3306
3527
  let chatCacheHash;
@@ -3601,7 +3822,7 @@ async function startProxy(config = {}) {
3601
3822
  const startTime = Date.now();
3602
3823
  // Handle streaming vs non-streaming
3603
3824
  if (isStreaming) {
3604
- await handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, useCascade ? 'cascade' : routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity, chatCacheHash, chatCacheBypass);
3825
+ await handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, useCascade ? 'cascade' : routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity, chatCacheHash, chatCacheBypass, chatAgentFingerprint, chatExplicitAgentId);
3605
3826
  }
3606
3827
  else {
3607
3828
  if (useCascade && cascadeConfig) {
@@ -3641,7 +3862,9 @@ async function startProxy(config = {}) {
3641
3862
  const cascadeCacheCreation = cascadeUsage?.cache_creation_input_tokens || undefined;
3642
3863
  const cascadeCacheRead = cascadeUsage?.cache_read_input_tokens || undefined;
3643
3864
  const cascadeCost = (0, telemetry_js_1.estimateCost)(cascadeResult.model, cascadeTokensIn, cascadeTokensOut, cascadeCacheCreation, cascadeCacheRead);
3644
- updateLastHistoryEntry(cascadeTokensIn, cascadeTokensOut, cascadeCost, chatCascadeRespModel, cascadeCacheCreation, cascadeCacheRead);
3865
+ updateLastHistoryEntry(cascadeTokensIn, cascadeTokensOut, cascadeCost, chatCascadeRespModel, cascadeCacheCreation, cascadeCacheRead, chatAgentFingerprint, chatExplicitAgentId);
3866
+ if (chatAgentFingerprint && chatAgentFingerprint !== 'unknown')
3867
+ (0, agent_tracker_js_1.updateAgentCost)(chatAgentFingerprint, cascadeCost);
3645
3868
  if (recordTelemetry) {
3646
3869
  try {
3647
3870
  const runResult = await relay.run({
@@ -3686,7 +3909,7 @@ async function startProxy(config = {}) {
3686
3909
  }
3687
3910
  }
3688
3911
  else {
3689
- await handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity);
3912
+ await handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity, chatAgentFingerprint, chatExplicitAgentId);
3690
3913
  }
3691
3914
  }
3692
3915
  });
@@ -3829,7 +4052,7 @@ async function executeNonStreamingProviderRequest(request, targetProvider, targe
3829
4052
  }
3830
4053
  return { responseData, ok: true, status: 200 };
3831
4054
  }
3832
- async function handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity = 'simple', cacheHash, cacheBypass) {
4055
+ async function handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity = 'simple', cacheHash, cacheBypass, agentFingerprint, agentId) {
3833
4056
  let providerResponse;
3834
4057
  try {
3835
4058
  switch (targetProvider) {
@@ -3988,7 +4211,9 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
3988
4211
  logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, true, routingMode, undefined, taskType, complexity);
3989
4212
  // Update token/cost info on the history entry (with cache token discount)
3990
4213
  const streamCost = (0, telemetry_js_1.estimateCost)(targetModel, streamTokensIn, streamTokensOut, streamCacheCreation || undefined, streamCacheRead || undefined);
3991
- updateLastHistoryEntry(streamTokensIn, streamTokensOut, streamCost, undefined, streamCacheCreation || undefined, streamCacheRead || undefined);
4214
+ updateLastHistoryEntry(streamTokensIn, streamTokensOut, streamCost, undefined, streamCacheCreation || undefined, streamCacheRead || undefined, agentFingerprint, agentId);
4215
+ if (agentFingerprint && agentFingerprint !== 'unknown')
4216
+ (0, agent_tracker_js_1.updateAgentCost)(agentFingerprint, streamCost);
3992
4217
  // ── Post-request: budget spend + anomaly detection ──
3993
4218
  try {
3994
4219
  (0, budget_js_1.getBudgetManager)().recordSpend(streamCost, targetModel);
@@ -4024,7 +4249,7 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
4024
4249
  /**
4025
4250
  * Handle non-streaming request
4026
4251
  */
4027
- async function handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity = 'simple') {
4252
+ async function handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity = 'simple', agentFingerprint, agentId) {
4028
4253
  let responseData;
4029
4254
  try {
4030
4255
  const result = await executeNonStreamingProviderRequest(request, targetProvider, targetModel, apiKey, ctx);
@@ -4066,7 +4291,9 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
4066
4291
  const cacheCreationTokens = usage?.cache_creation_input_tokens ?? 0;
4067
4292
  const cacheReadTokens = usage?.cache_read_input_tokens ?? 0;
4068
4293
  const cost = (0, telemetry_js_1.estimateCost)(targetModel, tokensIn, tokensOut, cacheCreationTokens || undefined, cacheReadTokens || undefined);
4069
- updateLastHistoryEntry(tokensIn, tokensOut, cost, nonStreamRespModel, cacheCreationTokens || undefined, cacheReadTokens || undefined);
4294
+ updateLastHistoryEntry(tokensIn, tokensOut, cost, nonStreamRespModel, cacheCreationTokens || undefined, cacheReadTokens || undefined, agentFingerprint, agentId);
4295
+ if (agentFingerprint && agentFingerprint !== 'unknown')
4296
+ (0, agent_tracker_js_1.updateAgentCost)(agentFingerprint, cost);
4070
4297
  // ── Post-request: budget spend + anomaly detection ──
4071
4298
  try {
4072
4299
  (0, budget_js_1.getBudgetManager)().recordSpend(cost, targetModel);