@relayplane/proxy 1.7.2 → 1.7.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -56,6 +56,8 @@ Object.defineProperty(exports, "__esModule", { value: true });
56
56
  exports.SMART_ALIASES = exports.RELAYPLANE_ALIASES = exports.MODEL_MAPPING = exports.DEFAULT_ENDPOINTS = exports.proxyStatsCollector = void 0;
57
57
  exports.getAvailableModelNames = getAvailableModelNames;
58
58
  exports.resolveModelAlias = resolveModelAlias;
59
+ exports.extractRequestContent = extractRequestContent;
60
+ exports.extractResponseText = extractResponseText;
59
61
  exports.parseModelSuffix = parseModelSuffix;
60
62
  exports.classifyComplexity = classifyComplexity;
61
63
  exports.shouldEscalate = shouldEscalate;
@@ -76,6 +78,7 @@ const budget_js_1 = require("./budget.js");
76
78
  const anomaly_js_1 = require("./anomaly.js");
77
79
  const alerts_js_1 = require("./alerts.js");
78
80
  const downgrade_js_1 = require("./downgrade.js");
81
+ const agent_tracker_js_1 = require("./agent-tracker.js");
79
82
  const version_status_js_1 = require("./utils/version-status.js");
80
83
  const PROXY_VERSION = (() => {
81
84
  try {
@@ -488,7 +491,7 @@ function shutdownHistory() {
488
491
  }
489
492
  flushHistoryBuffer();
490
493
  }
491
- function logRequest(originalModel, targetModel, provider, latencyMs, success, mode, escalated, taskType, complexity) {
494
+ function logRequest(originalModel, targetModel, provider, latencyMs, success, mode, escalated, taskType, complexity, agentFingerprint, agentId, errorMessage, errorStatusCode) {
492
495
  const timestamp = new Date().toISOString();
493
496
  const status = success ? '✓' : '✗';
494
497
  const escalateTag = escalated ? ' [ESCALATED]' : '';
@@ -531,6 +534,10 @@ function logRequest(originalModel, targetModel, provider, latencyMs, success, mo
531
534
  costUsd: 0,
532
535
  taskType: taskType || 'general',
533
536
  complexity: complexity || 'simple',
537
+ agentFingerprint,
538
+ agentId,
539
+ error: errorMessage,
540
+ statusCode: errorStatusCode,
534
541
  };
535
542
  requestHistory.push(entry);
536
543
  if (requestHistory.length > MAX_HISTORY) {
@@ -539,7 +546,7 @@ function logRequest(originalModel, targetModel, provider, latencyMs, success, mo
539
546
  bufferHistoryEntry(entry);
540
547
  }
541
548
  /** Update the most recent history entry with token/cost info */
542
- function updateLastHistoryEntry(tokensIn, tokensOut, costUsd, responseModel, cacheCreationTokens, cacheReadTokens) {
549
+ function updateLastHistoryEntry(tokensIn, tokensOut, costUsd, responseModel, cacheCreationTokens, cacheReadTokens, agentFingerprint, agentId, requestContent, errorMessage, errorStatusCode) {
543
550
  if (requestHistory.length > 0) {
544
551
  const last = requestHistory[requestHistory.length - 1];
545
552
  last.tokensIn = tokensIn;
@@ -552,8 +559,86 @@ function updateLastHistoryEntry(tokensIn, tokensOut, costUsd, responseModel, cac
552
559
  last.cacheCreationTokens = cacheCreationTokens;
553
560
  if (cacheReadTokens !== undefined)
554
561
  last.cacheReadTokens = cacheReadTokens;
562
+ if (agentFingerprint !== undefined)
563
+ last.agentFingerprint = agentFingerprint;
564
+ if (agentId !== undefined)
565
+ last.agentId = agentId;
566
+ if (requestContent)
567
+ last.requestContent = requestContent;
568
+ if (errorMessage !== undefined)
569
+ last.error = errorMessage;
570
+ if (errorStatusCode !== undefined)
571
+ last.statusCode = errorStatusCode;
555
572
  }
556
573
  }
574
+ /**
575
+ * Extract request content for logging. Handles Anthropic and OpenAI formats.
576
+ */
577
+ function extractRequestContent(body, isAnthropic) {
578
+ let systemPrompt = '';
579
+ let userMessage = '';
580
+ if (isAnthropic) {
581
+ if (typeof body.system === 'string') {
582
+ systemPrompt = body.system;
583
+ }
584
+ else if (Array.isArray(body.system)) {
585
+ systemPrompt = body.system
586
+ .map(p => p.type === 'text' ? (p.text ?? '') : (typeof p === 'string' ? String(p) : ''))
587
+ .join('');
588
+ }
589
+ }
590
+ else {
591
+ const sysmsgs = body.messages;
592
+ if (Array.isArray(sysmsgs)) {
593
+ for (const msg of sysmsgs) {
594
+ if (msg.role === 'system') {
595
+ systemPrompt = typeof msg.content === 'string' ? msg.content : '';
596
+ break;
597
+ }
598
+ }
599
+ }
600
+ }
601
+ const msgs = body.messages;
602
+ if (Array.isArray(msgs)) {
603
+ for (let i = msgs.length - 1; i >= 0; i--) {
604
+ if (msgs[i].role === 'user') {
605
+ const content = msgs[i].content;
606
+ if (typeof content === 'string') {
607
+ userMessage = content;
608
+ }
609
+ else if (Array.isArray(content)) {
610
+ userMessage = content
611
+ .filter(p => p.type === 'text')
612
+ .map(p => p.text ?? '')
613
+ .join('');
614
+ }
615
+ break;
616
+ }
617
+ }
618
+ }
619
+ return {
620
+ systemPrompt: systemPrompt ? systemPrompt.slice(0, 200) : undefined,
621
+ userMessage: userMessage || undefined,
622
+ };
623
+ }
624
+ /**
625
+ * Extract assistant response text from response payload.
626
+ */
627
+ function extractResponseText(responseData, isAnthropic) {
628
+ if (isAnthropic) {
629
+ const content = responseData.content;
630
+ if (Array.isArray(content)) {
631
+ return content.filter(p => p.type === 'text').map(p => p.text ?? '').join('');
632
+ }
633
+ }
634
+ else {
635
+ const choices = responseData.choices;
636
+ if (Array.isArray(choices) && choices[0]?.message?.content) {
637
+ return choices[0].message.content;
638
+ }
639
+ }
640
+ return '';
641
+ }
557
642
  const DEFAULT_PROXY_CONFIG = {
558
643
  enabled: true,
559
644
  modelOverrides: {},
@@ -584,6 +669,11 @@ const DEFAULT_PROXY_CONFIG = {
584
669
  },
585
670
  },
586
671
  };
672
+ /** Module-level ref to active proxy config (set during startProxy) */
673
+ let _activeProxyConfig = {};
674
+ function isContentLoggingEnabled() {
675
+ return _activeProxyConfig.dashboard?.showRequestContent !== false;
676
+ }
587
677
  function getProxyConfigPath() {
588
678
  const customPath = process.env['RELAYPLANE_CONFIG_PATH'];
589
679
  if (customPath && customPath.trim())
@@ -835,19 +925,16 @@ function buildAnthropicHeadersWithAuth(ctx, apiKey, isMaxToken) {
835
925
  'Content-Type': 'application/json',
836
926
  'anthropic-version': ctx.versionHeader || '2023-06-01',
837
927
  };
838
- // Auth: prefer incoming auth for passthrough, but OAuth doesn't work for all models (e.g. Haiku)
839
- // When we have a regular API key AND incoming auth is OAuth, prefer the API key for rerouted requests
840
- // because OAuth may not be supported on the target model. The API key works for ALL models.
841
- const incomingIsOAuth = !!(ctx.apiKeyHeader?.startsWith('sk-ant-oat') || ctx.authHeader?.includes('sk-ant-oat'));
842
- if (incomingIsOAuth && apiKey && !apiKey.startsWith('sk-ant-oat')) {
843
- headers['x-api-key'] = apiKey;
844
- }
845
- else if (ctx.authHeader) {
928
+ // Auth: ALWAYS prefer incoming auth for passthrough (don't replace it)
929
+ // Incoming auth is from Claude Code/OpenClaw and is already the right token for the request
930
+ if (ctx.authHeader) {
931
+ // Incoming Authorization header takes priority - use it as-is
846
932
  headers['Authorization'] = ctx.authHeader;
847
933
  }
848
934
  else if (ctx.apiKeyHeader) {
849
- // MAX/OAuth tokens (sk-ant-oat*) must use Authorization: Bearer, not x-api-key
935
+ // Incoming x-api-key header
850
936
  if (ctx.apiKeyHeader.startsWith('sk-ant-oat')) {
937
+ // MAX/OAuth tokens must use Authorization: Bearer, not x-api-key
851
938
  headers['Authorization'] = `Bearer ${ctx.apiKeyHeader}`;
852
939
  }
853
940
  else {
@@ -855,7 +942,7 @@ function buildAnthropicHeadersWithAuth(ctx, apiKey, isMaxToken) {
855
942
  }
856
943
  }
857
944
  else if (apiKey) {
858
- // MAX tokens (OAuth) use Authorization: Bearer, API keys use x-api-key
945
+ // Fallback to configured API key (only if no incoming auth)
859
946
  if (isMaxToken || apiKey.startsWith('sk-ant-oat')) {
860
947
  headers['Authorization'] = `Bearer ${apiKey}`;
861
948
  }
@@ -867,6 +954,13 @@ function buildAnthropicHeadersWithAuth(ctx, apiKey, isMaxToken) {
867
954
  if (ctx.betaHeaders) {
868
955
  headers['anthropic-beta'] = ctx.betaHeaders;
869
956
  }
957
+ // Pass through OAuth identity headers (required by Anthropic for OAuth token validation)
958
+ if (ctx.userAgent) {
959
+ headers['user-agent'] = ctx.userAgent;
960
+ }
961
+ if (ctx.xApp) {
962
+ headers['x-app'] = ctx.xApp;
963
+ }
870
964
  return headers;
871
965
  }
872
966
  /**
@@ -907,6 +1001,13 @@ function buildAnthropicHeaders(ctx, envApiKey) {
907
1001
  if (ctx.betaHeaders) {
908
1002
  headers['anthropic-beta'] = ctx.betaHeaders;
909
1003
  }
1004
+ // Pass through OAuth identity headers (required by Anthropic for OAuth token validation)
1005
+ if (ctx.userAgent) {
1006
+ headers['user-agent'] = ctx.userAgent;
1007
+ }
1008
+ if (ctx.xApp) {
1009
+ headers['x-app'] = ctx.xApp;
1010
+ }
910
1011
  return headers;
911
1012
  }
912
1013
  /**
@@ -1772,7 +1873,7 @@ function resolveExplicitModel(modelName) {
1772
1873
  function resolveConfigModel(modelName) {
1773
1874
  return resolveExplicitModel(modelName) ?? parsePreferredModel(modelName);
1774
1875
  }
1775
- function extractResponseText(responseData) {
1876
+ function extractResponseTextAuto(responseData) {
1776
1877
  const openAiChoices = responseData['choices'];
1777
1878
  if (openAiChoices && openAiChoices.length > 0) {
1778
1879
  const first = openAiChoices[0];
@@ -1816,6 +1917,28 @@ function checkResponseModelMismatch(responseData, requestedModel, provider, log)
1816
1917
  }
1817
1918
  return responseModel;
1818
1919
  }
1920
+ /**
1921
+ * Extract a human-readable error message from a provider error payload.
1922
+ * Handles Anthropic ({ error: { type, message } }) and OpenAI ({ error: { message } }) formats.
1923
+ */
1924
+ function extractProviderErrorMessage(payload, statusCode) {
1925
+ const err = payload['error'];
1926
+ if (typeof err === 'string')
1927
+ return err;
1928
+ if (err && typeof err === 'object') {
1929
+ const errType = err['type'];
1930
+ const errMsg = err['message'];
1931
+ if (errType && errMsg)
1932
+ return `${errType}: ${errMsg}`;
1933
+ if (errMsg)
1934
+ return errMsg;
1935
+ if (errType)
1936
+ return errType;
1937
+ }
1938
+ if (statusCode)
1939
+ return `HTTP ${statusCode}`;
1940
+ return 'Unknown error';
1941
+ }
1819
1942
  class ProviderResponseError extends Error {
1820
1943
  status;
1821
1944
  payload;
@@ -1841,6 +1964,8 @@ function extractRequestContext(req) {
1841
1964
  betaHeaders: req.headers['anthropic-beta'],
1842
1965
  versionHeader: req.headers['anthropic-version'],
1843
1966
  apiKeyHeader: req.headers['x-api-key'],
1967
+ userAgent: req.headers['user-agent'],
1968
+ xApp: req.headers['x-app'],
1844
1969
  };
1845
1970
  }
1846
1971
  const MAX_BODY_SIZE = 10 * 1024 * 1024; // 10MB max request body
@@ -1940,7 +2065,7 @@ async function cascadeRequest(config, makeRequest, log) {
1940
2065
  const isLastModel = i === config.models.length - 1;
1941
2066
  try {
1942
2067
  const { responseData, provider, model: resolvedModel } = await makeRequest(model);
1943
- const text = extractResponseText(responseData);
2068
+ const text = extractResponseTextAuto(responseData);
1944
2069
  if (isLastModel || escalations >= config.maxEscalations) {
1945
2070
  return { responseData, provider, model: resolvedModel, escalations };
1946
2071
  }
@@ -1969,7 +2094,7 @@ async function cascadeRequest(config, makeRequest, log) {
1969
2094
  function getDashboardHTML() {
1970
2095
  return `<!DOCTYPE html><html lang="en"><head><meta charset="utf-8"><meta name="viewport" content="width=device-width,initial-scale=1"><title>RelayPlane Dashboard</title>
1971
2096
  <style>
1972
- *{margin:0;padding:0;box-sizing:border-box}body{background:#0a0b0d;color:#e2e8f0;font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',sans-serif;padding:20px;max-width:1200px;margin:0 auto}
2097
+ *{margin:0;padding:0;box-sizing:border-box}body{background:#0a0b0d;color:#e2e8f0;font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',sans-serif;padding:20px;max-width:1600px;margin:0 auto}
1973
2098
  a{color:#34d399}h1{font-size:1.5rem;font-weight:600}
1974
2099
  .header{display:flex;justify-content:space-between;align-items:center;padding:16px 0;border-bottom:1px solid #1e293b;margin-bottom:24px}
1975
2100
  .header .meta{font-size:.8rem;color:#64748b}
@@ -1977,13 +2102,18 @@ a{color:#34d399}h1{font-size:1.5rem;font-weight:600}
1977
2102
  .card{background:#111318;border:1px solid #1e293b;border-radius:12px;padding:20px}
1978
2103
  .card .label{font-size:.75rem;color:#64748b;text-transform:uppercase;letter-spacing:.05em;margin-bottom:6px}
1979
2104
  .card .value{font-size:1.75rem;font-weight:700}.green{color:#34d399}
2105
+ .tooltip-wrap{position:relative;display:inline-block}
2106
+ .tooltip-wrap .tooltip-box{visibility:hidden;opacity:0;background:#1e293b;color:#e2e8f0;font-size:.8rem;font-weight:400;text-transform:none;letter-spacing:0;line-height:1.5;border:1px solid #334155;border-radius:8px;padding:10px 14px;position:absolute;top:calc(100% + 8px);left:50%;transform:translateX(-50%);width:280px;z-index:999;pointer-events:none;transition:opacity .15s;box-shadow:0 4px 16px rgba(0,0,0,.4)}
2107
+ .tooltip-wrap .tooltip-box::after{content:'';position:absolute;bottom:100%;left:50%;transform:translateX(-50%);border:6px solid transparent;border-bottom-color:#334155}
2108
+ .tooltip-wrap:hover .tooltip-box{visibility:visible;opacity:1}
2109
+ .info-icon{cursor:help;color:#64748b;font-size:.75rem;vertical-align:middle;margin-left:4px}
1980
2110
  table{width:100%;border-collapse:collapse;font-size:.85rem}
1981
2111
  th{text-align:left;color:#64748b;font-weight:500;padding:8px 12px;border-bottom:1px solid #1e293b;font-size:.75rem;text-transform:uppercase;letter-spacing:.04em}
1982
2112
  td{padding:8px 12px;border-bottom:1px solid #111318}
1983
2113
  .section{margin-bottom:32px}.section h2{font-size:1rem;font-weight:600;margin-bottom:12px;color:#94a3b8}
1984
2114
  .dot{display:inline-block;width:8px;height:8px;border-radius:50%;margin-right:6px}.dot.up{background:#34d399}.dot.warn{background:#fbbf24}.dot.down{background:#ef4444}
1985
2115
  .badge{display:inline-block;padding:2px 8px;border-radius:6px;font-size:.75rem;font-weight:500}
1986
- .badge.ok{background:#052e1633;color:#34d399}.badge.err{background:#2d0a0a;color:#ef4444}
2116
+ .badge.ok{background:#052e1633;color:#34d399}.badge.err{background:#2d0a0a;color:#ef4444}.badge.err-auth{background:#2d0a0a;color:#ef4444}.badge.err-rate{background:#2d2a0a;color:#fbbf24}.badge.err-timeout{background:#2d1a0a;color:#fb923c}
1987
2117
  .badge.tt-code{background:#1e3a5f;color:#60a5fa}.badge.tt-analysis{background:#3b1f6e;color:#a78bfa}.badge.tt-summarization{background:#1a3a2a;color:#6ee7b7}.badge.tt-qa{background:#3a2f1e;color:#fbbf24}.badge.tt-general{background:#1e293b;color:#94a3b8}
1988
2118
  .badge.cx-simple{background:#052e1633;color:#34d399}.badge.cx-moderate{background:#2d2a0a;color:#fbbf24}.badge.cx-complex{background:#2d0a0a;color:#ef4444}
1989
2119
  .vstat{display:inline-flex;align-items:center;gap:6px;margin-left:8px;padding:1px 8px;border-radius:999px;border:1px solid #334155;font-size:.72rem}
@@ -1992,19 +2122,22 @@ td{padding:8px 12px;border-bottom:1px solid #111318}
1992
2122
  .vstat.unavailable{color:#a3a3a3;border-color:#52525b66;background:#18181b66}
1993
2123
  @media(max-width:768px){.col-tt,.col-cx{display:none}}
1994
2124
  .prov{display:flex;gap:16px;flex-wrap:wrap}.prov-item{display:flex;align-items:center;font-size:.85rem;background:#111318;padding:8px 14px;border-radius:8px;border:1px solid #1e293b}
2125
+ .rename-btn{background:none;border:none;cursor:pointer;font-size:.75rem;opacity:.5;padding:2px}.rename-btn:hover{opacity:1}
1995
2126
  </style></head><body>
1996
2127
  <div class="header"><div><h1>⚡ RelayPlane Dashboard</h1></div><div class="meta"><a href="/dashboard/config">Config</a> · <span id="ver"></span><span id="vstat" class="vstat unavailable">Unable to check</span> · up <span id="uptime"></span> · refreshes every 5s</div></div>
1997
2128
  <div class="cards">
1998
2129
  <div class="card"><div class="label">Total Requests</div><div class="value" id="totalReq">—</div></div>
1999
2130
  <div class="card"><div class="label">Total Cost</div><div class="value" id="totalCost">—</div></div>
2000
- <div class="card"><div class="label">Savings</div><div class="value green" id="savings">—</div></div>
2131
+ <div class="card"><div class="label">Routing Savings <span class="tooltip-wrap"><span class="info-icon">ⓘ</span><span class="tooltip-box" id="savings-tooltip">Loading...</span></span></div><div class="value green" id="savings">—</div><div id="savings-detail" style="font-size:.75rem;color:#64748b;margin-top:4px">—</div></div>
2001
2132
  <div class="card"><div class="label">Avg Latency</div><div class="value" id="avgLat">—</div></div>
2002
2133
  </div>
2003
2134
  <div class="section"><h2>Model Breakdown</h2>
2004
- <table><thead><tr><th>Model</th><th>Requests</th><th>Cost</th><th>% of Total</th></tr></thead><tbody id="models"></tbody></table></div>
2135
+ <table><thead><tr><th>Provider</th><th>Model</th><th>Requests</th><th>Cost</th><th>% of Total</th></tr></thead><tbody id="models"></tbody></table></div>
2136
+ <div class="section"><h2>Agent Cost Breakdown</h2>
2137
+ <table><thead><tr><th>Agent</th><th>Requests</th><th>Total Cost</th><th>Last Active</th><th></th></tr></thead><tbody id="agents"></tbody></table></div>
2005
2138
  <div class="section"><h2>Provider Status</h2><div class="prov" id="providers"></div></div>
2006
2139
  <div class="section"><h2>Recent Runs</h2>
2007
- <table><thead><tr><th>Time</th><th>Model</th><th class="col-tt">Task Type</th><th class="col-cx">Complexity</th><th>Tokens In</th><th>Tokens Out</th><th class="col-cache">Cache Create</th><th class="col-cache">Cache Read</th><th>Cost</th><th>Latency</th><th>Status</th></tr></thead><tbody id="runs"></tbody></table></div>
2140
+ <table><thead><tr><th>Time</th><th>Agent</th><th>Model</th><th class="col-tt">Task Type</th><th class="col-cx">Complexity</th><th>Tokens In</th><th>Tokens Out</th><th class="col-cache">Cache Create</th><th class="col-cache">Cache Read</th><th>Cost</th><th>Latency</th><th>Status</th></tr></thead><tbody id="runs"></tbody></table></div>
2008
2141
  <script>
2009
2142
  const $ = id => document.getElementById(id);
2010
2143
  function fmt(n,d=2){return typeof n==='number'?n.toFixed(d):'-'}
@@ -2012,12 +2145,13 @@ function fmtTime(s){const d=new Date(s);return d.toLocaleTimeString()}
2012
2145
  function dur(s){const h=Math.floor(s/3600),m=Math.floor(s%3600/60);return h?h+'h '+m+'m':m+'m'}
2013
2146
  async function load(){
2014
2147
  try{
2015
- const [health,stats,runsR,sav,provH]=await Promise.all([
2148
+ const [health,stats,runsR,sav,provH,agentsR]=await Promise.all([
2016
2149
  fetch('/health').then(r=>r.json()),
2017
2150
  fetch('/v1/telemetry/stats').then(r=>r.json()),
2018
2151
  fetch('/v1/telemetry/runs?limit=20').then(r=>r.json()),
2019
2152
  fetch('/v1/telemetry/savings').then(r=>r.json()),
2020
- fetch('/v1/telemetry/health').then(r=>r.json())
2153
+ fetch('/v1/telemetry/health').then(r=>r.json()),
2154
+ fetch('/api/agents').then(r=>r.json()).catch(()=>({agents:[]}))
2021
2155
  ]);
2022
2156
  $('ver').textContent='v'+health.version;
2023
2157
  $('uptime').textContent=dur(health.uptime);
@@ -2037,16 +2171,65 @@ async function load(){
2037
2171
  const total=stats.summary?.totalEvents||0;
2038
2172
  $('totalReq').textContent=total;
2039
2173
  $('totalCost').textContent='$'+fmt(stats.summary?.totalCostUsd??0,4);
2040
- $('savings').textContent=(sav.percentage??0)+'%';
2174
+ const savAmt=sav.savedAmount??sav.savings??0;
2175
+ const cacheSav=sav.cacheSavings??0;
2176
+ const routeSav=sav.routingSavings??0;
2177
+ const actual=sav.actualCost??0;
2178
+ const hasAnthropic=sav.hasAnthropicCalls!==false;
2179
+ const baseline=sav.potentialSavings??sav.total??0;
2180
+ // Headline = routing savings % (RelayPlane's actual contribution)
2181
+ const routeBaseline=baseline>0?baseline:1;
2182
+ const routePct=hasAnthropic?Math.round((routeSav/routeBaseline)*100):0;
2183
+ const totalPct=sav.percentage??0;
2184
+ $('savings').textContent='$'+fmt(routeSav,2);
2185
+ // Secondary: show total % including cache as context
2186
+ if(hasAnthropic){
2187
+ $('savings-detail').innerHTML='<span style="color:#60a5fa">routing savings</span> · <span style="color:#64748b" title="Includes Anthropic prompt cache hits which happen regardless of routing">'+totalPct+'% total incl. cache</span>';
2188
+ } else {
2189
+ $('savings-detail').innerHTML='<span style="color:#a78bfa">$'+fmt(cacheSav,2)+' cache</span> · <span style="color:#64748b">'+totalPct+'% total</span>';
2190
+ }
2191
+ const tipEl=$('savings-tooltip');
2192
+ if(tipEl){
2193
+ let tip='<strong>How savings are calculated</strong><br><br>';
2194
+ if(hasAnthropic){
2195
+ tip+='<span style="color:#60a5fa">🔀 Routing savings: $'+fmt(routeSav,2)+'</span><br><small>Requests routed to cheaper models (e.g. Sonnet) vs always using Opus. RelayPlane contribution.</small><br><br>';
2196
+ tip+='<span style="color:#a78bfa">💾 Cache savings: $'+fmt(cacheSav,2)+'</span><br><small>Anthropic prompt cache hits (10× cheaper reads). This would happen without RelayPlane too.</small><br><br>';
2197
+ } else {
2198
+ tip+='<span style="color:#a78bfa">💾 Cache savings: $'+fmt(cacheSav,2)+'</span><br><small>Provider cache hits. Happens automatically, not specific to RelayPlane.</small><br><br>';
2199
+ }
2200
+ tip+='💳 Actual cost: <b>$'+fmt(actual,2)+'</b><br>✅ Total saved: <b>$'+fmt(savAmt,2)+'</b>';
2201
+ tipEl.innerHTML=tip;
2202
+ }
2041
2203
  $('avgLat').textContent=(stats.summary?.avgLatencyMs??0)+'ms';
2042
2204
  $('models').innerHTML=(stats.byModel||[]).map(m=>
2043
- '<tr><td>'+m.model+'</td><td>'+m.count+'</td><td>$'+fmt(m.costUsd,4)+'</td><td>'+fmt(total>0?m.count/total*100:0,1)+'%</td></tr>'
2044
- ).join('')||'<tr><td colspan=4 style="color:#64748b">No data yet</td></tr>';
2205
+ '<tr><td style="color:#94a3b8;font-size:.85rem">'+(m.provider||'—')+'</td><td>'+m.model+'</td><td>'+m.count+'</td><td>$'+fmt(m.costUsd,4)+'</td><td>'+fmt(total>0?m.count/total*100:0,1)+'%</td></tr>'
2206
+ ).join('')||'<tr><td colspan=5 style="color:#64748b">No data yet</td></tr>';
2045
2207
  function ttCls(t){const m={code_generation:'tt-code',analysis:'tt-analysis',summarization:'tt-summarization',question_answering:'tt-qa'};return m[t]||'tt-general'}
2046
2208
  function cxCls(c){const m={simple:'cx-simple',moderate:'cx-moderate',complex:'cx-complex'};return m[c]||'cx-simple'}
2047
- $('runs').innerHTML=(runsR.runs||[]).map(r=>
2048
- '<tr><td>'+fmtTime(r.started_at)+'</td><td>'+r.model+'</td><td class="col-tt"><span class="badge '+ttCls(r.taskType)+'">'+(r.taskType||'general').replace(/_/g,' ')+'</span></td><td class="col-cx"><span class="badge '+cxCls(r.complexity)+'">'+(r.complexity||'simple')+'</span></td><td>'+(r.tokensIn||0)+'</td><td>'+(r.tokensOut||0)+'</td><td class="col-cache" style="color:#60a5fa">'+(r.cacheCreationTokens||0)+'</td><td class="col-cache" style="color:#34d399">'+(r.cacheReadTokens||0)+'</td><td>$'+fmt(r.costUsd,4)+'</td><td>'+r.latencyMs+'ms</td><td><span class="badge '+(r.status==='success'?'ok':'err')+'">'+r.status+'</span></td></tr>'
2049
- ).join('')||'<tr><td colspan=11 style="color:#64748b">No runs yet</td></tr>';
2209
+ function esc(s){if(!s)return'';return s.replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;')}
2210
+ const agents=(agentsR.agents||[]).sort((a,b)=>(b.totalCost||0)-(a.totalCost||0));
2211
+ $('runs').innerHTML=(runsR.runs||[]).map((r,i)=>{
2212
+ function errBadge(r){if(r.status==='success')return '<span class="badge ok">success</span>';var cls='err';var label=r.error||'error';if(r.statusCode===401||r.statusCode===403||(r.error&&/auth/i.test(r.error)))cls='err-auth';else if(r.statusCode===429||(r.error&&/rate.?limit/i.test(r.error)))cls='err-rate';else if(r.error&&/timeout/i.test(r.error))cls='err-timeout';return '<span class="badge '+cls+'" title="'+esc(r.error||'')+' (HTTP '+( r.statusCode||'?')+')">'+(r.statusCode?r.statusCode+' ':'')+ (label.length>40?label.slice(0,40)+'…':label)+'</span>';}
2213
+ const agentName=agents.find(a=>a.fingerprint===r.agentFingerprint)?.name||(r.agentId||'—');
2214
+ const row='<tr style="cursor:pointer" onclick="toggleDetail('+i+')"><td><span id="arrow-'+i+'" style="color:#64748b;font-size:.7rem;margin-right:6px">▶</span>'+fmtTime(r.started_at)+'</td><td style="font-size:.85rem">'+esc(agentName)+'</td><td>'+r.model+'</td><td class="col-tt"><span class="badge '+ttCls(r.taskType)+'">'+(r.taskType||'general').replace(/_/g,' ')+'</span></td><td class="col-cx"><span class="badge '+cxCls(r.complexity)+'">'+(r.complexity||'simple')+'</span></td><td>'+(r.tokensIn||0)+'</td><td>'+(r.tokensOut||0)+'</td><td class="col-cache" style="color:#60a5fa">'+(r.cacheCreationTokens||0)+'</td><td class="col-cache" style="color:#34d399">'+(r.cacheReadTokens||0)+'</td><td>$'+fmt(r.costUsd,4)+'</td><td>'+r.latencyMs+'ms</td><td>'+errBadge(r)+'</td></tr>';
2215
+ const c=r.requestContent||{};
2216
+ let detail='<tr id="run-detail-'+i+'" style="display:none"><td colspan="12" style="padding:16px;background:#111217;border-bottom:1px solid #1e293b">';
2217
+ if(c.systemPrompt||c.userMessage||c.responsePreview){
2218
+ if(c.systemPrompt) detail+='<div style="color:#64748b;font-size:.85rem;margin-bottom:10px;font-style:italic"><strong style="color:#94a3b8">System:</strong> '+esc(c.systemPrompt)+'</div>';
2219
+ if(c.userMessage) detail+='<div style="background:#1a1c23;border:1px solid #1e293b;border-radius:8px;padding:12px;margin-bottom:10px"><strong style="color:#94a3b8;font-size:.8rem">User Message</strong><div style="margin-top:6px;white-space:pre-wrap">'+esc(c.userMessage)+'</div></div>';
2220
+ if(c.responsePreview) detail+='<div style="background:#1a1c23;border:1px solid #1e293b;border-radius:8px;padding:12px;margin-bottom:10px"><strong style="color:#94a3b8;font-size:.8rem">Response Preview</strong><div style="margin-top:6px;white-space:pre-wrap">'+esc(c.responsePreview)+'</div></div>';
2221
+ const btnAttrs='id="full-btn-'+i+'" style="background:#1e293b;color:#e2e8f0;border:1px solid #334155;padding:6px 12px;border-radius:6px;font-size:.8rem"';
2222
+ detail+=(r.tokensOut>0?'<button onclick="event.stopPropagation();loadFullResponse(&quot;'+r.id+'&quot;,'+i+')" '+btnAttrs+'>Show full response</button>':'<button disabled '+btnAttrs+' style="opacity:.4;cursor:default">Response not available (streaming)</button>')+'<pre id="full-resp-'+i+'" style="display:none;white-space:pre-wrap;margin-top:10px;background:#0d0e11;border:1px solid #1e293b;border-radius:8px;padding:12px;max-height:400px;overflow:auto;font-size:.8rem"></pre>';
2223
+ } else {
2224
+ detail+='<span style="color:#64748b">No content captured for this request</span>';
2225
+ }
2226
+ detail+='</td></tr>';
2227
+ return row+detail;
2228
+ }).join('')||'<tr><td colspan=12 style="color:#64748b">No runs yet</td></tr>';
2229
+ restoreExpanded();
2230
+ $('agents').innerHTML=agents.length?agents.map(a=>
2231
+ '<tr><td><span class="agent-name" data-fp="'+a.fingerprint+'">'+esc(a.name)+'</span> <button class="rename-btn" onclick="renameAgent(&quot;'+a.fingerprint+'&quot;,&quot;'+a.name.replace(/"/g,'')+'&quot;)">✏️</button></td><td>'+a.totalRequests+'</td><td>$'+fmt(a.totalCost,4)+'</td><td>'+fmtTime(a.lastSeen)+'</td><td style="font-size:.7rem;color:#64748b" title="'+esc(a.systemPromptPreview||'')+'">'+a.fingerprint+'</td></tr>'
2232
+ ).join(''):'<tr><td colspan=5 style="color:#64748b">No agents detected yet</td></tr>';
2050
2233
  $('providers').innerHTML=(provH.providers||[]).map(p=>{
2051
2234
  const dotClass = p.status==='healthy'?'up':(p.status==='degraded'?'warn':'down');
2052
2235
  const rate = p.successRate!==undefined?(' '+Math.round(p.successRate*100)+'%'):'';
@@ -2054,13 +2237,34 @@ async function load(){
2054
2237
  }).join('');
2055
2238
  }catch(e){console.error(e)}
2056
2239
  }
2240
+ async function renameAgent(fp,currentName){
2241
+ const name=prompt('Rename agent:',currentName);
2242
+ if(!name||name===currentName)return;
2243
+ await fetch('/api/agents/rename',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({fingerprint:fp,name:name})});
2244
+ load();
2245
+ }
2246
+ const expandedRows=new Set();
2247
+ function toggleDetail(i){var d=document.getElementById('run-detail-'+i);var arrow=document.getElementById('arrow-'+i);if(d.style.display==='none'){d.style.display='table-row';expandedRows.add(i);if(arrow)arrow.textContent='▼'}else{d.style.display='none';expandedRows.delete(i);if(arrow)arrow.textContent='▶'}}
2248
+ function restoreExpanded(){expandedRows.forEach(i=>{var d=document.getElementById('run-detail-'+i);var arrow=document.getElementById('arrow-'+i);if(d)d.style.display='table-row';if(arrow)arrow.textContent='▼'})}
2249
+ async function loadFullResponse(runId,i){
2250
+ const btn=document.getElementById('full-btn-'+i);
2251
+ const pre=document.getElementById('full-resp-'+i);
2252
+ if(pre.style.display!=='none'){pre.style.display='none';btn.textContent='Show full response';return}
2253
+ btn.textContent='Loading...';
2254
+ try{
2255
+ const data=await fetch('/api/runs/'+runId).then(r=>r.json());
2256
+ const full=data.requestContent&&data.requestContent.fullResponse;
2257
+ if(full){pre.textContent=full;pre.style.display='block';btn.textContent='Hide full response'}
2258
+ else{btn.textContent='No full response available'}
2259
+ }catch{btn.textContent='Error loading response'}
2260
+ }
2057
2261
  load();setInterval(load,5000);
2058
- </script></body></html>`;
2262
+ </script><footer style="text-align:center;padding:20px 0;color:#475569;font-size:.75rem;border-top:1px solid #1e293b;margin-top:20px">🔒 Request content stays on your machine. Never sent to cloud.</footer></body></html>`;
2059
2263
  }
2060
2264
  function getConfigDashboardHTML() {
2061
2265
  return `<!DOCTYPE html><html lang="en"><head><meta charset="utf-8"><meta name="viewport" content="width=device-width,initial-scale=1"><title>RelayPlane Config</title>
2062
2266
  <style>
2063
- *{margin:0;padding:0;box-sizing:border-box}body{background:#0a0b0d;color:#e2e8f0;font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',sans-serif;padding:20px;max-width:1200px;margin:0 auto}
2267
+ *{margin:0;padding:0;box-sizing:border-box}body{background:#0a0b0d;color:#e2e8f0;font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',sans-serif;padding:20px;max-width:1600px;margin:0 auto}
2064
2268
  a{color:#34d399}h1{font-size:1.5rem;font-weight:600}
2065
2269
  .header{display:flex;justify-content:space-between;align-items:center;padding:16px 0;border-bottom:1px solid #1e293b;margin-bottom:24px}
2066
2270
  .header .meta{font-size:.8rem;color:#64748b}
@@ -2152,8 +2356,10 @@ async function startProxy(config = {}) {
2152
2356
  };
2153
2357
  // Load persistent history from disk
2154
2358
  loadHistoryFromDisk();
2359
+ (0, agent_tracker_js_1.loadAgentRegistry)();
2155
2360
  // Flush history on shutdown
2156
2361
  const handleShutdown = () => {
2362
+ (0, agent_tracker_js_1.flushAgentRegistry)();
2157
2363
  meshHandle.stop();
2158
2364
  shutdownHistory();
2159
2365
  process.exit(0);
@@ -2162,6 +2368,7 @@ async function startProxy(config = {}) {
2162
2368
  process.on('SIGTERM', handleShutdown);
2163
2369
  const configPath = getProxyConfigPath();
2164
2370
  let proxyConfig = await loadProxyConfig(configPath, log);
2371
+ _activeProxyConfig = proxyConfig;
2165
2372
  const cooldownManager = new CooldownManager(getCooldownConfig(proxyConfig));
2166
2373
  // === Startup config validation (Task 4) ===
2167
2374
  try {
@@ -2500,11 +2707,11 @@ async function startProxy(config = {}) {
2500
2707
  const days = parseInt(params.get('days') || '7', 10);
2501
2708
  const cutoff = Date.now() - days * 86400000;
2502
2709
  const recent = requestHistory.filter(r => new Date(r.timestamp).getTime() >= cutoff);
2503
- // Model breakdown
2710
+ // Model breakdown (keyed by provider/model for disambiguation)
2504
2711
  const modelMap = new Map();
2505
2712
  for (const r of recent) {
2506
- const key = r.targetModel;
2507
- const cur = modelMap.get(key) || { count: 0, cost: 0 };
2713
+ const key = `${r.provider || 'unknown'}/${r.targetModel}`;
2714
+ const cur = modelMap.get(key) || { count: 0, cost: 0, provider: r.provider || 'unknown', model: r.targetModel };
2508
2715
  cur.count++;
2509
2716
  cur.cost += r.costUsd;
2510
2717
  modelMap.set(key, cur);
@@ -2527,7 +2734,7 @@ async function startProxy(config = {}) {
2527
2734
  avgLatencyMs: recent.length ? Math.round(totalLatency / recent.length) : 0,
2528
2735
  successRate: recent.length ? recent.filter(r => r.success).length / recent.length : 0,
2529
2736
  },
2530
- byModel: Array.from(modelMap.entries()).map(([model, v]) => ({ model, count: v.count, costUsd: v.cost, savings: 0 })),
2737
+ byModel: Array.from(modelMap.entries()).map(([, v]) => ({ model: v.model, provider: v.provider, count: v.count, costUsd: v.cost, savings: 0 })),
2531
2738
  dailyCosts: Array.from(dailyMap.entries()).map(([date, v]) => ({ date, costUsd: v.cost, requests: v.requests })),
2532
2739
  };
2533
2740
  res.writeHead(200, { 'Content-Type': 'application/json' });
@@ -2565,6 +2772,16 @@ async function startProxy(config = {}) {
2565
2772
  cacheReadTokens: r.cacheReadTokens ?? 0,
2566
2773
  savings: Math.round(perRunSavings * 10000) / 10000,
2567
2774
  escalated: r.escalated,
2775
+ error: r.error ?? null,
2776
+ statusCode: r.statusCode ?? null,
2777
+ agentFingerprint: r.agentFingerprint ?? null,
2778
+ agentId: r.agentId ?? null,
2779
+ requestContent: r.requestContent ? {
2780
+ systemPrompt: r.requestContent.systemPrompt,
2781
+ userMessage: r.requestContent.userMessage,
2782
+ responsePreview: r.requestContent.responsePreview,
2783
+ // fullResponse excluded from list endpoint to keep payloads small
2784
+ } : undefined,
2568
2785
  };
2569
2786
  });
2570
2787
  res.writeHead(200, { 'Content-Type': 'application/json' });
@@ -2572,26 +2789,34 @@ async function startProxy(config = {}) {
2572
2789
  return;
2573
2790
  }
2574
2791
  if (req.method === 'GET' && telemetryPath === 'savings') {
2575
- // Savings = cost if everything ran on Opus - actual cost
2576
- // Always compare against Opus as the baseline
2577
- const OPUS_BASELINE = 'claude-opus-4-6';
2578
- let totalOriginalCost = 0;
2792
+ // Routing savings: cost at same model with no cache vs actual cost
2793
+ // Cache savings: what cache hits saved vs paying full input price
2794
+ // Baseline: each request at full input price (no cache, no routing)
2579
2795
  let totalActualCost = 0;
2580
- let totalSavedAmount = 0;
2796
+ let totalCacheSavings = 0; // savings from cache hits (Anthropic feature)
2797
+ let totalRoutingSavings = 0; // savings from routing to cheaper model
2798
+ let hasAnthropicCalls = false;
2581
2799
  const byDayMap = new Map();
2582
2800
  for (const r of requestHistory) {
2583
- // Pass same cache tokens to baseline so savings only reflect routing decisions,
2584
- // not prompt-cache discounts (those happen regardless of which model is chosen).
2585
- const origCost = (0, telemetry_js_1.estimateCost)(OPUS_BASELINE, r.tokensIn, r.tokensOut, r.cacheCreationTokens || undefined, r.cacheReadTokens || undefined);
2586
2801
  const actualCost = r.costUsd;
2587
- const saved = Math.max(0, origCost - actualCost);
2588
- totalOriginalCost += origCost;
2589
2802
  totalActualCost += actualCost;
2590
- totalSavedAmount += saved;
2803
+ // Cache savings: full input price vs what was paid with cache
2804
+ const fullInputCost = (0, telemetry_js_1.estimateCost)(r.targetModel, r.tokensIn + (r.cacheCreationTokens || 0) + (r.cacheReadTokens || 0), r.tokensOut);
2805
+ const cachedCost = r.costUsd;
2806
+ const cacheSaved = Math.max(0, fullInputCost - cachedCost);
2807
+ totalCacheSavings += cacheSaved;
2808
+ // Routing savings: what would this request cost at full Opus price (no cache)
2809
+ // vs what the routed model cost (no cache). Only meaningful for Anthropic.
2810
+ if (r.provider === 'anthropic') {
2811
+ hasAnthropicCalls = true;
2812
+ const opusCost = (0, telemetry_js_1.estimateCost)('claude-opus-4-6', r.tokensIn, r.tokensOut);
2813
+ const modelCost = (0, telemetry_js_1.estimateCost)(r.targetModel, r.tokensIn, r.tokensOut);
2814
+ const routingSaved = Math.max(0, opusCost - modelCost);
2815
+ totalRoutingSavings += routingSaved;
2816
+ }
2591
2817
  const date = r.timestamp.slice(0, 10);
2592
2818
  const day = byDayMap.get(date) || { savedAmount: 0, originalCost: 0, actualCost: 0 };
2593
- day.savedAmount += saved;
2594
- day.originalCost += origCost;
2819
+ day.savedAmount += Math.max(0, totalCacheSavings + totalRoutingSavings);
2595
2820
  day.actualCost += actualCost;
2596
2821
  byDayMap.set(date, day);
2597
2822
  }
@@ -2603,16 +2828,19 @@ async function startProxy(config = {}) {
2603
2828
  originalCost: Math.round(v.originalCost * 10000) / 10000,
2604
2829
  actualCost: Math.round(v.actualCost * 10000) / 10000,
2605
2830
  }));
2831
+ const totalSaved = totalCacheSavings + totalRoutingSavings;
2832
+ const baseline = totalActualCost + totalSaved;
2606
2833
  res.writeHead(200, { 'Content-Type': 'application/json' });
2607
2834
  res.end(JSON.stringify({
2608
- total: Math.round(totalOriginalCost * 10000) / 10000,
2609
2835
  actualCost: Math.round(totalActualCost * 10000) / 10000,
2610
- savings: Math.round(totalSavedAmount * 10000) / 10000,
2611
- savedAmount: Math.round(totalSavedAmount * 10000) / 10000,
2612
- potentialSavings: Math.round(totalOriginalCost * 10000) / 10000,
2613
- percentage: totalOriginalCost > 0
2614
- ? Math.round((totalSavedAmount / totalOriginalCost) * 100)
2615
- : 0,
2836
+ savedAmount: Math.round(totalSaved * 10000) / 10000,
2837
+ savings: Math.round(totalSaved * 10000) / 10000,
2838
+ cacheSavings: Math.round(totalCacheSavings * 10000) / 10000,
2839
+ routingSavings: Math.round(totalRoutingSavings * 10000) / 10000,
2840
+ hasAnthropicCalls,
2841
+ potentialSavings: Math.round(baseline * 10000) / 10000,
2842
+ total: Math.round(baseline * 10000) / 10000,
2843
+ percentage: baseline > 0 ? Math.round((totalSaved / baseline) * 100) : 0,
2616
2844
  byDay,
2617
2845
  }));
2618
2846
  return;
@@ -2662,6 +2890,63 @@ async function startProxy(config = {}) {
2662
2890
  res.end(JSON.stringify({ error: 'Not found' }));
2663
2891
  return;
2664
2892
  }
2893
+ // === Agent tracking API ===
2894
+ // === /api/runs/:id — full request/response content for a single run ===
2895
+ const runsIdMatch = pathname.match(/^\/api\/runs\/(.+)$/);
2896
+ if (req.method === 'GET' && runsIdMatch) {
2897
+ const runId = runsIdMatch[1];
2898
+ const run = requestHistory.find(r => r.id === runId);
2899
+ if (!run) {
2900
+ res.writeHead(404, { 'Content-Type': 'application/json' });
2901
+ res.end(JSON.stringify({ error: 'Run not found' }));
2902
+ return;
2903
+ }
2904
+ res.writeHead(200, { 'Content-Type': 'application/json' });
2905
+ res.end(JSON.stringify({
2906
+ id: run.id,
2907
+ model: run.targetModel,
2908
+ provider: run.provider,
2909
+ timestamp: run.timestamp,
2910
+ tokensIn: run.tokensIn,
2911
+ tokensOut: run.tokensOut,
2912
+ costUsd: run.costUsd,
2913
+ latencyMs: run.latencyMs,
2914
+ success: run.success,
2915
+ requestContent: run.requestContent,
2916
+ }));
2917
+ return;
2918
+ }
2919
+ if (req.method === 'GET' && pathname === '/api/agents') {
2920
+ const summaries = (0, agent_tracker_js_1.getAgentSummaries)(requestHistory);
2921
+ res.writeHead(200, { 'Content-Type': 'application/json' });
2922
+ res.end(JSON.stringify({ agents: summaries }));
2923
+ return;
2924
+ }
2925
+ if (req.method === 'POST' && pathname === '/api/agents/rename') {
2926
+ try {
2927
+ const body = await readJsonBody(req);
2928
+ const fingerprint = body['fingerprint'];
2929
+ const name = body['name'];
2930
+ if (!fingerprint || !name) {
2931
+ res.writeHead(400, { 'Content-Type': 'application/json' });
2932
+ res.end(JSON.stringify({ error: 'Missing fingerprint or name' }));
2933
+ return;
2934
+ }
2935
+ const ok = (0, agent_tracker_js_1.renameAgent)(fingerprint, name);
2936
+ if (!ok) {
2937
+ res.writeHead(404, { 'Content-Type': 'application/json' });
2938
+ res.end(JSON.stringify({ error: 'Agent not found' }));
2939
+ return;
2940
+ }
2941
+ res.writeHead(200, { 'Content-Type': 'application/json' });
2942
+ res.end(JSON.stringify({ ok: true }));
2943
+ }
2944
+ catch {
2945
+ res.writeHead(400, { 'Content-Type': 'application/json' });
2946
+ res.end(JSON.stringify({ error: 'Invalid JSON' }));
2947
+ }
2948
+ return;
2949
+ }
2665
2950
  // === Dashboard ===
2666
2951
  if (req.method === 'GET' && (pathname === '/' || pathname === '/dashboard')) {
2667
2952
  res.writeHead(200, { 'Content-Type': 'text/html' });
@@ -2742,6 +3027,14 @@ async function startProxy(config = {}) {
2742
3027
  res.end(JSON.stringify({ error: 'Invalid JSON' }));
2743
3028
  return;
2744
3029
  }
3030
+ // Extract agent fingerprint and explicit agent ID
3031
+ const nativeSystemPrompt = (0, agent_tracker_js_1.extractSystemPromptFromBody)(requestBody);
3032
+ const nativeExplicitAgentId = getHeaderValue(req, 'x-relayplane-agent') || undefined;
3033
+ let nativeAgentFingerprint;
3034
+ if (nativeSystemPrompt) {
3035
+ const agentResult = (0, agent_tracker_js_1.trackAgent)(nativeSystemPrompt, 0, nativeExplicitAgentId);
3036
+ nativeAgentFingerprint = agentResult.fingerprint;
3037
+ }
2745
3038
  const originalModel = requestBody['model'];
2746
3039
  let requestedModel = headerModelOverride ?? originalModel ?? '';
2747
3040
  if (headerModelOverride) {
@@ -3076,7 +3369,8 @@ async function startProxy(config = {}) {
3076
3369
  cooldownManager.recordFailure(targetProvider, JSON.stringify(errorPayload));
3077
3370
  }
3078
3371
  const durationMs = Date.now() - startTime;
3079
- logRequest(originalModel ?? 'unknown', targetModel || requestedModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity);
3372
+ const errMsg = extractProviderErrorMessage(errorPayload, providerResponse.status);
3373
+ logRequest(originalModel ?? 'unknown', targetModel || requestedModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, undefined, undefined, errMsg, providerResponse.status);
3080
3374
  res.writeHead(providerResponse.status, { 'Content-Type': 'application/json' });
3081
3375
  res.end(JSON.stringify(errorPayload));
3082
3376
  return;
@@ -3203,7 +3497,22 @@ async function startProxy(config = {}) {
3203
3497
  const nativeTokIn = nativeBaseTokIn + nativeCacheCreation + nativeCacheRead;
3204
3498
  // Cost calculation expects inputTokens to include cache tokens when cache params are provided
3205
3499
  const nativeCostUsd = (0, telemetry_js_1.estimateCost)(targetModel || requestedModel, nativeTokIn, nativeTokOut, nativeCacheCreation || undefined, nativeCacheRead || undefined);
3206
- updateLastHistoryEntry(nativeTokIn, nativeTokOut, nativeCostUsd, undefined, nativeCacheCreation || undefined, nativeCacheRead || undefined);
3500
+ // Build request content if logging enabled
3501
+ let nativeContentData;
3502
+ if (isContentLoggingEnabled()) {
3503
+ const extracted = extractRequestContent(requestBody, true);
3504
+ const responseText = nativeResponseData ? extractResponseText(nativeResponseData, true) : '';
3505
+ nativeContentData = {
3506
+ ...extracted,
3507
+ responsePreview: responseText ? responseText.slice(0, 500) : undefined,
3508
+ fullResponse: responseText || undefined,
3509
+ };
3510
+ }
3511
+ updateLastHistoryEntry(nativeTokIn, nativeTokOut, nativeCostUsd, undefined, nativeCacheCreation || undefined, nativeCacheRead || undefined, nativeAgentFingerprint, nativeExplicitAgentId, nativeContentData);
3512
+ // Update agent cost now that we know the actual cost
3513
+ if (nativeAgentFingerprint && nativeAgentFingerprint !== 'unknown') {
3514
+ (0, agent_tracker_js_1.updateAgentCost)(nativeAgentFingerprint, nativeCostUsd);
3515
+ }
3207
3516
  // ── Post-request: budget spend + anomaly detection ──
3208
3517
  postRequestRecord(targetModel || requestedModel, nativeTokIn, nativeTokOut, nativeCostUsd);
3209
3518
  if (recordTelemetry) {
@@ -3224,7 +3533,17 @@ async function startProxy(config = {}) {
3224
3533
  }
3225
3534
  catch (err) {
3226
3535
  const durationMs = Date.now() - startTime;
3227
- logRequest(originalModel ?? 'unknown', targetModel || requestedModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity);
3536
+ let catchErrMsg;
3537
+ let catchErrStatus;
3538
+ if (err instanceof ProviderResponseError) {
3539
+ catchErrMsg = extractProviderErrorMessage(err.payload, err.status);
3540
+ catchErrStatus = err.status;
3541
+ }
3542
+ else {
3543
+ catchErrMsg = err instanceof Error ? err.message : String(err);
3544
+ catchErrStatus = 500;
3545
+ }
3546
+ logRequest(originalModel ?? 'unknown', targetModel || requestedModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, undefined, undefined, catchErrMsg, catchErrStatus);
3228
3547
  if (err instanceof ProviderResponseError) {
3229
3548
  res.writeHead(err.status, { 'Content-Type': 'application/json' });
3230
3549
  res.end(JSON.stringify(err.payload));
@@ -3301,6 +3620,14 @@ async function startProxy(config = {}) {
3301
3620
  return;
3302
3621
  }
3303
3622
  const isStreaming = request.stream === true;
3623
+ // Extract agent fingerprint for chat/completions
3624
+ const chatSystemPrompt = (0, agent_tracker_js_1.extractSystemPromptFromBody)(request);
3625
+ const chatExplicitAgentId = getHeaderValue(req, 'x-relayplane-agent') || undefined;
3626
+ let chatAgentFingerprint;
3627
+ if (chatSystemPrompt) {
3628
+ const agentResult = (0, agent_tracker_js_1.trackAgent)(chatSystemPrompt, 0, chatExplicitAgentId);
3629
+ chatAgentFingerprint = agentResult.fingerprint;
3630
+ }
3304
3631
  // ── Response Cache: check for cached response (chat/completions) ──
3305
3632
  const chatCacheBypass = responseCache.shouldBypass(request);
3306
3633
  let chatCacheHash;
@@ -3601,7 +3928,7 @@ async function startProxy(config = {}) {
3601
3928
  const startTime = Date.now();
3602
3929
  // Handle streaming vs non-streaming
3603
3930
  if (isStreaming) {
3604
- await handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, useCascade ? 'cascade' : routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity, chatCacheHash, chatCacheBypass);
3931
+ await handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, useCascade ? 'cascade' : routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity, chatCacheHash, chatCacheBypass, chatAgentFingerprint, chatExplicitAgentId);
3605
3932
  }
3606
3933
  else {
3607
3934
  if (useCascade && cascadeConfig) {
@@ -3641,7 +3968,9 @@ async function startProxy(config = {}) {
3641
3968
  const cascadeCacheCreation = cascadeUsage?.cache_creation_input_tokens || undefined;
3642
3969
  const cascadeCacheRead = cascadeUsage?.cache_read_input_tokens || undefined;
3643
3970
  const cascadeCost = (0, telemetry_js_1.estimateCost)(cascadeResult.model, cascadeTokensIn, cascadeTokensOut, cascadeCacheCreation, cascadeCacheRead);
3644
- updateLastHistoryEntry(cascadeTokensIn, cascadeTokensOut, cascadeCost, chatCascadeRespModel, cascadeCacheCreation, cascadeCacheRead);
3971
+ updateLastHistoryEntry(cascadeTokensIn, cascadeTokensOut, cascadeCost, chatCascadeRespModel, cascadeCacheCreation, cascadeCacheRead, chatAgentFingerprint, chatExplicitAgentId);
3972
+ if (chatAgentFingerprint && chatAgentFingerprint !== 'unknown')
3973
+ (0, agent_tracker_js_1.updateAgentCost)(chatAgentFingerprint, cascadeCost);
3645
3974
  if (recordTelemetry) {
3646
3975
  try {
3647
3976
  const runResult = await relay.run({
@@ -3674,7 +4003,17 @@ async function startProxy(config = {}) {
3674
4003
  }
3675
4004
  catch (err) {
3676
4005
  const durationMs = Date.now() - startTime;
3677
- logRequest(originalRequestedModel ?? 'unknown', targetModel || 'unknown', targetProvider, durationMs, false, 'cascade', undefined, taskType, complexity);
4006
+ let cascadeErrMsg;
4007
+ let cascadeErrStatus;
4008
+ if (err instanceof ProviderResponseError) {
4009
+ cascadeErrMsg = extractProviderErrorMessage(err.payload, err.status);
4010
+ cascadeErrStatus = err.status;
4011
+ }
4012
+ else {
4013
+ cascadeErrMsg = err instanceof Error ? err.message : String(err);
4014
+ cascadeErrStatus = 500;
4015
+ }
4016
+ logRequest(originalRequestedModel ?? 'unknown', targetModel || 'unknown', targetProvider, durationMs, false, 'cascade', undefined, taskType, complexity, undefined, undefined, cascadeErrMsg, cascadeErrStatus);
3678
4017
  if (err instanceof ProviderResponseError) {
3679
4018
  res.writeHead(err.status, { 'Content-Type': 'application/json' });
3680
4019
  res.end(JSON.stringify(err.payload));
@@ -3686,7 +4025,7 @@ async function startProxy(config = {}) {
3686
4025
  }
3687
4026
  }
3688
4027
  else {
3689
- await handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity);
4028
+ await handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity, chatAgentFingerprint, chatExplicitAgentId);
3690
4029
  }
3691
4030
  }
3692
4031
  });
@@ -3829,7 +4168,7 @@ async function executeNonStreamingProviderRequest(request, targetProvider, targe
3829
4168
  }
3830
4169
  return { responseData, ok: true, status: 200 };
3831
4170
  }
3832
- async function handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity = 'simple', cacheHash, cacheBypass) {
4171
+ async function handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity = 'simple', cacheHash, cacheBypass, agentFingerprint, agentId) {
3833
4172
  let providerResponse;
3834
4173
  try {
3835
4174
  switch (targetProvider) {
@@ -3857,7 +4196,8 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
3857
4196
  cooldownManager.recordFailure(targetProvider, JSON.stringify(errorData));
3858
4197
  }
3859
4198
  const durationMs = Date.now() - startTime;
3860
- logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity);
4199
+ const streamErrMsg = extractProviderErrorMessage(errorData, providerResponse.status);
4200
+ logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, undefined, undefined, streamErrMsg, providerResponse.status);
3861
4201
  res.writeHead(providerResponse.status, { 'Content-Type': 'application/json' });
3862
4202
  res.end(JSON.stringify(errorData));
3863
4203
  return;
@@ -3869,7 +4209,7 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
3869
4209
  cooldownManager.recordFailure(targetProvider, errorMsg);
3870
4210
  }
3871
4211
  const durationMs = Date.now() - startTime;
3872
- logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity);
4212
+ logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, undefined, undefined, errorMsg, 500);
3873
4213
  res.writeHead(500, { 'Content-Type': 'application/json' });
3874
4214
  res.end(JSON.stringify({ error: `Provider error: ${errorMsg}` }));
3875
4215
  return;
@@ -3988,7 +4328,9 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
3988
4328
  logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, true, routingMode, undefined, taskType, complexity);
3989
4329
  // Update token/cost info on the history entry (with cache token discount)
3990
4330
  const streamCost = (0, telemetry_js_1.estimateCost)(targetModel, streamTokensIn, streamTokensOut, streamCacheCreation || undefined, streamCacheRead || undefined);
3991
- updateLastHistoryEntry(streamTokensIn, streamTokensOut, streamCost, undefined, streamCacheCreation || undefined, streamCacheRead || undefined);
4331
+ updateLastHistoryEntry(streamTokensIn, streamTokensOut, streamCost, undefined, streamCacheCreation || undefined, streamCacheRead || undefined, agentFingerprint, agentId);
4332
+ if (agentFingerprint && agentFingerprint !== 'unknown')
4333
+ (0, agent_tracker_js_1.updateAgentCost)(agentFingerprint, streamCost);
3992
4334
  // ── Post-request: budget spend + anomaly detection ──
3993
4335
  try {
3994
4336
  (0, budget_js_1.getBudgetManager)().recordSpend(streamCost, targetModel);
@@ -4024,7 +4366,7 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
4024
4366
  /**
4025
4367
  * Handle non-streaming request
4026
4368
  */
4027
- async function handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity = 'simple') {
4369
+ async function handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity = 'simple', agentFingerprint, agentId) {
4028
4370
  let responseData;
4029
4371
  try {
4030
4372
  const result = await executeNonStreamingProviderRequest(request, targetProvider, targetModel, apiKey, ctx);
@@ -4034,7 +4376,8 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
4034
4376
  cooldownManager.recordFailure(targetProvider, JSON.stringify(responseData));
4035
4377
  }
4036
4378
  const durationMs = Date.now() - startTime;
4037
- logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity);
4379
+ const nsErrMsg = extractProviderErrorMessage(responseData, result.status);
4380
+ logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, undefined, undefined, nsErrMsg, result.status);
4038
4381
  res.writeHead(result.status, { 'Content-Type': 'application/json' });
4039
4382
  res.end(JSON.stringify(responseData));
4040
4383
  return;
@@ -4046,7 +4389,7 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
4046
4389
  cooldownManager.recordFailure(targetProvider, errorMsg);
4047
4390
  }
4048
4391
  const durationMs = Date.now() - startTime;
4049
- logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity);
4392
+ logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, undefined, undefined, errorMsg, 500);
4050
4393
  res.writeHead(500, { 'Content-Type': 'application/json' });
4051
4394
  res.end(JSON.stringify({ error: `Provider error: ${errorMsg}` }));
4052
4395
  return;
@@ -4066,7 +4409,9 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
4066
4409
  const cacheCreationTokens = usage?.cache_creation_input_tokens ?? 0;
4067
4410
  const cacheReadTokens = usage?.cache_read_input_tokens ?? 0;
4068
4411
  const cost = (0, telemetry_js_1.estimateCost)(targetModel, tokensIn, tokensOut, cacheCreationTokens || undefined, cacheReadTokens || undefined);
4069
- updateLastHistoryEntry(tokensIn, tokensOut, cost, nonStreamRespModel, cacheCreationTokens || undefined, cacheReadTokens || undefined);
4412
+ updateLastHistoryEntry(tokensIn, tokensOut, cost, nonStreamRespModel, cacheCreationTokens || undefined, cacheReadTokens || undefined, agentFingerprint, agentId);
4413
+ if (agentFingerprint && agentFingerprint !== 'unknown')
4414
+ (0, agent_tracker_js_1.updateAgentCost)(agentFingerprint, cost);
4070
4415
  // ── Post-request: budget spend + anomaly detection ──
4071
4416
  try {
4072
4417
  (0, budget_js_1.getBudgetManager)().recordSpend(cost, targetModel);