@relayplane/proxy 1.5.44 → 1.5.46

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -5,6 +5,13 @@
5
5
 
6
6
  An open-source LLM proxy that sits between your AI agents and providers. Tracks every request, shows where the money goes, and offers configurable task-aware routing — all running locally.
7
7
 
8
+ **Key features:**
9
+ - 📊 Per-request cost tracking across 11+ providers
10
+ - 💰 **Cache-aware cost tracking** — accurately tracks Anthropic prompt caching with cache read savings, creation costs, and true per-request costs including cached tokens
11
+ - 🔀 Configurable task-aware routing (complexity-based, cascade, model overrides)
12
+ - 🛡️ Circuit breaker architecture — if the proxy fails, your agent doesn't notice
13
+ - 📈 Local dashboard with cost breakdown, savings analysis, and provider health
14
+
8
15
  ## Quick Start
9
16
 
10
17
  ```bash
@@ -1 +1 @@
1
- {"version":3,"file":"standalone-proxy.d.ts","sourceRoot":"","sources":["../src/standalone-proxy.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAEH,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAKlC,OAAO,KAAK,EAAE,QAAQ,EAAY,MAAM,kBAAkB,CAAC;AAG3D,OAAO,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAU5C,2DAA2D;AAC3D,eAAO,MAAM,mBAAmB,gBAAuB,CAAC;AAExD;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,eAAO,MAAM,iBAAiB,EAAE,MAAM,CAAC,MAAM,EAAE,gBAAgB,CA6C9D,CAAC;AAEF;;GAEG;AACH,eAAO,MAAM,aAAa,EAAE,MAAM,CAAC,MAAM,EAAE;IAAE,QAAQ,EAAE,QAAQ,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,CAc/E,CAAC;AAEF;;;GAGG;AACH,eAAO,MAAM,kBAAkB,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAGrD,CAAC;AAEF;;;GAGG;AACH,eAAO,MAAM,aAAa,EAAE,MAAM,CAAC,MAAM,EAAE;IAAE,QAAQ,EAAE,QAAQ,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,CAQ/E,CAAC;AAsCF;;GAEG;AACH,wBAAgB,sBAAsB,IAAI,MAAM,EAAE,CAWjD;AAED;;;GAGG;AACH,wBAAgB,iBAAiB,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAMvD;AAkBD,KAAK,aAAa,GAAG,MAAM,GAAG,MAAM,GAAG,SAAS,CAAC;AAEjD,UAAU,WAAW;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,aAAa,GAAG,IAAI,CAAC;CAC9B;AAcD,UAAU,aAAa;IACrB,OAAO,EAAE,OAAO,CAAC;IACjB,MAAM,EAAE,MAAM,EAAE,CAAC;IACjB,UAAU,EAAE,aAAa,GAAG,SAAS,GAAG,OAAO,CAAC;IAChD,cAAc,EAAE,MAAM,CAAC;CACxB;AAmBD,KAAK,UAAU,GAAG,QAAQ,GAAG,UAAU,GAAG,SAAS,CAAC;AA6EpD;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB;;;;;OAKG;IACH,aAAa,CAAC,EAAE,aAAa,GAAG,KAAK,GAAG,MAAM,CAAC;CAChD;AAuZD,wBAAgB,gBAAgB,CAAC,KAAK,EAAE,MAAM,GAAG,WAAW,CAe3D;AAuDD,wBAAgB,kBAAkB,CAAC,QAAQ,EAAE,KAAK,CAAC;IAAE,IAAI,CAAC,EAAE,MAAM,CAAC;IAAC,OAAO,CAAC,EAAE,OAAO,CAAA;CAAE,CAAC,GAAG,UAAU,CAuCpG;AAED,wBAAgB,cAAc,CAAC,YAAY,EAAE,MAAM,EAAE,OAAO,EAAE,aAAa,CAAC,YAAY,CAAC,GAAG,OAAO,CAIlG;AAuoDD;;GAEG;AACH,wBAAsB,UAAU,CAAC,MAAM,GAAE,WAAgB,GAAG,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAu5C/E"}
1
+ {"version":3,"file":"standalone-proxy.d.ts","sourceRoot":"","sources":["../src/standalone-proxy.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAEH,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAKlC,OAAO,KAAK,EAAE,QAAQ,EAAY,MAAM,kBAAkB,CAAC;AAG3D,OAAO,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAU5C,2DAA2D;AAC3D,eAAO,MAAM,mBAAmB,gBAAuB,CAAC;AAExD;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,eAAO,MAAM,iBAAiB,EAAE,MAAM,CAAC,MAAM,EAAE,gBAAgB,CA6C9D,CAAC;AAEF;;GAEG;AACH,eAAO,MAAM,aAAa,EAAE,MAAM,CAAC,MAAM,EAAE;IAAE,QAAQ,EAAE,QAAQ,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,CAc/E,CAAC;AAEF;;;GAGG;AACH,eAAO,MAAM,kBAAkB,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAGrD,CAAC;AAEF;;;GAGG;AACH,eAAO,MAAM,aAAa,EAAE,MAAM,CAAC,MAAM,EAAE;IAAE,QAAQ,EAAE,QAAQ,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,CAQ/E,CAAC;AAiCF;;GAEG;AACH,wBAAgB,sBAAsB,IAAI,MAAM,EAAE,CAWjD;AAED;;;GAGG;AACH,wBAAgB,iBAAiB,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAMvD;AAkBD,KAAK,aAAa,GAAG,MAAM,GAAG,MAAM,GAAG,SAAS,CAAC;AAEjD,UAAU,WAAW;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,aAAa,GAAG,IAAI,CAAC;CAC9B;AAcD,UAAU,aAAa;IACrB,OAAO,EAAE,OAAO,CAAC;IACjB,MAAM,EAAE,MAAM,EAAE,CAAC;IACjB,UAAU,EAAE,aAAa,GAAG,SAAS,GAAG,OAAO,CAAC;IAChD,cAAc,EAAE,MAAM,CAAC;CACxB;AAmBD,KAAK,UAAU,GAAG,QAAQ,GAAG,UAAU,GAAG,SAAS,CAAC;AA6EpD;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB;;;;;OAKG;IACH,aAAa,CAAC,EAAE,aAAa,GAAG,KAAK,GAAG,MAAM,CAAC;CAChD;AAuZD,wBAAgB,gBAAgB,CAAC,KAAK,EAAE,MAAM,GAAG,WAAW,CAe3D;AAuDD,wBAAgB,kBAAkB,CAAC,QAAQ,EAAE,KAAK,CAAC;IAAE,IAAI,CAAC,EAAE,MAAM,CAAC;IAAC,OAAO,CAAC,EAAE,OAAO,CAAA;CAAE,CAAC,GAAG,UAAU,CAuCpG;AAED,wBAAgB,cAAc,CAAC,YAAY,EAAE,MAAM,EAAE,OAAO,EAAE,aAAa,CAAC,YAAY,CAAC,GAAG,OAAO,CAIlG;AAulDD;;GAEG;AACH,wBAAsB,UAAU,CAAC,MAAM,GAAE,WAAgB,GAAG,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CA2xC/E"}
@@ -133,14 +133,14 @@ exports.DEFAULT_ENDPOINTS = {
133
133
  */
134
134
  exports.MODEL_MAPPING = {
135
135
  // Anthropic models (using correct API model IDs)
136
- 'claude-opus-4-5': { provider: 'anthropic', model: 'claude-opus-4-20250514' },
137
- 'claude-sonnet-4': { provider: 'anthropic', model: 'claude-sonnet-4-20250514' },
138
- 'claude-3-5-sonnet': { provider: 'anthropic', model: 'claude-3-5-sonnet-20241022' },
139
- 'claude-3-5-haiku': { provider: 'anthropic', model: 'claude-3-5-haiku-20241022' },
140
- 'claude-haiku-4-5': { provider: 'anthropic', model: 'claude-haiku-4-5-20250514' },
141
- haiku: { provider: 'anthropic', model: 'claude-haiku-4-5-20250514' },
142
- sonnet: { provider: 'anthropic', model: 'claude-sonnet-4-20250514' },
143
- opus: { provider: 'anthropic', model: 'claude-opus-4-20250514' },
136
+ 'claude-opus-4-5': { provider: 'anthropic', model: 'claude-opus-4-6' },
137
+ 'claude-sonnet-4': { provider: 'anthropic', model: 'claude-sonnet-4-6' },
138
+ 'claude-3-5-sonnet': { provider: 'anthropic', model: 'claude-3-5-sonnet-latest' },
139
+ 'claude-3-5-haiku': { provider: 'anthropic', model: 'claude-haiku-4-5' },
140
+ 'claude-haiku-4-5': { provider: 'anthropic', model: 'claude-haiku-4-5' },
141
+ haiku: { provider: 'anthropic', model: 'claude-haiku-4-5' },
142
+ sonnet: { provider: 'anthropic', model: 'claude-sonnet-4-6' },
143
+ opus: { provider: 'anthropic', model: 'claude-opus-4-6' },
144
144
  // OpenAI models
145
145
  'gpt-4o': { provider: 'openai', model: 'gpt-4o' },
146
146
  'gpt-4o-mini': { provider: 'openai', model: 'gpt-4o-mini' },
@@ -171,10 +171,10 @@ exports.SMART_ALIASES = {
171
171
  * Send a telemetry event to the cloud (anonymous or authenticated).
172
172
  * Non-blocking — errors are silently swallowed.
173
173
  */
174
- function sendCloudTelemetry(taskType, model, tokensIn, tokensOut, latencyMs, success, costUsd, requestedModel, cacheCreationTokens, cacheReadTokens) {
174
+ function sendCloudTelemetry(taskType, model, tokensIn, tokensOut, latencyMs, success, costUsd, requestedModel) {
175
175
  try {
176
- const cost = costUsd ?? (0, telemetry_js_1.estimateCost)(model, tokensIn, tokensOut, cacheCreationTokens, cacheReadTokens);
177
- const event = {
176
+ const cost = costUsd ?? (0, telemetry_js_1.estimateCost)(model, tokensIn, tokensOut);
177
+ (0, telemetry_js_1.recordTelemetry)({
178
178
  task_type: taskType,
179
179
  model,
180
180
  tokens_in: tokensIn,
@@ -183,12 +183,7 @@ function sendCloudTelemetry(taskType, model, tokensIn, tokensOut, latencyMs, suc
183
183
  success,
184
184
  cost_usd: cost,
185
185
  requested_model: requestedModel,
186
- };
187
- if (cacheCreationTokens)
188
- event.cache_creation_tokens = cacheCreationTokens;
189
- if (cacheReadTokens)
190
- event.cache_read_tokens = cacheReadTokens;
191
- (0, telemetry_js_1.recordTelemetry)(event);
186
+ });
192
187
  }
193
188
  catch {
194
189
  // Telemetry should never break the proxy
@@ -494,18 +489,18 @@ const DEFAULT_PROXY_CONFIG = {
494
489
  cascade: {
495
490
  enabled: true,
496
491
  models: [
497
- 'claude-3-5-haiku-20241022',
498
- 'claude-sonnet-4-20250514',
499
- 'claude-opus-4-20250514',
492
+ 'claude-haiku-4-5',
493
+ 'claude-sonnet-4-6',
494
+ 'claude-opus-4-6',
500
495
  ],
501
496
  escalateOn: 'uncertainty',
502
497
  maxEscalations: 1,
503
498
  },
504
499
  complexity: {
505
500
  enabled: true,
506
- simple: 'claude-3-5-haiku-20241022',
507
- moderate: 'claude-sonnet-4-20250514',
508
- complex: 'claude-opus-4-20250514',
501
+ simple: 'claude-haiku-4-5',
502
+ moderate: 'claude-sonnet-4-6',
503
+ complex: 'claude-opus-4-6',
509
504
  },
510
505
  },
511
506
  reliability: {
@@ -746,24 +741,29 @@ function getAuthForModel(model, authConfig, envApiKey) {
746
741
  * MAX tokens (sk-ant-oat*) use Authorization: Bearer header
747
742
  * API keys (sk-ant-api*) use x-api-key header
748
743
  */
749
- function buildAnthropicHeadersWithAuth(ctx, apiKey, isMaxToken, isRerouted) {
744
+ function buildAnthropicHeadersWithAuth(ctx, apiKey, isMaxToken) {
750
745
  const headers = {
751
746
  'Content-Type': 'application/json',
752
747
  'anthropic-version': ctx.versionHeader || '2023-06-01',
753
748
  };
754
- // Detect if incoming auth is OAuth
755
- const incomingIsOAuth = ctx.apiKeyHeader?.startsWith('sk-ant-oat') || ctx.authHeader?.includes('sk-ant-oat');
756
- const apiKeyIsRegular = apiKey && apiKey.startsWith('sk-ant-api');
757
- // When rerouted (auto mode changed the model) and incoming is OAuth,
758
- // prefer the regular API key — OAuth doesn't work for all models (e.g. Haiku)
759
- if (isRerouted && incomingIsOAuth && apiKeyIsRegular) {
749
+ // Auth: prefer incoming auth for passthrough, but OAuth doesn't work for all models (e.g. Haiku)
750
+ // When we have a regular API key AND incoming auth is OAuth, prefer the API key for rerouted requests
751
+ // because OAuth may not be supported on the target model. The API key works for ALL models.
752
+ const incomingIsOAuth = !!(ctx.apiKeyHeader?.startsWith('sk-ant-oat') || ctx.authHeader?.includes('sk-ant-oat'));
753
+ if (incomingIsOAuth && apiKey && !apiKey.startsWith('sk-ant-oat')) {
760
754
  headers['x-api-key'] = apiKey;
761
755
  }
762
756
  else if (ctx.authHeader) {
763
757
  headers['Authorization'] = ctx.authHeader;
764
758
  }
765
759
  else if (ctx.apiKeyHeader) {
766
- headers['x-api-key'] = ctx.apiKeyHeader;
760
+ // MAX/OAuth tokens (sk-ant-oat*) must use Authorization: Bearer, not x-api-key
761
+ if (ctx.apiKeyHeader.startsWith('sk-ant-oat')) {
762
+ headers['Authorization'] = `Bearer ${ctx.apiKeyHeader}`;
763
+ }
764
+ else {
765
+ headers['x-api-key'] = ctx.apiKeyHeader;
766
+ }
767
767
  }
768
768
  else if (apiKey) {
769
769
  // MAX tokens (OAuth) use Authorization: Bearer, API keys use x-api-key
@@ -850,8 +850,8 @@ async function forwardToAnthropicStream(request, targetModel, ctx, envApiKey) {
850
850
  * Forward native Anthropic /v1/messages request (passthrough with routing)
851
851
  * Used for Claude Code direct integration
852
852
  */
853
- async function forwardNativeAnthropicRequest(body, ctx, envApiKey, isMaxToken, isRerouted) {
854
- const headers = buildAnthropicHeadersWithAuth(ctx, envApiKey, isMaxToken, isRerouted);
853
+ async function forwardNativeAnthropicRequest(body, ctx, envApiKey, isMaxToken) {
854
+ const headers = buildAnthropicHeadersWithAuth(ctx, envApiKey, isMaxToken);
855
855
  const response = await fetch('https://api.anthropic.com/v1/messages', {
856
856
  method: 'POST',
857
857
  headers,
@@ -1412,11 +1412,9 @@ function convertAnthropicResponse(anthropicData) {
1412
1412
  },
1413
1413
  ],
1414
1414
  usage: {
1415
- prompt_tokens: (anthropicData.usage?.input_tokens ?? 0) + (anthropicData.usage?.cache_creation_input_tokens ?? 0) + (anthropicData.usage?.cache_read_input_tokens ?? 0),
1415
+ prompt_tokens: anthropicData.usage?.input_tokens ?? 0,
1416
1416
  completion_tokens: anthropicData.usage?.output_tokens ?? 0,
1417
- total_tokens: (anthropicData.usage?.input_tokens ?? 0) + (anthropicData.usage?.cache_creation_input_tokens ?? 0) + (anthropicData.usage?.cache_read_input_tokens ?? 0) + (anthropicData.usage?.output_tokens ?? 0),
1418
- cache_creation_input_tokens: anthropicData.usage?.cache_creation_input_tokens ?? 0,
1419
- cache_read_input_tokens: anthropicData.usage?.cache_read_input_tokens ?? 0,
1417
+ total_tokens: (anthropicData.usage?.input_tokens ?? 0) + (anthropicData.usage?.output_tokens ?? 0),
1420
1418
  },
1421
1419
  };
1422
1420
  }
@@ -1439,16 +1437,11 @@ function convertAnthropicStreamEvent(eventType, eventData, messageId, model, too
1439
1437
  const msg = eventData['message'];
1440
1438
  baseChunk.id = msg?.['id'] || messageId;
1441
1439
  choice.delta = { role: 'assistant', content: '' };
1442
- // Pass through input token count from message_start (including cache tokens)
1440
+ // Pass through input token count from message_start
1443
1441
  const msgUsage = msg?.['usage'];
1444
1442
  if (msgUsage) {
1445
- const cacheCreation = msgUsage['cache_creation_input_tokens'] ?? 0;
1446
- const cacheRead = msgUsage['cache_read_input_tokens'] ?? 0;
1447
- const inputTokens = msgUsage['input_tokens'] ?? 0;
1448
1443
  baseChunk['usage'] = {
1449
- prompt_tokens: inputTokens + cacheCreation + cacheRead,
1450
- cache_creation_input_tokens: cacheCreation,
1451
- cache_read_input_tokens: cacheRead,
1444
+ prompt_tokens: msgUsage['input_tokens'] ?? 0,
1452
1445
  };
1453
1446
  }
1454
1447
  return `data: ${JSON.stringify(baseChunk)}\n\n`;
@@ -1819,7 +1812,7 @@ function getCascadeConfig(config) {
1819
1812
  const c = config.routing?.cascade;
1820
1813
  return {
1821
1814
  enabled: c?.enabled ?? true,
1822
- models: c?.models ?? ['claude-3-5-haiku-20241022', 'claude-sonnet-4-20250514', 'claude-opus-4-20250514'],
1815
+ models: c?.models ?? ['claude-haiku-4-5', 'claude-sonnet-4-6', 'claude-opus-4-6'],
1823
1816
  escalateOn: c?.escalateOn ?? 'uncertainty',
1824
1817
  maxEscalations: c?.maxEscalations ?? 1,
1825
1818
  };
@@ -1836,18 +1829,18 @@ function getCooldownConfig(config) {
1836
1829
  function getCostModel(config) {
1837
1830
  return (config.routing?.complexity?.simple ||
1838
1831
  config.routing?.cascade?.models?.[0] ||
1839
- 'claude-3-5-haiku-20241022');
1832
+ 'claude-haiku-4-5');
1840
1833
  }
1841
1834
  function getFastModel(config) {
1842
1835
  return (config.routing?.complexity?.simple ||
1843
1836
  config.routing?.cascade?.models?.[0] ||
1844
- 'claude-3-5-haiku-20241022');
1837
+ 'claude-haiku-4-5');
1845
1838
  }
1846
1839
  function getQualityModel(config) {
1847
1840
  return (config.routing?.complexity?.complex ||
1848
1841
  config.routing?.cascade?.models?.[config.routing?.cascade?.models?.length ? config.routing.cascade.models.length - 1 : 0] ||
1849
1842
  process.env['RELAYPLANE_QUALITY_MODEL'] ||
1850
- 'claude-sonnet-4-20250514');
1843
+ 'claude-sonnet-4-6');
1851
1844
  }
1852
1845
  async function cascadeRequest(config, makeRequest, log) {
1853
1846
  let escalations = 0;
@@ -1909,18 +1902,9 @@ td{padding:8px 12px;border-bottom:1px solid #111318}
1909
1902
  <div class="cards">
1910
1903
  <div class="card"><div class="label">Total Requests</div><div class="value" id="totalReq">—</div></div>
1911
1904
  <div class="card"><div class="label">Total Cost</div><div class="value" id="totalCost">—</div></div>
1912
- <div class="card"><div class="label">Savings (vs Opus)</div><div class="value green" id="savings">—</div></div>
1905
+ <div class="card"><div class="label">Savings</div><div class="value green" id="savings">—</div></div>
1913
1906
  <div class="card"><div class="label">Avg Latency</div><div class="value" id="avgLat">—</div></div>
1914
1907
  </div>
1915
- <div class="section"><h2>Auth & Routing</h2>
1916
- <div id="routingDetails" style="display:flex;gap:16px;flex-wrap:wrap;margin-bottom:12px">
1917
- <div class="prov-item"><span class="dot" id="authDot"></span> <strong>Auth:</strong>&nbsp;<span id="authInfo">—</span></div>
1918
- <div class="prov-item"><strong>Routing:</strong>&nbsp;<span id="routingMode">—</span></div>
1919
- <div class="prov-item"><strong>Simple→</strong>&nbsp;<span id="routeSimple">—</span></div>
1920
- <div class="prov-item"><strong>Moderate→</strong>&nbsp;<span id="routeModerate">—</span></div>
1921
- <div class="prov-item"><strong>Complex→</strong>&nbsp;<span id="routeComplex">—</span></div>
1922
- </div>
1923
- </div>
1924
1908
  <div class="section"><h2>Model Breakdown</h2>
1925
1909
  <table><thead><tr><th>Model</th><th>Requests</th><th>Cost</th><th>% of Total</th></tr></thead><tbody id="models"></tbody></table></div>
1926
1910
  <div class="section"><h2>Provider Status</h2><div class="prov" id="providers"></div></div>
@@ -1941,39 +1925,6 @@ async function load(){
1941
1925
  fetch('/v1/telemetry/health').then(r=>r.json())
1942
1926
  ]);
1943
1927
  $('ver').textContent='v'+health.version;
1944
- const authDot=$('authDot'),authInfo=$('authInfo');
1945
- if(health.auth){
1946
- if(health.auth.anthropicApiKey){authDot.className='dot up';authInfo.textContent='API key ('+health.auth.anthropicApiKeyPrefix+')';}
1947
- else{authDot.className='dot warn';authInfo.textContent='OAuth only (no API key)';}
1948
- }
1949
- if(health.routing){
1950
- const mode=health.routing.mode||'passthrough';
1951
- $('routingMode').textContent=mode;
1952
- const routingSection=document.getElementById('routingDetails');
1953
- const hasApiKey=health.auth&&health.auth.anthropicApiKey;
1954
- if(mode==='passthrough'){
1955
- if(routingSection)routingSection.innerHTML='<div class="prov-item">Routing: passthrough → model from incoming requests</div>';
1956
- }else{
1957
- if(health.routing.complexity){
1958
- const cx=health.routing.complexity;
1959
- const authLabel=function(model){
1960
- if(hasApiKey)return '<span style="color:#34d399">● API key</span>';
1961
- const isHaiku=model&&model.toLowerCase().includes('haiku');
1962
- if(isHaiku)return '<span style="color:#ef4444">⚠️ OAuth - may fail</span>';
1963
- return '<span style="color:#fbbf24">● OAuth</span>';
1964
- };
1965
- $('routeSimple').innerHTML=(cx.simple||'—')+' <small>'+authLabel(cx.simple)+'</small>';
1966
- $('routeModerate').innerHTML=(cx.moderate||'—')+' <small>'+authLabel(cx.moderate)+'</small>';
1967
- $('routeComplex').innerHTML=(cx.complex||'—')+' <small>'+authLabel(cx.complex)+'</small>';
1968
- if(!hasApiKey&&cx.simple&&cx.simple.toLowerCase().includes('haiku')){
1969
- const warn=document.createElement('div');
1970
- warn.className='prov-item';warn.style.borderColor='#ef4444';warn.style.color='#ef4444';
1971
- warn.innerHTML='⚠️ Haiku requires ANTHROPIC_API_KEY — OAuth not supported';
1972
- if(routingSection)routingSection.appendChild(warn);
1973
- }
1974
- }
1975
- }
1976
- }
1977
1928
  $('uptime').textContent=dur(health.uptime);
1978
1929
  const total=stats.summary?.totalEvents||0;
1979
1930
  $('totalReq').textContent=total;
@@ -2102,60 +2053,6 @@ async function startProxy(config = {}) {
2102
2053
  process.on('SIGTERM', handleShutdown);
2103
2054
  const configPath = getProxyConfigPath();
2104
2055
  let proxyConfig = await loadProxyConfig(configPath, log);
2105
- // Auto-config on startup: detect available auth and set optimal routing
2106
- const configExists = await fs.promises.access(configPath).then(() => true).catch(() => false);
2107
- if (!configExists || proxyConfig.routing?.mode === 'auto') {
2108
- const envAnthropicKey = process.env['ANTHROPIC_API_KEY'];
2109
- const hasRegularApiKey = !!envAnthropicKey && envAnthropicKey.startsWith('sk-ant-api');
2110
- if (hasRegularApiKey) {
2111
- // Full 3-tier routing with API key
2112
- log('Auto-config: ANTHROPIC_API_KEY detected — enabling 3-tier routing (haiku/sonnet/opus)');
2113
- if (!configExists) {
2114
- const autoConfig = {
2115
- enabled: true,
2116
- modelOverrides: {},
2117
- routing: {
2118
- mode: 'auto',
2119
- cascade: { enabled: false, models: [], escalateOn: 'uncertainty', maxEscalations: 1 },
2120
- complexity: {
2121
- enabled: true,
2122
- simple: 'claude-haiku-4-5',
2123
- moderate: 'claude-sonnet-4-6',
2124
- complex: 'claude-opus-4-6',
2125
- },
2126
- },
2127
- reliability: proxyConfig.reliability,
2128
- };
2129
- await saveProxyConfig(configPath, autoConfig);
2130
- proxyConfig = await loadProxyConfig(configPath, log);
2131
- log('Auto-config: wrote config to ' + configPath);
2132
- }
2133
- }
2134
- else {
2135
- // No regular API key — OAuth only, skip Haiku
2136
- console.warn('[relayplane] ⚠️ No ANTHROPIC_API_KEY set — Haiku routing disabled (OAuth not supported). Set ANTHROPIC_API_KEY to enable 3-tier routing.');
2137
- if (!configExists) {
2138
- const autoConfig = {
2139
- enabled: true,
2140
- modelOverrides: {},
2141
- routing: {
2142
- mode: 'auto',
2143
- cascade: { enabled: false, models: [], escalateOn: 'uncertainty', maxEscalations: 1 },
2144
- complexity: {
2145
- enabled: true,
2146
- simple: 'claude-sonnet-4-6',
2147
- moderate: 'claude-sonnet-4-6',
2148
- complex: 'claude-opus-4-6',
2149
- },
2150
- },
2151
- reliability: proxyConfig.reliability,
2152
- };
2153
- await saveProxyConfig(configPath, autoConfig);
2154
- proxyConfig = await loadProxyConfig(configPath, log);
2155
- log('Auto-config: wrote OAuth-safe config to ' + configPath + ' (no Haiku)');
2156
- }
2157
- }
2158
- }
2159
2056
  const cooldownManager = new CooldownManager(getCooldownConfig(proxyConfig));
2160
2057
  let configWatcher = null;
2161
2058
  let configReloadTimer = null;
@@ -2171,8 +2068,6 @@ async function startProxy(config = {}) {
2171
2068
  reloadConfig().catch(() => { });
2172
2069
  }, 50);
2173
2070
  };
2174
- let credentialsWatcher = null;
2175
- const credentialsPath = path.join(path.dirname(configPath), 'credentials.json');
2176
2071
  const startConfigWatcher = () => {
2177
2072
  if (configWatcher)
2178
2073
  return;
@@ -2184,45 +2079,7 @@ async function startProxy(config = {}) {
2184
2079
  log(`Config watch error: ${error.message}`);
2185
2080
  }
2186
2081
  };
2187
- const startCredentialsWatcher = () => {
2188
- if (credentialsWatcher)
2189
- return;
2190
- try {
2191
- // Watch credentials.json so login triggers a reload without proxy restart
2192
- if (fs.existsSync(credentialsPath)) {
2193
- credentialsWatcher = fs.watch(credentialsPath, () => {
2194
- log('Credentials changed — reloading config');
2195
- scheduleConfigReload();
2196
- });
2197
- }
2198
- else {
2199
- // Watch the directory for credentials.json creation
2200
- const dir = path.dirname(credentialsPath);
2201
- const dirWatcher = fs.watch(dir, (_, filename) => {
2202
- if (filename === 'credentials.json') {
2203
- log('Credentials file created — reloading config');
2204
- scheduleConfigReload();
2205
- // Now watch the file directly
2206
- dirWatcher.close();
2207
- try {
2208
- credentialsWatcher = fs.watch(credentialsPath, () => {
2209
- log('Credentials changed — reloading config');
2210
- scheduleConfigReload();
2211
- });
2212
- }
2213
- catch { }
2214
- }
2215
- });
2216
- credentialsWatcher = dirWatcher;
2217
- }
2218
- }
2219
- catch (err) {
2220
- const error = err;
2221
- log(`Credentials watch error: ${error.message}`);
2222
- }
2223
- };
2224
2082
  startConfigWatcher();
2225
- startCredentialsWatcher();
2226
2083
  // Initialize RelayPlane
2227
2084
  const relay = new core_1.RelayPlane({ dbPath: config.dbPath });
2228
2085
  // Startup migration: clear default routing rules so complexity config takes priority
@@ -2253,10 +2110,6 @@ async function startProxy(config = {}) {
2253
2110
  if (req.method === 'GET' && (pathname === '/health' || pathname === '/healthz')) {
2254
2111
  const uptimeMs = Date.now() - globalStats.startedAt;
2255
2112
  res.writeHead(200, { 'Content-Type': 'application/json' });
2256
- const anthropicEnvKeySet = !!process.env['ANTHROPIC_API_KEY'];
2257
- const anthropicEnvKeyPrefix = anthropicEnvKeySet ? process.env['ANTHROPIC_API_KEY'].slice(0, 12) + '...' : null;
2258
- const routingMode = proxyConfig.routing?.mode || 'passthrough';
2259
- const complexityConfig = proxyConfig.routing?.complexity;
2260
2113
  res.end(JSON.stringify({
2261
2114
  status: 'ok',
2262
2115
  version: PROXY_VERSION,
@@ -2266,15 +2119,6 @@ async function startProxy(config = {}) {
2266
2119
  successRate: globalStats.totalRequests > 0
2267
2120
  ? parseFloat(((globalStats.successfulRequests / globalStats.totalRequests) * 100).toFixed(1))
2268
2121
  : null,
2269
- auth: {
2270
- anthropicApiKey: anthropicEnvKeySet,
2271
- anthropicApiKeyPrefix: anthropicEnvKeyPrefix,
2272
- note: anthropicEnvKeySet ? 'API key available for models that don\'t support OAuth' : 'No API key — OAuth passthrough only',
2273
- },
2274
- routing: {
2275
- mode: routingMode,
2276
- complexity: complexityConfig,
2277
- },
2278
2122
  stats: {
2279
2123
  totalRequests: globalStats.totalRequests,
2280
2124
  successfulRequests: globalStats.successfulRequests,
@@ -2400,7 +2244,7 @@ async function startProxy(config = {}) {
2400
2244
  const offset = parseInt(params.get('offset') || '0', 10);
2401
2245
  const sorted = [...requestHistory].reverse();
2402
2246
  const runs = sorted.slice(offset, offset + limit).map(r => {
2403
- const origCost = (0, telemetry_js_1.estimateCost)('claude-opus-4-20250514', r.tokensIn, r.tokensOut);
2247
+ const origCost = (0, telemetry_js_1.estimateCost)('claude-opus-4-6', r.tokensIn, r.tokensOut);
2404
2248
  const perRunSavings = Math.max(0, origCost - r.costUsd);
2405
2249
  return {
2406
2250
  id: r.id,
@@ -2431,7 +2275,7 @@ async function startProxy(config = {}) {
2431
2275
  if (req.method === 'GET' && telemetryPath === 'savings') {
2432
2276
  // Savings = cost if everything ran on Opus - actual cost
2433
2277
  // Always compare against Opus as the baseline
2434
- const OPUS_BASELINE = 'claude-opus-4-20250514';
2278
+ const OPUS_BASELINE = 'claude-opus-4-6';
2435
2279
  let totalOriginalCost = 0;
2436
2280
  let totalActualCost = 0;
2437
2281
  let totalSavedAmount = 0;
@@ -2556,7 +2400,8 @@ async function startProxy(config = {}) {
2556
2400
  useAnthropicEnvKey = undefined; // Only use incoming auth
2557
2401
  }
2558
2402
  else {
2559
- // 'auto': Always keep env key available — needed for OAuth→API key fallback on rerouted requests
2403
+ // 'auto': Use incoming auth if present, fallback to env
2404
+ // ALWAYS keep env key available — OAuth (sk-ant-oat) doesn't work for all models (e.g. Haiku)
2560
2405
  useAnthropicEnvKey = anthropicEnvKey;
2561
2406
  }
2562
2407
  // === Native Anthropic /v1/messages endpoint (for Claude Code) ===
@@ -2795,8 +2640,7 @@ async function startProxy(config = {}) {
2795
2640
  if (modelAuth.isMax) {
2796
2641
  log(`Using MAX token for ${resolved.model}`);
2797
2642
  }
2798
- const isCascadeRerouted = resolved.model !== originalModel;
2799
- const providerResponse = await forwardNativeAnthropicRequest(attemptBody, ctx, modelAuth.apiKey, modelAuth.isMax, isCascadeRerouted);
2643
+ const providerResponse = await forwardNativeAnthropicRequest(attemptBody, ctx, modelAuth.apiKey, modelAuth.isMax);
2800
2644
  const responseData = (await providerResponse.json());
2801
2645
  if (!providerResponse.ok) {
2802
2646
  if (proxyConfig.reliability?.cooldowns?.enabled) {
@@ -2823,12 +2667,7 @@ async function startProxy(config = {}) {
2823
2667
  if (modelAuth.isMax) {
2824
2668
  log(`Using MAX token for ${finalModel}`);
2825
2669
  }
2826
- // isRerouted: true when auto-routing changed the model from what the user requested
2827
- const isRerouted = routingMode !== 'passthrough' && finalModel !== originalModel;
2828
- if (isRerouted) {
2829
- log(`Rerouted: ${originalModel} → ${finalModel} (auth fallback enabled)`);
2830
- }
2831
- const providerResponse = await forwardNativeAnthropicRequest({ ...requestBody, model: finalModel }, ctx, modelAuth.apiKey, modelAuth.isMax, isRerouted);
2670
+ const providerResponse = await forwardNativeAnthropicRequest({ ...requestBody, model: finalModel }, ctx, modelAuth.apiKey, modelAuth.isMax);
2832
2671
  if (!providerResponse.ok) {
2833
2672
  const errorPayload = (await providerResponse.json());
2834
2673
  if (proxyConfig.reliability?.cooldowns?.enabled) {
@@ -2854,8 +2693,6 @@ async function startProxy(config = {}) {
2854
2693
  const reader = providerResponse.body?.getReader();
2855
2694
  let streamTokensIn = 0;
2856
2695
  let streamTokensOut = 0;
2857
- let streamCacheCreation = 0;
2858
- let streamCacheRead = 0;
2859
2696
  if (reader) {
2860
2697
  const decoder = new TextDecoder();
2861
2698
  let sseBuffer = '';
@@ -2878,12 +2715,9 @@ async function startProxy(config = {}) {
2878
2715
  if (evt.type === 'message_delta' && evt.usage) {
2879
2716
  streamTokensOut = evt.usage.output_tokens ?? streamTokensOut;
2880
2717
  }
2881
- // Anthropic: message_start has usage.input_tokens + cache token fields
2718
+ // Anthropic: message_start has usage.input_tokens
2882
2719
  if (evt.type === 'message_start' && evt.message?.usage) {
2883
- const u = evt.message.usage;
2884
- streamCacheCreation = u.cache_creation_input_tokens ?? 0;
2885
- streamCacheRead = u.cache_read_input_tokens ?? 0;
2886
- streamTokensIn = (u.input_tokens ?? 0) + streamCacheCreation + streamCacheRead;
2720
+ streamTokensIn = evt.message.usage.input_tokens ?? streamTokensIn;
2887
2721
  }
2888
2722
  // OpenAI format: choices with usage
2889
2723
  if (evt.usage) {
@@ -2903,7 +2737,7 @@ async function startProxy(config = {}) {
2903
2737
  }
2904
2738
  }
2905
2739
  // Store streaming token counts so telemetry can use them
2906
- nativeResponseData = { usage: { input_tokens: streamTokensIn, output_tokens: streamTokensOut, cache_creation_input_tokens: streamCacheCreation, cache_read_input_tokens: streamCacheRead } };
2740
+ nativeResponseData = { usage: { input_tokens: streamTokensIn, output_tokens: streamTokensOut } };
2907
2741
  res.end();
2908
2742
  }
2909
2743
  else {
@@ -2920,12 +2754,9 @@ async function startProxy(config = {}) {
2920
2754
  // nativeResponseData holds response JSON for non-streaming, or { usage: { input_tokens, output_tokens } }
2921
2755
  // synthesised from SSE events for streaming
2922
2756
  const nativeUsageData = nativeResponseData?.usage;
2923
- const nativeCacheCreation = nativeUsageData?.cache_creation_input_tokens ?? 0;
2924
- const nativeCacheRead = nativeUsageData?.cache_read_input_tokens ?? 0;
2925
- const nativeRawIn = nativeUsageData?.input_tokens ?? nativeUsageData?.prompt_tokens ?? 0;
2926
- const nativeTokIn = nativeRawIn + nativeCacheCreation + nativeCacheRead;
2757
+ const nativeTokIn = nativeUsageData?.input_tokens ?? nativeUsageData?.prompt_tokens ?? 0;
2927
2758
  const nativeTokOut = nativeUsageData?.output_tokens ?? nativeUsageData?.completion_tokens ?? 0;
2928
- updateLastHistoryEntry(nativeTokIn, nativeTokOut, (0, telemetry_js_1.estimateCost)(targetModel || requestedModel, nativeRawIn, nativeTokOut, nativeCacheCreation, nativeCacheRead));
2759
+ updateLastHistoryEntry(nativeTokIn, nativeTokOut, (0, telemetry_js_1.estimateCost)(targetModel || requestedModel, nativeTokIn, nativeTokOut));
2929
2760
  if (recordTelemetry) {
2930
2761
  relay
2931
2762
  .run({
@@ -2934,7 +2765,7 @@ async function startProxy(config = {}) {
2934
2765
  model: `${targetProvider}:${targetModel || requestedModel}`,
2935
2766
  })
2936
2767
  .catch(() => { });
2937
- sendCloudTelemetry(taskType, targetModel || requestedModel, nativeTokIn, nativeTokOut, durationMs, true, undefined, originalModel ?? undefined, nativeCacheCreation, nativeCacheRead);
2768
+ sendCloudTelemetry(taskType, targetModel || requestedModel, nativeTokIn, nativeTokOut, durationMs, true, undefined, originalModel ?? undefined);
2938
2769
  }
2939
2770
  }
2940
2771
  catch (err) {
@@ -3272,12 +3103,9 @@ async function startProxy(config = {}) {
3272
3103
  // Log cascade request for stats tracking
3273
3104
  logRequest(originalRequestedModel ?? 'unknown', cascadeResult.model, cascadeResult.provider, durationMs, true, 'cascade', cascadeResult.escalations > 0, taskType, complexity);
3274
3105
  const cascadeUsage = responseData?.usage;
3275
- const cascadeCacheCreation = cascadeUsage?.cache_creation_input_tokens ?? 0;
3276
- const cascadeCacheRead = cascadeUsage?.cache_read_input_tokens ?? 0;
3277
- const cascadeRawIn = cascadeUsage?.input_tokens ?? cascadeUsage?.prompt_tokens ?? 0;
3278
- const cascadeTokensIn = cascadeRawIn + cascadeCacheCreation + cascadeCacheRead;
3106
+ const cascadeTokensIn = cascadeUsage?.input_tokens ?? cascadeUsage?.prompt_tokens ?? 0;
3279
3107
  const cascadeTokensOut = cascadeUsage?.output_tokens ?? cascadeUsage?.completion_tokens ?? 0;
3280
- const cascadeCost = (0, telemetry_js_1.estimateCost)(cascadeResult.model, cascadeRawIn, cascadeTokensOut, cascadeCacheCreation, cascadeCacheRead);
3108
+ const cascadeCost = (0, telemetry_js_1.estimateCost)(cascadeResult.model, cascadeTokensIn, cascadeTokensOut);
3281
3109
  updateLastHistoryEntry(cascadeTokensIn, cascadeTokensOut, cascadeCost, chatCascadeRespModel);
3282
3110
  if (recordTelemetry) {
3283
3111
  try {
@@ -3300,7 +3128,7 @@ async function startProxy(config = {}) {
3300
3128
  catch (err) {
3301
3129
  log(`Failed to record run: ${err}`);
3302
3130
  }
3303
- sendCloudTelemetry(taskType, cascadeResult.model, cascadeTokensIn, cascadeTokensOut, durationMs, true, undefined, originalRequestedModel ?? undefined, cascadeCacheCreation, cascadeCacheRead);
3131
+ sendCloudTelemetry(taskType, cascadeResult.model, cascadeTokensIn, cascadeTokensOut, durationMs, true, undefined, originalRequestedModel ?? undefined);
3304
3132
  }
3305
3133
  const chatCascadeRpHeaders = buildRelayPlaneResponseHeaders(cascadeResult.model, originalRequestedModel ?? 'unknown', complexity, cascadeResult.provider, 'cascade');
3306
3134
  res.writeHead(200, { 'Content-Type': 'application/json', ...chatCascadeRpHeaders });
@@ -3449,8 +3277,6 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
3449
3277
  // Track token usage from streaming events
3450
3278
  let streamTokensIn = 0;
3451
3279
  let streamTokensOut = 0;
3452
- let streamCacheCreation = 0;
3453
- let streamCacheRead = 0;
3454
3280
  try {
3455
3281
  // Stream the response based on provider format
3456
3282
  switch (targetProvider) {
@@ -3467,8 +3293,6 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
3467
3293
  if (evt.usage) {
3468
3294
  streamTokensIn = evt.usage.prompt_tokens ?? streamTokensIn;
3469
3295
  streamTokensOut = evt.usage.completion_tokens ?? streamTokensOut;
3470
- streamCacheCreation = evt.usage.cache_creation_input_tokens ?? streamCacheCreation;
3471
- streamCacheRead = evt.usage.cache_read_input_tokens ?? streamCacheRead;
3472
3296
  }
3473
3297
  }
3474
3298
  }
@@ -3525,9 +3349,7 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
3525
3349
  // Always log the request for stats/telemetry tracking
3526
3350
  logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, true, routingMode, undefined, taskType, complexity);
3527
3351
  // Update token/cost info on the history entry
3528
- // For cost calculation with cache breakdown, pass raw input (total minus cache) separately
3529
- const streamRawIn = streamCacheCreation || streamCacheRead ? streamTokensIn - streamCacheCreation - streamCacheRead : streamTokensIn;
3530
- const streamCost = (0, telemetry_js_1.estimateCost)(targetModel, streamRawIn, streamTokensOut, streamCacheCreation || undefined, streamCacheRead || undefined);
3352
+ const streamCost = (0, telemetry_js_1.estimateCost)(targetModel, streamTokensIn, streamTokensOut);
3531
3353
  updateLastHistoryEntry(streamTokensIn, streamTokensOut, streamCost);
3532
3354
  if (recordTelemetry) {
3533
3355
  // Record the run (non-blocking)
@@ -3543,7 +3365,7 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
3543
3365
  .catch((err) => {
3544
3366
  log(`Failed to record run: ${err}`);
3545
3367
  });
3546
- sendCloudTelemetry(taskType, targetModel, streamTokensIn, streamTokensOut, durationMs, true, undefined, request.model ?? undefined, streamCacheCreation || undefined, streamCacheRead || undefined);
3368
+ sendCloudTelemetry(taskType, targetModel, streamTokensIn, streamTokensOut, durationMs, true, undefined, request.model ?? undefined);
3547
3369
  }
3548
3370
  res.end();
3549
3371
  }
@@ -3587,12 +3409,9 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
3587
3409
  logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, true, routingMode, undefined, taskType, complexity);
3588
3410
  // Update token/cost info
3589
3411
  const usage = responseData?.usage;
3590
- const cacheCreation = usage?.cache_creation_input_tokens ?? 0;
3591
- const cacheRead = usage?.cache_read_input_tokens ?? 0;
3592
- const rawIn = usage?.input_tokens ?? usage?.prompt_tokens ?? 0;
3593
- const tokensIn = rawIn + cacheCreation + cacheRead;
3412
+ const tokensIn = usage?.input_tokens ?? usage?.prompt_tokens ?? 0;
3594
3413
  const tokensOut = usage?.output_tokens ?? usage?.completion_tokens ?? 0;
3595
- const cost = (0, telemetry_js_1.estimateCost)(targetModel, rawIn, tokensOut, cacheCreation || undefined, cacheRead || undefined);
3414
+ const cost = (0, telemetry_js_1.estimateCost)(targetModel, tokensIn, tokensOut);
3596
3415
  updateLastHistoryEntry(tokensIn, tokensOut, cost, nonStreamRespModel);
3597
3416
  if (recordTelemetry) {
3598
3417
  // Record the run in RelayPlane
@@ -3617,13 +3436,10 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
3617
3436
  log(`Failed to record run: ${err}`);
3618
3437
  }
3619
3438
  // Extract token counts from response if available (Anthropic/OpenAI format)
3620
- const usage2 = responseData?.usage;
3621
- const cc2 = usage2?.cache_creation_input_tokens ?? 0;
3622
- const cr2 = usage2?.cache_read_input_tokens ?? 0;
3623
- const rawIn2 = usage2?.input_tokens ?? usage2?.prompt_tokens ?? 0;
3624
- const tokensIn2 = rawIn2 + cc2 + cr2;
3625
- const tokensOut2 = usage2?.output_tokens ?? usage2?.completion_tokens ?? 0;
3626
- sendCloudTelemetry(taskType, targetModel, tokensIn2, tokensOut2, durationMs, true, undefined, undefined, cc2 || undefined, cr2 || undefined);
3439
+ const usage = responseData?.usage;
3440
+ const tokensIn = usage?.input_tokens ?? usage?.prompt_tokens ?? 0;
3441
+ const tokensOut = usage?.output_tokens ?? usage?.completion_tokens ?? 0;
3442
+ sendCloudTelemetry(taskType, targetModel, tokensIn, tokensOut, durationMs, true);
3627
3443
  }
3628
3444
  // Send response with RelayPlane routing headers
3629
3445
  const nonStreamRpHeaders = buildRelayPlaneResponseHeaders(targetModel, request.model ?? 'unknown', complexity, targetProvider, routingMode);