@relayplane/proxy 1.5.44 → 1.5.46
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -0
- package/dist/standalone-proxy.d.ts.map +1 -1
- package/dist/standalone-proxy.js +65 -249
- package/dist/standalone-proxy.js.map +1 -1
- package/dist/swarm-client.d.ts +8 -8
- package/dist/swarm-client.d.ts.map +1 -1
- package/dist/swarm-client.js +24 -24
- package/dist/swarm-client.js.map +1 -1
- package/dist/telemetry.js +3 -3
- package/dist/telemetry.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -5,6 +5,13 @@
|
|
|
5
5
|
|
|
6
6
|
An open-source LLM proxy that sits between your AI agents and providers. Tracks every request, shows where the money goes, and offers configurable task-aware routing — all running locally.
|
|
7
7
|
|
|
8
|
+
**Key features:**
|
|
9
|
+
- 📊 Per-request cost tracking across 11+ providers
|
|
10
|
+
- 💰 **Cache-aware cost tracking** — accurately tracks Anthropic prompt caching with cache read savings, creation costs, and true per-request costs including cached tokens
|
|
11
|
+
- 🔀 Configurable task-aware routing (complexity-based, cascade, model overrides)
|
|
12
|
+
- 🛡️ Circuit breaker architecture — if the proxy fails, your agent doesn't notice
|
|
13
|
+
- 📈 Local dashboard with cost breakdown, savings analysis, and provider health
|
|
14
|
+
|
|
8
15
|
## Quick Start
|
|
9
16
|
|
|
10
17
|
```bash
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"standalone-proxy.d.ts","sourceRoot":"","sources":["../src/standalone-proxy.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAEH,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAKlC,OAAO,KAAK,EAAE,QAAQ,EAAY,MAAM,kBAAkB,CAAC;AAG3D,OAAO,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAU5C,2DAA2D;AAC3D,eAAO,MAAM,mBAAmB,gBAAuB,CAAC;AAExD;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,eAAO,MAAM,iBAAiB,EAAE,MAAM,CAAC,MAAM,EAAE,gBAAgB,CA6C9D,CAAC;AAEF;;GAEG;AACH,eAAO,MAAM,aAAa,EAAE,MAAM,CAAC,MAAM,EAAE;IAAE,QAAQ,EAAE,QAAQ,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,CAc/E,CAAC;AAEF;;;GAGG;AACH,eAAO,MAAM,kBAAkB,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAGrD,CAAC;AAEF;;;GAGG;AACH,eAAO,MAAM,aAAa,EAAE,MAAM,CAAC,MAAM,EAAE;IAAE,QAAQ,EAAE,QAAQ,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,CAQ/E,CAAC;
|
|
1
|
+
{"version":3,"file":"standalone-proxy.d.ts","sourceRoot":"","sources":["../src/standalone-proxy.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAEH,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAKlC,OAAO,KAAK,EAAE,QAAQ,EAAY,MAAM,kBAAkB,CAAC;AAG3D,OAAO,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAU5C,2DAA2D;AAC3D,eAAO,MAAM,mBAAmB,gBAAuB,CAAC;AAExD;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,eAAO,MAAM,iBAAiB,EAAE,MAAM,CAAC,MAAM,EAAE,gBAAgB,CA6C9D,CAAC;AAEF;;GAEG;AACH,eAAO,MAAM,aAAa,EAAE,MAAM,CAAC,MAAM,EAAE;IAAE,QAAQ,EAAE,QAAQ,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,CAc/E,CAAC;AAEF;;;GAGG;AACH,eAAO,MAAM,kBAAkB,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAGrD,CAAC;AAEF;;;GAGG;AACH,eAAO,MAAM,aAAa,EAAE,MAAM,CAAC,MAAM,EAAE;IAAE,QAAQ,EAAE,QAAQ,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,CAQ/E,CAAC;AAiCF;;GAEG;AACH,wBAAgB,sBAAsB,IAAI,MAAM,EAAE,CAWjD;AAED;;;GAGG;AACH,wBAAgB,iBAAiB,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAMvD;AAkBD,KAAK,aAAa,GAAG,MAAM,GAAG,MAAM,GAAG,SAAS,CAAC;AAEjD,UAAU,WAAW;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,aAAa,GAAG,IAAI,CAAC;CAC9B;AAcD,UAAU,aAAa;IACrB,OAAO,EAAE,OAAO,CAAC;IACjB,MAAM,EAAE,MAAM,EAAE,CAAC;IACjB,UAAU,EAAE,aAAa,GAAG,SAAS,GAAG,OAAO,CAAC;IAChD,cAAc,EAAE,MAAM,CAAC;CACxB;AAmBD,KAAK,UAAU,GAAG,QAAQ,GAAG,UAAU,GAAG,SAAS,CAAC;AA6EpD;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB;;;;;OAKG;IACH,aAAa,CAAC,EAAE,aAAa,GAAG,KAAK,GAAG,MAAM,CAAC;CAChD;AAuZD,wBAAgB,gBAAgB,CAAC,KAAK,EAAE,MAAM,GAAG,WAAW,CAe3D;AAuDD,wBAAgB,kBAAkB,CAAC,QAAQ,EAAE,KAAK,CAAC;IAAE,IAAI,CAAC,EAAE,MAAM,CAAC;IAAC,OAAO,CAAC,EAAE,OAAO,CAAA;CAAE,CAAC,GAAG,UAAU,CAuCpG;AAED,wBAAgB,cAAc,CAAC,YAAY,EAAE,MAAM,EAAE,OAAO,EAAE,aAAa,CAAC,YAAY,CAAC,GAAG,OAAO,CAIlG;AAulDD;;GAEG;AACH,wBAAsB,UAAU,CAAC,MAAM,GAAE,WAAgB,GAAG,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CA2xC/E"}
|
package/dist/standalone-proxy.js
CHANGED
|
@@ -133,14 +133,14 @@ exports.DEFAULT_ENDPOINTS = {
|
|
|
133
133
|
*/
|
|
134
134
|
exports.MODEL_MAPPING = {
|
|
135
135
|
// Anthropic models (using correct API model IDs)
|
|
136
|
-
'claude-opus-4-5': { provider: 'anthropic', model: 'claude-opus-4-
|
|
137
|
-
'claude-sonnet-4': { provider: 'anthropic', model: 'claude-sonnet-4-
|
|
138
|
-
'claude-3-5-sonnet': { provider: 'anthropic', model: 'claude-3-5-sonnet-
|
|
139
|
-
'claude-3-5-haiku': { provider: 'anthropic', model: 'claude-
|
|
140
|
-
'claude-haiku-4-5': { provider: 'anthropic', model: 'claude-haiku-4-5
|
|
141
|
-
haiku: { provider: 'anthropic', model: 'claude-haiku-4-5
|
|
142
|
-
sonnet: { provider: 'anthropic', model: 'claude-sonnet-4-
|
|
143
|
-
opus: { provider: 'anthropic', model: 'claude-opus-4-
|
|
136
|
+
'claude-opus-4-5': { provider: 'anthropic', model: 'claude-opus-4-6' },
|
|
137
|
+
'claude-sonnet-4': { provider: 'anthropic', model: 'claude-sonnet-4-6' },
|
|
138
|
+
'claude-3-5-sonnet': { provider: 'anthropic', model: 'claude-3-5-sonnet-latest' },
|
|
139
|
+
'claude-3-5-haiku': { provider: 'anthropic', model: 'claude-haiku-4-5' },
|
|
140
|
+
'claude-haiku-4-5': { provider: 'anthropic', model: 'claude-haiku-4-5' },
|
|
141
|
+
haiku: { provider: 'anthropic', model: 'claude-haiku-4-5' },
|
|
142
|
+
sonnet: { provider: 'anthropic', model: 'claude-sonnet-4-6' },
|
|
143
|
+
opus: { provider: 'anthropic', model: 'claude-opus-4-6' },
|
|
144
144
|
// OpenAI models
|
|
145
145
|
'gpt-4o': { provider: 'openai', model: 'gpt-4o' },
|
|
146
146
|
'gpt-4o-mini': { provider: 'openai', model: 'gpt-4o-mini' },
|
|
@@ -171,10 +171,10 @@ exports.SMART_ALIASES = {
|
|
|
171
171
|
* Send a telemetry event to the cloud (anonymous or authenticated).
|
|
172
172
|
* Non-blocking — errors are silently swallowed.
|
|
173
173
|
*/
|
|
174
|
-
function sendCloudTelemetry(taskType, model, tokensIn, tokensOut, latencyMs, success, costUsd, requestedModel
|
|
174
|
+
function sendCloudTelemetry(taskType, model, tokensIn, tokensOut, latencyMs, success, costUsd, requestedModel) {
|
|
175
175
|
try {
|
|
176
|
-
const cost = costUsd ?? (0, telemetry_js_1.estimateCost)(model, tokensIn, tokensOut
|
|
177
|
-
|
|
176
|
+
const cost = costUsd ?? (0, telemetry_js_1.estimateCost)(model, tokensIn, tokensOut);
|
|
177
|
+
(0, telemetry_js_1.recordTelemetry)({
|
|
178
178
|
task_type: taskType,
|
|
179
179
|
model,
|
|
180
180
|
tokens_in: tokensIn,
|
|
@@ -183,12 +183,7 @@ function sendCloudTelemetry(taskType, model, tokensIn, tokensOut, latencyMs, suc
|
|
|
183
183
|
success,
|
|
184
184
|
cost_usd: cost,
|
|
185
185
|
requested_model: requestedModel,
|
|
186
|
-
};
|
|
187
|
-
if (cacheCreationTokens)
|
|
188
|
-
event.cache_creation_tokens = cacheCreationTokens;
|
|
189
|
-
if (cacheReadTokens)
|
|
190
|
-
event.cache_read_tokens = cacheReadTokens;
|
|
191
|
-
(0, telemetry_js_1.recordTelemetry)(event);
|
|
186
|
+
});
|
|
192
187
|
}
|
|
193
188
|
catch {
|
|
194
189
|
// Telemetry should never break the proxy
|
|
@@ -494,18 +489,18 @@ const DEFAULT_PROXY_CONFIG = {
|
|
|
494
489
|
cascade: {
|
|
495
490
|
enabled: true,
|
|
496
491
|
models: [
|
|
497
|
-
'claude-
|
|
498
|
-
'claude-sonnet-4-
|
|
499
|
-
'claude-opus-4-
|
|
492
|
+
'claude-haiku-4-5',
|
|
493
|
+
'claude-sonnet-4-6',
|
|
494
|
+
'claude-opus-4-6',
|
|
500
495
|
],
|
|
501
496
|
escalateOn: 'uncertainty',
|
|
502
497
|
maxEscalations: 1,
|
|
503
498
|
},
|
|
504
499
|
complexity: {
|
|
505
500
|
enabled: true,
|
|
506
|
-
simple: 'claude-
|
|
507
|
-
moderate: 'claude-sonnet-4-
|
|
508
|
-
complex: 'claude-opus-4-
|
|
501
|
+
simple: 'claude-haiku-4-5',
|
|
502
|
+
moderate: 'claude-sonnet-4-6',
|
|
503
|
+
complex: 'claude-opus-4-6',
|
|
509
504
|
},
|
|
510
505
|
},
|
|
511
506
|
reliability: {
|
|
@@ -746,24 +741,29 @@ function getAuthForModel(model, authConfig, envApiKey) {
|
|
|
746
741
|
* MAX tokens (sk-ant-oat*) use Authorization: Bearer header
|
|
747
742
|
* API keys (sk-ant-api*) use x-api-key header
|
|
748
743
|
*/
|
|
749
|
-
function buildAnthropicHeadersWithAuth(ctx, apiKey, isMaxToken
|
|
744
|
+
function buildAnthropicHeadersWithAuth(ctx, apiKey, isMaxToken) {
|
|
750
745
|
const headers = {
|
|
751
746
|
'Content-Type': 'application/json',
|
|
752
747
|
'anthropic-version': ctx.versionHeader || '2023-06-01',
|
|
753
748
|
};
|
|
754
|
-
//
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
if (isRerouted && incomingIsOAuth && apiKeyIsRegular) {
|
|
749
|
+
// Auth: prefer incoming auth for passthrough, but OAuth doesn't work for all models (e.g. Haiku)
|
|
750
|
+
// When we have a regular API key AND incoming auth is OAuth, prefer the API key for rerouted requests
|
|
751
|
+
// because OAuth may not be supported on the target model. The API key works for ALL models.
|
|
752
|
+
const incomingIsOAuth = !!(ctx.apiKeyHeader?.startsWith('sk-ant-oat') || ctx.authHeader?.includes('sk-ant-oat'));
|
|
753
|
+
if (incomingIsOAuth && apiKey && !apiKey.startsWith('sk-ant-oat')) {
|
|
760
754
|
headers['x-api-key'] = apiKey;
|
|
761
755
|
}
|
|
762
756
|
else if (ctx.authHeader) {
|
|
763
757
|
headers['Authorization'] = ctx.authHeader;
|
|
764
758
|
}
|
|
765
759
|
else if (ctx.apiKeyHeader) {
|
|
766
|
-
|
|
760
|
+
// MAX/OAuth tokens (sk-ant-oat*) must use Authorization: Bearer, not x-api-key
|
|
761
|
+
if (ctx.apiKeyHeader.startsWith('sk-ant-oat')) {
|
|
762
|
+
headers['Authorization'] = `Bearer ${ctx.apiKeyHeader}`;
|
|
763
|
+
}
|
|
764
|
+
else {
|
|
765
|
+
headers['x-api-key'] = ctx.apiKeyHeader;
|
|
766
|
+
}
|
|
767
767
|
}
|
|
768
768
|
else if (apiKey) {
|
|
769
769
|
// MAX tokens (OAuth) use Authorization: Bearer, API keys use x-api-key
|
|
@@ -850,8 +850,8 @@ async function forwardToAnthropicStream(request, targetModel, ctx, envApiKey) {
|
|
|
850
850
|
* Forward native Anthropic /v1/messages request (passthrough with routing)
|
|
851
851
|
* Used for Claude Code direct integration
|
|
852
852
|
*/
|
|
853
|
-
async function forwardNativeAnthropicRequest(body, ctx, envApiKey, isMaxToken
|
|
854
|
-
const headers = buildAnthropicHeadersWithAuth(ctx, envApiKey, isMaxToken
|
|
853
|
+
async function forwardNativeAnthropicRequest(body, ctx, envApiKey, isMaxToken) {
|
|
854
|
+
const headers = buildAnthropicHeadersWithAuth(ctx, envApiKey, isMaxToken);
|
|
855
855
|
const response = await fetch('https://api.anthropic.com/v1/messages', {
|
|
856
856
|
method: 'POST',
|
|
857
857
|
headers,
|
|
@@ -1412,11 +1412,9 @@ function convertAnthropicResponse(anthropicData) {
|
|
|
1412
1412
|
},
|
|
1413
1413
|
],
|
|
1414
1414
|
usage: {
|
|
1415
|
-
prompt_tokens:
|
|
1415
|
+
prompt_tokens: anthropicData.usage?.input_tokens ?? 0,
|
|
1416
1416
|
completion_tokens: anthropicData.usage?.output_tokens ?? 0,
|
|
1417
|
-
total_tokens: (anthropicData.usage?.input_tokens ?? 0) + (anthropicData.usage?.
|
|
1418
|
-
cache_creation_input_tokens: anthropicData.usage?.cache_creation_input_tokens ?? 0,
|
|
1419
|
-
cache_read_input_tokens: anthropicData.usage?.cache_read_input_tokens ?? 0,
|
|
1417
|
+
total_tokens: (anthropicData.usage?.input_tokens ?? 0) + (anthropicData.usage?.output_tokens ?? 0),
|
|
1420
1418
|
},
|
|
1421
1419
|
};
|
|
1422
1420
|
}
|
|
@@ -1439,16 +1437,11 @@ function convertAnthropicStreamEvent(eventType, eventData, messageId, model, too
|
|
|
1439
1437
|
const msg = eventData['message'];
|
|
1440
1438
|
baseChunk.id = msg?.['id'] || messageId;
|
|
1441
1439
|
choice.delta = { role: 'assistant', content: '' };
|
|
1442
|
-
// Pass through input token count from message_start
|
|
1440
|
+
// Pass through input token count from message_start
|
|
1443
1441
|
const msgUsage = msg?.['usage'];
|
|
1444
1442
|
if (msgUsage) {
|
|
1445
|
-
const cacheCreation = msgUsage['cache_creation_input_tokens'] ?? 0;
|
|
1446
|
-
const cacheRead = msgUsage['cache_read_input_tokens'] ?? 0;
|
|
1447
|
-
const inputTokens = msgUsage['input_tokens'] ?? 0;
|
|
1448
1443
|
baseChunk['usage'] = {
|
|
1449
|
-
prompt_tokens:
|
|
1450
|
-
cache_creation_input_tokens: cacheCreation,
|
|
1451
|
-
cache_read_input_tokens: cacheRead,
|
|
1444
|
+
prompt_tokens: msgUsage['input_tokens'] ?? 0,
|
|
1452
1445
|
};
|
|
1453
1446
|
}
|
|
1454
1447
|
return `data: ${JSON.stringify(baseChunk)}\n\n`;
|
|
@@ -1819,7 +1812,7 @@ function getCascadeConfig(config) {
|
|
|
1819
1812
|
const c = config.routing?.cascade;
|
|
1820
1813
|
return {
|
|
1821
1814
|
enabled: c?.enabled ?? true,
|
|
1822
|
-
models: c?.models ?? ['claude-
|
|
1815
|
+
models: c?.models ?? ['claude-haiku-4-5', 'claude-sonnet-4-6', 'claude-opus-4-6'],
|
|
1823
1816
|
escalateOn: c?.escalateOn ?? 'uncertainty',
|
|
1824
1817
|
maxEscalations: c?.maxEscalations ?? 1,
|
|
1825
1818
|
};
|
|
@@ -1836,18 +1829,18 @@ function getCooldownConfig(config) {
|
|
|
1836
1829
|
function getCostModel(config) {
|
|
1837
1830
|
return (config.routing?.complexity?.simple ||
|
|
1838
1831
|
config.routing?.cascade?.models?.[0] ||
|
|
1839
|
-
'claude-
|
|
1832
|
+
'claude-haiku-4-5');
|
|
1840
1833
|
}
|
|
1841
1834
|
function getFastModel(config) {
|
|
1842
1835
|
return (config.routing?.complexity?.simple ||
|
|
1843
1836
|
config.routing?.cascade?.models?.[0] ||
|
|
1844
|
-
'claude-
|
|
1837
|
+
'claude-haiku-4-5');
|
|
1845
1838
|
}
|
|
1846
1839
|
function getQualityModel(config) {
|
|
1847
1840
|
return (config.routing?.complexity?.complex ||
|
|
1848
1841
|
config.routing?.cascade?.models?.[config.routing?.cascade?.models?.length ? config.routing.cascade.models.length - 1 : 0] ||
|
|
1849
1842
|
process.env['RELAYPLANE_QUALITY_MODEL'] ||
|
|
1850
|
-
'claude-sonnet-4-
|
|
1843
|
+
'claude-sonnet-4-6');
|
|
1851
1844
|
}
|
|
1852
1845
|
async function cascadeRequest(config, makeRequest, log) {
|
|
1853
1846
|
let escalations = 0;
|
|
@@ -1909,18 +1902,9 @@ td{padding:8px 12px;border-bottom:1px solid #111318}
|
|
|
1909
1902
|
<div class="cards">
|
|
1910
1903
|
<div class="card"><div class="label">Total Requests</div><div class="value" id="totalReq">—</div></div>
|
|
1911
1904
|
<div class="card"><div class="label">Total Cost</div><div class="value" id="totalCost">—</div></div>
|
|
1912
|
-
<div class="card"><div class="label">Savings
|
|
1905
|
+
<div class="card"><div class="label">Savings</div><div class="value green" id="savings">—</div></div>
|
|
1913
1906
|
<div class="card"><div class="label">Avg Latency</div><div class="value" id="avgLat">—</div></div>
|
|
1914
1907
|
</div>
|
|
1915
|
-
<div class="section"><h2>Auth & Routing</h2>
|
|
1916
|
-
<div id="routingDetails" style="display:flex;gap:16px;flex-wrap:wrap;margin-bottom:12px">
|
|
1917
|
-
<div class="prov-item"><span class="dot" id="authDot"></span> <strong>Auth:</strong> <span id="authInfo">—</span></div>
|
|
1918
|
-
<div class="prov-item"><strong>Routing:</strong> <span id="routingMode">—</span></div>
|
|
1919
|
-
<div class="prov-item"><strong>Simple→</strong> <span id="routeSimple">—</span></div>
|
|
1920
|
-
<div class="prov-item"><strong>Moderate→</strong> <span id="routeModerate">—</span></div>
|
|
1921
|
-
<div class="prov-item"><strong>Complex→</strong> <span id="routeComplex">—</span></div>
|
|
1922
|
-
</div>
|
|
1923
|
-
</div>
|
|
1924
1908
|
<div class="section"><h2>Model Breakdown</h2>
|
|
1925
1909
|
<table><thead><tr><th>Model</th><th>Requests</th><th>Cost</th><th>% of Total</th></tr></thead><tbody id="models"></tbody></table></div>
|
|
1926
1910
|
<div class="section"><h2>Provider Status</h2><div class="prov" id="providers"></div></div>
|
|
@@ -1941,39 +1925,6 @@ async function load(){
|
|
|
1941
1925
|
fetch('/v1/telemetry/health').then(r=>r.json())
|
|
1942
1926
|
]);
|
|
1943
1927
|
$('ver').textContent='v'+health.version;
|
|
1944
|
-
const authDot=$('authDot'),authInfo=$('authInfo');
|
|
1945
|
-
if(health.auth){
|
|
1946
|
-
if(health.auth.anthropicApiKey){authDot.className='dot up';authInfo.textContent='API key ('+health.auth.anthropicApiKeyPrefix+')';}
|
|
1947
|
-
else{authDot.className='dot warn';authInfo.textContent='OAuth only (no API key)';}
|
|
1948
|
-
}
|
|
1949
|
-
if(health.routing){
|
|
1950
|
-
const mode=health.routing.mode||'passthrough';
|
|
1951
|
-
$('routingMode').textContent=mode;
|
|
1952
|
-
const routingSection=document.getElementById('routingDetails');
|
|
1953
|
-
const hasApiKey=health.auth&&health.auth.anthropicApiKey;
|
|
1954
|
-
if(mode==='passthrough'){
|
|
1955
|
-
if(routingSection)routingSection.innerHTML='<div class="prov-item">Routing: passthrough → model from incoming requests</div>';
|
|
1956
|
-
}else{
|
|
1957
|
-
if(health.routing.complexity){
|
|
1958
|
-
const cx=health.routing.complexity;
|
|
1959
|
-
const authLabel=function(model){
|
|
1960
|
-
if(hasApiKey)return '<span style="color:#34d399">● API key</span>';
|
|
1961
|
-
const isHaiku=model&&model.toLowerCase().includes('haiku');
|
|
1962
|
-
if(isHaiku)return '<span style="color:#ef4444">⚠️ OAuth - may fail</span>';
|
|
1963
|
-
return '<span style="color:#fbbf24">● OAuth</span>';
|
|
1964
|
-
};
|
|
1965
|
-
$('routeSimple').innerHTML=(cx.simple||'—')+' <small>'+authLabel(cx.simple)+'</small>';
|
|
1966
|
-
$('routeModerate').innerHTML=(cx.moderate||'—')+' <small>'+authLabel(cx.moderate)+'</small>';
|
|
1967
|
-
$('routeComplex').innerHTML=(cx.complex||'—')+' <small>'+authLabel(cx.complex)+'</small>';
|
|
1968
|
-
if(!hasApiKey&&cx.simple&&cx.simple.toLowerCase().includes('haiku')){
|
|
1969
|
-
const warn=document.createElement('div');
|
|
1970
|
-
warn.className='prov-item';warn.style.borderColor='#ef4444';warn.style.color='#ef4444';
|
|
1971
|
-
warn.innerHTML='⚠️ Haiku requires ANTHROPIC_API_KEY — OAuth not supported';
|
|
1972
|
-
if(routingSection)routingSection.appendChild(warn);
|
|
1973
|
-
}
|
|
1974
|
-
}
|
|
1975
|
-
}
|
|
1976
|
-
}
|
|
1977
1928
|
$('uptime').textContent=dur(health.uptime);
|
|
1978
1929
|
const total=stats.summary?.totalEvents||0;
|
|
1979
1930
|
$('totalReq').textContent=total;
|
|
@@ -2102,60 +2053,6 @@ async function startProxy(config = {}) {
|
|
|
2102
2053
|
process.on('SIGTERM', handleShutdown);
|
|
2103
2054
|
const configPath = getProxyConfigPath();
|
|
2104
2055
|
let proxyConfig = await loadProxyConfig(configPath, log);
|
|
2105
|
-
// Auto-config on startup: detect available auth and set optimal routing
|
|
2106
|
-
const configExists = await fs.promises.access(configPath).then(() => true).catch(() => false);
|
|
2107
|
-
if (!configExists || proxyConfig.routing?.mode === 'auto') {
|
|
2108
|
-
const envAnthropicKey = process.env['ANTHROPIC_API_KEY'];
|
|
2109
|
-
const hasRegularApiKey = !!envAnthropicKey && envAnthropicKey.startsWith('sk-ant-api');
|
|
2110
|
-
if (hasRegularApiKey) {
|
|
2111
|
-
// Full 3-tier routing with API key
|
|
2112
|
-
log('Auto-config: ANTHROPIC_API_KEY detected — enabling 3-tier routing (haiku/sonnet/opus)');
|
|
2113
|
-
if (!configExists) {
|
|
2114
|
-
const autoConfig = {
|
|
2115
|
-
enabled: true,
|
|
2116
|
-
modelOverrides: {},
|
|
2117
|
-
routing: {
|
|
2118
|
-
mode: 'auto',
|
|
2119
|
-
cascade: { enabled: false, models: [], escalateOn: 'uncertainty', maxEscalations: 1 },
|
|
2120
|
-
complexity: {
|
|
2121
|
-
enabled: true,
|
|
2122
|
-
simple: 'claude-haiku-4-5',
|
|
2123
|
-
moderate: 'claude-sonnet-4-6',
|
|
2124
|
-
complex: 'claude-opus-4-6',
|
|
2125
|
-
},
|
|
2126
|
-
},
|
|
2127
|
-
reliability: proxyConfig.reliability,
|
|
2128
|
-
};
|
|
2129
|
-
await saveProxyConfig(configPath, autoConfig);
|
|
2130
|
-
proxyConfig = await loadProxyConfig(configPath, log);
|
|
2131
|
-
log('Auto-config: wrote config to ' + configPath);
|
|
2132
|
-
}
|
|
2133
|
-
}
|
|
2134
|
-
else {
|
|
2135
|
-
// No regular API key — OAuth only, skip Haiku
|
|
2136
|
-
console.warn('[relayplane] ⚠️ No ANTHROPIC_API_KEY set — Haiku routing disabled (OAuth not supported). Set ANTHROPIC_API_KEY to enable 3-tier routing.');
|
|
2137
|
-
if (!configExists) {
|
|
2138
|
-
const autoConfig = {
|
|
2139
|
-
enabled: true,
|
|
2140
|
-
modelOverrides: {},
|
|
2141
|
-
routing: {
|
|
2142
|
-
mode: 'auto',
|
|
2143
|
-
cascade: { enabled: false, models: [], escalateOn: 'uncertainty', maxEscalations: 1 },
|
|
2144
|
-
complexity: {
|
|
2145
|
-
enabled: true,
|
|
2146
|
-
simple: 'claude-sonnet-4-6',
|
|
2147
|
-
moderate: 'claude-sonnet-4-6',
|
|
2148
|
-
complex: 'claude-opus-4-6',
|
|
2149
|
-
},
|
|
2150
|
-
},
|
|
2151
|
-
reliability: proxyConfig.reliability,
|
|
2152
|
-
};
|
|
2153
|
-
await saveProxyConfig(configPath, autoConfig);
|
|
2154
|
-
proxyConfig = await loadProxyConfig(configPath, log);
|
|
2155
|
-
log('Auto-config: wrote OAuth-safe config to ' + configPath + ' (no Haiku)');
|
|
2156
|
-
}
|
|
2157
|
-
}
|
|
2158
|
-
}
|
|
2159
2056
|
const cooldownManager = new CooldownManager(getCooldownConfig(proxyConfig));
|
|
2160
2057
|
let configWatcher = null;
|
|
2161
2058
|
let configReloadTimer = null;
|
|
@@ -2171,8 +2068,6 @@ async function startProxy(config = {}) {
|
|
|
2171
2068
|
reloadConfig().catch(() => { });
|
|
2172
2069
|
}, 50);
|
|
2173
2070
|
};
|
|
2174
|
-
let credentialsWatcher = null;
|
|
2175
|
-
const credentialsPath = path.join(path.dirname(configPath), 'credentials.json');
|
|
2176
2071
|
const startConfigWatcher = () => {
|
|
2177
2072
|
if (configWatcher)
|
|
2178
2073
|
return;
|
|
@@ -2184,45 +2079,7 @@ async function startProxy(config = {}) {
|
|
|
2184
2079
|
log(`Config watch error: ${error.message}`);
|
|
2185
2080
|
}
|
|
2186
2081
|
};
|
|
2187
|
-
const startCredentialsWatcher = () => {
|
|
2188
|
-
if (credentialsWatcher)
|
|
2189
|
-
return;
|
|
2190
|
-
try {
|
|
2191
|
-
// Watch credentials.json so login triggers a reload without proxy restart
|
|
2192
|
-
if (fs.existsSync(credentialsPath)) {
|
|
2193
|
-
credentialsWatcher = fs.watch(credentialsPath, () => {
|
|
2194
|
-
log('Credentials changed — reloading config');
|
|
2195
|
-
scheduleConfigReload();
|
|
2196
|
-
});
|
|
2197
|
-
}
|
|
2198
|
-
else {
|
|
2199
|
-
// Watch the directory for credentials.json creation
|
|
2200
|
-
const dir = path.dirname(credentialsPath);
|
|
2201
|
-
const dirWatcher = fs.watch(dir, (_, filename) => {
|
|
2202
|
-
if (filename === 'credentials.json') {
|
|
2203
|
-
log('Credentials file created — reloading config');
|
|
2204
|
-
scheduleConfigReload();
|
|
2205
|
-
// Now watch the file directly
|
|
2206
|
-
dirWatcher.close();
|
|
2207
|
-
try {
|
|
2208
|
-
credentialsWatcher = fs.watch(credentialsPath, () => {
|
|
2209
|
-
log('Credentials changed — reloading config');
|
|
2210
|
-
scheduleConfigReload();
|
|
2211
|
-
});
|
|
2212
|
-
}
|
|
2213
|
-
catch { }
|
|
2214
|
-
}
|
|
2215
|
-
});
|
|
2216
|
-
credentialsWatcher = dirWatcher;
|
|
2217
|
-
}
|
|
2218
|
-
}
|
|
2219
|
-
catch (err) {
|
|
2220
|
-
const error = err;
|
|
2221
|
-
log(`Credentials watch error: ${error.message}`);
|
|
2222
|
-
}
|
|
2223
|
-
};
|
|
2224
2082
|
startConfigWatcher();
|
|
2225
|
-
startCredentialsWatcher();
|
|
2226
2083
|
// Initialize RelayPlane
|
|
2227
2084
|
const relay = new core_1.RelayPlane({ dbPath: config.dbPath });
|
|
2228
2085
|
// Startup migration: clear default routing rules so complexity config takes priority
|
|
@@ -2253,10 +2110,6 @@ async function startProxy(config = {}) {
|
|
|
2253
2110
|
if (req.method === 'GET' && (pathname === '/health' || pathname === '/healthz')) {
|
|
2254
2111
|
const uptimeMs = Date.now() - globalStats.startedAt;
|
|
2255
2112
|
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
2256
|
-
const anthropicEnvKeySet = !!process.env['ANTHROPIC_API_KEY'];
|
|
2257
|
-
const anthropicEnvKeyPrefix = anthropicEnvKeySet ? process.env['ANTHROPIC_API_KEY'].slice(0, 12) + '...' : null;
|
|
2258
|
-
const routingMode = proxyConfig.routing?.mode || 'passthrough';
|
|
2259
|
-
const complexityConfig = proxyConfig.routing?.complexity;
|
|
2260
2113
|
res.end(JSON.stringify({
|
|
2261
2114
|
status: 'ok',
|
|
2262
2115
|
version: PROXY_VERSION,
|
|
@@ -2266,15 +2119,6 @@ async function startProxy(config = {}) {
|
|
|
2266
2119
|
successRate: globalStats.totalRequests > 0
|
|
2267
2120
|
? parseFloat(((globalStats.successfulRequests / globalStats.totalRequests) * 100).toFixed(1))
|
|
2268
2121
|
: null,
|
|
2269
|
-
auth: {
|
|
2270
|
-
anthropicApiKey: anthropicEnvKeySet,
|
|
2271
|
-
anthropicApiKeyPrefix: anthropicEnvKeyPrefix,
|
|
2272
|
-
note: anthropicEnvKeySet ? 'API key available for models that don\'t support OAuth' : 'No API key — OAuth passthrough only',
|
|
2273
|
-
},
|
|
2274
|
-
routing: {
|
|
2275
|
-
mode: routingMode,
|
|
2276
|
-
complexity: complexityConfig,
|
|
2277
|
-
},
|
|
2278
2122
|
stats: {
|
|
2279
2123
|
totalRequests: globalStats.totalRequests,
|
|
2280
2124
|
successfulRequests: globalStats.successfulRequests,
|
|
@@ -2400,7 +2244,7 @@ async function startProxy(config = {}) {
|
|
|
2400
2244
|
const offset = parseInt(params.get('offset') || '0', 10);
|
|
2401
2245
|
const sorted = [...requestHistory].reverse();
|
|
2402
2246
|
const runs = sorted.slice(offset, offset + limit).map(r => {
|
|
2403
|
-
const origCost = (0, telemetry_js_1.estimateCost)('claude-opus-4-
|
|
2247
|
+
const origCost = (0, telemetry_js_1.estimateCost)('claude-opus-4-6', r.tokensIn, r.tokensOut);
|
|
2404
2248
|
const perRunSavings = Math.max(0, origCost - r.costUsd);
|
|
2405
2249
|
return {
|
|
2406
2250
|
id: r.id,
|
|
@@ -2431,7 +2275,7 @@ async function startProxy(config = {}) {
|
|
|
2431
2275
|
if (req.method === 'GET' && telemetryPath === 'savings') {
|
|
2432
2276
|
// Savings = cost if everything ran on Opus - actual cost
|
|
2433
2277
|
// Always compare against Opus as the baseline
|
|
2434
|
-
const OPUS_BASELINE = 'claude-opus-4-
|
|
2278
|
+
const OPUS_BASELINE = 'claude-opus-4-6';
|
|
2435
2279
|
let totalOriginalCost = 0;
|
|
2436
2280
|
let totalActualCost = 0;
|
|
2437
2281
|
let totalSavedAmount = 0;
|
|
@@ -2556,7 +2400,8 @@ async function startProxy(config = {}) {
|
|
|
2556
2400
|
useAnthropicEnvKey = undefined; // Only use incoming auth
|
|
2557
2401
|
}
|
|
2558
2402
|
else {
|
|
2559
|
-
// 'auto':
|
|
2403
|
+
// 'auto': Use incoming auth if present, fallback to env
|
|
2404
|
+
// ALWAYS keep env key available — OAuth (sk-ant-oat) doesn't work for all models (e.g. Haiku)
|
|
2560
2405
|
useAnthropicEnvKey = anthropicEnvKey;
|
|
2561
2406
|
}
|
|
2562
2407
|
// === Native Anthropic /v1/messages endpoint (for Claude Code) ===
|
|
@@ -2795,8 +2640,7 @@ async function startProxy(config = {}) {
|
|
|
2795
2640
|
if (modelAuth.isMax) {
|
|
2796
2641
|
log(`Using MAX token for ${resolved.model}`);
|
|
2797
2642
|
}
|
|
2798
|
-
const
|
|
2799
|
-
const providerResponse = await forwardNativeAnthropicRequest(attemptBody, ctx, modelAuth.apiKey, modelAuth.isMax, isCascadeRerouted);
|
|
2643
|
+
const providerResponse = await forwardNativeAnthropicRequest(attemptBody, ctx, modelAuth.apiKey, modelAuth.isMax);
|
|
2800
2644
|
const responseData = (await providerResponse.json());
|
|
2801
2645
|
if (!providerResponse.ok) {
|
|
2802
2646
|
if (proxyConfig.reliability?.cooldowns?.enabled) {
|
|
@@ -2823,12 +2667,7 @@ async function startProxy(config = {}) {
|
|
|
2823
2667
|
if (modelAuth.isMax) {
|
|
2824
2668
|
log(`Using MAX token for ${finalModel}`);
|
|
2825
2669
|
}
|
|
2826
|
-
|
|
2827
|
-
const isRerouted = routingMode !== 'passthrough' && finalModel !== originalModel;
|
|
2828
|
-
if (isRerouted) {
|
|
2829
|
-
log(`Rerouted: ${originalModel} → ${finalModel} (auth fallback enabled)`);
|
|
2830
|
-
}
|
|
2831
|
-
const providerResponse = await forwardNativeAnthropicRequest({ ...requestBody, model: finalModel }, ctx, modelAuth.apiKey, modelAuth.isMax, isRerouted);
|
|
2670
|
+
const providerResponse = await forwardNativeAnthropicRequest({ ...requestBody, model: finalModel }, ctx, modelAuth.apiKey, modelAuth.isMax);
|
|
2832
2671
|
if (!providerResponse.ok) {
|
|
2833
2672
|
const errorPayload = (await providerResponse.json());
|
|
2834
2673
|
if (proxyConfig.reliability?.cooldowns?.enabled) {
|
|
@@ -2854,8 +2693,6 @@ async function startProxy(config = {}) {
|
|
|
2854
2693
|
const reader = providerResponse.body?.getReader();
|
|
2855
2694
|
let streamTokensIn = 0;
|
|
2856
2695
|
let streamTokensOut = 0;
|
|
2857
|
-
let streamCacheCreation = 0;
|
|
2858
|
-
let streamCacheRead = 0;
|
|
2859
2696
|
if (reader) {
|
|
2860
2697
|
const decoder = new TextDecoder();
|
|
2861
2698
|
let sseBuffer = '';
|
|
@@ -2878,12 +2715,9 @@ async function startProxy(config = {}) {
|
|
|
2878
2715
|
if (evt.type === 'message_delta' && evt.usage) {
|
|
2879
2716
|
streamTokensOut = evt.usage.output_tokens ?? streamTokensOut;
|
|
2880
2717
|
}
|
|
2881
|
-
// Anthropic: message_start has usage.input_tokens
|
|
2718
|
+
// Anthropic: message_start has usage.input_tokens
|
|
2882
2719
|
if (evt.type === 'message_start' && evt.message?.usage) {
|
|
2883
|
-
|
|
2884
|
-
streamCacheCreation = u.cache_creation_input_tokens ?? 0;
|
|
2885
|
-
streamCacheRead = u.cache_read_input_tokens ?? 0;
|
|
2886
|
-
streamTokensIn = (u.input_tokens ?? 0) + streamCacheCreation + streamCacheRead;
|
|
2720
|
+
streamTokensIn = evt.message.usage.input_tokens ?? streamTokensIn;
|
|
2887
2721
|
}
|
|
2888
2722
|
// OpenAI format: choices with usage
|
|
2889
2723
|
if (evt.usage) {
|
|
@@ -2903,7 +2737,7 @@ async function startProxy(config = {}) {
|
|
|
2903
2737
|
}
|
|
2904
2738
|
}
|
|
2905
2739
|
// Store streaming token counts so telemetry can use them
|
|
2906
|
-
nativeResponseData = { usage: { input_tokens: streamTokensIn, output_tokens: streamTokensOut
|
|
2740
|
+
nativeResponseData = { usage: { input_tokens: streamTokensIn, output_tokens: streamTokensOut } };
|
|
2907
2741
|
res.end();
|
|
2908
2742
|
}
|
|
2909
2743
|
else {
|
|
@@ -2920,12 +2754,9 @@ async function startProxy(config = {}) {
|
|
|
2920
2754
|
// nativeResponseData holds response JSON for non-streaming, or { usage: { input_tokens, output_tokens } }
|
|
2921
2755
|
// synthesised from SSE events for streaming
|
|
2922
2756
|
const nativeUsageData = nativeResponseData?.usage;
|
|
2923
|
-
const
|
|
2924
|
-
const nativeCacheRead = nativeUsageData?.cache_read_input_tokens ?? 0;
|
|
2925
|
-
const nativeRawIn = nativeUsageData?.input_tokens ?? nativeUsageData?.prompt_tokens ?? 0;
|
|
2926
|
-
const nativeTokIn = nativeRawIn + nativeCacheCreation + nativeCacheRead;
|
|
2757
|
+
const nativeTokIn = nativeUsageData?.input_tokens ?? nativeUsageData?.prompt_tokens ?? 0;
|
|
2927
2758
|
const nativeTokOut = nativeUsageData?.output_tokens ?? nativeUsageData?.completion_tokens ?? 0;
|
|
2928
|
-
updateLastHistoryEntry(nativeTokIn, nativeTokOut, (0, telemetry_js_1.estimateCost)(targetModel || requestedModel,
|
|
2759
|
+
updateLastHistoryEntry(nativeTokIn, nativeTokOut, (0, telemetry_js_1.estimateCost)(targetModel || requestedModel, nativeTokIn, nativeTokOut));
|
|
2929
2760
|
if (recordTelemetry) {
|
|
2930
2761
|
relay
|
|
2931
2762
|
.run({
|
|
@@ -2934,7 +2765,7 @@ async function startProxy(config = {}) {
|
|
|
2934
2765
|
model: `${targetProvider}:${targetModel || requestedModel}`,
|
|
2935
2766
|
})
|
|
2936
2767
|
.catch(() => { });
|
|
2937
|
-
sendCloudTelemetry(taskType, targetModel || requestedModel, nativeTokIn, nativeTokOut, durationMs, true, undefined, originalModel ?? undefined
|
|
2768
|
+
sendCloudTelemetry(taskType, targetModel || requestedModel, nativeTokIn, nativeTokOut, durationMs, true, undefined, originalModel ?? undefined);
|
|
2938
2769
|
}
|
|
2939
2770
|
}
|
|
2940
2771
|
catch (err) {
|
|
@@ -3272,12 +3103,9 @@ async function startProxy(config = {}) {
|
|
|
3272
3103
|
// Log cascade request for stats tracking
|
|
3273
3104
|
logRequest(originalRequestedModel ?? 'unknown', cascadeResult.model, cascadeResult.provider, durationMs, true, 'cascade', cascadeResult.escalations > 0, taskType, complexity);
|
|
3274
3105
|
const cascadeUsage = responseData?.usage;
|
|
3275
|
-
const
|
|
3276
|
-
const cascadeCacheRead = cascadeUsage?.cache_read_input_tokens ?? 0;
|
|
3277
|
-
const cascadeRawIn = cascadeUsage?.input_tokens ?? cascadeUsage?.prompt_tokens ?? 0;
|
|
3278
|
-
const cascadeTokensIn = cascadeRawIn + cascadeCacheCreation + cascadeCacheRead;
|
|
3106
|
+
const cascadeTokensIn = cascadeUsage?.input_tokens ?? cascadeUsage?.prompt_tokens ?? 0;
|
|
3279
3107
|
const cascadeTokensOut = cascadeUsage?.output_tokens ?? cascadeUsage?.completion_tokens ?? 0;
|
|
3280
|
-
const cascadeCost = (0, telemetry_js_1.estimateCost)(cascadeResult.model,
|
|
3108
|
+
const cascadeCost = (0, telemetry_js_1.estimateCost)(cascadeResult.model, cascadeTokensIn, cascadeTokensOut);
|
|
3281
3109
|
updateLastHistoryEntry(cascadeTokensIn, cascadeTokensOut, cascadeCost, chatCascadeRespModel);
|
|
3282
3110
|
if (recordTelemetry) {
|
|
3283
3111
|
try {
|
|
@@ -3300,7 +3128,7 @@ async function startProxy(config = {}) {
|
|
|
3300
3128
|
catch (err) {
|
|
3301
3129
|
log(`Failed to record run: ${err}`);
|
|
3302
3130
|
}
|
|
3303
|
-
sendCloudTelemetry(taskType, cascadeResult.model, cascadeTokensIn, cascadeTokensOut, durationMs, true, undefined, originalRequestedModel ?? undefined
|
|
3131
|
+
sendCloudTelemetry(taskType, cascadeResult.model, cascadeTokensIn, cascadeTokensOut, durationMs, true, undefined, originalRequestedModel ?? undefined);
|
|
3304
3132
|
}
|
|
3305
3133
|
const chatCascadeRpHeaders = buildRelayPlaneResponseHeaders(cascadeResult.model, originalRequestedModel ?? 'unknown', complexity, cascadeResult.provider, 'cascade');
|
|
3306
3134
|
res.writeHead(200, { 'Content-Type': 'application/json', ...chatCascadeRpHeaders });
|
|
@@ -3449,8 +3277,6 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
3449
3277
|
// Track token usage from streaming events
|
|
3450
3278
|
let streamTokensIn = 0;
|
|
3451
3279
|
let streamTokensOut = 0;
|
|
3452
|
-
let streamCacheCreation = 0;
|
|
3453
|
-
let streamCacheRead = 0;
|
|
3454
3280
|
try {
|
|
3455
3281
|
// Stream the response based on provider format
|
|
3456
3282
|
switch (targetProvider) {
|
|
@@ -3467,8 +3293,6 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
3467
3293
|
if (evt.usage) {
|
|
3468
3294
|
streamTokensIn = evt.usage.prompt_tokens ?? streamTokensIn;
|
|
3469
3295
|
streamTokensOut = evt.usage.completion_tokens ?? streamTokensOut;
|
|
3470
|
-
streamCacheCreation = evt.usage.cache_creation_input_tokens ?? streamCacheCreation;
|
|
3471
|
-
streamCacheRead = evt.usage.cache_read_input_tokens ?? streamCacheRead;
|
|
3472
3296
|
}
|
|
3473
3297
|
}
|
|
3474
3298
|
}
|
|
@@ -3525,9 +3349,7 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
3525
3349
|
// Always log the request for stats/telemetry tracking
|
|
3526
3350
|
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, true, routingMode, undefined, taskType, complexity);
|
|
3527
3351
|
// Update token/cost info on the history entry
|
|
3528
|
-
|
|
3529
|
-
const streamRawIn = streamCacheCreation || streamCacheRead ? streamTokensIn - streamCacheCreation - streamCacheRead : streamTokensIn;
|
|
3530
|
-
const streamCost = (0, telemetry_js_1.estimateCost)(targetModel, streamRawIn, streamTokensOut, streamCacheCreation || undefined, streamCacheRead || undefined);
|
|
3352
|
+
const streamCost = (0, telemetry_js_1.estimateCost)(targetModel, streamTokensIn, streamTokensOut);
|
|
3531
3353
|
updateLastHistoryEntry(streamTokensIn, streamTokensOut, streamCost);
|
|
3532
3354
|
if (recordTelemetry) {
|
|
3533
3355
|
// Record the run (non-blocking)
|
|
@@ -3543,7 +3365,7 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
3543
3365
|
.catch((err) => {
|
|
3544
3366
|
log(`Failed to record run: ${err}`);
|
|
3545
3367
|
});
|
|
3546
|
-
sendCloudTelemetry(taskType, targetModel, streamTokensIn, streamTokensOut, durationMs, true, undefined, request.model ?? undefined
|
|
3368
|
+
sendCloudTelemetry(taskType, targetModel, streamTokensIn, streamTokensOut, durationMs, true, undefined, request.model ?? undefined);
|
|
3547
3369
|
}
|
|
3548
3370
|
res.end();
|
|
3549
3371
|
}
|
|
@@ -3587,12 +3409,9 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
|
|
|
3587
3409
|
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, true, routingMode, undefined, taskType, complexity);
|
|
3588
3410
|
// Update token/cost info
|
|
3589
3411
|
const usage = responseData?.usage;
|
|
3590
|
-
const
|
|
3591
|
-
const cacheRead = usage?.cache_read_input_tokens ?? 0;
|
|
3592
|
-
const rawIn = usage?.input_tokens ?? usage?.prompt_tokens ?? 0;
|
|
3593
|
-
const tokensIn = rawIn + cacheCreation + cacheRead;
|
|
3412
|
+
const tokensIn = usage?.input_tokens ?? usage?.prompt_tokens ?? 0;
|
|
3594
3413
|
const tokensOut = usage?.output_tokens ?? usage?.completion_tokens ?? 0;
|
|
3595
|
-
const cost = (0, telemetry_js_1.estimateCost)(targetModel,
|
|
3414
|
+
const cost = (0, telemetry_js_1.estimateCost)(targetModel, tokensIn, tokensOut);
|
|
3596
3415
|
updateLastHistoryEntry(tokensIn, tokensOut, cost, nonStreamRespModel);
|
|
3597
3416
|
if (recordTelemetry) {
|
|
3598
3417
|
// Record the run in RelayPlane
|
|
@@ -3617,13 +3436,10 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
|
|
|
3617
3436
|
log(`Failed to record run: ${err}`);
|
|
3618
3437
|
}
|
|
3619
3438
|
// Extract token counts from response if available (Anthropic/OpenAI format)
|
|
3620
|
-
const
|
|
3621
|
-
const
|
|
3622
|
-
const
|
|
3623
|
-
|
|
3624
|
-
const tokensIn2 = rawIn2 + cc2 + cr2;
|
|
3625
|
-
const tokensOut2 = usage2?.output_tokens ?? usage2?.completion_tokens ?? 0;
|
|
3626
|
-
sendCloudTelemetry(taskType, targetModel, tokensIn2, tokensOut2, durationMs, true, undefined, undefined, cc2 || undefined, cr2 || undefined);
|
|
3439
|
+
const usage = responseData?.usage;
|
|
3440
|
+
const tokensIn = usage?.input_tokens ?? usage?.prompt_tokens ?? 0;
|
|
3441
|
+
const tokensOut = usage?.output_tokens ?? usage?.completion_tokens ?? 0;
|
|
3442
|
+
sendCloudTelemetry(taskType, targetModel, tokensIn, tokensOut, durationMs, true);
|
|
3627
3443
|
}
|
|
3628
3444
|
// Send response with RelayPlane routing headers
|
|
3629
3445
|
const nonStreamRpHeaders = buildRelayPlaneResponseHeaders(targetModel, request.model ?? 'unknown', complexity, targetProvider, routingMode);
|