@chenpu17/cc-gw 0.4.2 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/server/dist/index.js +242 -104
- package/src/web/dist/assets/{About-CfWemXks.js → About-DK242vw9.js} +2 -2
- package/src/web/dist/assets/{ApiKeys-jGq-kKf8.js → ApiKeys-BGROxK6-.js} +1 -1
- package/src/web/dist/assets/{Button-DdVyALkb.js → Button-Bnnxe9ep.js} +1 -1
- package/src/web/dist/assets/Dashboard-B7zokimB.js +16 -0
- package/src/web/dist/assets/{FormField-Dyi1Kr0k.js → FormField-BzT4FGj8.js} +1 -1
- package/src/web/dist/assets/{Help-iZXiqNPj.js → Help-CJooSMdJ.js} +1 -1
- package/src/web/dist/assets/{Input-DACzKnnk.js → Input-B-P-J4xQ.js} +1 -1
- package/src/web/dist/assets/{Login-mm-NJUOP.js → Login-B9RgxiYX.js} +1 -1
- package/src/web/dist/assets/Logs-CsJCTftU.js +1 -0
- package/src/web/dist/assets/{ModelManagement-B4f8RUk8.js → ModelManagement-dDhNa_5z.js} +1 -1
- package/src/web/dist/assets/{PageSection-BfMkaweN.js → PageSection-Dzvd3cKD.js} +1 -1
- package/src/web/dist/assets/{Settings-DRLUoVLq.js → Settings-BcMQ79b0.js} +1 -1
- package/src/web/dist/assets/{StatusBadge-OH4R_aKr.js → StatusBadge-CAkVtC--.js} +1 -1
- package/src/web/dist/assets/{copy-BVgnqUsP.js → copy-D6cuJHzh.js} +1 -1
- package/src/web/dist/assets/{index-C2xoexDc.js → index-Cm-hZvRJ.js} +1 -1
- package/src/web/dist/assets/{index-CAbsGgAq.js → index-agm-2asf.js} +5 -5
- package/src/web/dist/assets/{info-CpmSkfUl.js → info-CfAuBePJ.js} +1 -1
- package/src/web/dist/assets/{useApiQuery-CNbY7eTZ.js → useApiQuery-ns68sM2H.js} +1 -1
- package/src/web/dist/index.html +1 -1
- package/src/web/dist/assets/Dashboard-uoI9oSjK.js +0 -16
- package/src/web/dist/assets/Logs-DNxTsrk3.js +0 -1
package/src/server/dist/index.js
CHANGED
|
@@ -11103,7 +11103,11 @@ async function migrateDailyMetricsTable(db) {
|
|
|
11103
11103
|
if (!hasEndpointColumn || !hasCompositePrimaryKey) {
|
|
11104
11104
|
const endpointSelector = hasEndpointColumn ? "COALESCE(endpoint, 'anthropic')" : "'anthropic'";
|
|
11105
11105
|
const hasCachedTokensColumn = columns.some((column) => column.name === "total_cached_tokens");
|
|
11106
|
+
const hasCacheReadColumn = columns.some((column) => column.name === "total_cache_read_tokens");
|
|
11107
|
+
const hasCacheCreationColumn = columns.some((column) => column.name === "total_cache_creation_tokens");
|
|
11106
11108
|
const cachedTokensSelector = hasCachedTokensColumn ? "COALESCE(total_cached_tokens, 0)" : "0";
|
|
11109
|
+
const cacheReadSelector = hasCacheReadColumn ? "COALESCE(total_cache_read_tokens, 0)" : "0";
|
|
11110
|
+
const cacheCreationSelector = hasCacheCreationColumn ? "COALESCE(total_cache_creation_tokens, 0)" : "0";
|
|
11107
11111
|
await exec(
|
|
11108
11112
|
db,
|
|
11109
11113
|
`ALTER TABLE daily_metrics RENAME TO daily_metrics_old;
|
|
@@ -11114,16 +11118,20 @@ async function migrateDailyMetricsTable(db) {
|
|
|
11114
11118
|
total_input_tokens INTEGER DEFAULT 0,
|
|
11115
11119
|
total_output_tokens INTEGER DEFAULT 0,
|
|
11116
11120
|
total_cached_tokens INTEGER DEFAULT 0,
|
|
11121
|
+
total_cache_read_tokens INTEGER DEFAULT 0,
|
|
11122
|
+
total_cache_creation_tokens INTEGER DEFAULT 0,
|
|
11117
11123
|
total_latency_ms INTEGER DEFAULT 0,
|
|
11118
11124
|
PRIMARY KEY (date, endpoint)
|
|
11119
11125
|
);
|
|
11120
|
-
INSERT INTO daily_metrics (date, endpoint, request_count, total_input_tokens, total_output_tokens, total_cached_tokens, total_latency_ms)
|
|
11126
|
+
INSERT INTO daily_metrics (date, endpoint, request_count, total_input_tokens, total_output_tokens, total_cached_tokens, total_cache_read_tokens, total_cache_creation_tokens, total_latency_ms)
|
|
11121
11127
|
SELECT date,
|
|
11122
11128
|
${endpointSelector},
|
|
11123
11129
|
request_count,
|
|
11124
11130
|
total_input_tokens,
|
|
11125
11131
|
total_output_tokens,
|
|
11126
11132
|
${cachedTokensSelector},
|
|
11133
|
+
${cacheReadSelector},
|
|
11134
|
+
${cacheCreationSelector},
|
|
11127
11135
|
total_latency_ms
|
|
11128
11136
|
FROM daily_metrics_old;
|
|
11129
11137
|
DROP TABLE daily_metrics_old;`
|
|
@@ -11181,6 +11189,8 @@ async function ensureSchema(db) {
|
|
|
11181
11189
|
total_input_tokens INTEGER DEFAULT 0,
|
|
11182
11190
|
total_output_tokens INTEGER DEFAULT 0,
|
|
11183
11191
|
total_cached_tokens INTEGER DEFAULT 0,
|
|
11192
|
+
total_cache_read_tokens INTEGER DEFAULT 0,
|
|
11193
|
+
total_cache_creation_tokens INTEGER DEFAULT 0,
|
|
11184
11194
|
total_latency_ms INTEGER DEFAULT 0,
|
|
11185
11195
|
PRIMARY KEY (date, endpoint)
|
|
11186
11196
|
);
|
|
@@ -11217,6 +11227,8 @@ async function ensureSchema(db) {
|
|
|
11217
11227
|
);
|
|
11218
11228
|
await maybeAddColumn(db, "request_logs", "client_model", "TEXT");
|
|
11219
11229
|
await maybeAddColumn(db, "request_logs", "cached_tokens", "INTEGER");
|
|
11230
|
+
await maybeAddColumn(db, "request_logs", "cache_read_tokens", "INTEGER DEFAULT 0");
|
|
11231
|
+
await maybeAddColumn(db, "request_logs", "cache_creation_tokens", "INTEGER DEFAULT 0");
|
|
11220
11232
|
await maybeAddColumn(db, "request_logs", "ttft_ms", "INTEGER");
|
|
11221
11233
|
await maybeAddColumn(db, "request_logs", "tpot_ms", "REAL");
|
|
11222
11234
|
await maybeAddColumn(db, "request_logs", "stream", "INTEGER");
|
|
@@ -11239,6 +11251,8 @@ async function ensureSchema(db) {
|
|
|
11239
11251
|
await maybeAddColumn(db, "api_keys", "total_output_tokens", "INTEGER DEFAULT 0");
|
|
11240
11252
|
await migrateDailyMetricsTable(db);
|
|
11241
11253
|
await maybeAddColumn(db, "daily_metrics", "total_cached_tokens", "INTEGER DEFAULT 0");
|
|
11254
|
+
await maybeAddColumn(db, "daily_metrics", "total_cache_read_tokens", "INTEGER DEFAULT 0");
|
|
11255
|
+
await maybeAddColumn(db, "daily_metrics", "total_cache_creation_tokens", "INTEGER DEFAULT 0");
|
|
11242
11256
|
await run(db, "CREATE UNIQUE INDEX IF NOT EXISTS idx_api_keys_hash ON api_keys(key_hash) WHERE key_hash IS NOT NULL");
|
|
11243
11257
|
await run(db, "UPDATE api_keys SET key_hash = '*' WHERE is_wildcard = 1 AND (key_hash IS NULL OR key_hash = '')");
|
|
11244
11258
|
await run(db, "UPDATE api_keys SET updated_at = created_at WHERE updated_at IS NULL");
|
|
@@ -11404,6 +11418,14 @@ async function updateLogTokens(requestId, values) {
|
|
|
11404
11418
|
values.outputTokens,
|
|
11405
11419
|
values.cachedTokens ?? null
|
|
11406
11420
|
];
|
|
11421
|
+
if (values.cacheReadTokens !== void 0) {
|
|
11422
|
+
setters.push("cache_read_tokens = ?");
|
|
11423
|
+
params.push(values.cacheReadTokens ?? null);
|
|
11424
|
+
}
|
|
11425
|
+
if (values.cacheCreationTokens !== void 0) {
|
|
11426
|
+
setters.push("cache_creation_tokens = ?");
|
|
11427
|
+
params.push(values.cacheCreationTokens ?? null);
|
|
11428
|
+
}
|
|
11407
11429
|
if (values.ttftMs !== void 0) {
|
|
11408
11430
|
setters.push("ttft_ms = ?");
|
|
11409
11431
|
params.push(values.ttftMs ?? null);
|
|
@@ -11455,25 +11477,33 @@ async function upsertLogPayload(requestId, payload) {
|
|
|
11455
11477
|
);
|
|
11456
11478
|
}
|
|
11457
11479
|
async function updateMetrics(date, endpoint, delta) {
|
|
11458
|
-
|
|
11459
|
-
|
|
11460
|
-
|
|
11461
|
-
|
|
11462
|
-
|
|
11463
|
-
|
|
11464
|
-
|
|
11465
|
-
|
|
11466
|
-
|
|
11467
|
-
|
|
11468
|
-
|
|
11469
|
-
|
|
11470
|
-
|
|
11471
|
-
|
|
11472
|
-
|
|
11473
|
-
|
|
11474
|
-
|
|
11475
|
-
|
|
11476
|
-
|
|
11480
|
+
try {
|
|
11481
|
+
await runQuery(
|
|
11482
|
+
`INSERT INTO daily_metrics (date, endpoint, request_count, total_input_tokens, total_output_tokens, total_cached_tokens, total_cache_read_tokens, total_cache_creation_tokens, total_latency_ms)
|
|
11483
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
11484
|
+
ON CONFLICT(date, endpoint) DO UPDATE SET
|
|
11485
|
+
request_count = daily_metrics.request_count + excluded.request_count,
|
|
11486
|
+
total_input_tokens = daily_metrics.total_input_tokens + excluded.total_input_tokens,
|
|
11487
|
+
total_output_tokens = daily_metrics.total_output_tokens + excluded.total_output_tokens,
|
|
11488
|
+
total_cached_tokens = daily_metrics.total_cached_tokens + excluded.total_cached_tokens,
|
|
11489
|
+
total_cache_read_tokens = daily_metrics.total_cache_read_tokens + excluded.total_cache_read_tokens,
|
|
11490
|
+
total_cache_creation_tokens = daily_metrics.total_cache_creation_tokens + excluded.total_cache_creation_tokens,
|
|
11491
|
+
total_latency_ms = daily_metrics.total_latency_ms + excluded.total_latency_ms`,
|
|
11492
|
+
[
|
|
11493
|
+
date,
|
|
11494
|
+
endpoint,
|
|
11495
|
+
delta.requests,
|
|
11496
|
+
delta.inputTokens,
|
|
11497
|
+
delta.outputTokens,
|
|
11498
|
+
delta.cachedTokens ?? 0,
|
|
11499
|
+
delta.cacheReadTokens ?? 0,
|
|
11500
|
+
delta.cacheCreationTokens ?? 0,
|
|
11501
|
+
delta.latencyMs
|
|
11502
|
+
]
|
|
11503
|
+
);
|
|
11504
|
+
} catch (err) {
|
|
11505
|
+
console.error("[updateMetrics] Failed to update metrics:", err);
|
|
11506
|
+
}
|
|
11477
11507
|
}
|
|
11478
11508
|
|
|
11479
11509
|
// metrics/activity.ts
|
|
@@ -11943,23 +11973,24 @@ function computeTpot(totalLatencyMs, outputTokens, options) {
|
|
|
11943
11973
|
return Number.isFinite(raw) ? roundTwoDecimals(raw) : null;
|
|
11944
11974
|
}
|
|
11945
11975
|
function resolveCachedTokens(usage) {
|
|
11976
|
+
const result = { read: 0, creation: 0 };
|
|
11946
11977
|
if (!usage || typeof usage !== "object") {
|
|
11947
|
-
return
|
|
11978
|
+
return result;
|
|
11979
|
+
}
|
|
11980
|
+
if (typeof usage.cache_read_input_tokens === "number") {
|
|
11981
|
+
result.read = usage.cache_read_input_tokens;
|
|
11982
|
+
}
|
|
11983
|
+
if (typeof usage.cache_creation_input_tokens === "number") {
|
|
11984
|
+
result.creation = usage.cache_creation_input_tokens;
|
|
11948
11985
|
}
|
|
11949
11986
|
if (typeof usage.cached_tokens === "number") {
|
|
11950
|
-
|
|
11987
|
+
result.read = usage.cached_tokens;
|
|
11951
11988
|
}
|
|
11952
11989
|
const promptDetails = usage.prompt_tokens_details;
|
|
11953
11990
|
if (promptDetails && typeof promptDetails.cached_tokens === "number") {
|
|
11954
|
-
|
|
11955
|
-
}
|
|
11956
|
-
if (typeof usage.cache_read_input_tokens === "number") {
|
|
11957
|
-
return usage.cache_read_input_tokens;
|
|
11958
|
-
}
|
|
11959
|
-
if (typeof usage.cache_creation_input_tokens === "number") {
|
|
11960
|
-
return usage.cache_creation_input_tokens;
|
|
11991
|
+
result.read = promptDetails.cached_tokens;
|
|
11961
11992
|
}
|
|
11962
|
-
return
|
|
11993
|
+
return result;
|
|
11963
11994
|
}
|
|
11964
11995
|
function cloneOriginalPayload(value) {
|
|
11965
11996
|
const structuredCloneFn = globalThis.structuredClone;
|
|
@@ -12222,7 +12253,8 @@ async function registerMessagesRoute(app) {
|
|
|
12222
12253
|
if (providerType === "anthropic") {
|
|
12223
12254
|
let inputTokens2 = json.usage?.input_tokens ?? 0;
|
|
12224
12255
|
let outputTokens2 = json.usage?.output_tokens ?? 0;
|
|
12225
|
-
const
|
|
12256
|
+
const cached2 = resolveCachedTokens(json.usage);
|
|
12257
|
+
const cachedTokens2 = cached2.read + cached2.creation;
|
|
12226
12258
|
if (!inputTokens2) {
|
|
12227
12259
|
inputTokens2 = target.tokenEstimate || estimateTokens(normalized, target.modelId);
|
|
12228
12260
|
}
|
|
@@ -12233,13 +12265,13 @@ async function registerMessagesRoute(app) {
|
|
|
12233
12265
|
logUsage("non_stream.anthropic", {
|
|
12234
12266
|
input: inputTokens2,
|
|
12235
12267
|
output: outputTokens2,
|
|
12236
|
-
cached:
|
|
12268
|
+
cached: cachedTokens2
|
|
12237
12269
|
});
|
|
12238
12270
|
const latencyMs2 = Date.now() - requestStart;
|
|
12239
12271
|
await updateLogTokens(logId, {
|
|
12240
12272
|
inputTokens: inputTokens2,
|
|
12241
12273
|
outputTokens: outputTokens2,
|
|
12242
|
-
cachedTokens:
|
|
12274
|
+
cachedTokens: cachedTokens2,
|
|
12243
12275
|
ttftMs: latencyMs2,
|
|
12244
12276
|
tpotMs: computeTpot(latencyMs2, outputTokens2, { streaming: false })
|
|
12245
12277
|
});
|
|
@@ -12248,7 +12280,9 @@ async function registerMessagesRoute(app) {
|
|
|
12248
12280
|
requests: 1,
|
|
12249
12281
|
inputTokens: inputTokens2,
|
|
12250
12282
|
outputTokens: outputTokens2,
|
|
12251
|
-
cachedTokens:
|
|
12283
|
+
cachedTokens: cachedTokens2,
|
|
12284
|
+
cacheReadTokens: cached2.read,
|
|
12285
|
+
cacheCreationTokens: cached2.creation,
|
|
12252
12286
|
latencyMs: latencyMs2
|
|
12253
12287
|
});
|
|
12254
12288
|
if (storeResponsePayloads) {
|
|
@@ -12269,7 +12303,8 @@ async function registerMessagesRoute(app) {
|
|
|
12269
12303
|
const claudeResponse = buildClaudeResponse(json, target.modelId);
|
|
12270
12304
|
let inputTokens = json.usage?.prompt_tokens ?? 0;
|
|
12271
12305
|
let outputTokens = json.usage?.completion_tokens ?? 0;
|
|
12272
|
-
const
|
|
12306
|
+
const cached = resolveCachedTokens(json.usage);
|
|
12307
|
+
const cachedTokens = cached.read + cached.creation;
|
|
12273
12308
|
if (!inputTokens) {
|
|
12274
12309
|
inputTokens = target.tokenEstimate || estimateTokens(normalized, target.modelId);
|
|
12275
12310
|
}
|
|
@@ -12280,13 +12315,13 @@ async function registerMessagesRoute(app) {
|
|
|
12280
12315
|
logUsage("non_stream.openai", {
|
|
12281
12316
|
input: inputTokens,
|
|
12282
12317
|
output: outputTokens,
|
|
12283
|
-
cached:
|
|
12318
|
+
cached: cachedTokens
|
|
12284
12319
|
});
|
|
12285
12320
|
const latencyMs = Date.now() - requestStart;
|
|
12286
12321
|
await updateLogTokens(logId, {
|
|
12287
12322
|
inputTokens,
|
|
12288
12323
|
outputTokens,
|
|
12289
|
-
cachedTokens
|
|
12324
|
+
cachedTokens,
|
|
12290
12325
|
ttftMs: latencyMs,
|
|
12291
12326
|
tpotMs: computeTpot(latencyMs, outputTokens, { streaming: false })
|
|
12292
12327
|
});
|
|
@@ -12295,6 +12330,9 @@ async function registerMessagesRoute(app) {
|
|
|
12295
12330
|
requests: 1,
|
|
12296
12331
|
inputTokens,
|
|
12297
12332
|
outputTokens,
|
|
12333
|
+
cachedTokens,
|
|
12334
|
+
cacheReadTokens: cached.read,
|
|
12335
|
+
cacheCreationTokens: cached.creation,
|
|
12298
12336
|
latencyMs
|
|
12299
12337
|
});
|
|
12300
12338
|
if (storeResponsePayloads) {
|
|
@@ -12466,9 +12504,7 @@ async function registerMessagesRoute(app) {
|
|
|
12466
12504
|
usagePrompt2 = payload2.usage.input_tokens ?? usagePrompt2;
|
|
12467
12505
|
usageCompletion2 = payload2.usage.output_tokens ?? usageCompletion2;
|
|
12468
12506
|
const maybeCached = resolveCachedTokens(payload2.usage);
|
|
12469
|
-
|
|
12470
|
-
usageCached2 = maybeCached;
|
|
12471
|
-
}
|
|
12507
|
+
usageCached2 = maybeCached.read + maybeCached.creation;
|
|
12472
12508
|
lastUsagePayload = payload2.usage;
|
|
12473
12509
|
}
|
|
12474
12510
|
if (payload2?.delta) {
|
|
@@ -12495,9 +12531,7 @@ async function registerMessagesRoute(app) {
|
|
|
12495
12531
|
usagePrompt2 = payload2.usage.input_tokens ?? usagePrompt2;
|
|
12496
12532
|
usageCompletion2 = payload2.usage.output_tokens ?? usageCompletion2;
|
|
12497
12533
|
const maybeCached = resolveCachedTokens(payload2.usage);
|
|
12498
|
-
|
|
12499
|
-
usageCached2 = maybeCached;
|
|
12500
|
-
}
|
|
12534
|
+
usageCached2 = maybeCached.read + maybeCached.creation;
|
|
12501
12535
|
lastUsagePayload = payload2.usage;
|
|
12502
12536
|
}
|
|
12503
12537
|
if (payload2?.stop_reason) {
|
|
@@ -12549,8 +12583,9 @@ async function registerMessagesRoute(app) {
|
|
|
12549
12583
|
}
|
|
12550
12584
|
const totalLatencyMs = Date.now() - requestStart;
|
|
12551
12585
|
const ttftMs = firstTokenAt2 ? firstTokenAt2 - requestStart : null;
|
|
12586
|
+
const cached = resolveCachedTokens(lastUsagePayload);
|
|
12552
12587
|
if (usageCached2 === null) {
|
|
12553
|
-
usageCached2 =
|
|
12588
|
+
usageCached2 = cached.read + cached.creation;
|
|
12554
12589
|
}
|
|
12555
12590
|
logUsage("stream.anthropic.final", {
|
|
12556
12591
|
input: usagePrompt2,
|
|
@@ -12561,6 +12596,8 @@ async function registerMessagesRoute(app) {
|
|
|
12561
12596
|
inputTokens: usagePrompt2,
|
|
12562
12597
|
outputTokens: usageCompletion2,
|
|
12563
12598
|
cachedTokens: usageCached2,
|
|
12599
|
+
cacheReadTokens: cached.read,
|
|
12600
|
+
cacheCreationTokens: cached.creation,
|
|
12564
12601
|
ttftMs,
|
|
12565
12602
|
tpotMs: computeTpot(totalLatencyMs, usageCompletion2, {
|
|
12566
12603
|
streaming: true,
|
|
@@ -12573,6 +12610,8 @@ async function registerMessagesRoute(app) {
|
|
|
12573
12610
|
inputTokens: usagePrompt2,
|
|
12574
12611
|
outputTokens: usageCompletion2,
|
|
12575
12612
|
cachedTokens: usageCached2,
|
|
12613
|
+
cacheReadTokens: cached.read,
|
|
12614
|
+
cacheCreationTokens: cached.creation,
|
|
12576
12615
|
latencyMs: totalLatencyMs
|
|
12577
12616
|
});
|
|
12578
12617
|
if (storeResponsePayloads) {
|
|
@@ -12732,6 +12771,8 @@ data: ${JSON.stringify(data)}
|
|
|
12732
12771
|
inputTokens: finalPromptTokens,
|
|
12733
12772
|
outputTokens: finalCompletionTokens,
|
|
12734
12773
|
cachedTokens: usageCached,
|
|
12774
|
+
cacheReadTokens: 0,
|
|
12775
|
+
cacheCreationTokens: 0,
|
|
12735
12776
|
ttftMs,
|
|
12736
12777
|
tpotMs: computeTpot(totalLatencyMs, finalCompletionTokens, {
|
|
12737
12778
|
streaming: true,
|
|
@@ -12743,7 +12784,9 @@ data: ${JSON.stringify(data)}
|
|
|
12743
12784
|
requests: 1,
|
|
12744
12785
|
inputTokens: finalPromptTokens,
|
|
12745
12786
|
outputTokens: finalCompletionTokens,
|
|
12746
|
-
cachedTokens: usageCached,
|
|
12787
|
+
cachedTokens: usageCached ?? 0,
|
|
12788
|
+
cacheReadTokens: 0,
|
|
12789
|
+
cacheCreationTokens: 0,
|
|
12747
12790
|
latencyMs: totalLatencyMs
|
|
12748
12791
|
});
|
|
12749
12792
|
if (storeResponsePayloads) {
|
|
@@ -12894,6 +12937,8 @@ data: ${JSON.stringify(data)}
|
|
|
12894
12937
|
inputTokens: fallbackPrompt,
|
|
12895
12938
|
outputTokens: fallbackCompletion,
|
|
12896
12939
|
cachedTokens: usageCached,
|
|
12940
|
+
cacheReadTokens: 0,
|
|
12941
|
+
cacheCreationTokens: 0,
|
|
12897
12942
|
ttftMs,
|
|
12898
12943
|
tpotMs: computeTpot(totalLatencyMs, fallbackCompletion, {
|
|
12899
12944
|
streaming: true,
|
|
@@ -12905,7 +12950,9 @@ data: ${JSON.stringify(data)}
|
|
|
12905
12950
|
requests: 1,
|
|
12906
12951
|
inputTokens: fallbackPrompt,
|
|
12907
12952
|
outputTokens: fallbackCompletion,
|
|
12908
|
-
cachedTokens: usageCached,
|
|
12953
|
+
cachedTokens: usageCached ?? 0,
|
|
12954
|
+
cacheReadTokens: 0,
|
|
12955
|
+
cacheCreationTokens: 0,
|
|
12909
12956
|
latencyMs: totalLatencyMs
|
|
12910
12957
|
});
|
|
12911
12958
|
if (storeResponsePayloads) {
|
|
@@ -13306,27 +13353,28 @@ function computeTpot2(totalLatencyMs, outputTokens, options) {
|
|
|
13306
13353
|
return Number.isFinite(raw) ? roundTwoDecimals2(raw) : null;
|
|
13307
13354
|
}
|
|
13308
13355
|
function resolveCachedTokens2(usage) {
|
|
13356
|
+
const result = { read: 0, creation: 0 };
|
|
13309
13357
|
if (!usage || typeof usage !== "object") {
|
|
13310
|
-
return
|
|
13358
|
+
return result;
|
|
13359
|
+
}
|
|
13360
|
+
if (typeof usage.cache_read_input_tokens === "number") {
|
|
13361
|
+
result.read = usage.cache_read_input_tokens;
|
|
13362
|
+
}
|
|
13363
|
+
if (typeof usage.cache_creation_input_tokens === "number") {
|
|
13364
|
+
result.creation = usage.cache_creation_input_tokens;
|
|
13311
13365
|
}
|
|
13312
13366
|
if (typeof usage.cached_tokens === "number") {
|
|
13313
|
-
|
|
13367
|
+
result.read = usage.cached_tokens;
|
|
13314
13368
|
}
|
|
13315
13369
|
const promptDetails = usage.prompt_tokens_details;
|
|
13316
13370
|
if (promptDetails && typeof promptDetails.cached_tokens === "number") {
|
|
13317
|
-
|
|
13371
|
+
result.read = promptDetails.cached_tokens;
|
|
13318
13372
|
}
|
|
13319
13373
|
const inputDetails = usage.input_tokens_details;
|
|
13320
13374
|
if (inputDetails && typeof inputDetails.cached_tokens === "number") {
|
|
13321
|
-
|
|
13322
|
-
}
|
|
13323
|
-
if (typeof usage.cache_read_input_tokens === "number") {
|
|
13324
|
-
return usage.cache_read_input_tokens;
|
|
13325
|
-
}
|
|
13326
|
-
if (typeof usage.cache_creation_input_tokens === "number") {
|
|
13327
|
-
return usage.cache_creation_input_tokens;
|
|
13375
|
+
result.read = inputDetails.cached_tokens;
|
|
13328
13376
|
}
|
|
13329
|
-
return
|
|
13377
|
+
return result;
|
|
13330
13378
|
}
|
|
13331
13379
|
var generateId = (prefix) => `${prefix}_${Math.random().toString(36).slice(2, 10)}`;
|
|
13332
13380
|
var isText = (input) => typeof input === "string" && input.length > 0;
|
|
@@ -13848,17 +13896,20 @@ async function registerOpenAiRoutes(app) {
|
|
|
13848
13896
|
if (!Number.isFinite(inputTokens3) || inputTokens3 <= 0) {
|
|
13849
13897
|
inputTokens3 = target.tokenEstimate ?? estimateTokens(normalized, target.modelId);
|
|
13850
13898
|
}
|
|
13851
|
-
const
|
|
13899
|
+
const cached2 = resolveCachedTokens2(usagePayload2);
|
|
13900
|
+
const cachedTokens2 = cached2.read + cached2.creation;
|
|
13852
13901
|
const latencyMs3 = Date.now() - requestStart;
|
|
13853
13902
|
const openAIResponse = buildOpenAIResponseFromClaude(parsed, target.modelId, converted, {
|
|
13854
13903
|
inputTokens: inputTokens3,
|
|
13855
13904
|
outputTokens: outputTokens3,
|
|
13856
|
-
cachedTokens:
|
|
13905
|
+
cachedTokens: cachedTokens2
|
|
13857
13906
|
});
|
|
13858
13907
|
await updateLogTokens(logId, {
|
|
13859
13908
|
inputTokens: inputTokens3,
|
|
13860
13909
|
outputTokens: outputTokens3,
|
|
13861
|
-
cachedTokens:
|
|
13910
|
+
cachedTokens: usageCached,
|
|
13911
|
+
cacheReadTokens: cached2.read,
|
|
13912
|
+
cacheCreationTokens: cached2.creation,
|
|
13862
13913
|
ttftMs: latencyMs3,
|
|
13863
13914
|
tpotMs: computeTpot2(latencyMs3, outputTokens3, { streaming: false })
|
|
13864
13915
|
});
|
|
@@ -13867,7 +13918,9 @@ async function registerOpenAiRoutes(app) {
|
|
|
13867
13918
|
requests: 1,
|
|
13868
13919
|
inputTokens: inputTokens3,
|
|
13869
13920
|
outputTokens: outputTokens3,
|
|
13870
|
-
cachedTokens:
|
|
13921
|
+
cachedTokens: usageCached,
|
|
13922
|
+
cacheReadTokens: usageCacheRead,
|
|
13923
|
+
cacheCreationTokens: usageCacheCreation,
|
|
13871
13924
|
latencyMs: latencyMs3
|
|
13872
13925
|
});
|
|
13873
13926
|
if (storeResponsePayloads) {
|
|
@@ -13902,12 +13955,15 @@ async function registerOpenAiRoutes(app) {
|
|
|
13902
13955
|
return 0;
|
|
13903
13956
|
})();
|
|
13904
13957
|
const outputTokens2 = baseOutputTokens + reasoningTokens2;
|
|
13905
|
-
const
|
|
13958
|
+
const cached = resolveCachedTokens2(usagePayload);
|
|
13959
|
+
const cachedTokens = cached.read + cached.creation;
|
|
13906
13960
|
const latencyMs2 = Date.now() - requestStart;
|
|
13907
13961
|
await updateLogTokens(logId, {
|
|
13908
13962
|
inputTokens: inputTokens2,
|
|
13909
13963
|
outputTokens: outputTokens2,
|
|
13910
|
-
cachedTokens:
|
|
13964
|
+
cachedTokens: usageCached,
|
|
13965
|
+
cacheReadTokens: cached.read,
|
|
13966
|
+
cacheCreationTokens: cached.creation,
|
|
13911
13967
|
ttftMs: usagePayload?.first_token_latency_ms ?? latencyMs2,
|
|
13912
13968
|
tpotMs: usagePayload?.tokens_per_second ? computeTpot2(latencyMs2, outputTokens2, { streaming: false, reasoningTokens: reasoningTokens2 }) : null
|
|
13913
13969
|
});
|
|
@@ -13949,6 +14005,8 @@ async function registerOpenAiRoutes(app) {
|
|
|
13949
14005
|
let usagePrompt2 = null;
|
|
13950
14006
|
let usageCompletion2 = null;
|
|
13951
14007
|
let usageCached2 = null;
|
|
14008
|
+
let usageCacheRead2 = 0;
|
|
14009
|
+
let usageCacheCreation2 = 0;
|
|
13952
14010
|
let lastUsagePayload = null;
|
|
13953
14011
|
let firstTokenAt2 = null;
|
|
13954
14012
|
let claudeMessageId = null;
|
|
@@ -13990,9 +14048,9 @@ async function registerOpenAiRoutes(app) {
|
|
|
13990
14048
|
);
|
|
13991
14049
|
if (usageCached2 == null) {
|
|
13992
14050
|
const candidate = resolveCachedTokens2(usagePayload);
|
|
13993
|
-
|
|
13994
|
-
|
|
13995
|
-
|
|
14051
|
+
usageCacheRead2 = candidate.read;
|
|
14052
|
+
usageCacheCreation2 = candidate.creation;
|
|
14053
|
+
usageCached2 = candidate.read + candidate.creation;
|
|
13996
14054
|
}
|
|
13997
14055
|
lastUsagePayload = usagePayload;
|
|
13998
14056
|
};
|
|
@@ -14263,7 +14321,8 @@ async function registerOpenAiRoutes(app) {
|
|
|
14263
14321
|
ensureCreatedSent();
|
|
14264
14322
|
let finalPromptTokens = typeof usagePrompt2 === "number" && usagePrompt2 > 0 ? usagePrompt2 : target.tokenEstimate ?? estimateTokens(normalized, target.modelId);
|
|
14265
14323
|
let finalCompletionTokens = typeof usageCompletion2 === "number" && usageCompletion2 > 0 ? usageCompletion2 : aggregatedText ? estimateTextTokens(aggregatedText, target.modelId) : 0;
|
|
14266
|
-
const
|
|
14324
|
+
const finalCachedResult = usageCached2 != null ? { read: usageCacheRead2, creation: usageCacheCreation2 } : resolveCachedTokens2(lastUsagePayload);
|
|
14325
|
+
const finalCachedTokens = finalCachedResult.read + finalCachedResult.creation;
|
|
14267
14326
|
const totalLatencyMs = Date.now() - requestStart;
|
|
14268
14327
|
const ttftMs = firstTokenAt2 ? firstTokenAt2 - requestStart : null;
|
|
14269
14328
|
const openAIResponse = buildOpenAIResponseFromClaude(claudeMessage, target.modelId, converted, {
|
|
@@ -14294,6 +14353,8 @@ async function registerOpenAiRoutes(app) {
|
|
|
14294
14353
|
inputTokens: finalPromptTokens,
|
|
14295
14354
|
outputTokens: finalCompletionTokens,
|
|
14296
14355
|
cachedTokens: finalCachedTokens ?? null,
|
|
14356
|
+
cacheReadTokens: 0,
|
|
14357
|
+
cacheCreationTokens: 0,
|
|
14297
14358
|
ttftMs,
|
|
14298
14359
|
tpotMs: computeTpot2(totalLatencyMs, finalCompletionTokens, {
|
|
14299
14360
|
streaming: true,
|
|
@@ -14305,7 +14366,9 @@ async function registerOpenAiRoutes(app) {
|
|
|
14305
14366
|
requests: 1,
|
|
14306
14367
|
inputTokens: finalPromptTokens,
|
|
14307
14368
|
outputTokens: finalCompletionTokens,
|
|
14308
|
-
cachedTokens:
|
|
14369
|
+
cachedTokens: finalCachedTokens,
|
|
14370
|
+
cacheReadTokens: finalCachedResult.read,
|
|
14371
|
+
cacheCreationTokens: finalCachedResult.creation,
|
|
14309
14372
|
latencyMs: totalLatencyMs
|
|
14310
14373
|
});
|
|
14311
14374
|
if (storeResponsePayloads && capturedResponseChunks2) {
|
|
@@ -14325,6 +14388,8 @@ async function registerOpenAiRoutes(app) {
|
|
|
14325
14388
|
let usageCompletion = null;
|
|
14326
14389
|
let usageReasoning = null;
|
|
14327
14390
|
let usageCached = null;
|
|
14391
|
+
let usageCacheRead = 0;
|
|
14392
|
+
let usageCacheCreation = 0;
|
|
14328
14393
|
let firstTokenAt = null;
|
|
14329
14394
|
let chunkCount = 0;
|
|
14330
14395
|
const capturedResponseChunks = storeResponsePayloads ? [] : null;
|
|
@@ -14364,7 +14429,10 @@ async function registerOpenAiRoutes(app) {
|
|
|
14364
14429
|
usageReasoning
|
|
14365
14430
|
);
|
|
14366
14431
|
if (usageCached == null) {
|
|
14367
|
-
|
|
14432
|
+
const cachedResult = resolveCachedTokens2(usagePayload);
|
|
14433
|
+
usageCacheRead = cachedResult.read;
|
|
14434
|
+
usageCacheCreation = cachedResult.creation;
|
|
14435
|
+
usageCached = cachedResult.read + cachedResult.creation;
|
|
14368
14436
|
}
|
|
14369
14437
|
if (OPENAI_DEBUG) {
|
|
14370
14438
|
debugLog("usage payload received", usagePayload);
|
|
@@ -14456,6 +14524,8 @@ async function registerOpenAiRoutes(app) {
|
|
|
14456
14524
|
inputTokens,
|
|
14457
14525
|
outputTokens,
|
|
14458
14526
|
cachedTokens: usageCached,
|
|
14527
|
+
cacheReadTokens: 0,
|
|
14528
|
+
cacheCreationTokens: 0,
|
|
14459
14529
|
ttftMs: firstTokenAt ? firstTokenAt - requestStart : null,
|
|
14460
14530
|
tpotMs: computeTpot2(latencyMs, outputTokens, {
|
|
14461
14531
|
streaming: true,
|
|
@@ -14713,12 +14783,15 @@ async function registerOpenAiRoutes(app) {
|
|
|
14713
14783
|
inputTokens: inputTokens3,
|
|
14714
14784
|
outputTokens: outputTokens3
|
|
14715
14785
|
});
|
|
14716
|
-
const
|
|
14786
|
+
const cached2 = resolveCachedTokens2(usagePayload2);
|
|
14787
|
+
const cachedTokens2 = cached2.read + cached2.creation;
|
|
14717
14788
|
const latencyMs3 = Date.now() - requestStart;
|
|
14718
14789
|
await updateLogTokens(logId, {
|
|
14719
14790
|
inputTokens: inputTokens3,
|
|
14720
14791
|
outputTokens: outputTokens3,
|
|
14721
|
-
cachedTokens:
|
|
14792
|
+
cachedTokens: usageCached,
|
|
14793
|
+
cacheReadTokens: cached2.read,
|
|
14794
|
+
cacheCreationTokens: cached2.creation,
|
|
14722
14795
|
ttftMs: latencyMs3,
|
|
14723
14796
|
tpotMs: computeTpot2(latencyMs3, outputTokens3, { streaming: false })
|
|
14724
14797
|
});
|
|
@@ -14727,7 +14800,9 @@ async function registerOpenAiRoutes(app) {
|
|
|
14727
14800
|
requests: 1,
|
|
14728
14801
|
inputTokens: inputTokens3,
|
|
14729
14802
|
outputTokens: outputTokens3,
|
|
14730
|
-
cachedTokens:
|
|
14803
|
+
cachedTokens: usageCached,
|
|
14804
|
+
cacheReadTokens: usageCacheRead,
|
|
14805
|
+
cacheCreationTokens: usageCacheCreation,
|
|
14731
14806
|
latencyMs: latencyMs3
|
|
14732
14807
|
});
|
|
14733
14808
|
if (storeResponsePayloads) {
|
|
@@ -14757,12 +14832,15 @@ async function registerOpenAiRoutes(app) {
|
|
|
14757
14832
|
})(),
|
|
14758
14833
|
target.modelId
|
|
14759
14834
|
);
|
|
14760
|
-
const
|
|
14835
|
+
const cached = resolveCachedTokens2(usagePayload);
|
|
14836
|
+
const cachedTokens = cached.read + cached.creation;
|
|
14761
14837
|
const latencyMs2 = Date.now() - requestStart;
|
|
14762
14838
|
await updateLogTokens(logId, {
|
|
14763
14839
|
inputTokens: inputTokens2,
|
|
14764
14840
|
outputTokens: outputTokens2,
|
|
14765
|
-
cachedTokens:
|
|
14841
|
+
cachedTokens: usageCached,
|
|
14842
|
+
cacheReadTokens: cached.read,
|
|
14843
|
+
cacheCreationTokens: cached.creation,
|
|
14766
14844
|
ttftMs: usagePayload?.first_token_latency_ms ?? latencyMs2,
|
|
14767
14845
|
tpotMs: usagePayload?.tokens_per_second ? computeTpot2(latencyMs2, outputTokens2, { streaming: false }) : null
|
|
14768
14846
|
});
|
|
@@ -14804,6 +14882,8 @@ async function registerOpenAiRoutes(app) {
|
|
|
14804
14882
|
let usagePrompt2 = null;
|
|
14805
14883
|
let usageCompletion2 = null;
|
|
14806
14884
|
let usageCached2 = null;
|
|
14885
|
+
let usageCacheRead2 = 0;
|
|
14886
|
+
let usageCacheCreation2 = 0;
|
|
14807
14887
|
let lastUsagePayload = null;
|
|
14808
14888
|
let firstTokenAt2 = null;
|
|
14809
14889
|
let claudeStopReason = null;
|
|
@@ -14846,9 +14926,9 @@ async function registerOpenAiRoutes(app) {
|
|
|
14846
14926
|
);
|
|
14847
14927
|
if (usageCached2 == null) {
|
|
14848
14928
|
const candidate = resolveCachedTokens2(usagePayload);
|
|
14849
|
-
|
|
14850
|
-
|
|
14851
|
-
|
|
14929
|
+
usageCacheRead2 = candidate.read;
|
|
14930
|
+
usageCacheCreation2 = candidate.creation;
|
|
14931
|
+
usageCached2 = candidate.read + candidate.creation;
|
|
14852
14932
|
}
|
|
14853
14933
|
lastUsagePayload = usagePayload;
|
|
14854
14934
|
};
|
|
@@ -15159,7 +15239,8 @@ async function registerOpenAiRoutes(app) {
|
|
|
15159
15239
|
}
|
|
15160
15240
|
const finalPromptTokens = typeof usagePrompt2 === "number" && usagePrompt2 > 0 ? usagePrompt2 : target.tokenEstimate ?? estimateTokens(normalized, target.modelId);
|
|
15161
15241
|
const finalCompletionTokens = typeof usageCompletion2 === "number" && usageCompletion2 > 0 ? usageCompletion2 : aggregatedText ? estimateTextTokens(aggregatedText, target.modelId) : 0;
|
|
15162
|
-
const
|
|
15242
|
+
const finalCachedResult = usageCached2 != null ? { read: usageCacheRead2, creation: usageCacheCreation2 } : resolveCachedTokens2(lastUsagePayload);
|
|
15243
|
+
const finalCachedTokens = finalCachedResult.read + finalCachedResult.creation;
|
|
15163
15244
|
const totalLatencyMs = Date.now() - requestStart;
|
|
15164
15245
|
const ttftMs = firstTokenAt2 ? firstTokenAt2 - requestStart : null;
|
|
15165
15246
|
const finishReason = mapClaudeStopReasonToChatFinish(claudeStopReason) ?? "stop";
|
|
@@ -15192,6 +15273,8 @@ async function registerOpenAiRoutes(app) {
|
|
|
15192
15273
|
inputTokens: finalPromptTokens,
|
|
15193
15274
|
outputTokens: finalCompletionTokens,
|
|
15194
15275
|
cachedTokens: finalCachedTokens ?? null,
|
|
15276
|
+
cacheReadTokens: 0,
|
|
15277
|
+
cacheCreationTokens: 0,
|
|
15195
15278
|
ttftMs,
|
|
15196
15279
|
tpotMs: computeTpot2(totalLatencyMs, finalCompletionTokens, {
|
|
15197
15280
|
streaming: true,
|
|
@@ -15203,7 +15286,9 @@ async function registerOpenAiRoutes(app) {
|
|
|
15203
15286
|
requests: 1,
|
|
15204
15287
|
inputTokens: finalPromptTokens,
|
|
15205
15288
|
outputTokens: finalCompletionTokens,
|
|
15206
|
-
cachedTokens:
|
|
15289
|
+
cachedTokens: finalCachedTokens,
|
|
15290
|
+
cacheReadTokens: finalCachedResult.read,
|
|
15291
|
+
cacheCreationTokens: finalCachedResult.creation,
|
|
15207
15292
|
latencyMs: totalLatencyMs
|
|
15208
15293
|
});
|
|
15209
15294
|
if (storeResponsePayloads && capturedResponseChunks2) {
|
|
@@ -15222,6 +15307,8 @@ async function registerOpenAiRoutes(app) {
|
|
|
15222
15307
|
let usagePrompt = null;
|
|
15223
15308
|
let usageCompletion = null;
|
|
15224
15309
|
let usageCached = null;
|
|
15310
|
+
let usageCacheRead = 0;
|
|
15311
|
+
let usageCacheCreation = 0;
|
|
15225
15312
|
let firstTokenAt = null;
|
|
15226
15313
|
const capturedResponseChunks = storeResponsePayloads ? [] : null;
|
|
15227
15314
|
const replyClosed = () => {
|
|
@@ -15254,7 +15341,10 @@ async function registerOpenAiRoutes(app) {
|
|
|
15254
15341
|
usageCompletion
|
|
15255
15342
|
);
|
|
15256
15343
|
if (usageCached == null) {
|
|
15257
|
-
|
|
15344
|
+
const cachedResult = resolveCachedTokens2(usagePayload);
|
|
15345
|
+
usageCacheRead = cachedResult.read;
|
|
15346
|
+
usageCacheCreation = cachedResult.creation;
|
|
15347
|
+
usageCached = cachedResult.read + cachedResult.creation;
|
|
15258
15348
|
}
|
|
15259
15349
|
};
|
|
15260
15350
|
while (true) {
|
|
@@ -15329,6 +15419,8 @@ async function registerOpenAiRoutes(app) {
|
|
|
15329
15419
|
inputTokens,
|
|
15330
15420
|
outputTokens,
|
|
15331
15421
|
cachedTokens: usageCached,
|
|
15422
|
+
cacheReadTokens: 0,
|
|
15423
|
+
cacheCreationTokens: 0,
|
|
15332
15424
|
ttftMs: firstTokenAt ? firstTokenAt - requestStart : null,
|
|
15333
15425
|
tpotMs: computeTpot2(latencyMs, outputTokens, {
|
|
15334
15426
|
streaming: true,
|
|
@@ -15614,6 +15706,8 @@ async function getDailyMetrics(days = 7, endpoint) {
|
|
|
15614
15706
|
total_input_tokens AS inputTokens,
|
|
15615
15707
|
total_output_tokens AS outputTokens,
|
|
15616
15708
|
total_cached_tokens AS cachedTokens,
|
|
15709
|
+
total_cache_read_tokens AS cacheReadTokens,
|
|
15710
|
+
total_cache_creation_tokens AS cacheCreationTokens,
|
|
15617
15711
|
total_latency_ms AS totalLatency
|
|
15618
15712
|
FROM daily_metrics
|
|
15619
15713
|
${whereClause}
|
|
@@ -15627,6 +15721,8 @@ async function getDailyMetrics(days = 7, endpoint) {
|
|
|
15627
15721
|
inputTokens: row.inputTokens ?? 0,
|
|
15628
15722
|
outputTokens: row.outputTokens ?? 0,
|
|
15629
15723
|
cachedTokens: row.cachedTokens ?? 0,
|
|
15724
|
+
cacheReadTokens: row.cacheReadTokens ?? 0,
|
|
15725
|
+
cacheCreationTokens: row.cacheCreationTokens ?? 0,
|
|
15630
15726
|
avgLatencyMs: row.requestCount ? Math.round((row.totalLatency ?? 0) / row.requestCount) : 0
|
|
15631
15727
|
})).reverse();
|
|
15632
15728
|
}
|
|
@@ -15638,6 +15734,8 @@ async function getMetricsOverview(endpoint) {
|
|
|
15638
15734
|
COALESCE(SUM(total_input_tokens), 0) AS inputTokens,
|
|
15639
15735
|
COALESCE(SUM(total_output_tokens), 0) AS outputTokens,
|
|
15640
15736
|
COALESCE(SUM(total_cached_tokens), 0) AS cachedTokens,
|
|
15737
|
+
COALESCE(SUM(total_cache_read_tokens), 0) AS cacheReadTokens,
|
|
15738
|
+
COALESCE(SUM(total_cache_creation_tokens), 0) AS cacheCreationTokens,
|
|
15641
15739
|
COALESCE(SUM(total_latency_ms), 0) AS totalLatency
|
|
15642
15740
|
FROM daily_metrics
|
|
15643
15741
|
${totalsWhere}`,
|
|
@@ -15649,6 +15747,8 @@ async function getMetricsOverview(endpoint) {
|
|
|
15649
15747
|
total_input_tokens AS inputTokens,
|
|
15650
15748
|
total_output_tokens AS outputTokens,
|
|
15651
15749
|
total_cached_tokens AS cachedTokens,
|
|
15750
|
+
total_cache_read_tokens AS cacheReadTokens,
|
|
15751
|
+
total_cache_creation_tokens AS cacheCreationTokens,
|
|
15652
15752
|
total_latency_ms AS totalLatency
|
|
15653
15753
|
FROM daily_metrics
|
|
15654
15754
|
WHERE date = ?
|
|
@@ -15666,6 +15766,8 @@ async function getMetricsOverview(endpoint) {
|
|
|
15666
15766
|
inputTokens: totalsRow?.inputTokens ?? 0,
|
|
15667
15767
|
outputTokens: totalsRow?.outputTokens ?? 0,
|
|
15668
15768
|
cachedTokens: totalsRow?.cachedTokens ?? 0,
|
|
15769
|
+
cacheReadTokens: totalsRow?.cacheReadTokens ?? 0,
|
|
15770
|
+
cacheCreationTokens: totalsRow?.cacheCreationTokens ?? 0,
|
|
15669
15771
|
avgLatencyMs: resolveAvg(totalsLatency, totalsRequests)
|
|
15670
15772
|
},
|
|
15671
15773
|
today: {
|
|
@@ -15673,6 +15775,8 @@ async function getMetricsOverview(endpoint) {
|
|
|
15673
15775
|
inputTokens: todayRow?.inputTokens ?? 0,
|
|
15674
15776
|
outputTokens: todayRow?.outputTokens ?? 0,
|
|
15675
15777
|
cachedTokens: todayRow?.cachedTokens ?? 0,
|
|
15778
|
+
cacheReadTokens: todayRow?.cacheReadTokens ?? 0,
|
|
15779
|
+
cacheCreationTokens: todayRow?.cacheCreationTokens ?? 0,
|
|
15676
15780
|
avgLatencyMs: resolveAvg(todayLatency, todayRequests)
|
|
15677
15781
|
}
|
|
15678
15782
|
};
|
|
@@ -17097,23 +17201,24 @@ function getPathsToRegister(basePath, protocol) {
|
|
|
17097
17201
|
}
|
|
17098
17202
|
}
|
|
17099
17203
|
function resolveCachedTokens3(usage) {
|
|
17204
|
+
const result = { read: 0, creation: 0 };
|
|
17100
17205
|
if (!usage || typeof usage !== "object") {
|
|
17101
|
-
return
|
|
17206
|
+
return result;
|
|
17207
|
+
}
|
|
17208
|
+
if (typeof usage.cache_read_input_tokens === "number") {
|
|
17209
|
+
result.read = usage.cache_read_input_tokens;
|
|
17210
|
+
}
|
|
17211
|
+
if (typeof usage.cache_creation_input_tokens === "number") {
|
|
17212
|
+
result.creation = usage.cache_creation_input_tokens;
|
|
17102
17213
|
}
|
|
17103
17214
|
if (typeof usage.cached_tokens === "number") {
|
|
17104
|
-
|
|
17215
|
+
result.read = usage.cached_tokens;
|
|
17105
17216
|
}
|
|
17106
17217
|
const promptDetails = usage.prompt_tokens_details;
|
|
17107
17218
|
if (promptDetails && typeof promptDetails.cached_tokens === "number") {
|
|
17108
|
-
|
|
17109
|
-
}
|
|
17110
|
-
if (typeof usage.cache_read_input_tokens === "number") {
|
|
17111
|
-
return usage.cache_read_input_tokens;
|
|
17112
|
-
}
|
|
17113
|
-
if (typeof usage.cache_creation_input_tokens === "number") {
|
|
17114
|
-
return usage.cache_creation_input_tokens;
|
|
17219
|
+
result.read = promptDetails.cached_tokens;
|
|
17115
17220
|
}
|
|
17116
|
-
return
|
|
17221
|
+
return result;
|
|
17117
17222
|
}
|
|
17118
17223
|
var roundTwoDecimals3 = (value) => Math.round(value * 100) / 100;
|
|
17119
17224
|
function cloneOriginalPayload2(value) {
|
|
@@ -17470,12 +17575,15 @@ async function handleAnthropicProtocol(request, reply, endpoint, endpointId, app
|
|
|
17470
17575
|
const json = await new Response(upstream.body).json();
|
|
17471
17576
|
const inputTokens = json.usage?.input_tokens ?? estimateTokens(normalized, target.modelId);
|
|
17472
17577
|
const outputTokens = json.usage?.output_tokens ?? 0;
|
|
17473
|
-
const
|
|
17578
|
+
const cached = resolveCachedTokens3(json.usage);
|
|
17579
|
+
const cachedTokens = cached.read + cached.creation;
|
|
17474
17580
|
const latencyMs = Date.now() - requestStart;
|
|
17475
17581
|
await updateLogTokens(logId, {
|
|
17476
17582
|
inputTokens,
|
|
17477
17583
|
outputTokens,
|
|
17478
|
-
cachedTokens:
|
|
17584
|
+
cachedTokens: usageCached,
|
|
17585
|
+
cacheReadTokens: cached.read,
|
|
17586
|
+
cacheCreationTokens: cached.creation,
|
|
17479
17587
|
ttftMs: latencyMs,
|
|
17480
17588
|
tpotMs: computeTpot3(latencyMs, outputTokens, { streaming: false })
|
|
17481
17589
|
});
|
|
@@ -17484,7 +17592,9 @@ async function handleAnthropicProtocol(request, reply, endpoint, endpointId, app
|
|
|
17484
17592
|
requests: 1,
|
|
17485
17593
|
inputTokens,
|
|
17486
17594
|
outputTokens,
|
|
17487
|
-
cachedTokens:
|
|
17595
|
+
cachedTokens: usageCached,
|
|
17596
|
+
cacheReadTokens: usageCacheRead,
|
|
17597
|
+
cacheCreationTokens: usageCacheCreation,
|
|
17488
17598
|
latencyMs
|
|
17489
17599
|
});
|
|
17490
17600
|
if (storeResponsePayloads) {
|
|
@@ -17512,6 +17622,8 @@ async function handleAnthropicProtocol(request, reply, endpoint, endpointId, app
|
|
|
17512
17622
|
let usagePrompt = 0;
|
|
17513
17623
|
let usageCompletion = 0;
|
|
17514
17624
|
let usageCached = null;
|
|
17625
|
+
let usageCacheRead = 0;
|
|
17626
|
+
let usageCacheCreation = 0;
|
|
17515
17627
|
let firstTokenAt = null;
|
|
17516
17628
|
const capturedChunks = storeResponsePayloads ? [] : null;
|
|
17517
17629
|
try {
|
|
@@ -17541,9 +17653,9 @@ async function handleAnthropicProtocol(request, reply, endpoint, endpointId, app
|
|
|
17541
17653
|
usagePrompt = parsed.usage.input_tokens ?? usagePrompt;
|
|
17542
17654
|
usageCompletion = parsed.usage.output_tokens ?? usageCompletion;
|
|
17543
17655
|
const cached = resolveCachedTokens3(parsed.usage);
|
|
17544
|
-
|
|
17545
|
-
|
|
17546
|
-
|
|
17656
|
+
usageCacheRead = cached.read;
|
|
17657
|
+
usageCacheCreation = cached.creation;
|
|
17658
|
+
usageCached = cached.read + cached.creation;
|
|
17547
17659
|
}
|
|
17548
17660
|
if (!firstTokenAt && (parsed?.type === "content_block_delta" || parsed?.delta?.text)) {
|
|
17549
17661
|
firstTokenAt = Date.now();
|
|
@@ -17570,6 +17682,8 @@ async function handleAnthropicProtocol(request, reply, endpoint, endpointId, app
|
|
|
17570
17682
|
inputTokens: usagePrompt,
|
|
17571
17683
|
outputTokens: usageCompletion,
|
|
17572
17684
|
cachedTokens: usageCached,
|
|
17685
|
+
cacheReadTokens: usageCacheRead,
|
|
17686
|
+
cacheCreationTokens: usageCacheCreation,
|
|
17573
17687
|
ttftMs,
|
|
17574
17688
|
tpotMs: computeTpot3(totalLatencyMs, usageCompletion, {
|
|
17575
17689
|
streaming: true,
|
|
@@ -17761,12 +17875,15 @@ async function handleOpenAIChatProtocol(request, reply, endpoint, endpointId, ap
|
|
|
17761
17875
|
const usagePayload = json?.usage ?? null;
|
|
17762
17876
|
const inputTokens2 = usagePayload?.prompt_tokens ?? usagePayload?.input_tokens ?? target.tokenEstimate ?? estimateTokens(normalized, target.modelId);
|
|
17763
17877
|
const outputTokens2 = usagePayload?.completion_tokens ?? usagePayload?.output_tokens ?? estimateTextTokens(json?.choices?.[0]?.message?.content ?? "", target.modelId);
|
|
17764
|
-
const
|
|
17878
|
+
const cached = resolveCachedTokens3(usagePayload);
|
|
17879
|
+
const cachedTokens = cached.read + cached.creation;
|
|
17765
17880
|
const latencyMs2 = Date.now() - requestStart;
|
|
17766
17881
|
await updateLogTokens(logId, {
|
|
17767
17882
|
inputTokens: inputTokens2,
|
|
17768
17883
|
outputTokens: outputTokens2,
|
|
17769
|
-
cachedTokens:
|
|
17884
|
+
cachedTokens: usageCached,
|
|
17885
|
+
cacheReadTokens: cached.read,
|
|
17886
|
+
cacheCreationTokens: cached.creation,
|
|
17770
17887
|
ttftMs: latencyMs2,
|
|
17771
17888
|
tpotMs: computeTpot3(latencyMs2, outputTokens2, { streaming: false })
|
|
17772
17889
|
});
|
|
@@ -17775,7 +17892,9 @@ async function handleOpenAIChatProtocol(request, reply, endpoint, endpointId, ap
|
|
|
17775
17892
|
requests: 1,
|
|
17776
17893
|
inputTokens: inputTokens2,
|
|
17777
17894
|
outputTokens: outputTokens2,
|
|
17778
|
-
cachedTokens:
|
|
17895
|
+
cachedTokens: usageCached,
|
|
17896
|
+
cacheReadTokens: usageCacheRead,
|
|
17897
|
+
cacheCreationTokens: usageCacheCreation,
|
|
17779
17898
|
latencyMs: latencyMs2
|
|
17780
17899
|
});
|
|
17781
17900
|
if (storeResponsePayloads) {
|
|
@@ -17800,6 +17919,8 @@ async function handleOpenAIChatProtocol(request, reply, endpoint, endpointId, ap
|
|
|
17800
17919
|
let usagePrompt = null;
|
|
17801
17920
|
let usageCompletion = null;
|
|
17802
17921
|
let usageCached = null;
|
|
17922
|
+
let usageCacheRead = 0;
|
|
17923
|
+
let usageCacheCreation = 0;
|
|
17803
17924
|
let firstTokenAt = null;
|
|
17804
17925
|
const capturedChunks = storeResponsePayloads ? [] : null;
|
|
17805
17926
|
try {
|
|
@@ -17829,7 +17950,10 @@ async function handleOpenAIChatProtocol(request, reply, endpoint, endpointId, ap
|
|
|
17829
17950
|
if (usage) {
|
|
17830
17951
|
usagePrompt = usage.prompt_tokens ?? usage.input_tokens ?? usagePrompt;
|
|
17831
17952
|
usageCompletion = usage.completion_tokens ?? usage.output_tokens ?? usageCompletion;
|
|
17832
|
-
|
|
17953
|
+
const cachedResult = resolveCachedTokens3(usage);
|
|
17954
|
+
usageCacheRead = cachedResult.read;
|
|
17955
|
+
usageCacheCreation = cachedResult.creation;
|
|
17956
|
+
usageCached = cachedResult.read + cachedResult.creation;
|
|
17833
17957
|
}
|
|
17834
17958
|
} catch {
|
|
17835
17959
|
}
|
|
@@ -17851,6 +17975,8 @@ async function handleOpenAIChatProtocol(request, reply, endpoint, endpointId, ap
|
|
|
17851
17975
|
inputTokens,
|
|
17852
17976
|
outputTokens,
|
|
17853
17977
|
cachedTokens: usageCached,
|
|
17978
|
+
cacheReadTokens: usageCacheRead,
|
|
17979
|
+
cacheCreationTokens: usageCacheCreation,
|
|
17854
17980
|
ttftMs: firstTokenAt ? firstTokenAt - requestStart : null,
|
|
17855
17981
|
tpotMs: computeTpot3(latencyMs, outputTokens, {
|
|
17856
17982
|
streaming: true,
|
|
@@ -18041,12 +18167,15 @@ async function handleOpenAIResponsesProtocol(request, reply, endpoint, endpointI
|
|
|
18041
18167
|
const inputTokens2 = usagePayload?.prompt_tokens ?? usagePayload?.input_tokens ?? target.tokenEstimate ?? estimateTokens(normalized, target.modelId);
|
|
18042
18168
|
const content = json?.response?.body?.content ?? json?.choices?.[0]?.message?.content ?? "";
|
|
18043
18169
|
const outputTokens2 = usagePayload?.completion_tokens ?? usagePayload?.output_tokens ?? estimateTextTokens(content, target.modelId);
|
|
18044
|
-
const
|
|
18170
|
+
const cached = resolveCachedTokens3(usagePayload);
|
|
18171
|
+
const cachedTokens = cached.read + cached.creation;
|
|
18045
18172
|
const latencyMs2 = Date.now() - requestStart;
|
|
18046
18173
|
await updateLogTokens(logId, {
|
|
18047
18174
|
inputTokens: inputTokens2,
|
|
18048
18175
|
outputTokens: outputTokens2,
|
|
18049
|
-
cachedTokens:
|
|
18176
|
+
cachedTokens: usageCached,
|
|
18177
|
+
cacheReadTokens: cached.read,
|
|
18178
|
+
cacheCreationTokens: cached.creation,
|
|
18050
18179
|
ttftMs: latencyMs2,
|
|
18051
18180
|
tpotMs: computeTpot3(latencyMs2, outputTokens2, { streaming: false })
|
|
18052
18181
|
});
|
|
@@ -18055,7 +18184,9 @@ async function handleOpenAIResponsesProtocol(request, reply, endpoint, endpointI
|
|
|
18055
18184
|
requests: 1,
|
|
18056
18185
|
inputTokens: inputTokens2,
|
|
18057
18186
|
outputTokens: outputTokens2,
|
|
18058
|
-
cachedTokens:
|
|
18187
|
+
cachedTokens: usageCached,
|
|
18188
|
+
cacheReadTokens: usageCacheRead,
|
|
18189
|
+
cacheCreationTokens: usageCacheCreation,
|
|
18059
18190
|
latencyMs: latencyMs2
|
|
18060
18191
|
});
|
|
18061
18192
|
if (storeResponsePayloads) {
|
|
@@ -18080,6 +18211,8 @@ async function handleOpenAIResponsesProtocol(request, reply, endpoint, endpointI
|
|
|
18080
18211
|
let usagePrompt = null;
|
|
18081
18212
|
let usageCompletion = null;
|
|
18082
18213
|
let usageCached = null;
|
|
18214
|
+
let usageCacheRead = 0;
|
|
18215
|
+
let usageCacheCreation = 0;
|
|
18083
18216
|
let firstTokenAt = null;
|
|
18084
18217
|
const capturedChunks = storeResponsePayloads ? [] : null;
|
|
18085
18218
|
try {
|
|
@@ -18109,7 +18242,10 @@ async function handleOpenAIResponsesProtocol(request, reply, endpoint, endpointI
|
|
|
18109
18242
|
if (usage) {
|
|
18110
18243
|
usagePrompt = usage.prompt_tokens ?? usage.input_tokens ?? usagePrompt;
|
|
18111
18244
|
usageCompletion = usage.completion_tokens ?? usage.output_tokens ?? usageCompletion;
|
|
18112
|
-
|
|
18245
|
+
const cachedResult = resolveCachedTokens3(usage);
|
|
18246
|
+
usageCacheRead = cachedResult.read;
|
|
18247
|
+
usageCacheCreation = cachedResult.creation;
|
|
18248
|
+
usageCached = cachedResult.read + cachedResult.creation;
|
|
18113
18249
|
}
|
|
18114
18250
|
} catch {
|
|
18115
18251
|
}
|
|
@@ -18131,6 +18267,8 @@ async function handleOpenAIResponsesProtocol(request, reply, endpoint, endpointI
|
|
|
18131
18267
|
inputTokens,
|
|
18132
18268
|
outputTokens,
|
|
18133
18269
|
cachedTokens: usageCached,
|
|
18270
|
+
cacheReadTokens: usageCacheRead,
|
|
18271
|
+
cacheCreationTokens: usageCacheCreation,
|
|
18134
18272
|
ttftMs: firstTokenAt ? firstTokenAt - requestStart : null,
|
|
18135
18273
|
tpotMs: computeTpot3(latencyMs, outputTokens, {
|
|
18136
18274
|
streaming: true,
|