@chenpu17/cc-gw 0.4.2 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. package/package.json +1 -1
  2. package/src/server/dist/index.js +242 -104
  3. package/src/web/dist/assets/{About-CfWemXks.js → About-DK242vw9.js} +2 -2
  4. package/src/web/dist/assets/{ApiKeys-jGq-kKf8.js → ApiKeys-BGROxK6-.js} +1 -1
  5. package/src/web/dist/assets/{Button-DdVyALkb.js → Button-Bnnxe9ep.js} +1 -1
  6. package/src/web/dist/assets/Dashboard-B7zokimB.js +16 -0
  7. package/src/web/dist/assets/{FormField-Dyi1Kr0k.js → FormField-BzT4FGj8.js} +1 -1
  8. package/src/web/dist/assets/{Help-iZXiqNPj.js → Help-CJooSMdJ.js} +1 -1
  9. package/src/web/dist/assets/{Input-DACzKnnk.js → Input-B-P-J4xQ.js} +1 -1
  10. package/src/web/dist/assets/{Login-mm-NJUOP.js → Login-B9RgxiYX.js} +1 -1
  11. package/src/web/dist/assets/Logs-CsJCTftU.js +1 -0
  12. package/src/web/dist/assets/{ModelManagement-B4f8RUk8.js → ModelManagement-dDhNa_5z.js} +1 -1
  13. package/src/web/dist/assets/{PageSection-BfMkaweN.js → PageSection-Dzvd3cKD.js} +1 -1
  14. package/src/web/dist/assets/{Settings-DRLUoVLq.js → Settings-BcMQ79b0.js} +1 -1
  15. package/src/web/dist/assets/{StatusBadge-OH4R_aKr.js → StatusBadge-CAkVtC--.js} +1 -1
  16. package/src/web/dist/assets/{copy-BVgnqUsP.js → copy-D6cuJHzh.js} +1 -1
  17. package/src/web/dist/assets/{index-C2xoexDc.js → index-Cm-hZvRJ.js} +1 -1
  18. package/src/web/dist/assets/{index-CAbsGgAq.js → index-agm-2asf.js} +5 -5
  19. package/src/web/dist/assets/{info-CpmSkfUl.js → info-CfAuBePJ.js} +1 -1
  20. package/src/web/dist/assets/{useApiQuery-CNbY7eTZ.js → useApiQuery-ns68sM2H.js} +1 -1
  21. package/src/web/dist/index.html +1 -1
  22. package/src/web/dist/assets/Dashboard-uoI9oSjK.js +0 -16
  23. package/src/web/dist/assets/Logs-DNxTsrk3.js +0 -1
@@ -11103,7 +11103,11 @@ async function migrateDailyMetricsTable(db) {
11103
11103
  if (!hasEndpointColumn || !hasCompositePrimaryKey) {
11104
11104
  const endpointSelector = hasEndpointColumn ? "COALESCE(endpoint, 'anthropic')" : "'anthropic'";
11105
11105
  const hasCachedTokensColumn = columns.some((column) => column.name === "total_cached_tokens");
11106
+ const hasCacheReadColumn = columns.some((column) => column.name === "total_cache_read_tokens");
11107
+ const hasCacheCreationColumn = columns.some((column) => column.name === "total_cache_creation_tokens");
11106
11108
  const cachedTokensSelector = hasCachedTokensColumn ? "COALESCE(total_cached_tokens, 0)" : "0";
11109
+ const cacheReadSelector = hasCacheReadColumn ? "COALESCE(total_cache_read_tokens, 0)" : "0";
11110
+ const cacheCreationSelector = hasCacheCreationColumn ? "COALESCE(total_cache_creation_tokens, 0)" : "0";
11107
11111
  await exec(
11108
11112
  db,
11109
11113
  `ALTER TABLE daily_metrics RENAME TO daily_metrics_old;
@@ -11114,16 +11118,20 @@ async function migrateDailyMetricsTable(db) {
11114
11118
  total_input_tokens INTEGER DEFAULT 0,
11115
11119
  total_output_tokens INTEGER DEFAULT 0,
11116
11120
  total_cached_tokens INTEGER DEFAULT 0,
11121
+ total_cache_read_tokens INTEGER DEFAULT 0,
11122
+ total_cache_creation_tokens INTEGER DEFAULT 0,
11117
11123
  total_latency_ms INTEGER DEFAULT 0,
11118
11124
  PRIMARY KEY (date, endpoint)
11119
11125
  );
11120
- INSERT INTO daily_metrics (date, endpoint, request_count, total_input_tokens, total_output_tokens, total_cached_tokens, total_latency_ms)
11126
+ INSERT INTO daily_metrics (date, endpoint, request_count, total_input_tokens, total_output_tokens, total_cached_tokens, total_cache_read_tokens, total_cache_creation_tokens, total_latency_ms)
11121
11127
  SELECT date,
11122
11128
  ${endpointSelector},
11123
11129
  request_count,
11124
11130
  total_input_tokens,
11125
11131
  total_output_tokens,
11126
11132
  ${cachedTokensSelector},
11133
+ ${cacheReadSelector},
11134
+ ${cacheCreationSelector},
11127
11135
  total_latency_ms
11128
11136
  FROM daily_metrics_old;
11129
11137
  DROP TABLE daily_metrics_old;`
@@ -11181,6 +11189,8 @@ async function ensureSchema(db) {
11181
11189
  total_input_tokens INTEGER DEFAULT 0,
11182
11190
  total_output_tokens INTEGER DEFAULT 0,
11183
11191
  total_cached_tokens INTEGER DEFAULT 0,
11192
+ total_cache_read_tokens INTEGER DEFAULT 0,
11193
+ total_cache_creation_tokens INTEGER DEFAULT 0,
11184
11194
  total_latency_ms INTEGER DEFAULT 0,
11185
11195
  PRIMARY KEY (date, endpoint)
11186
11196
  );
@@ -11217,6 +11227,8 @@ async function ensureSchema(db) {
11217
11227
  );
11218
11228
  await maybeAddColumn(db, "request_logs", "client_model", "TEXT");
11219
11229
  await maybeAddColumn(db, "request_logs", "cached_tokens", "INTEGER");
11230
+ await maybeAddColumn(db, "request_logs", "cache_read_tokens", "INTEGER DEFAULT 0");
11231
+ await maybeAddColumn(db, "request_logs", "cache_creation_tokens", "INTEGER DEFAULT 0");
11220
11232
  await maybeAddColumn(db, "request_logs", "ttft_ms", "INTEGER");
11221
11233
  await maybeAddColumn(db, "request_logs", "tpot_ms", "REAL");
11222
11234
  await maybeAddColumn(db, "request_logs", "stream", "INTEGER");
@@ -11239,6 +11251,8 @@ async function ensureSchema(db) {
11239
11251
  await maybeAddColumn(db, "api_keys", "total_output_tokens", "INTEGER DEFAULT 0");
11240
11252
  await migrateDailyMetricsTable(db);
11241
11253
  await maybeAddColumn(db, "daily_metrics", "total_cached_tokens", "INTEGER DEFAULT 0");
11254
+ await maybeAddColumn(db, "daily_metrics", "total_cache_read_tokens", "INTEGER DEFAULT 0");
11255
+ await maybeAddColumn(db, "daily_metrics", "total_cache_creation_tokens", "INTEGER DEFAULT 0");
11242
11256
  await run(db, "CREATE UNIQUE INDEX IF NOT EXISTS idx_api_keys_hash ON api_keys(key_hash) WHERE key_hash IS NOT NULL");
11243
11257
  await run(db, "UPDATE api_keys SET key_hash = '*' WHERE is_wildcard = 1 AND (key_hash IS NULL OR key_hash = '')");
11244
11258
  await run(db, "UPDATE api_keys SET updated_at = created_at WHERE updated_at IS NULL");
@@ -11404,6 +11418,14 @@ async function updateLogTokens(requestId, values) {
11404
11418
  values.outputTokens,
11405
11419
  values.cachedTokens ?? null
11406
11420
  ];
11421
+ if (values.cacheReadTokens !== void 0) {
11422
+ setters.push("cache_read_tokens = ?");
11423
+ params.push(values.cacheReadTokens ?? null);
11424
+ }
11425
+ if (values.cacheCreationTokens !== void 0) {
11426
+ setters.push("cache_creation_tokens = ?");
11427
+ params.push(values.cacheCreationTokens ?? null);
11428
+ }
11407
11429
  if (values.ttftMs !== void 0) {
11408
11430
  setters.push("ttft_ms = ?");
11409
11431
  params.push(values.ttftMs ?? null);
@@ -11455,25 +11477,33 @@ async function upsertLogPayload(requestId, payload) {
11455
11477
  );
11456
11478
  }
11457
11479
  async function updateMetrics(date, endpoint, delta) {
11458
- await runQuery(
11459
- `INSERT INTO daily_metrics (date, endpoint, request_count, total_input_tokens, total_output_tokens, total_cached_tokens, total_latency_ms)
11460
- VALUES (?, ?, ?, ?, ?, ?, ?)
11461
- ON CONFLICT(date, endpoint) DO UPDATE SET
11462
- request_count = daily_metrics.request_count + excluded.request_count,
11463
- total_input_tokens = daily_metrics.total_input_tokens + excluded.total_input_tokens,
11464
- total_output_tokens = daily_metrics.total_output_tokens + excluded.total_output_tokens,
11465
- total_cached_tokens = daily_metrics.total_cached_tokens + excluded.total_cached_tokens,
11466
- total_latency_ms = daily_metrics.total_latency_ms + excluded.total_latency_ms`,
11467
- [
11468
- date,
11469
- endpoint,
11470
- delta.requests,
11471
- delta.inputTokens,
11472
- delta.outputTokens,
11473
- delta.cachedTokens ?? 0,
11474
- delta.latencyMs
11475
- ]
11476
- );
11480
+ try {
11481
+ await runQuery(
11482
+ `INSERT INTO daily_metrics (date, endpoint, request_count, total_input_tokens, total_output_tokens, total_cached_tokens, total_cache_read_tokens, total_cache_creation_tokens, total_latency_ms)
11483
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
11484
+ ON CONFLICT(date, endpoint) DO UPDATE SET
11485
+ request_count = daily_metrics.request_count + excluded.request_count,
11486
+ total_input_tokens = daily_metrics.total_input_tokens + excluded.total_input_tokens,
11487
+ total_output_tokens = daily_metrics.total_output_tokens + excluded.total_output_tokens,
11488
+ total_cached_tokens = daily_metrics.total_cached_tokens + excluded.total_cached_tokens,
11489
+ total_cache_read_tokens = daily_metrics.total_cache_read_tokens + excluded.total_cache_read_tokens,
11490
+ total_cache_creation_tokens = daily_metrics.total_cache_creation_tokens + excluded.total_cache_creation_tokens,
11491
+ total_latency_ms = daily_metrics.total_latency_ms + excluded.total_latency_ms`,
11492
+ [
11493
+ date,
11494
+ endpoint,
11495
+ delta.requests,
11496
+ delta.inputTokens,
11497
+ delta.outputTokens,
11498
+ delta.cachedTokens ?? 0,
11499
+ delta.cacheReadTokens ?? 0,
11500
+ delta.cacheCreationTokens ?? 0,
11501
+ delta.latencyMs
11502
+ ]
11503
+ );
11504
+ } catch (err) {
11505
+ console.error("[updateMetrics] Failed to update metrics:", err);
11506
+ }
11477
11507
  }
11478
11508
 
11479
11509
  // metrics/activity.ts
@@ -11943,23 +11973,24 @@ function computeTpot(totalLatencyMs, outputTokens, options) {
11943
11973
  return Number.isFinite(raw) ? roundTwoDecimals(raw) : null;
11944
11974
  }
11945
11975
  function resolveCachedTokens(usage) {
11976
+ const result = { read: 0, creation: 0 };
11946
11977
  if (!usage || typeof usage !== "object") {
11947
- return null;
11978
+ return result;
11979
+ }
11980
+ if (typeof usage.cache_read_input_tokens === "number") {
11981
+ result.read = usage.cache_read_input_tokens;
11982
+ }
11983
+ if (typeof usage.cache_creation_input_tokens === "number") {
11984
+ result.creation = usage.cache_creation_input_tokens;
11948
11985
  }
11949
11986
  if (typeof usage.cached_tokens === "number") {
11950
- return usage.cached_tokens;
11987
+ result.read = usage.cached_tokens;
11951
11988
  }
11952
11989
  const promptDetails = usage.prompt_tokens_details;
11953
11990
  if (promptDetails && typeof promptDetails.cached_tokens === "number") {
11954
- return promptDetails.cached_tokens;
11955
- }
11956
- if (typeof usage.cache_read_input_tokens === "number") {
11957
- return usage.cache_read_input_tokens;
11958
- }
11959
- if (typeof usage.cache_creation_input_tokens === "number") {
11960
- return usage.cache_creation_input_tokens;
11991
+ result.read = promptDetails.cached_tokens;
11961
11992
  }
11962
- return null;
11993
+ return result;
11963
11994
  }
11964
11995
  function cloneOriginalPayload(value) {
11965
11996
  const structuredCloneFn = globalThis.structuredClone;
@@ -12222,7 +12253,8 @@ async function registerMessagesRoute(app) {
12222
12253
  if (providerType === "anthropic") {
12223
12254
  let inputTokens2 = json.usage?.input_tokens ?? 0;
12224
12255
  let outputTokens2 = json.usage?.output_tokens ?? 0;
12225
- const cachedTokens3 = resolveCachedTokens(json.usage);
12256
+ const cached2 = resolveCachedTokens(json.usage);
12257
+ const cachedTokens2 = cached2.read + cached2.creation;
12226
12258
  if (!inputTokens2) {
12227
12259
  inputTokens2 = target.tokenEstimate || estimateTokens(normalized, target.modelId);
12228
12260
  }
@@ -12233,13 +12265,13 @@ async function registerMessagesRoute(app) {
12233
12265
  logUsage("non_stream.anthropic", {
12234
12266
  input: inputTokens2,
12235
12267
  output: outputTokens2,
12236
- cached: cachedTokens3
12268
+ cached: cachedTokens2
12237
12269
  });
12238
12270
  const latencyMs2 = Date.now() - requestStart;
12239
12271
  await updateLogTokens(logId, {
12240
12272
  inputTokens: inputTokens2,
12241
12273
  outputTokens: outputTokens2,
12242
- cachedTokens: cachedTokens3,
12274
+ cachedTokens: cachedTokens2,
12243
12275
  ttftMs: latencyMs2,
12244
12276
  tpotMs: computeTpot(latencyMs2, outputTokens2, { streaming: false })
12245
12277
  });
@@ -12248,7 +12280,9 @@ async function registerMessagesRoute(app) {
12248
12280
  requests: 1,
12249
12281
  inputTokens: inputTokens2,
12250
12282
  outputTokens: outputTokens2,
12251
- cachedTokens: cachedTokens3,
12283
+ cachedTokens: cachedTokens2,
12284
+ cacheReadTokens: cached2.read,
12285
+ cacheCreationTokens: cached2.creation,
12252
12286
  latencyMs: latencyMs2
12253
12287
  });
12254
12288
  if (storeResponsePayloads) {
@@ -12269,7 +12303,8 @@ async function registerMessagesRoute(app) {
12269
12303
  const claudeResponse = buildClaudeResponse(json, target.modelId);
12270
12304
  let inputTokens = json.usage?.prompt_tokens ?? 0;
12271
12305
  let outputTokens = json.usage?.completion_tokens ?? 0;
12272
- const cachedTokens2 = resolveCachedTokens(json.usage);
12306
+ const cached = resolveCachedTokens(json.usage);
12307
+ const cachedTokens = cached.read + cached.creation;
12273
12308
  if (!inputTokens) {
12274
12309
  inputTokens = target.tokenEstimate || estimateTokens(normalized, target.modelId);
12275
12310
  }
@@ -12280,13 +12315,13 @@ async function registerMessagesRoute(app) {
12280
12315
  logUsage("non_stream.openai", {
12281
12316
  input: inputTokens,
12282
12317
  output: outputTokens,
12283
- cached: cachedTokens2
12318
+ cached: cachedTokens
12284
12319
  });
12285
12320
  const latencyMs = Date.now() - requestStart;
12286
12321
  await updateLogTokens(logId, {
12287
12322
  inputTokens,
12288
12323
  outputTokens,
12289
- cachedTokens: cachedTokens2,
12324
+ cachedTokens,
12290
12325
  ttftMs: latencyMs,
12291
12326
  tpotMs: computeTpot(latencyMs, outputTokens, { streaming: false })
12292
12327
  });
@@ -12295,6 +12330,9 @@ async function registerMessagesRoute(app) {
12295
12330
  requests: 1,
12296
12331
  inputTokens,
12297
12332
  outputTokens,
12333
+ cachedTokens,
12334
+ cacheReadTokens: cached.read,
12335
+ cacheCreationTokens: cached.creation,
12298
12336
  latencyMs
12299
12337
  });
12300
12338
  if (storeResponsePayloads) {
@@ -12466,9 +12504,7 @@ async function registerMessagesRoute(app) {
12466
12504
  usagePrompt2 = payload2.usage.input_tokens ?? usagePrompt2;
12467
12505
  usageCompletion2 = payload2.usage.output_tokens ?? usageCompletion2;
12468
12506
  const maybeCached = resolveCachedTokens(payload2.usage);
12469
- if (maybeCached !== null) {
12470
- usageCached2 = maybeCached;
12471
- }
12507
+ usageCached2 = maybeCached.read + maybeCached.creation;
12472
12508
  lastUsagePayload = payload2.usage;
12473
12509
  }
12474
12510
  if (payload2?.delta) {
@@ -12495,9 +12531,7 @@ async function registerMessagesRoute(app) {
12495
12531
  usagePrompt2 = payload2.usage.input_tokens ?? usagePrompt2;
12496
12532
  usageCompletion2 = payload2.usage.output_tokens ?? usageCompletion2;
12497
12533
  const maybeCached = resolveCachedTokens(payload2.usage);
12498
- if (maybeCached !== null) {
12499
- usageCached2 = maybeCached;
12500
- }
12534
+ usageCached2 = maybeCached.read + maybeCached.creation;
12501
12535
  lastUsagePayload = payload2.usage;
12502
12536
  }
12503
12537
  if (payload2?.stop_reason) {
@@ -12549,8 +12583,9 @@ async function registerMessagesRoute(app) {
12549
12583
  }
12550
12584
  const totalLatencyMs = Date.now() - requestStart;
12551
12585
  const ttftMs = firstTokenAt2 ? firstTokenAt2 - requestStart : null;
12586
+ const cached = resolveCachedTokens(lastUsagePayload);
12552
12587
  if (usageCached2 === null) {
12553
- usageCached2 = resolveCachedTokens(lastUsagePayload);
12588
+ usageCached2 = cached.read + cached.creation;
12554
12589
  }
12555
12590
  logUsage("stream.anthropic.final", {
12556
12591
  input: usagePrompt2,
@@ -12561,6 +12596,8 @@ async function registerMessagesRoute(app) {
12561
12596
  inputTokens: usagePrompt2,
12562
12597
  outputTokens: usageCompletion2,
12563
12598
  cachedTokens: usageCached2,
12599
+ cacheReadTokens: cached.read,
12600
+ cacheCreationTokens: cached.creation,
12564
12601
  ttftMs,
12565
12602
  tpotMs: computeTpot(totalLatencyMs, usageCompletion2, {
12566
12603
  streaming: true,
@@ -12573,6 +12610,8 @@ async function registerMessagesRoute(app) {
12573
12610
  inputTokens: usagePrompt2,
12574
12611
  outputTokens: usageCompletion2,
12575
12612
  cachedTokens: usageCached2,
12613
+ cacheReadTokens: cached.read,
12614
+ cacheCreationTokens: cached.creation,
12576
12615
  latencyMs: totalLatencyMs
12577
12616
  });
12578
12617
  if (storeResponsePayloads) {
@@ -12732,6 +12771,8 @@ data: ${JSON.stringify(data)}
12732
12771
  inputTokens: finalPromptTokens,
12733
12772
  outputTokens: finalCompletionTokens,
12734
12773
  cachedTokens: usageCached,
12774
+ cacheReadTokens: 0,
12775
+ cacheCreationTokens: 0,
12735
12776
  ttftMs,
12736
12777
  tpotMs: computeTpot(totalLatencyMs, finalCompletionTokens, {
12737
12778
  streaming: true,
@@ -12743,7 +12784,9 @@ data: ${JSON.stringify(data)}
12743
12784
  requests: 1,
12744
12785
  inputTokens: finalPromptTokens,
12745
12786
  outputTokens: finalCompletionTokens,
12746
- cachedTokens: usageCached,
12787
+ cachedTokens: usageCached ?? 0,
12788
+ cacheReadTokens: 0,
12789
+ cacheCreationTokens: 0,
12747
12790
  latencyMs: totalLatencyMs
12748
12791
  });
12749
12792
  if (storeResponsePayloads) {
@@ -12894,6 +12937,8 @@ data: ${JSON.stringify(data)}
12894
12937
  inputTokens: fallbackPrompt,
12895
12938
  outputTokens: fallbackCompletion,
12896
12939
  cachedTokens: usageCached,
12940
+ cacheReadTokens: 0,
12941
+ cacheCreationTokens: 0,
12897
12942
  ttftMs,
12898
12943
  tpotMs: computeTpot(totalLatencyMs, fallbackCompletion, {
12899
12944
  streaming: true,
@@ -12905,7 +12950,9 @@ data: ${JSON.stringify(data)}
12905
12950
  requests: 1,
12906
12951
  inputTokens: fallbackPrompt,
12907
12952
  outputTokens: fallbackCompletion,
12908
- cachedTokens: usageCached,
12953
+ cachedTokens: usageCached ?? 0,
12954
+ cacheReadTokens: 0,
12955
+ cacheCreationTokens: 0,
12909
12956
  latencyMs: totalLatencyMs
12910
12957
  });
12911
12958
  if (storeResponsePayloads) {
@@ -13306,27 +13353,28 @@ function computeTpot2(totalLatencyMs, outputTokens, options) {
13306
13353
  return Number.isFinite(raw) ? roundTwoDecimals2(raw) : null;
13307
13354
  }
13308
13355
  function resolveCachedTokens2(usage) {
13356
+ const result = { read: 0, creation: 0 };
13309
13357
  if (!usage || typeof usage !== "object") {
13310
- return null;
13358
+ return result;
13359
+ }
13360
+ if (typeof usage.cache_read_input_tokens === "number") {
13361
+ result.read = usage.cache_read_input_tokens;
13362
+ }
13363
+ if (typeof usage.cache_creation_input_tokens === "number") {
13364
+ result.creation = usage.cache_creation_input_tokens;
13311
13365
  }
13312
13366
  if (typeof usage.cached_tokens === "number") {
13313
- return usage.cached_tokens;
13367
+ result.read = usage.cached_tokens;
13314
13368
  }
13315
13369
  const promptDetails = usage.prompt_tokens_details;
13316
13370
  if (promptDetails && typeof promptDetails.cached_tokens === "number") {
13317
- return promptDetails.cached_tokens;
13371
+ result.read = promptDetails.cached_tokens;
13318
13372
  }
13319
13373
  const inputDetails = usage.input_tokens_details;
13320
13374
  if (inputDetails && typeof inputDetails.cached_tokens === "number") {
13321
- return inputDetails.cached_tokens;
13322
- }
13323
- if (typeof usage.cache_read_input_tokens === "number") {
13324
- return usage.cache_read_input_tokens;
13325
- }
13326
- if (typeof usage.cache_creation_input_tokens === "number") {
13327
- return usage.cache_creation_input_tokens;
13375
+ result.read = inputDetails.cached_tokens;
13328
13376
  }
13329
- return null;
13377
+ return result;
13330
13378
  }
13331
13379
  var generateId = (prefix) => `${prefix}_${Math.random().toString(36).slice(2, 10)}`;
13332
13380
  var isText = (input) => typeof input === "string" && input.length > 0;
@@ -13848,17 +13896,20 @@ async function registerOpenAiRoutes(app) {
13848
13896
  if (!Number.isFinite(inputTokens3) || inputTokens3 <= 0) {
13849
13897
  inputTokens3 = target.tokenEstimate ?? estimateTokens(normalized, target.modelId);
13850
13898
  }
13851
- const cachedTokens3 = resolveCachedTokens2(usagePayload2);
13899
+ const cached2 = resolveCachedTokens2(usagePayload2);
13900
+ const cachedTokens2 = cached2.read + cached2.creation;
13852
13901
  const latencyMs3 = Date.now() - requestStart;
13853
13902
  const openAIResponse = buildOpenAIResponseFromClaude(parsed, target.modelId, converted, {
13854
13903
  inputTokens: inputTokens3,
13855
13904
  outputTokens: outputTokens3,
13856
- cachedTokens: cachedTokens3
13905
+ cachedTokens: cachedTokens2
13857
13906
  });
13858
13907
  await updateLogTokens(logId, {
13859
13908
  inputTokens: inputTokens3,
13860
13909
  outputTokens: outputTokens3,
13861
- cachedTokens: cachedTokens3,
13910
+ cachedTokens: usageCached,
13911
+ cacheReadTokens: cached2.read,
13912
+ cacheCreationTokens: cached2.creation,
13862
13913
  ttftMs: latencyMs3,
13863
13914
  tpotMs: computeTpot2(latencyMs3, outputTokens3, { streaming: false })
13864
13915
  });
@@ -13867,7 +13918,9 @@ async function registerOpenAiRoutes(app) {
13867
13918
  requests: 1,
13868
13919
  inputTokens: inputTokens3,
13869
13920
  outputTokens: outputTokens3,
13870
- cachedTokens: cachedTokens3,
13921
+ cachedTokens: usageCached,
13922
+ cacheReadTokens: usageCacheRead,
13923
+ cacheCreationTokens: usageCacheCreation,
13871
13924
  latencyMs: latencyMs3
13872
13925
  });
13873
13926
  if (storeResponsePayloads) {
@@ -13902,12 +13955,15 @@ async function registerOpenAiRoutes(app) {
13902
13955
  return 0;
13903
13956
  })();
13904
13957
  const outputTokens2 = baseOutputTokens + reasoningTokens2;
13905
- const cachedTokens2 = resolveCachedTokens2(usagePayload);
13958
+ const cached = resolveCachedTokens2(usagePayload);
13959
+ const cachedTokens = cached.read + cached.creation;
13906
13960
  const latencyMs2 = Date.now() - requestStart;
13907
13961
  await updateLogTokens(logId, {
13908
13962
  inputTokens: inputTokens2,
13909
13963
  outputTokens: outputTokens2,
13910
- cachedTokens: cachedTokens2,
13964
+ cachedTokens: usageCached,
13965
+ cacheReadTokens: cached.read,
13966
+ cacheCreationTokens: cached.creation,
13911
13967
  ttftMs: usagePayload?.first_token_latency_ms ?? latencyMs2,
13912
13968
  tpotMs: usagePayload?.tokens_per_second ? computeTpot2(latencyMs2, outputTokens2, { streaming: false, reasoningTokens: reasoningTokens2 }) : null
13913
13969
  });
@@ -13949,6 +14005,8 @@ async function registerOpenAiRoutes(app) {
13949
14005
  let usagePrompt2 = null;
13950
14006
  let usageCompletion2 = null;
13951
14007
  let usageCached2 = null;
14008
+ let usageCacheRead2 = 0;
14009
+ let usageCacheCreation2 = 0;
13952
14010
  let lastUsagePayload = null;
13953
14011
  let firstTokenAt2 = null;
13954
14012
  let claudeMessageId = null;
@@ -13990,9 +14048,9 @@ async function registerOpenAiRoutes(app) {
13990
14048
  );
13991
14049
  if (usageCached2 == null) {
13992
14050
  const candidate = resolveCachedTokens2(usagePayload);
13993
- if (candidate != null) {
13994
- usageCached2 = candidate;
13995
- }
14051
+ usageCacheRead2 = candidate.read;
14052
+ usageCacheCreation2 = candidate.creation;
14053
+ usageCached2 = candidate.read + candidate.creation;
13996
14054
  }
13997
14055
  lastUsagePayload = usagePayload;
13998
14056
  };
@@ -14263,7 +14321,8 @@ async function registerOpenAiRoutes(app) {
14263
14321
  ensureCreatedSent();
14264
14322
  let finalPromptTokens = typeof usagePrompt2 === "number" && usagePrompt2 > 0 ? usagePrompt2 : target.tokenEstimate ?? estimateTokens(normalized, target.modelId);
14265
14323
  let finalCompletionTokens = typeof usageCompletion2 === "number" && usageCompletion2 > 0 ? usageCompletion2 : aggregatedText ? estimateTextTokens(aggregatedText, target.modelId) : 0;
14266
- const finalCachedTokens = usageCached2 != null ? usageCached2 : resolveCachedTokens2(lastUsagePayload);
14324
+ const finalCachedResult = usageCached2 != null ? { read: usageCacheRead2, creation: usageCacheCreation2 } : resolveCachedTokens2(lastUsagePayload);
14325
+ const finalCachedTokens = finalCachedResult.read + finalCachedResult.creation;
14267
14326
  const totalLatencyMs = Date.now() - requestStart;
14268
14327
  const ttftMs = firstTokenAt2 ? firstTokenAt2 - requestStart : null;
14269
14328
  const openAIResponse = buildOpenAIResponseFromClaude(claudeMessage, target.modelId, converted, {
@@ -14294,6 +14353,8 @@ async function registerOpenAiRoutes(app) {
14294
14353
  inputTokens: finalPromptTokens,
14295
14354
  outputTokens: finalCompletionTokens,
14296
14355
  cachedTokens: finalCachedTokens ?? null,
14356
+ cacheReadTokens: 0,
14357
+ cacheCreationTokens: 0,
14297
14358
  ttftMs,
14298
14359
  tpotMs: computeTpot2(totalLatencyMs, finalCompletionTokens, {
14299
14360
  streaming: true,
@@ -14305,7 +14366,9 @@ async function registerOpenAiRoutes(app) {
14305
14366
  requests: 1,
14306
14367
  inputTokens: finalPromptTokens,
14307
14368
  outputTokens: finalCompletionTokens,
14308
- cachedTokens: usageCached2,
14369
+ cachedTokens: finalCachedTokens,
14370
+ cacheReadTokens: finalCachedResult.read,
14371
+ cacheCreationTokens: finalCachedResult.creation,
14309
14372
  latencyMs: totalLatencyMs
14310
14373
  });
14311
14374
  if (storeResponsePayloads && capturedResponseChunks2) {
@@ -14325,6 +14388,8 @@ async function registerOpenAiRoutes(app) {
14325
14388
  let usageCompletion = null;
14326
14389
  let usageReasoning = null;
14327
14390
  let usageCached = null;
14391
+ let usageCacheRead = 0;
14392
+ let usageCacheCreation = 0;
14328
14393
  let firstTokenAt = null;
14329
14394
  let chunkCount = 0;
14330
14395
  const capturedResponseChunks = storeResponsePayloads ? [] : null;
@@ -14364,7 +14429,10 @@ async function registerOpenAiRoutes(app) {
14364
14429
  usageReasoning
14365
14430
  );
14366
14431
  if (usageCached == null) {
14367
- usageCached = resolveCachedTokens2(usagePayload);
14432
+ const cachedResult = resolveCachedTokens2(usagePayload);
14433
+ usageCacheRead = cachedResult.read;
14434
+ usageCacheCreation = cachedResult.creation;
14435
+ usageCached = cachedResult.read + cachedResult.creation;
14368
14436
  }
14369
14437
  if (OPENAI_DEBUG) {
14370
14438
  debugLog("usage payload received", usagePayload);
@@ -14456,6 +14524,8 @@ async function registerOpenAiRoutes(app) {
14456
14524
  inputTokens,
14457
14525
  outputTokens,
14458
14526
  cachedTokens: usageCached,
14527
+ cacheReadTokens: 0,
14528
+ cacheCreationTokens: 0,
14459
14529
  ttftMs: firstTokenAt ? firstTokenAt - requestStart : null,
14460
14530
  tpotMs: computeTpot2(latencyMs, outputTokens, {
14461
14531
  streaming: true,
@@ -14713,12 +14783,15 @@ async function registerOpenAiRoutes(app) {
14713
14783
  inputTokens: inputTokens3,
14714
14784
  outputTokens: outputTokens3
14715
14785
  });
14716
- const cachedTokens3 = resolveCachedTokens2(usagePayload2);
14786
+ const cached2 = resolveCachedTokens2(usagePayload2);
14787
+ const cachedTokens2 = cached2.read + cached2.creation;
14717
14788
  const latencyMs3 = Date.now() - requestStart;
14718
14789
  await updateLogTokens(logId, {
14719
14790
  inputTokens: inputTokens3,
14720
14791
  outputTokens: outputTokens3,
14721
- cachedTokens: cachedTokens3,
14792
+ cachedTokens: usageCached,
14793
+ cacheReadTokens: cached2.read,
14794
+ cacheCreationTokens: cached2.creation,
14722
14795
  ttftMs: latencyMs3,
14723
14796
  tpotMs: computeTpot2(latencyMs3, outputTokens3, { streaming: false })
14724
14797
  });
@@ -14727,7 +14800,9 @@ async function registerOpenAiRoutes(app) {
14727
14800
  requests: 1,
14728
14801
  inputTokens: inputTokens3,
14729
14802
  outputTokens: outputTokens3,
14730
- cachedTokens: cachedTokens3,
14803
+ cachedTokens: usageCached,
14804
+ cacheReadTokens: usageCacheRead,
14805
+ cacheCreationTokens: usageCacheCreation,
14731
14806
  latencyMs: latencyMs3
14732
14807
  });
14733
14808
  if (storeResponsePayloads) {
@@ -14757,12 +14832,15 @@ async function registerOpenAiRoutes(app) {
14757
14832
  })(),
14758
14833
  target.modelId
14759
14834
  );
14760
- const cachedTokens2 = resolveCachedTokens2(usagePayload);
14835
+ const cached = resolveCachedTokens2(usagePayload);
14836
+ const cachedTokens = cached.read + cached.creation;
14761
14837
  const latencyMs2 = Date.now() - requestStart;
14762
14838
  await updateLogTokens(logId, {
14763
14839
  inputTokens: inputTokens2,
14764
14840
  outputTokens: outputTokens2,
14765
- cachedTokens: cachedTokens2,
14841
+ cachedTokens: usageCached,
14842
+ cacheReadTokens: cached.read,
14843
+ cacheCreationTokens: cached.creation,
14766
14844
  ttftMs: usagePayload?.first_token_latency_ms ?? latencyMs2,
14767
14845
  tpotMs: usagePayload?.tokens_per_second ? computeTpot2(latencyMs2, outputTokens2, { streaming: false }) : null
14768
14846
  });
@@ -14804,6 +14882,8 @@ async function registerOpenAiRoutes(app) {
14804
14882
  let usagePrompt2 = null;
14805
14883
  let usageCompletion2 = null;
14806
14884
  let usageCached2 = null;
14885
+ let usageCacheRead2 = 0;
14886
+ let usageCacheCreation2 = 0;
14807
14887
  let lastUsagePayload = null;
14808
14888
  let firstTokenAt2 = null;
14809
14889
  let claudeStopReason = null;
@@ -14846,9 +14926,9 @@ async function registerOpenAiRoutes(app) {
14846
14926
  );
14847
14927
  if (usageCached2 == null) {
14848
14928
  const candidate = resolveCachedTokens2(usagePayload);
14849
- if (candidate != null) {
14850
- usageCached2 = candidate;
14851
- }
14929
+ usageCacheRead2 = candidate.read;
14930
+ usageCacheCreation2 = candidate.creation;
14931
+ usageCached2 = candidate.read + candidate.creation;
14852
14932
  }
14853
14933
  lastUsagePayload = usagePayload;
14854
14934
  };
@@ -15159,7 +15239,8 @@ async function registerOpenAiRoutes(app) {
15159
15239
  }
15160
15240
  const finalPromptTokens = typeof usagePrompt2 === "number" && usagePrompt2 > 0 ? usagePrompt2 : target.tokenEstimate ?? estimateTokens(normalized, target.modelId);
15161
15241
  const finalCompletionTokens = typeof usageCompletion2 === "number" && usageCompletion2 > 0 ? usageCompletion2 : aggregatedText ? estimateTextTokens(aggregatedText, target.modelId) : 0;
15162
- const finalCachedTokens = usageCached2 != null ? usageCached2 : resolveCachedTokens2(lastUsagePayload);
15242
+ const finalCachedResult = usageCached2 != null ? { read: usageCacheRead2, creation: usageCacheCreation2 } : resolveCachedTokens2(lastUsagePayload);
15243
+ const finalCachedTokens = finalCachedResult.read + finalCachedResult.creation;
15163
15244
  const totalLatencyMs = Date.now() - requestStart;
15164
15245
  const ttftMs = firstTokenAt2 ? firstTokenAt2 - requestStart : null;
15165
15246
  const finishReason = mapClaudeStopReasonToChatFinish(claudeStopReason) ?? "stop";
@@ -15192,6 +15273,8 @@ async function registerOpenAiRoutes(app) {
15192
15273
  inputTokens: finalPromptTokens,
15193
15274
  outputTokens: finalCompletionTokens,
15194
15275
  cachedTokens: finalCachedTokens ?? null,
15276
+ cacheReadTokens: 0,
15277
+ cacheCreationTokens: 0,
15195
15278
  ttftMs,
15196
15279
  tpotMs: computeTpot2(totalLatencyMs, finalCompletionTokens, {
15197
15280
  streaming: true,
@@ -15203,7 +15286,9 @@ async function registerOpenAiRoutes(app) {
15203
15286
  requests: 1,
15204
15287
  inputTokens: finalPromptTokens,
15205
15288
  outputTokens: finalCompletionTokens,
15206
- cachedTokens: usageCached2,
15289
+ cachedTokens: finalCachedTokens,
15290
+ cacheReadTokens: finalCachedResult.read,
15291
+ cacheCreationTokens: finalCachedResult.creation,
15207
15292
  latencyMs: totalLatencyMs
15208
15293
  });
15209
15294
  if (storeResponsePayloads && capturedResponseChunks2) {
@@ -15222,6 +15307,8 @@ async function registerOpenAiRoutes(app) {
15222
15307
  let usagePrompt = null;
15223
15308
  let usageCompletion = null;
15224
15309
  let usageCached = null;
15310
+ let usageCacheRead = 0;
15311
+ let usageCacheCreation = 0;
15225
15312
  let firstTokenAt = null;
15226
15313
  const capturedResponseChunks = storeResponsePayloads ? [] : null;
15227
15314
  const replyClosed = () => {
@@ -15254,7 +15341,10 @@ async function registerOpenAiRoutes(app) {
15254
15341
  usageCompletion
15255
15342
  );
15256
15343
  if (usageCached == null) {
15257
- usageCached = resolveCachedTokens2(usagePayload);
15344
+ const cachedResult = resolveCachedTokens2(usagePayload);
15345
+ usageCacheRead = cachedResult.read;
15346
+ usageCacheCreation = cachedResult.creation;
15347
+ usageCached = cachedResult.read + cachedResult.creation;
15258
15348
  }
15259
15349
  };
15260
15350
  while (true) {
@@ -15329,6 +15419,8 @@ async function registerOpenAiRoutes(app) {
15329
15419
  inputTokens,
15330
15420
  outputTokens,
15331
15421
  cachedTokens: usageCached,
15422
+ cacheReadTokens: 0,
15423
+ cacheCreationTokens: 0,
15332
15424
  ttftMs: firstTokenAt ? firstTokenAt - requestStart : null,
15333
15425
  tpotMs: computeTpot2(latencyMs, outputTokens, {
15334
15426
  streaming: true,
@@ -15614,6 +15706,8 @@ async function getDailyMetrics(days = 7, endpoint) {
15614
15706
  total_input_tokens AS inputTokens,
15615
15707
  total_output_tokens AS outputTokens,
15616
15708
  total_cached_tokens AS cachedTokens,
15709
+ total_cache_read_tokens AS cacheReadTokens,
15710
+ total_cache_creation_tokens AS cacheCreationTokens,
15617
15711
  total_latency_ms AS totalLatency
15618
15712
  FROM daily_metrics
15619
15713
  ${whereClause}
@@ -15627,6 +15721,8 @@ async function getDailyMetrics(days = 7, endpoint) {
15627
15721
  inputTokens: row.inputTokens ?? 0,
15628
15722
  outputTokens: row.outputTokens ?? 0,
15629
15723
  cachedTokens: row.cachedTokens ?? 0,
15724
+ cacheReadTokens: row.cacheReadTokens ?? 0,
15725
+ cacheCreationTokens: row.cacheCreationTokens ?? 0,
15630
15726
  avgLatencyMs: row.requestCount ? Math.round((row.totalLatency ?? 0) / row.requestCount) : 0
15631
15727
  })).reverse();
15632
15728
  }
@@ -15638,6 +15734,8 @@ async function getMetricsOverview(endpoint) {
15638
15734
  COALESCE(SUM(total_input_tokens), 0) AS inputTokens,
15639
15735
  COALESCE(SUM(total_output_tokens), 0) AS outputTokens,
15640
15736
  COALESCE(SUM(total_cached_tokens), 0) AS cachedTokens,
15737
+ COALESCE(SUM(total_cache_read_tokens), 0) AS cacheReadTokens,
15738
+ COALESCE(SUM(total_cache_creation_tokens), 0) AS cacheCreationTokens,
15641
15739
  COALESCE(SUM(total_latency_ms), 0) AS totalLatency
15642
15740
  FROM daily_metrics
15643
15741
  ${totalsWhere}`,
@@ -15649,6 +15747,8 @@ async function getMetricsOverview(endpoint) {
15649
15747
  total_input_tokens AS inputTokens,
15650
15748
  total_output_tokens AS outputTokens,
15651
15749
  total_cached_tokens AS cachedTokens,
15750
+ total_cache_read_tokens AS cacheReadTokens,
15751
+ total_cache_creation_tokens AS cacheCreationTokens,
15652
15752
  total_latency_ms AS totalLatency
15653
15753
  FROM daily_metrics
15654
15754
  WHERE date = ?
@@ -15666,6 +15766,8 @@ async function getMetricsOverview(endpoint) {
15666
15766
  inputTokens: totalsRow?.inputTokens ?? 0,
15667
15767
  outputTokens: totalsRow?.outputTokens ?? 0,
15668
15768
  cachedTokens: totalsRow?.cachedTokens ?? 0,
15769
+ cacheReadTokens: totalsRow?.cacheReadTokens ?? 0,
15770
+ cacheCreationTokens: totalsRow?.cacheCreationTokens ?? 0,
15669
15771
  avgLatencyMs: resolveAvg(totalsLatency, totalsRequests)
15670
15772
  },
15671
15773
  today: {
@@ -15673,6 +15775,8 @@ async function getMetricsOverview(endpoint) {
15673
15775
  inputTokens: todayRow?.inputTokens ?? 0,
15674
15776
  outputTokens: todayRow?.outputTokens ?? 0,
15675
15777
  cachedTokens: todayRow?.cachedTokens ?? 0,
15778
+ cacheReadTokens: todayRow?.cacheReadTokens ?? 0,
15779
+ cacheCreationTokens: todayRow?.cacheCreationTokens ?? 0,
15676
15780
  avgLatencyMs: resolveAvg(todayLatency, todayRequests)
15677
15781
  }
15678
15782
  };
@@ -17097,23 +17201,24 @@ function getPathsToRegister(basePath, protocol) {
17097
17201
  }
17098
17202
  }
17099
17203
  function resolveCachedTokens3(usage) {
17204
+ const result = { read: 0, creation: 0 };
17100
17205
  if (!usage || typeof usage !== "object") {
17101
- return null;
17206
+ return result;
17207
+ }
17208
+ if (typeof usage.cache_read_input_tokens === "number") {
17209
+ result.read = usage.cache_read_input_tokens;
17210
+ }
17211
+ if (typeof usage.cache_creation_input_tokens === "number") {
17212
+ result.creation = usage.cache_creation_input_tokens;
17102
17213
  }
17103
17214
  if (typeof usage.cached_tokens === "number") {
17104
- return usage.cached_tokens;
17215
+ result.read = usage.cached_tokens;
17105
17216
  }
17106
17217
  const promptDetails = usage.prompt_tokens_details;
17107
17218
  if (promptDetails && typeof promptDetails.cached_tokens === "number") {
17108
- return promptDetails.cached_tokens;
17109
- }
17110
- if (typeof usage.cache_read_input_tokens === "number") {
17111
- return usage.cache_read_input_tokens;
17112
- }
17113
- if (typeof usage.cache_creation_input_tokens === "number") {
17114
- return usage.cache_creation_input_tokens;
17219
+ result.read = promptDetails.cached_tokens;
17115
17220
  }
17116
- return null;
17221
+ return result;
17117
17222
  }
17118
17223
  var roundTwoDecimals3 = (value) => Math.round(value * 100) / 100;
17119
17224
  function cloneOriginalPayload2(value) {
@@ -17470,12 +17575,15 @@ async function handleAnthropicProtocol(request, reply, endpoint, endpointId, app
17470
17575
  const json = await new Response(upstream.body).json();
17471
17576
  const inputTokens = json.usage?.input_tokens ?? estimateTokens(normalized, target.modelId);
17472
17577
  const outputTokens = json.usage?.output_tokens ?? 0;
17473
- const cachedTokens2 = resolveCachedTokens3(json.usage);
17578
+ const cached = resolveCachedTokens3(json.usage);
17579
+ const cachedTokens = cached.read + cached.creation;
17474
17580
  const latencyMs = Date.now() - requestStart;
17475
17581
  await updateLogTokens(logId, {
17476
17582
  inputTokens,
17477
17583
  outputTokens,
17478
- cachedTokens: cachedTokens2,
17584
+ cachedTokens: usageCached,
17585
+ cacheReadTokens: cached.read,
17586
+ cacheCreationTokens: cached.creation,
17479
17587
  ttftMs: latencyMs,
17480
17588
  tpotMs: computeTpot3(latencyMs, outputTokens, { streaming: false })
17481
17589
  });
@@ -17484,7 +17592,9 @@ async function handleAnthropicProtocol(request, reply, endpoint, endpointId, app
17484
17592
  requests: 1,
17485
17593
  inputTokens,
17486
17594
  outputTokens,
17487
- cachedTokens: cachedTokens2,
17595
+ cachedTokens: usageCached,
17596
+ cacheReadTokens: usageCacheRead,
17597
+ cacheCreationTokens: usageCacheCreation,
17488
17598
  latencyMs
17489
17599
  });
17490
17600
  if (storeResponsePayloads) {
@@ -17512,6 +17622,8 @@ async function handleAnthropicProtocol(request, reply, endpoint, endpointId, app
17512
17622
  let usagePrompt = 0;
17513
17623
  let usageCompletion = 0;
17514
17624
  let usageCached = null;
17625
+ let usageCacheRead = 0;
17626
+ let usageCacheCreation = 0;
17515
17627
  let firstTokenAt = null;
17516
17628
  const capturedChunks = storeResponsePayloads ? [] : null;
17517
17629
  try {
@@ -17541,9 +17653,9 @@ async function handleAnthropicProtocol(request, reply, endpoint, endpointId, app
17541
17653
  usagePrompt = parsed.usage.input_tokens ?? usagePrompt;
17542
17654
  usageCompletion = parsed.usage.output_tokens ?? usageCompletion;
17543
17655
  const cached = resolveCachedTokens3(parsed.usage);
17544
- if (cached !== null) {
17545
- usageCached = cached;
17546
- }
17656
+ usageCacheRead = cached.read;
17657
+ usageCacheCreation = cached.creation;
17658
+ usageCached = cached.read + cached.creation;
17547
17659
  }
17548
17660
  if (!firstTokenAt && (parsed?.type === "content_block_delta" || parsed?.delta?.text)) {
17549
17661
  firstTokenAt = Date.now();
@@ -17570,6 +17682,8 @@ async function handleAnthropicProtocol(request, reply, endpoint, endpointId, app
17570
17682
  inputTokens: usagePrompt,
17571
17683
  outputTokens: usageCompletion,
17572
17684
  cachedTokens: usageCached,
17685
+ cacheReadTokens: usageCacheRead,
17686
+ cacheCreationTokens: usageCacheCreation,
17573
17687
  ttftMs,
17574
17688
  tpotMs: computeTpot3(totalLatencyMs, usageCompletion, {
17575
17689
  streaming: true,
@@ -17761,12 +17875,15 @@ async function handleOpenAIChatProtocol(request, reply, endpoint, endpointId, ap
17761
17875
  const usagePayload = json?.usage ?? null;
17762
17876
  const inputTokens2 = usagePayload?.prompt_tokens ?? usagePayload?.input_tokens ?? target.tokenEstimate ?? estimateTokens(normalized, target.modelId);
17763
17877
  const outputTokens2 = usagePayload?.completion_tokens ?? usagePayload?.output_tokens ?? estimateTextTokens(json?.choices?.[0]?.message?.content ?? "", target.modelId);
17764
- const cachedTokens2 = resolveCachedTokens3(usagePayload);
17878
+ const cached = resolveCachedTokens3(usagePayload);
17879
+ const cachedTokens = cached.read + cached.creation;
17765
17880
  const latencyMs2 = Date.now() - requestStart;
17766
17881
  await updateLogTokens(logId, {
17767
17882
  inputTokens: inputTokens2,
17768
17883
  outputTokens: outputTokens2,
17769
- cachedTokens: cachedTokens2,
17884
+ cachedTokens: usageCached,
17885
+ cacheReadTokens: cached.read,
17886
+ cacheCreationTokens: cached.creation,
17770
17887
  ttftMs: latencyMs2,
17771
17888
  tpotMs: computeTpot3(latencyMs2, outputTokens2, { streaming: false })
17772
17889
  });
@@ -17775,7 +17892,9 @@ async function handleOpenAIChatProtocol(request, reply, endpoint, endpointId, ap
17775
17892
  requests: 1,
17776
17893
  inputTokens: inputTokens2,
17777
17894
  outputTokens: outputTokens2,
17778
- cachedTokens: cachedTokens2,
17895
+ cachedTokens: usageCached,
17896
+ cacheReadTokens: usageCacheRead,
17897
+ cacheCreationTokens: usageCacheCreation,
17779
17898
  latencyMs: latencyMs2
17780
17899
  });
17781
17900
  if (storeResponsePayloads) {
@@ -17800,6 +17919,8 @@ async function handleOpenAIChatProtocol(request, reply, endpoint, endpointId, ap
17800
17919
  let usagePrompt = null;
17801
17920
  let usageCompletion = null;
17802
17921
  let usageCached = null;
17922
+ let usageCacheRead = 0;
17923
+ let usageCacheCreation = 0;
17803
17924
  let firstTokenAt = null;
17804
17925
  const capturedChunks = storeResponsePayloads ? [] : null;
17805
17926
  try {
@@ -17829,7 +17950,10 @@ async function handleOpenAIChatProtocol(request, reply, endpoint, endpointId, ap
17829
17950
  if (usage) {
17830
17951
  usagePrompt = usage.prompt_tokens ?? usage.input_tokens ?? usagePrompt;
17831
17952
  usageCompletion = usage.completion_tokens ?? usage.output_tokens ?? usageCompletion;
17832
- usageCached = usage.cached_tokens ?? usageCached;
17953
+ const cachedResult = resolveCachedTokens3(usage);
17954
+ usageCacheRead = cachedResult.read;
17955
+ usageCacheCreation = cachedResult.creation;
17956
+ usageCached = cachedResult.read + cachedResult.creation;
17833
17957
  }
17834
17958
  } catch {
17835
17959
  }
@@ -17851,6 +17975,8 @@ async function handleOpenAIChatProtocol(request, reply, endpoint, endpointId, ap
17851
17975
  inputTokens,
17852
17976
  outputTokens,
17853
17977
  cachedTokens: usageCached,
17978
+ cacheReadTokens: usageCacheRead,
17979
+ cacheCreationTokens: usageCacheCreation,
17854
17980
  ttftMs: firstTokenAt ? firstTokenAt - requestStart : null,
17855
17981
  tpotMs: computeTpot3(latencyMs, outputTokens, {
17856
17982
  streaming: true,
@@ -18041,12 +18167,15 @@ async function handleOpenAIResponsesProtocol(request, reply, endpoint, endpointI
18041
18167
  const inputTokens2 = usagePayload?.prompt_tokens ?? usagePayload?.input_tokens ?? target.tokenEstimate ?? estimateTokens(normalized, target.modelId);
18042
18168
  const content = json?.response?.body?.content ?? json?.choices?.[0]?.message?.content ?? "";
18043
18169
  const outputTokens2 = usagePayload?.completion_tokens ?? usagePayload?.output_tokens ?? estimateTextTokens(content, target.modelId);
18044
- const cachedTokens2 = resolveCachedTokens3(usagePayload);
18170
+ const cached = resolveCachedTokens3(usagePayload);
18171
+ const cachedTokens = cached.read + cached.creation;
18045
18172
  const latencyMs2 = Date.now() - requestStart;
18046
18173
  await updateLogTokens(logId, {
18047
18174
  inputTokens: inputTokens2,
18048
18175
  outputTokens: outputTokens2,
18049
- cachedTokens: cachedTokens2,
18176
+ cachedTokens: usageCached,
18177
+ cacheReadTokens: cached.read,
18178
+ cacheCreationTokens: cached.creation,
18050
18179
  ttftMs: latencyMs2,
18051
18180
  tpotMs: computeTpot3(latencyMs2, outputTokens2, { streaming: false })
18052
18181
  });
@@ -18055,7 +18184,9 @@ async function handleOpenAIResponsesProtocol(request, reply, endpoint, endpointI
18055
18184
  requests: 1,
18056
18185
  inputTokens: inputTokens2,
18057
18186
  outputTokens: outputTokens2,
18058
- cachedTokens: cachedTokens2,
18187
+ cachedTokens: usageCached,
18188
+ cacheReadTokens: usageCacheRead,
18189
+ cacheCreationTokens: usageCacheCreation,
18059
18190
  latencyMs: latencyMs2
18060
18191
  });
18061
18192
  if (storeResponsePayloads) {
@@ -18080,6 +18211,8 @@ async function handleOpenAIResponsesProtocol(request, reply, endpoint, endpointI
18080
18211
  let usagePrompt = null;
18081
18212
  let usageCompletion = null;
18082
18213
  let usageCached = null;
18214
+ let usageCacheRead = 0;
18215
+ let usageCacheCreation = 0;
18083
18216
  let firstTokenAt = null;
18084
18217
  const capturedChunks = storeResponsePayloads ? [] : null;
18085
18218
  try {
@@ -18109,7 +18242,10 @@ async function handleOpenAIResponsesProtocol(request, reply, endpoint, endpointI
18109
18242
  if (usage) {
18110
18243
  usagePrompt = usage.prompt_tokens ?? usage.input_tokens ?? usagePrompt;
18111
18244
  usageCompletion = usage.completion_tokens ?? usage.output_tokens ?? usageCompletion;
18112
- usageCached = usage.cached_tokens ?? usageCached;
18245
+ const cachedResult = resolveCachedTokens3(usage);
18246
+ usageCacheRead = cachedResult.read;
18247
+ usageCacheCreation = cachedResult.creation;
18248
+ usageCached = cachedResult.read + cachedResult.creation;
18113
18249
  }
18114
18250
  } catch {
18115
18251
  }
@@ -18131,6 +18267,8 @@ async function handleOpenAIResponsesProtocol(request, reply, endpoint, endpointI
18131
18267
  inputTokens,
18132
18268
  outputTokens,
18133
18269
  cachedTokens: usageCached,
18270
+ cacheReadTokens: usageCacheRead,
18271
+ cacheCreationTokens: usageCacheCreation,
18134
18272
  ttftMs: firstTokenAt ? firstTokenAt - requestStart : null,
18135
18273
  tpotMs: computeTpot3(latencyMs, outputTokens, {
18136
18274
  streaming: true,