claude-sdk-proxy 3.2.1 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -16,6 +16,8 @@ Any Anthropic/OpenAI client → claude-sdk-proxy (:3456) → Claude Agent SDK
16
16
  - **Full tool use** — proper `tool_use` content blocks, `stop_reason: "tool_use"`, `input_json_delta` streaming
17
17
  - **Built-in agent tools** — Claude has access to Read, Write, Edit, Bash, Glob, Grep, WebFetch, WebSearch
18
18
  - **API key protection** — optional `CLAUDE_PROXY_API_KEY` to secure network-exposed instances
19
+ - **Session persistence** — SDK sessions survive proxy restarts via `persistSession: true`; automatic reset detection when message count drops
20
+ - **Exact usage tracking** — real `input_tokens`, `output_tokens`, `cache_read_input_tokens`, and `cache_creation_input_tokens` from the SDK (no rough estimates)
19
21
  - **Streaming SSE** — `message_start` emitted immediately; 15s heartbeat keeps connections alive
20
22
  - **Request timeout** — configurable per-request timeout (default 5 minutes)
21
23
  - **Graceful shutdown** — SIGINT/SIGTERM handlers wait for in-flight requests
@@ -159,6 +161,7 @@ Client tool mode is auto-detected when the request has a `tools` array and the s
159
161
  | `GET` | `/v1/models/:id` | Get model details |
160
162
  | `POST` | `/v1/messages` | Create a message (streaming or non-streaming) |
161
163
  | `POST` | `/v1/messages/count_tokens` | Estimate token count |
164
+ | `DELETE` | `/sessions/:id` | Invalidate a cached session |
162
165
  | `POST` | `/v1/chat/completions` | OpenAI-compatible chat completions |
163
166
  | `GET` | `/v1/chat/models` | List models (OpenAI format) |
164
167
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-sdk-proxy",
3
- "version": "3.2.1",
3
+ "version": "3.3.0",
4
4
  "description": "Anthropic Messages API proxy backed by Claude Agent SDK — use Claude Max with any API client",
5
5
  "type": "module",
6
6
  "main": "./src/proxy/server.ts",
@@ -213,6 +213,13 @@ function roughTokens(text: string): number {
213
213
  return Math.ceil((text ?? "").length / 4)
214
214
  }
215
215
 
216
+ function buildUsage(input: number, output: number, cacheRead: number, cacheCreation: number) {
217
+ const usage: Record<string, number> = { input_tokens: input, output_tokens: output }
218
+ if (cacheRead > 0) usage.cache_read_input_tokens = cacheRead
219
+ if (cacheCreation > 0) usage.cache_creation_input_tokens = cacheCreation
220
+ return usage
221
+ }
222
+
216
223
  // ── Conversation label extraction ────────────────────────────────────────────
217
224
  // Openclaw embeds "Conversation info (untrusted metadata)" in the last user
218
225
  // message containing a JSON block with conversation_label. Extract it to use
@@ -376,6 +383,17 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
376
383
  return c.json(result)
377
384
  })
378
385
 
386
+ app.delete("/sessions/:id", (c) => {
387
+ const id = decodeURIComponent(c.req.param("id"))
388
+ const stored = sessionStore.get(id)
389
+ if (stored) {
390
+ sessionStore.invalidate(id)
391
+ logInfo("session.api_reset", { conversationId: id, sdkSessionId: stored.sdkSessionId })
392
+ return c.json({ ok: true, invalidated: id })
393
+ }
394
+ return c.json({ ok: false, error: "session not found" }, 404)
395
+ })
396
+
379
397
  app.get("/debug/traces", (c) => {
380
398
  const limit = parseInt(c.req.query("limit") ?? "20", 10)
381
399
  return c.json(traceStore.getRecentTraces(limit))
@@ -655,19 +673,30 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
655
673
  let resumeSessionId: string | undefined
656
674
  let isResuming = false
657
675
 
658
- if (conversationId && messages.length > 1) {
676
+ if (conversationId) {
659
677
  const stored = sessionStore.get(conversationId)
660
678
  if (stored && stored.model === model) {
661
- resumeSessionId = stored.sdkSessionId
662
- isResuming = true
663
- logInfo("session.resuming", {
664
- reqId,
665
- conversationId,
666
- sdkSessionId: resumeSessionId,
667
- storedMsgCount: stored.messageCount,
668
- currentMsgCount: messages.length,
669
- resumeCount: stored.resumeCount,
670
- })
679
+ if (messages.length < stored.messageCount) {
680
+ // Client reset detected: message count dropped
681
+ logInfo("session.reset_detected", {
682
+ reqId, conversationId,
683
+ sdkSessionId: stored.sdkSessionId,
684
+ storedMsgCount: stored.messageCount,
685
+ currentMsgCount: messages.length,
686
+ })
687
+ sessionStore.invalidate(conversationId)
688
+ } else {
689
+ resumeSessionId = stored.sdkSessionId
690
+ isResuming = true
691
+ logInfo("session.resuming", {
692
+ reqId,
693
+ conversationId,
694
+ sdkSessionId: resumeSessionId,
695
+ storedMsgCount: stored.messageCount,
696
+ currentMsgCount: messages.length,
697
+ resumeCount: stored.resumeCount,
698
+ })
699
+ }
671
700
  }
672
701
  }
673
702
 
@@ -679,6 +708,11 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
679
708
  let promptInput: string | AsyncIterable<any>
680
709
  // promptText: always the text-only version for token counting and logging
681
710
  promptText = serializeContent(lastMsg.content)
711
+ // Track real usage from SDK stream events
712
+ let sdkInputTokens = 0
713
+ let sdkOutputTokens = 0
714
+ let sdkCacheReadTokens = 0
715
+ let sdkCacheCreationTokens = 0
682
716
 
683
717
  if (isResuming && resumeSessionId) {
684
718
  systemPrompt = (systemContext || "").trim() || undefined
@@ -778,6 +812,16 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
778
812
  if (message.type === "system" && (message as any).subtype === "init") {
779
813
  capturedSessionId = (message as any).session_id
780
814
  }
815
+ if (message.type === "result") {
816
+ const r = message as any
817
+ if (r.session_id) capturedSessionId = r.session_id
818
+ if (r.usage) {
819
+ sdkInputTokens = r.usage.input_tokens ?? sdkInputTokens
820
+ sdkOutputTokens = r.usage.output_tokens ?? sdkOutputTokens
821
+ sdkCacheReadTokens = r.usage.cache_read_input_tokens ?? sdkCacheReadTokens
822
+ sdkCacheCreationTokens = r.usage.cache_creation_input_tokens ?? sdkCacheCreationTokens
823
+ }
824
+ }
781
825
  if (hasTools && message.type === "stream_event") {
782
826
  const ev = (message as any).event as any
783
827
  if (ev.type === "content_block_start") {
@@ -813,6 +857,11 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
813
857
  }
814
858
  } else if (ev.type === "message_delta") {
815
859
  if (ev.delta?.stop_reason) capturedStopReason = ev.delta.stop_reason
860
+ if (ev.usage?.output_tokens) sdkOutputTokens = ev.usage.output_tokens
861
+ } else if (ev.type === "message_start" && ev.message?.usage) {
862
+ if (ev.message.usage.input_tokens) sdkInputTokens = ev.message.usage.input_tokens
863
+ if (ev.message.usage.cache_read_input_tokens) sdkCacheReadTokens = ev.message.usage.cache_read_input_tokens
864
+ if (ev.message.usage.cache_creation_input_tokens) sdkCacheCreationTokens = ev.message.usage.cache_creation_input_tokens
816
865
  }
817
866
  } else if (!hasTools && message.type === "assistant") {
818
867
  let turnText = ""
@@ -820,6 +869,10 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
820
869
  if (block.type === "text") turnText += block.text
821
870
  }
822
871
  fullText = turnText
872
+ // Capture usage from assistant message
873
+ const msgUsage = (message as any).message?.usage
874
+ if (msgUsage?.input_tokens) sdkInputTokens = msgUsage.input_tokens
875
+ if (msgUsage?.output_tokens) sdkOutputTokens = msgUsage.output_tokens
823
876
  }
824
877
  }
825
878
  traceStore.phase(reqId, "sdk_done")
@@ -827,7 +880,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
827
880
  // Store session mapping for future resumption
828
881
  if (conversationId && capturedSessionId) {
829
882
  if (isResuming) {
830
- sessionStore.recordResume(conversationId)
883
+ sessionStore.recordResume(conversationId, messages.length)
831
884
  logInfo("session.resumed_ok", { reqId, conversationId, sdkSessionId: capturedSessionId })
832
885
  } else {
833
886
  sessionStore.set(conversationId, capturedSessionId, model, messages.length)
@@ -860,6 +913,16 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
860
913
  if (message.type === "system" && (message as any).subtype === "init") {
861
914
  capturedSessionId = (message as any).session_id
862
915
  }
916
+ if (message.type === "result") {
917
+ const r = message as any
918
+ if (r.session_id) capturedSessionId = r.session_id
919
+ if (r.usage) {
920
+ sdkInputTokens = r.usage.input_tokens ?? sdkInputTokens
921
+ sdkOutputTokens = r.usage.output_tokens ?? sdkOutputTokens
922
+ sdkCacheReadTokens = r.usage.cache_read_input_tokens ?? sdkCacheReadTokens
923
+ sdkCacheCreationTokens = r.usage.cache_creation_input_tokens ?? sdkCacheCreationTokens
924
+ }
925
+ }
863
926
  if (hasTools && message.type === "stream_event") {
864
927
  const ev = (message as any).event as any
865
928
  if (ev.type === "content_block_start") {
@@ -890,6 +953,11 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
890
953
  }
891
954
  } else if (ev.type === "message_delta") {
892
955
  if (ev.delta?.stop_reason) capturedStopReason = ev.delta.stop_reason
956
+ if (ev.usage?.output_tokens) sdkOutputTokens = ev.usage.output_tokens
957
+ } else if (ev.type === "message_start" && ev.message?.usage) {
958
+ if (ev.message.usage.input_tokens) sdkInputTokens = ev.message.usage.input_tokens
959
+ if (ev.message.usage.cache_read_input_tokens) sdkCacheReadTokens = ev.message.usage.cache_read_input_tokens
960
+ if (ev.message.usage.cache_creation_input_tokens) sdkCacheCreationTokens = ev.message.usage.cache_creation_input_tokens
893
961
  }
894
962
  } else if (!hasTools && message.type === "assistant") {
895
963
  let turnText = ""
@@ -897,6 +965,9 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
897
965
  if (block.type === "text") turnText += block.text
898
966
  }
899
967
  fullText = turnText
968
+ const msgUsage = (message as any).message?.usage
969
+ if (msgUsage?.input_tokens) sdkInputTokens = msgUsage.input_tokens
970
+ if (msgUsage?.output_tokens) sdkOutputTokens = msgUsage.output_tokens
900
971
  }
901
972
  }
902
973
  traceStore.phase(reqId, "sdk_done")
@@ -937,10 +1008,12 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
937
1008
  id: generateId("msg_"),
938
1009
  type: "message", role: "assistant", content,
939
1010
  model: body.model, stop_reason: stopReason, stop_sequence: null,
940
- usage: { input_tokens: roughTokens(promptText), output_tokens: roughTokens(fullText) }
1011
+ usage: buildUsage(sdkInputTokens, sdkOutputTokens, sdkCacheReadTokens, sdkCacheCreationTokens)
941
1012
  })
942
1013
  }
943
1014
 
1015
+ logDebug("usage.tokens", { reqId, sdkInput: sdkInputTokens, sdkOutput: sdkOutputTokens, cacheRead: sdkCacheReadTokens, cacheCreation: sdkCacheCreationTokens })
1016
+
944
1017
  if (!fullText || !fullText.trim()) fullText = "..."
945
1018
  traceStore.complete(reqId, { outputLen: fullText.length })
946
1019
 
@@ -949,7 +1022,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
949
1022
  type: "message", role: "assistant",
950
1023
  content: [{ type: "text", text: fullText }],
951
1024
  model: body.model, stop_reason: "end_turn", stop_sequence: null,
952
- usage: { input_tokens: roughTokens(promptText), output_tokens: roughTokens(fullText) }
1025
+ usage: buildUsage(sdkInputTokens, sdkOutputTokens, sdkCacheReadTokens, sdkCacheCreationTokens)
953
1026
  })
954
1027
  }
955
1028
 
@@ -1015,14 +1088,21 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
1015
1088
  }
1016
1089
  }, 15_000)
1017
1090
 
1018
- sse("message_start", {
1019
- type: "message_start",
1020
- message: {
1021
- id: messageId, type: "message", role: "assistant", content: [],
1022
- model: body.model, stop_reason: null, stop_sequence: null,
1023
- usage: { input_tokens: roughTokens(promptText), output_tokens: 1 }
1091
+ // Defer message_start until SDK provides real input_tokens
1092
+ let messageStartSent = false
1093
+ const emitMessageStart = () => {
1094
+ if (!messageStartSent) {
1095
+ messageStartSent = true
1096
+ sse("message_start", {
1097
+ type: "message_start",
1098
+ message: {
1099
+ id: messageId, type: "message", role: "assistant", content: [],
1100
+ model: body.model, stop_reason: null, stop_sequence: null,
1101
+ usage: buildUsage(sdkInputTokens, 1, sdkCacheReadTokens, sdkCacheCreationTokens)
1102
+ }
1103
+ })
1024
1104
  }
1025
- })
1105
+ }
1026
1106
 
1027
1107
  if (hasTools) {
1028
1108
  // ── With tools: forward native SDK stream events directly as SSE ──
@@ -1041,6 +1121,15 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
1041
1121
 
1042
1122
  // Helper to forward a stream event directly as SSE
1043
1123
  const forwardStreamEvent = (ev: any) => {
1124
+ if (ev.type === "message_start" && ev.message?.usage) {
1125
+ if (ev.message.usage.input_tokens) sdkInputTokens = ev.message.usage.input_tokens
1126
+ if (ev.message.usage.cache_read_input_tokens) sdkCacheReadTokens = ev.message.usage.cache_read_input_tokens
1127
+ if (ev.message.usage.cache_creation_input_tokens) sdkCacheCreationTokens = ev.message.usage.cache_creation_input_tokens
1128
+ emitMessageStart()
1129
+ return
1130
+ }
1131
+ // Ensure message_start is sent before any content blocks
1132
+ emitMessageStart()
1044
1133
  if (ev.type === "content_block_start") {
1045
1134
  const cb = ev.content_block
1046
1135
  if (cb?.type === "tool_use") {
@@ -1064,6 +1153,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
1064
1153
  blockIdx++
1065
1154
  } else if (ev.type === "message_delta") {
1066
1155
  if (ev.delta?.stop_reason) capturedStopReason = ev.delta.stop_reason
1156
+ if (ev.usage?.output_tokens) sdkOutputTokens = ev.usage.output_tokens
1067
1157
  }
1068
1158
  }
1069
1159
 
@@ -1077,6 +1167,16 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
1077
1167
  if (message.type === "system" && (message as any).subtype === "init") {
1078
1168
  capturedSessionId = (message as any).session_id
1079
1169
  }
1170
+ if (message.type === "result") {
1171
+ const r = message as any
1172
+ if (r.session_id) capturedSessionId = r.session_id
1173
+ if (r.usage) {
1174
+ sdkInputTokens = r.usage.input_tokens ?? sdkInputTokens
1175
+ sdkOutputTokens = r.usage.output_tokens ?? sdkOutputTokens
1176
+ sdkCacheReadTokens = r.usage.cache_read_input_tokens ?? sdkCacheReadTokens
1177
+ sdkCacheCreationTokens = r.usage.cache_creation_input_tokens ?? sdkCacheCreationTokens
1178
+ }
1179
+ }
1080
1180
  if (message.type === "stream_event") {
1081
1181
  const ev = (message as any).event as any
1082
1182
  if (!trace!.firstTokenAt && (ev.type === "content_block_delta" || ev.type === "content_block_start")) {
@@ -1091,7 +1191,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
1091
1191
  // Store session mapping
1092
1192
  if (conversationId && capturedSessionId) {
1093
1193
  if (isResuming) {
1094
- sessionStore.recordResume(conversationId)
1194
+ sessionStore.recordResume(conversationId, messages.length)
1095
1195
  } else {
1096
1196
  sessionStore.set(conversationId, capturedSessionId, model, messages.length)
1097
1197
  }
@@ -1123,6 +1223,16 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
1123
1223
  if (message.type === "system" && (message as any).subtype === "init") {
1124
1224
  capturedSessionId = (message as any).session_id
1125
1225
  }
1226
+ if (message.type === "result") {
1227
+ const r = message as any
1228
+ if (r.session_id) capturedSessionId = r.session_id
1229
+ if (r.usage) {
1230
+ sdkInputTokens = r.usage.input_tokens ?? sdkInputTokens
1231
+ sdkOutputTokens = r.usage.output_tokens ?? sdkOutputTokens
1232
+ sdkCacheReadTokens = r.usage.cache_read_input_tokens ?? sdkCacheReadTokens
1233
+ sdkCacheCreationTokens = r.usage.cache_creation_input_tokens ?? sdkCacheCreationTokens
1234
+ }
1235
+ }
1126
1236
  if (message.type === "stream_event") {
1127
1237
  const ev = (message as any).event as any
1128
1238
  if (!trace!.firstTokenAt && (ev.type === "content_block_delta" || ev.type === "content_block_start")) {
@@ -1149,6 +1259,9 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
1149
1259
 
1150
1260
  traceStore.phase(reqId, "responding")
1151
1261
 
1262
+ // Ensure message_start is sent even if no SDK events came
1263
+ emitMessageStart()
1264
+
1152
1265
  // If no blocks were emitted at all, emit a placeholder text block
1153
1266
  if (!hasEmittedAnyBlock) {
1154
1267
  sse("content_block_start", { type: "content_block_start", index: 0, content_block: { type: "text", text: "" } })
@@ -1157,7 +1270,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
1157
1270
  }
1158
1271
 
1159
1272
  const stopReason = toolCallCount > 0 ? "tool_use" : (capturedStopReason ?? "end_turn")
1160
- sse("message_delta", { type: "message_delta", delta: { stop_reason: stopReason, stop_sequence: null }, usage: { output_tokens: roughTokens(fullText) } })
1273
+ sse("message_delta", { type: "message_delta", delta: { stop_reason: stopReason, stop_sequence: null }, usage: { output_tokens: sdkOutputTokens } })
1161
1274
  sse("message_stop", { type: "message_stop" })
1162
1275
  controller.close()
1163
1276
 
@@ -1166,7 +1279,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
1166
1279
  }
1167
1280
 
1168
1281
  // ── No tools: stream text deltas directly ─────────────────────
1169
- sse("content_block_start", { type: "content_block_start", index: 0, content_block: { type: "text", text: "" } })
1282
+ let contentBlockStartSent = false
1170
1283
 
1171
1284
  let fullText = ""
1172
1285
  let hasStreamed = false
@@ -1188,8 +1301,25 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
1188
1301
  if (message.type === "system" && (message as any).subtype === "init") {
1189
1302
  capturedSessionId2 = (message as any).session_id
1190
1303
  }
1304
+ if (message.type === "result") {
1305
+ const r = message as any
1306
+ if (r.session_id) capturedSessionId2 = r.session_id
1307
+ if (r.usage) {
1308
+ sdkInputTokens = r.usage.input_tokens ?? sdkInputTokens
1309
+ sdkOutputTokens = r.usage.output_tokens ?? sdkOutputTokens
1310
+ sdkCacheReadTokens = r.usage.cache_read_input_tokens ?? sdkCacheReadTokens
1311
+ sdkCacheCreationTokens = r.usage.cache_creation_input_tokens ?? sdkCacheCreationTokens
1312
+ }
1313
+ }
1191
1314
  if (message.type === "stream_event") {
1192
1315
  const ev = message.event as any
1316
+ // Capture usage from SDK message_start (real input tokens)
1317
+ if (ev.type === "message_start" && ev.message?.usage) {
1318
+ if (ev.message.usage.input_tokens) sdkInputTokens = ev.message.usage.input_tokens
1319
+ if (ev.message.usage.cache_read_input_tokens) sdkCacheReadTokens = ev.message.usage.cache_read_input_tokens
1320
+ if (ev.message.usage.cache_creation_input_tokens) sdkCacheCreationTokens = ev.message.usage.cache_creation_input_tokens
1321
+ emitMessageStart()
1322
+ }
1193
1323
  // Detect first content event BEFORE sdkEvent records it
1194
1324
  if (!trace!.firstTokenAt && (ev.type === "content_block_delta" || ev.type === "content_block_start")) {
1195
1325
  traceStore.phase(reqId, "sdk_streaming")
@@ -1197,12 +1327,20 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
1197
1327
  if (ev.type === "content_block_delta" && ev.delta?.type === "text_delta") {
1198
1328
  const text = ev.delta.text ?? ""
1199
1329
  if (text) {
1330
+ // Ensure message_start and content_block_start are sent before first delta
1331
+ emitMessageStart()
1332
+ if (!contentBlockStartSent) {
1333
+ contentBlockStartSent = true
1334
+ sse("content_block_start", { type: "content_block_start", index: 0, content_block: { type: "text", text: "" } })
1335
+ }
1200
1336
  fullText += text
1201
1337
  hasStreamed = true
1202
1338
  traceStore.updateOutput(reqId, fullText.length)
1203
1339
  checkOutputSize(fullText.length)
1204
1340
  sse("content_block_delta", { type: "content_block_delta", index: 0, delta: { type: "text_delta", text } })
1205
1341
  }
1342
+ } else if (ev.type === "message_delta" && ev.usage?.output_tokens) {
1343
+ sdkOutputTokens = ev.usage.output_tokens
1206
1344
  }
1207
1345
  }
1208
1346
  traceStore.sdkEvent(reqId, sdkEventCount, message.type, subtype)
@@ -1212,7 +1350,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
1212
1350
  // Store session mapping
1213
1351
  if (conversationId && capturedSessionId2) {
1214
1352
  if (isResuming) {
1215
- sessionStore.recordResume(conversationId)
1353
+ sessionStore.recordResume(conversationId, messages.length)
1216
1354
  } else {
1217
1355
  sessionStore.set(conversationId, capturedSessionId2, model, messages.length)
1218
1356
  }
@@ -1239,20 +1377,43 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
1239
1377
  if (message.type === "system" && (message as any).subtype === "init") {
1240
1378
  capturedSessionId2 = (message as any).session_id
1241
1379
  }
1380
+ if (message.type === "result") {
1381
+ const r = message as any
1382
+ if (r.session_id) capturedSessionId2 = r.session_id
1383
+ if (r.usage) {
1384
+ sdkInputTokens = r.usage.input_tokens ?? sdkInputTokens
1385
+ sdkOutputTokens = r.usage.output_tokens ?? sdkOutputTokens
1386
+ sdkCacheReadTokens = r.usage.cache_read_input_tokens ?? sdkCacheReadTokens
1387
+ sdkCacheCreationTokens = r.usage.cache_creation_input_tokens ?? sdkCacheCreationTokens
1388
+ }
1389
+ }
1242
1390
  if (message.type === "stream_event") {
1243
1391
  const ev = message.event as any
1392
+ if (ev.type === "message_start" && ev.message?.usage) {
1393
+ if (ev.message.usage.input_tokens) sdkInputTokens = ev.message.usage.input_tokens
1394
+ if (ev.message.usage.cache_read_input_tokens) sdkCacheReadTokens = ev.message.usage.cache_read_input_tokens
1395
+ if (ev.message.usage.cache_creation_input_tokens) sdkCacheCreationTokens = ev.message.usage.cache_creation_input_tokens
1396
+ emitMessageStart()
1397
+ }
1244
1398
  if (!trace!.firstTokenAt && (ev.type === "content_block_delta" || ev.type === "content_block_start")) {
1245
1399
  traceStore.phase(reqId, "sdk_streaming")
1246
1400
  }
1247
1401
  if (ev.type === "content_block_delta" && ev.delta?.type === "text_delta") {
1248
1402
  const text = ev.delta.text ?? ""
1249
1403
  if (text) {
1404
+ emitMessageStart()
1405
+ if (!contentBlockStartSent) {
1406
+ contentBlockStartSent = true
1407
+ sse("content_block_start", { type: "content_block_start", index: 0, content_block: { type: "text", text: "" } })
1408
+ }
1250
1409
  fullText += text
1251
1410
  hasStreamed = true
1252
1411
  traceStore.updateOutput(reqId, fullText.length)
1253
1412
  checkOutputSize(fullText.length)
1254
1413
  sse("content_block_delta", { type: "content_block_delta", index: 0, delta: { type: "text_delta", text } })
1255
1414
  }
1415
+ } else if (ev.type === "message_delta" && ev.usage?.output_tokens) {
1416
+ sdkOutputTokens = ev.usage.output_tokens
1256
1417
  }
1257
1418
  }
1258
1419
  traceStore.sdkEvent(reqId, sdkEventCount, message.type, subtype)
@@ -1273,12 +1434,19 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
1273
1434
  releaseQueue()
1274
1435
  }
1275
1436
 
1437
+ // Ensure message_start and content_block_start are emitted even if no stream events came
1438
+ emitMessageStart()
1439
+ if (!contentBlockStartSent) {
1440
+ contentBlockStartSent = true
1441
+ sse("content_block_start", { type: "content_block_start", index: 0, content_block: { type: "text", text: "" } })
1442
+ }
1443
+
1276
1444
  if (!hasStreamed) {
1277
1445
  sse("content_block_delta", { type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "..." } })
1278
1446
  }
1279
1447
 
1280
1448
  sse("content_block_stop", { type: "content_block_stop", index: 0 })
1281
- sse("message_delta", { type: "message_delta", delta: { stop_reason: "end_turn", stop_sequence: null }, usage: { output_tokens: roughTokens(fullText) } })
1449
+ sse("message_delta", { type: "message_delta", delta: { stop_reason: "end_turn", stop_sequence: null }, usage: { output_tokens: sdkOutputTokens } })
1282
1450
  sse("message_stop", { type: "message_stop" })
1283
1451
  controller.close()
1284
1452
 
@@ -43,11 +43,9 @@ class SessionStore {
43
43
  constructor(ttlMs = DEFAULT_TTL_MS) {
44
44
  this.ttlMs = ttlMs
45
45
  this.load()
46
- // Clear all sessions on startup SDK sessions don't survive proxy restarts
46
+ // SDK sessions persist on disk via persistSession: true keep our mappings
47
47
  if (this.sessions.size > 0) {
48
- logInfo("session-store.startup_clear", { cleared: this.sessions.size })
49
- this.sessions.clear()
50
- this.save()
48
+ logInfo("session-store.loaded_sessions", { count: this.sessions.size })
51
49
  }
52
50
  }
53
51
 
@@ -87,10 +85,11 @@ class SessionStore {
87
85
  }
88
86
 
89
87
  /** Record a successful resume */
90
- recordResume(conversationId: string): void {
88
+ recordResume(conversationId: string, messageCount: number): void {
91
89
  const entry = this.sessions.get(conversationId)
92
90
  if (entry) {
93
91
  entry.resumeCount++
92
+ entry.messageCount = messageCount
94
93
  entry.lastUsed = Date.now()
95
94
  this.save()
96
95
  }