claude-sdk-proxy 3.2.2 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -16,6 +16,8 @@ Any Anthropic/OpenAI client → claude-sdk-proxy (:3456) → Claude Agent SDK
16
16
  - **Full tool use** — proper `tool_use` content blocks, `stop_reason: "tool_use"`, `input_json_delta` streaming
17
17
  - **Built-in agent tools** — Claude has access to Read, Write, Edit, Bash, Glob, Grep, WebFetch, WebSearch
18
18
  - **API key protection** — optional `CLAUDE_PROXY_API_KEY` to secure network-exposed instances
19
+ - **Session persistence** — SDK sessions survive proxy restarts via `persistSession: true`; automatic reset detection when message count drops
20
+ - **Exact usage tracking** — real `input_tokens`, `output_tokens`, `cache_read_input_tokens`, and `cache_creation_input_tokens` from the SDK (no rough estimates)
19
21
  - **Streaming SSE** — `message_start` emitted immediately; 15s heartbeat keeps connections alive
20
22
  - **Request timeout** — configurable per-request timeout (default 5 minutes)
21
23
  - **Graceful shutdown** — SIGINT/SIGTERM handlers wait for in-flight requests
@@ -159,6 +161,7 @@ Client tool mode is auto-detected when the request has a `tools` array and the s
159
161
  | `GET` | `/v1/models/:id` | Get model details |
160
162
  | `POST` | `/v1/messages` | Create a message (streaming or non-streaming) |
161
163
  | `POST` | `/v1/messages/count_tokens` | Estimate token count |
164
+ | `DELETE` | `/sessions/:id` | Invalidate a cached session |
162
165
  | `POST` | `/v1/chat/completions` | OpenAI-compatible chat completions |
163
166
  | `GET` | `/v1/chat/models` | List models (OpenAI format) |
164
167
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-sdk-proxy",
3
- "version": "3.2.2",
3
+ "version": "3.3.0",
4
4
  "description": "Anthropic Messages API proxy backed by Claude Agent SDK — use Claude Max with any API client",
5
5
  "type": "module",
6
6
  "main": "./src/proxy/server.ts",
@@ -213,6 +213,13 @@ function roughTokens(text: string): number {
213
213
  return Math.ceil((text ?? "").length / 4)
214
214
  }
215
215
 
216
+ function buildUsage(input: number, output: number, cacheRead: number, cacheCreation: number) {
217
+ const usage: Record<string, number> = { input_tokens: input, output_tokens: output }
218
+ if (cacheRead > 0) usage.cache_read_input_tokens = cacheRead
219
+ if (cacheCreation > 0) usage.cache_creation_input_tokens = cacheCreation
220
+ return usage
221
+ }
222
+
216
223
  // ── Conversation label extraction ────────────────────────────────────────────
217
224
  // Openclaw embeds "Conversation info (untrusted metadata)" in the last user
218
225
  // message containing a JSON block with conversation_label. Extract it to use
@@ -376,6 +383,17 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
376
383
  return c.json(result)
377
384
  })
378
385
 
386
+ app.delete("/sessions/:id", (c) => {
387
+ const id = decodeURIComponent(c.req.param("id"))
388
+ const stored = sessionStore.get(id)
389
+ if (stored) {
390
+ sessionStore.invalidate(id)
391
+ logInfo("session.api_reset", { conversationId: id, sdkSessionId: stored.sdkSessionId })
392
+ return c.json({ ok: true, invalidated: id })
393
+ }
394
+ return c.json({ ok: false, error: "session not found" }, 404)
395
+ })
396
+
379
397
  app.get("/debug/traces", (c) => {
380
398
  const limit = parseInt(c.req.query("limit") ?? "20", 10)
381
399
  return c.json(traceStore.getRecentTraces(limit))
@@ -655,19 +673,30 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
655
673
  let resumeSessionId: string | undefined
656
674
  let isResuming = false
657
675
 
658
- if (conversationId && messages.length > 1) {
676
+ if (conversationId) {
659
677
  const stored = sessionStore.get(conversationId)
660
678
  if (stored && stored.model === model) {
661
- resumeSessionId = stored.sdkSessionId
662
- isResuming = true
663
- logInfo("session.resuming", {
664
- reqId,
665
- conversationId,
666
- sdkSessionId: resumeSessionId,
667
- storedMsgCount: stored.messageCount,
668
- currentMsgCount: messages.length,
669
- resumeCount: stored.resumeCount,
670
- })
679
+ if (messages.length < stored.messageCount) {
680
+ // Client reset detected: message count dropped
681
+ logInfo("session.reset_detected", {
682
+ reqId, conversationId,
683
+ sdkSessionId: stored.sdkSessionId,
684
+ storedMsgCount: stored.messageCount,
685
+ currentMsgCount: messages.length,
686
+ })
687
+ sessionStore.invalidate(conversationId)
688
+ } else {
689
+ resumeSessionId = stored.sdkSessionId
690
+ isResuming = true
691
+ logInfo("session.resuming", {
692
+ reqId,
693
+ conversationId,
694
+ sdkSessionId: resumeSessionId,
695
+ storedMsgCount: stored.messageCount,
696
+ currentMsgCount: messages.length,
697
+ resumeCount: stored.resumeCount,
698
+ })
699
+ }
671
700
  }
672
701
  }
673
702
 
@@ -679,10 +708,11 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
679
708
  let promptInput: string | AsyncIterable<any>
680
709
  // promptText: always the text-only version for token counting and logging
681
710
  promptText = serializeContent(lastMsg.content)
682
- // Track real usage from SDK stream events (fallback to rough estimate)
683
- const roughContextTokens = roughTokens((systemContext || "") + messages.map(m => serializeContent(m.content)).join("\n"))
711
+ // Track real usage from SDK stream events
684
712
  let sdkInputTokens = 0
685
713
  let sdkOutputTokens = 0
714
+ let sdkCacheReadTokens = 0
715
+ let sdkCacheCreationTokens = 0
686
716
 
687
717
  if (isResuming && resumeSessionId) {
688
718
  systemPrompt = (systemContext || "").trim() || undefined
@@ -782,6 +812,16 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
782
812
  if (message.type === "system" && (message as any).subtype === "init") {
783
813
  capturedSessionId = (message as any).session_id
784
814
  }
815
+ if (message.type === "result") {
816
+ const r = message as any
817
+ if (r.session_id) capturedSessionId = r.session_id
818
+ if (r.usage) {
819
+ sdkInputTokens = r.usage.input_tokens ?? sdkInputTokens
820
+ sdkOutputTokens = r.usage.output_tokens ?? sdkOutputTokens
821
+ sdkCacheReadTokens = r.usage.cache_read_input_tokens ?? sdkCacheReadTokens
822
+ sdkCacheCreationTokens = r.usage.cache_creation_input_tokens ?? sdkCacheCreationTokens
823
+ }
824
+ }
785
825
  if (hasTools && message.type === "stream_event") {
786
826
  const ev = (message as any).event as any
787
827
  if (ev.type === "content_block_start") {
@@ -820,6 +860,8 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
820
860
  if (ev.usage?.output_tokens) sdkOutputTokens = ev.usage.output_tokens
821
861
  } else if (ev.type === "message_start" && ev.message?.usage) {
822
862
  if (ev.message.usage.input_tokens) sdkInputTokens = ev.message.usage.input_tokens
863
+ if (ev.message.usage.cache_read_input_tokens) sdkCacheReadTokens = ev.message.usage.cache_read_input_tokens
864
+ if (ev.message.usage.cache_creation_input_tokens) sdkCacheCreationTokens = ev.message.usage.cache_creation_input_tokens
823
865
  }
824
866
  } else if (!hasTools && message.type === "assistant") {
825
867
  let turnText = ""
@@ -838,7 +880,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
838
880
  // Store session mapping for future resumption
839
881
  if (conversationId && capturedSessionId) {
840
882
  if (isResuming) {
841
- sessionStore.recordResume(conversationId)
883
+ sessionStore.recordResume(conversationId, messages.length)
842
884
  logInfo("session.resumed_ok", { reqId, conversationId, sdkSessionId: capturedSessionId })
843
885
  } else {
844
886
  sessionStore.set(conversationId, capturedSessionId, model, messages.length)
@@ -871,6 +913,16 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
871
913
  if (message.type === "system" && (message as any).subtype === "init") {
872
914
  capturedSessionId = (message as any).session_id
873
915
  }
916
+ if (message.type === "result") {
917
+ const r = message as any
918
+ if (r.session_id) capturedSessionId = r.session_id
919
+ if (r.usage) {
920
+ sdkInputTokens = r.usage.input_tokens ?? sdkInputTokens
921
+ sdkOutputTokens = r.usage.output_tokens ?? sdkOutputTokens
922
+ sdkCacheReadTokens = r.usage.cache_read_input_tokens ?? sdkCacheReadTokens
923
+ sdkCacheCreationTokens = r.usage.cache_creation_input_tokens ?? sdkCacheCreationTokens
924
+ }
925
+ }
874
926
  if (hasTools && message.type === "stream_event") {
875
927
  const ev = (message as any).event as any
876
928
  if (ev.type === "content_block_start") {
@@ -904,6 +956,8 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
904
956
  if (ev.usage?.output_tokens) sdkOutputTokens = ev.usage.output_tokens
905
957
  } else if (ev.type === "message_start" && ev.message?.usage) {
906
958
  if (ev.message.usage.input_tokens) sdkInputTokens = ev.message.usage.input_tokens
959
+ if (ev.message.usage.cache_read_input_tokens) sdkCacheReadTokens = ev.message.usage.cache_read_input_tokens
960
+ if (ev.message.usage.cache_creation_input_tokens) sdkCacheCreationTokens = ev.message.usage.cache_creation_input_tokens
907
961
  }
908
962
  } else if (!hasTools && message.type === "assistant") {
909
963
  let turnText = ""
@@ -954,11 +1008,11 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
954
1008
  id: generateId("msg_"),
955
1009
  type: "message", role: "assistant", content,
956
1010
  model: body.model, stop_reason: stopReason, stop_sequence: null,
957
- usage: { input_tokens: sdkInputTokens || roughContextTokens, output_tokens: sdkOutputTokens || roughTokens(fullText) }
1011
+ usage: buildUsage(sdkInputTokens, sdkOutputTokens, sdkCacheReadTokens, sdkCacheCreationTokens)
958
1012
  })
959
1013
  }
960
1014
 
961
- logDebug("usage.tokens", { reqId, sdkInput: sdkInputTokens, sdkOutput: sdkOutputTokens, roughInput: roughContextTokens, roughOutput: roughTokens(fullText) })
1015
+ logDebug("usage.tokens", { reqId, sdkInput: sdkInputTokens, sdkOutput: sdkOutputTokens, cacheRead: sdkCacheReadTokens, cacheCreation: sdkCacheCreationTokens })
962
1016
 
963
1017
  if (!fullText || !fullText.trim()) fullText = "..."
964
1018
  traceStore.complete(reqId, { outputLen: fullText.length })
@@ -968,7 +1022,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
968
1022
  type: "message", role: "assistant",
969
1023
  content: [{ type: "text", text: fullText }],
970
1024
  model: body.model, stop_reason: "end_turn", stop_sequence: null,
971
- usage: { input_tokens: sdkInputTokens || roughContextTokens, output_tokens: sdkOutputTokens || roughTokens(fullText) }
1025
+ usage: buildUsage(sdkInputTokens, sdkOutputTokens, sdkCacheReadTokens, sdkCacheCreationTokens)
972
1026
  })
973
1027
  }
974
1028
 
@@ -1044,7 +1098,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
1044
1098
  message: {
1045
1099
  id: messageId, type: "message", role: "assistant", content: [],
1046
1100
  model: body.model, stop_reason: null, stop_sequence: null,
1047
- usage: { input_tokens: sdkInputTokens || roughContextTokens, output_tokens: 1 }
1101
+ usage: buildUsage(sdkInputTokens, 1, sdkCacheReadTokens, sdkCacheCreationTokens)
1048
1102
  }
1049
1103
  })
1050
1104
  }
@@ -1069,6 +1123,8 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
1069
1123
  const forwardStreamEvent = (ev: any) => {
1070
1124
  if (ev.type === "message_start" && ev.message?.usage) {
1071
1125
  if (ev.message.usage.input_tokens) sdkInputTokens = ev.message.usage.input_tokens
1126
+ if (ev.message.usage.cache_read_input_tokens) sdkCacheReadTokens = ev.message.usage.cache_read_input_tokens
1127
+ if (ev.message.usage.cache_creation_input_tokens) sdkCacheCreationTokens = ev.message.usage.cache_creation_input_tokens
1072
1128
  emitMessageStart()
1073
1129
  return
1074
1130
  }
@@ -1111,6 +1167,16 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
1111
1167
  if (message.type === "system" && (message as any).subtype === "init") {
1112
1168
  capturedSessionId = (message as any).session_id
1113
1169
  }
1170
+ if (message.type === "result") {
1171
+ const r = message as any
1172
+ if (r.session_id) capturedSessionId = r.session_id
1173
+ if (r.usage) {
1174
+ sdkInputTokens = r.usage.input_tokens ?? sdkInputTokens
1175
+ sdkOutputTokens = r.usage.output_tokens ?? sdkOutputTokens
1176
+ sdkCacheReadTokens = r.usage.cache_read_input_tokens ?? sdkCacheReadTokens
1177
+ sdkCacheCreationTokens = r.usage.cache_creation_input_tokens ?? sdkCacheCreationTokens
1178
+ }
1179
+ }
1114
1180
  if (message.type === "stream_event") {
1115
1181
  const ev = (message as any).event as any
1116
1182
  if (!trace!.firstTokenAt && (ev.type === "content_block_delta" || ev.type === "content_block_start")) {
@@ -1125,7 +1191,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
1125
1191
  // Store session mapping
1126
1192
  if (conversationId && capturedSessionId) {
1127
1193
  if (isResuming) {
1128
- sessionStore.recordResume(conversationId)
1194
+ sessionStore.recordResume(conversationId, messages.length)
1129
1195
  } else {
1130
1196
  sessionStore.set(conversationId, capturedSessionId, model, messages.length)
1131
1197
  }
@@ -1157,6 +1223,16 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
1157
1223
  if (message.type === "system" && (message as any).subtype === "init") {
1158
1224
  capturedSessionId = (message as any).session_id
1159
1225
  }
1226
+ if (message.type === "result") {
1227
+ const r = message as any
1228
+ if (r.session_id) capturedSessionId = r.session_id
1229
+ if (r.usage) {
1230
+ sdkInputTokens = r.usage.input_tokens ?? sdkInputTokens
1231
+ sdkOutputTokens = r.usage.output_tokens ?? sdkOutputTokens
1232
+ sdkCacheReadTokens = r.usage.cache_read_input_tokens ?? sdkCacheReadTokens
1233
+ sdkCacheCreationTokens = r.usage.cache_creation_input_tokens ?? sdkCacheCreationTokens
1234
+ }
1235
+ }
1160
1236
  if (message.type === "stream_event") {
1161
1237
  const ev = (message as any).event as any
1162
1238
  if (!trace!.firstTokenAt && (ev.type === "content_block_delta" || ev.type === "content_block_start")) {
@@ -1194,7 +1270,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
1194
1270
  }
1195
1271
 
1196
1272
  const stopReason = toolCallCount > 0 ? "tool_use" : (capturedStopReason ?? "end_turn")
1197
- sse("message_delta", { type: "message_delta", delta: { stop_reason: stopReason, stop_sequence: null }, usage: { output_tokens: sdkOutputTokens || roughTokens(fullText) } })
1273
+ sse("message_delta", { type: "message_delta", delta: { stop_reason: stopReason, stop_sequence: null }, usage: { output_tokens: sdkOutputTokens } })
1198
1274
  sse("message_stop", { type: "message_stop" })
1199
1275
  controller.close()
1200
1276
 
@@ -1225,11 +1301,23 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
1225
1301
  if (message.type === "system" && (message as any).subtype === "init") {
1226
1302
  capturedSessionId2 = (message as any).session_id
1227
1303
  }
1304
+ if (message.type === "result") {
1305
+ const r = message as any
1306
+ if (r.session_id) capturedSessionId2 = r.session_id
1307
+ if (r.usage) {
1308
+ sdkInputTokens = r.usage.input_tokens ?? sdkInputTokens
1309
+ sdkOutputTokens = r.usage.output_tokens ?? sdkOutputTokens
1310
+ sdkCacheReadTokens = r.usage.cache_read_input_tokens ?? sdkCacheReadTokens
1311
+ sdkCacheCreationTokens = r.usage.cache_creation_input_tokens ?? sdkCacheCreationTokens
1312
+ }
1313
+ }
1228
1314
  if (message.type === "stream_event") {
1229
1315
  const ev = message.event as any
1230
1316
  // Capture usage from SDK message_start (real input tokens)
1231
- if (ev.type === "message_start" && ev.message?.usage?.input_tokens) {
1232
- sdkInputTokens = ev.message.usage.input_tokens
1317
+ if (ev.type === "message_start" && ev.message?.usage) {
1318
+ if (ev.message.usage.input_tokens) sdkInputTokens = ev.message.usage.input_tokens
1319
+ if (ev.message.usage.cache_read_input_tokens) sdkCacheReadTokens = ev.message.usage.cache_read_input_tokens
1320
+ if (ev.message.usage.cache_creation_input_tokens) sdkCacheCreationTokens = ev.message.usage.cache_creation_input_tokens
1233
1321
  emitMessageStart()
1234
1322
  }
1235
1323
  // Detect first content event BEFORE sdkEvent records it
@@ -1262,7 +1350,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
1262
1350
  // Store session mapping
1263
1351
  if (conversationId && capturedSessionId2) {
1264
1352
  if (isResuming) {
1265
- sessionStore.recordResume(conversationId)
1353
+ sessionStore.recordResume(conversationId, messages.length)
1266
1354
  } else {
1267
1355
  sessionStore.set(conversationId, capturedSessionId2, model, messages.length)
1268
1356
  }
@@ -1289,10 +1377,22 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
1289
1377
  if (message.type === "system" && (message as any).subtype === "init") {
1290
1378
  capturedSessionId2 = (message as any).session_id
1291
1379
  }
1380
+ if (message.type === "result") {
1381
+ const r = message as any
1382
+ if (r.session_id) capturedSessionId2 = r.session_id
1383
+ if (r.usage) {
1384
+ sdkInputTokens = r.usage.input_tokens ?? sdkInputTokens
1385
+ sdkOutputTokens = r.usage.output_tokens ?? sdkOutputTokens
1386
+ sdkCacheReadTokens = r.usage.cache_read_input_tokens ?? sdkCacheReadTokens
1387
+ sdkCacheCreationTokens = r.usage.cache_creation_input_tokens ?? sdkCacheCreationTokens
1388
+ }
1389
+ }
1292
1390
  if (message.type === "stream_event") {
1293
1391
  const ev = message.event as any
1294
- if (ev.type === "message_start" && ev.message?.usage?.input_tokens) {
1295
- sdkInputTokens = ev.message.usage.input_tokens
1392
+ if (ev.type === "message_start" && ev.message?.usage) {
1393
+ if (ev.message.usage.input_tokens) sdkInputTokens = ev.message.usage.input_tokens
1394
+ if (ev.message.usage.cache_read_input_tokens) sdkCacheReadTokens = ev.message.usage.cache_read_input_tokens
1395
+ if (ev.message.usage.cache_creation_input_tokens) sdkCacheCreationTokens = ev.message.usage.cache_creation_input_tokens
1296
1396
  emitMessageStart()
1297
1397
  }
1298
1398
  if (!trace!.firstTokenAt && (ev.type === "content_block_delta" || ev.type === "content_block_start")) {
@@ -1346,7 +1446,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
1346
1446
  }
1347
1447
 
1348
1448
  sse("content_block_stop", { type: "content_block_stop", index: 0 })
1349
- sse("message_delta", { type: "message_delta", delta: { stop_reason: "end_turn", stop_sequence: null }, usage: { output_tokens: sdkOutputTokens || roughTokens(fullText) } })
1449
+ sse("message_delta", { type: "message_delta", delta: { stop_reason: "end_turn", stop_sequence: null }, usage: { output_tokens: sdkOutputTokens } })
1350
1450
  sse("message_stop", { type: "message_stop" })
1351
1451
  controller.close()
1352
1452
 
@@ -43,11 +43,9 @@ class SessionStore {
43
43
  constructor(ttlMs = DEFAULT_TTL_MS) {
44
44
  this.ttlMs = ttlMs
45
45
  this.load()
46
- // Clear all sessions on startup SDK sessions don't survive proxy restarts
46
+ // SDK sessions persist on disk via persistSession: true keep our mappings
47
47
  if (this.sessions.size > 0) {
48
- logInfo("session-store.startup_clear", { cleared: this.sessions.size })
49
- this.sessions.clear()
50
- this.save()
48
+ logInfo("session-store.loaded_sessions", { count: this.sessions.size })
51
49
  }
52
50
  }
53
51
 
@@ -87,10 +85,11 @@ class SessionStore {
87
85
  }
88
86
 
89
87
  /** Record a successful resume */
90
- recordResume(conversationId: string): void {
88
+ recordResume(conversationId: string, messageCount: number): void {
91
89
  const entry = this.sessions.get(conversationId)
92
90
  if (entry) {
93
91
  entry.resumeCount++
92
+ entry.messageCount = messageCount
94
93
  entry.lastUsed = Date.now()
95
94
  this.save()
96
95
  }