claude-sdk-proxy 3.2.1 → 3.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/proxy/server.ts +80 -12
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-sdk-proxy",
3
- "version": "3.2.1",
3
+ "version": "3.2.2",
4
4
  "description": "Anthropic Messages API proxy backed by Claude Agent SDK — use Claude Max with any API client",
5
5
  "type": "module",
6
6
  "main": "./src/proxy/server.ts",
@@ -679,6 +679,10 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
679
679
  let promptInput: string | AsyncIterable<any>
680
680
  // promptText: always the text-only version for token counting and logging
681
681
  promptText = serializeContent(lastMsg.content)
682
+ // Track real usage from SDK stream events (fallback to rough estimate)
683
+ const roughContextTokens = roughTokens((systemContext || "") + messages.map(m => serializeContent(m.content)).join("\n"))
684
+ let sdkInputTokens = 0
685
+ let sdkOutputTokens = 0
682
686
 
683
687
  if (isResuming && resumeSessionId) {
684
688
  systemPrompt = (systemContext || "").trim() || undefined
@@ -813,6 +817,9 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
813
817
  }
814
818
  } else if (ev.type === "message_delta") {
815
819
  if (ev.delta?.stop_reason) capturedStopReason = ev.delta.stop_reason
820
+ if (ev.usage?.output_tokens) sdkOutputTokens = ev.usage.output_tokens
821
+ } else if (ev.type === "message_start" && ev.message?.usage) {
822
+ if (ev.message.usage.input_tokens) sdkInputTokens = ev.message.usage.input_tokens
816
823
  }
817
824
  } else if (!hasTools && message.type === "assistant") {
818
825
  let turnText = ""
@@ -820,6 +827,10 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
820
827
  if (block.type === "text") turnText += block.text
821
828
  }
822
829
  fullText = turnText
830
+ // Capture usage from assistant message
831
+ const msgUsage = (message as any).message?.usage
832
+ if (msgUsage?.input_tokens) sdkInputTokens = msgUsage.input_tokens
833
+ if (msgUsage?.output_tokens) sdkOutputTokens = msgUsage.output_tokens
823
834
  }
824
835
  }
825
836
  traceStore.phase(reqId, "sdk_done")
@@ -890,6 +901,9 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
890
901
  }
891
902
  } else if (ev.type === "message_delta") {
892
903
  if (ev.delta?.stop_reason) capturedStopReason = ev.delta.stop_reason
904
+ if (ev.usage?.output_tokens) sdkOutputTokens = ev.usage.output_tokens
905
+ } else if (ev.type === "message_start" && ev.message?.usage) {
906
+ if (ev.message.usage.input_tokens) sdkInputTokens = ev.message.usage.input_tokens
893
907
  }
894
908
  } else if (!hasTools && message.type === "assistant") {
895
909
  let turnText = ""
@@ -897,6 +911,9 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
897
911
  if (block.type === "text") turnText += block.text
898
912
  }
899
913
  fullText = turnText
914
+ const msgUsage = (message as any).message?.usage
915
+ if (msgUsage?.input_tokens) sdkInputTokens = msgUsage.input_tokens
916
+ if (msgUsage?.output_tokens) sdkOutputTokens = msgUsage.output_tokens
900
917
  }
901
918
  }
902
919
  traceStore.phase(reqId, "sdk_done")
@@ -937,10 +954,12 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
937
954
  id: generateId("msg_"),
938
955
  type: "message", role: "assistant", content,
939
956
  model: body.model, stop_reason: stopReason, stop_sequence: null,
940
- usage: { input_tokens: roughTokens(promptText), output_tokens: roughTokens(fullText) }
957
+ usage: { input_tokens: sdkInputTokens || roughContextTokens, output_tokens: sdkOutputTokens || roughTokens(fullText) }
941
958
  })
942
959
  }
943
960
 
961
+ logDebug("usage.tokens", { reqId, sdkInput: sdkInputTokens, sdkOutput: sdkOutputTokens, roughInput: roughContextTokens, roughOutput: roughTokens(fullText) })
962
+
944
963
  if (!fullText || !fullText.trim()) fullText = "..."
945
964
  traceStore.complete(reqId, { outputLen: fullText.length })
946
965
 
@@ -949,7 +968,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
949
968
  type: "message", role: "assistant",
950
969
  content: [{ type: "text", text: fullText }],
951
970
  model: body.model, stop_reason: "end_turn", stop_sequence: null,
952
- usage: { input_tokens: roughTokens(promptText), output_tokens: roughTokens(fullText) }
971
+ usage: { input_tokens: sdkInputTokens || roughContextTokens, output_tokens: sdkOutputTokens || roughTokens(fullText) }
953
972
  })
954
973
  }
955
974
 
@@ -1015,14 +1034,21 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
1015
1034
  }
1016
1035
  }, 15_000)
1017
1036
 
1018
- sse("message_start", {
1019
- type: "message_start",
1020
- message: {
1021
- id: messageId, type: "message", role: "assistant", content: [],
1022
- model: body.model, stop_reason: null, stop_sequence: null,
1023
- usage: { input_tokens: roughTokens(promptText), output_tokens: 1 }
1037
+ // Defer message_start until SDK provides real input_tokens
1038
+ let messageStartSent = false
1039
+ const emitMessageStart = () => {
1040
+ if (!messageStartSent) {
1041
+ messageStartSent = true
1042
+ sse("message_start", {
1043
+ type: "message_start",
1044
+ message: {
1045
+ id: messageId, type: "message", role: "assistant", content: [],
1046
+ model: body.model, stop_reason: null, stop_sequence: null,
1047
+ usage: { input_tokens: sdkInputTokens || roughContextTokens, output_tokens: 1 }
1048
+ }
1049
+ })
1024
1050
  }
1025
- })
1051
+ }
1026
1052
 
1027
1053
  if (hasTools) {
1028
1054
  // ── With tools: forward native SDK stream events directly as SSE ──
@@ -1041,6 +1067,13 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
1041
1067
 
1042
1068
  // Helper to forward a stream event directly as SSE
1043
1069
  const forwardStreamEvent = (ev: any) => {
1070
+ if (ev.type === "message_start" && ev.message?.usage) {
1071
+ if (ev.message.usage.input_tokens) sdkInputTokens = ev.message.usage.input_tokens
1072
+ emitMessageStart()
1073
+ return
1074
+ }
1075
+ // Ensure message_start is sent before any content blocks
1076
+ emitMessageStart()
1044
1077
  if (ev.type === "content_block_start") {
1045
1078
  const cb = ev.content_block
1046
1079
  if (cb?.type === "tool_use") {
@@ -1064,6 +1097,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
1064
1097
  blockIdx++
1065
1098
  } else if (ev.type === "message_delta") {
1066
1099
  if (ev.delta?.stop_reason) capturedStopReason = ev.delta.stop_reason
1100
+ if (ev.usage?.output_tokens) sdkOutputTokens = ev.usage.output_tokens
1067
1101
  }
1068
1102
  }
1069
1103
 
@@ -1149,6 +1183,9 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
1149
1183
 
1150
1184
  traceStore.phase(reqId, "responding")
1151
1185
 
1186
+ // Ensure message_start is sent even if no SDK events came
1187
+ emitMessageStart()
1188
+
1152
1189
  // If no blocks were emitted at all, emit a placeholder text block
1153
1190
  if (!hasEmittedAnyBlock) {
1154
1191
  sse("content_block_start", { type: "content_block_start", index: 0, content_block: { type: "text", text: "" } })
@@ -1157,7 +1194,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
1157
1194
  }
1158
1195
 
1159
1196
  const stopReason = toolCallCount > 0 ? "tool_use" : (capturedStopReason ?? "end_turn")
1160
- sse("message_delta", { type: "message_delta", delta: { stop_reason: stopReason, stop_sequence: null }, usage: { output_tokens: roughTokens(fullText) } })
1197
+ sse("message_delta", { type: "message_delta", delta: { stop_reason: stopReason, stop_sequence: null }, usage: { output_tokens: sdkOutputTokens || roughTokens(fullText) } })
1161
1198
  sse("message_stop", { type: "message_stop" })
1162
1199
  controller.close()
1163
1200
 
@@ -1166,7 +1203,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
1166
1203
  }
1167
1204
 
1168
1205
  // ── No tools: stream text deltas directly ─────────────────────
1169
- sse("content_block_start", { type: "content_block_start", index: 0, content_block: { type: "text", text: "" } })
1206
+ let contentBlockStartSent = false
1170
1207
 
1171
1208
  let fullText = ""
1172
1209
  let hasStreamed = false
@@ -1190,6 +1227,11 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
1190
1227
  }
1191
1228
  if (message.type === "stream_event") {
1192
1229
  const ev = message.event as any
1230
+ // Capture usage from SDK message_start (real input tokens)
1231
+ if (ev.type === "message_start" && ev.message?.usage?.input_tokens) {
1232
+ sdkInputTokens = ev.message.usage.input_tokens
1233
+ emitMessageStart()
1234
+ }
1193
1235
  // Detect first content event BEFORE sdkEvent records it
1194
1236
  if (!trace!.firstTokenAt && (ev.type === "content_block_delta" || ev.type === "content_block_start")) {
1195
1237
  traceStore.phase(reqId, "sdk_streaming")
@@ -1197,12 +1239,20 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
1197
1239
  if (ev.type === "content_block_delta" && ev.delta?.type === "text_delta") {
1198
1240
  const text = ev.delta.text ?? ""
1199
1241
  if (text) {
1242
+ // Ensure message_start and content_block_start are sent before first delta
1243
+ emitMessageStart()
1244
+ if (!contentBlockStartSent) {
1245
+ contentBlockStartSent = true
1246
+ sse("content_block_start", { type: "content_block_start", index: 0, content_block: { type: "text", text: "" } })
1247
+ }
1200
1248
  fullText += text
1201
1249
  hasStreamed = true
1202
1250
  traceStore.updateOutput(reqId, fullText.length)
1203
1251
  checkOutputSize(fullText.length)
1204
1252
  sse("content_block_delta", { type: "content_block_delta", index: 0, delta: { type: "text_delta", text } })
1205
1253
  }
1254
+ } else if (ev.type === "message_delta" && ev.usage?.output_tokens) {
1255
+ sdkOutputTokens = ev.usage.output_tokens
1206
1256
  }
1207
1257
  }
1208
1258
  traceStore.sdkEvent(reqId, sdkEventCount, message.type, subtype)
@@ -1241,18 +1291,29 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
1241
1291
  }
1242
1292
  if (message.type === "stream_event") {
1243
1293
  const ev = message.event as any
1294
+ if (ev.type === "message_start" && ev.message?.usage?.input_tokens) {
1295
+ sdkInputTokens = ev.message.usage.input_tokens
1296
+ emitMessageStart()
1297
+ }
1244
1298
  if (!trace!.firstTokenAt && (ev.type === "content_block_delta" || ev.type === "content_block_start")) {
1245
1299
  traceStore.phase(reqId, "sdk_streaming")
1246
1300
  }
1247
1301
  if (ev.type === "content_block_delta" && ev.delta?.type === "text_delta") {
1248
1302
  const text = ev.delta.text ?? ""
1249
1303
  if (text) {
1304
+ emitMessageStart()
1305
+ if (!contentBlockStartSent) {
1306
+ contentBlockStartSent = true
1307
+ sse("content_block_start", { type: "content_block_start", index: 0, content_block: { type: "text", text: "" } })
1308
+ }
1250
1309
  fullText += text
1251
1310
  hasStreamed = true
1252
1311
  traceStore.updateOutput(reqId, fullText.length)
1253
1312
  checkOutputSize(fullText.length)
1254
1313
  sse("content_block_delta", { type: "content_block_delta", index: 0, delta: { type: "text_delta", text } })
1255
1314
  }
1315
+ } else if (ev.type === "message_delta" && ev.usage?.output_tokens) {
1316
+ sdkOutputTokens = ev.usage.output_tokens
1256
1317
  }
1257
1318
  }
1258
1319
  traceStore.sdkEvent(reqId, sdkEventCount, message.type, subtype)
@@ -1273,12 +1334,19 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
1273
1334
  releaseQueue()
1274
1335
  }
1275
1336
 
1337
+ // Ensure message_start and content_block_start are emitted even if no stream events came
1338
+ emitMessageStart()
1339
+ if (!contentBlockStartSent) {
1340
+ contentBlockStartSent = true
1341
+ sse("content_block_start", { type: "content_block_start", index: 0, content_block: { type: "text", text: "" } })
1342
+ }
1343
+
1276
1344
  if (!hasStreamed) {
1277
1345
  sse("content_block_delta", { type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "..." } })
1278
1346
  }
1279
1347
 
1280
1348
  sse("content_block_stop", { type: "content_block_stop", index: 0 })
1281
- sse("message_delta", { type: "message_delta", delta: { stop_reason: "end_turn", stop_sequence: null }, usage: { output_tokens: roughTokens(fullText) } })
1349
+ sse("message_delta", { type: "message_delta", delta: { stop_reason: "end_turn", stop_sequence: null }, usage: { output_tokens: sdkOutputTokens || roughTokens(fullText) } })
1282
1350
  sse("message_stop", { type: "message_stop" })
1283
1351
  controller.close()
1284
1352