claude-sdk-proxy 3.2.1 → 3.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/proxy/server.ts +80 -12
package/package.json
CHANGED
package/src/proxy/server.ts
CHANGED
|
@@ -679,6 +679,10 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
679
679
|
let promptInput: string | AsyncIterable<any>
|
|
680
680
|
// promptText: always the text-only version for token counting and logging
|
|
681
681
|
promptText = serializeContent(lastMsg.content)
|
|
682
|
+
// Track real usage from SDK stream events (fallback to rough estimate)
|
|
683
|
+
const roughContextTokens = roughTokens((systemContext || "") + messages.map(m => serializeContent(m.content)).join("\n"))
|
|
684
|
+
let sdkInputTokens = 0
|
|
685
|
+
let sdkOutputTokens = 0
|
|
682
686
|
|
|
683
687
|
if (isResuming && resumeSessionId) {
|
|
684
688
|
systemPrompt = (systemContext || "").trim() || undefined
|
|
@@ -813,6 +817,9 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
813
817
|
}
|
|
814
818
|
} else if (ev.type === "message_delta") {
|
|
815
819
|
if (ev.delta?.stop_reason) capturedStopReason = ev.delta.stop_reason
|
|
820
|
+
if (ev.usage?.output_tokens) sdkOutputTokens = ev.usage.output_tokens
|
|
821
|
+
} else if (ev.type === "message_start" && ev.message?.usage) {
|
|
822
|
+
if (ev.message.usage.input_tokens) sdkInputTokens = ev.message.usage.input_tokens
|
|
816
823
|
}
|
|
817
824
|
} else if (!hasTools && message.type === "assistant") {
|
|
818
825
|
let turnText = ""
|
|
@@ -820,6 +827,10 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
820
827
|
if (block.type === "text") turnText += block.text
|
|
821
828
|
}
|
|
822
829
|
fullText = turnText
|
|
830
|
+
// Capture usage from assistant message
|
|
831
|
+
const msgUsage = (message as any).message?.usage
|
|
832
|
+
if (msgUsage?.input_tokens) sdkInputTokens = msgUsage.input_tokens
|
|
833
|
+
if (msgUsage?.output_tokens) sdkOutputTokens = msgUsage.output_tokens
|
|
823
834
|
}
|
|
824
835
|
}
|
|
825
836
|
traceStore.phase(reqId, "sdk_done")
|
|
@@ -890,6 +901,9 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
890
901
|
}
|
|
891
902
|
} else if (ev.type === "message_delta") {
|
|
892
903
|
if (ev.delta?.stop_reason) capturedStopReason = ev.delta.stop_reason
|
|
904
|
+
if (ev.usage?.output_tokens) sdkOutputTokens = ev.usage.output_tokens
|
|
905
|
+
} else if (ev.type === "message_start" && ev.message?.usage) {
|
|
906
|
+
if (ev.message.usage.input_tokens) sdkInputTokens = ev.message.usage.input_tokens
|
|
893
907
|
}
|
|
894
908
|
} else if (!hasTools && message.type === "assistant") {
|
|
895
909
|
let turnText = ""
|
|
@@ -897,6 +911,9 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
897
911
|
if (block.type === "text") turnText += block.text
|
|
898
912
|
}
|
|
899
913
|
fullText = turnText
|
|
914
|
+
const msgUsage = (message as any).message?.usage
|
|
915
|
+
if (msgUsage?.input_tokens) sdkInputTokens = msgUsage.input_tokens
|
|
916
|
+
if (msgUsage?.output_tokens) sdkOutputTokens = msgUsage.output_tokens
|
|
900
917
|
}
|
|
901
918
|
}
|
|
902
919
|
traceStore.phase(reqId, "sdk_done")
|
|
@@ -937,10 +954,12 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
937
954
|
id: generateId("msg_"),
|
|
938
955
|
type: "message", role: "assistant", content,
|
|
939
956
|
model: body.model, stop_reason: stopReason, stop_sequence: null,
|
|
940
|
-
usage: { input_tokens:
|
|
957
|
+
usage: { input_tokens: sdkInputTokens || roughContextTokens, output_tokens: sdkOutputTokens || roughTokens(fullText) }
|
|
941
958
|
})
|
|
942
959
|
}
|
|
943
960
|
|
|
961
|
+
logDebug("usage.tokens", { reqId, sdkInput: sdkInputTokens, sdkOutput: sdkOutputTokens, roughInput: roughContextTokens, roughOutput: roughTokens(fullText) })
|
|
962
|
+
|
|
944
963
|
if (!fullText || !fullText.trim()) fullText = "..."
|
|
945
964
|
traceStore.complete(reqId, { outputLen: fullText.length })
|
|
946
965
|
|
|
@@ -949,7 +968,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
949
968
|
type: "message", role: "assistant",
|
|
950
969
|
content: [{ type: "text", text: fullText }],
|
|
951
970
|
model: body.model, stop_reason: "end_turn", stop_sequence: null,
|
|
952
|
-
usage: { input_tokens:
|
|
971
|
+
usage: { input_tokens: sdkInputTokens || roughContextTokens, output_tokens: sdkOutputTokens || roughTokens(fullText) }
|
|
953
972
|
})
|
|
954
973
|
}
|
|
955
974
|
|
|
@@ -1015,14 +1034,21 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
1015
1034
|
}
|
|
1016
1035
|
}, 15_000)
|
|
1017
1036
|
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1037
|
+
// Defer message_start until SDK provides real input_tokens
|
|
1038
|
+
let messageStartSent = false
|
|
1039
|
+
const emitMessageStart = () => {
|
|
1040
|
+
if (!messageStartSent) {
|
|
1041
|
+
messageStartSent = true
|
|
1042
|
+
sse("message_start", {
|
|
1043
|
+
type: "message_start",
|
|
1044
|
+
message: {
|
|
1045
|
+
id: messageId, type: "message", role: "assistant", content: [],
|
|
1046
|
+
model: body.model, stop_reason: null, stop_sequence: null,
|
|
1047
|
+
usage: { input_tokens: sdkInputTokens || roughContextTokens, output_tokens: 1 }
|
|
1048
|
+
}
|
|
1049
|
+
})
|
|
1024
1050
|
}
|
|
1025
|
-
}
|
|
1051
|
+
}
|
|
1026
1052
|
|
|
1027
1053
|
if (hasTools) {
|
|
1028
1054
|
// ── With tools: forward native SDK stream events directly as SSE ──
|
|
@@ -1041,6 +1067,13 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
1041
1067
|
|
|
1042
1068
|
// Helper to forward a stream event directly as SSE
|
|
1043
1069
|
const forwardStreamEvent = (ev: any) => {
|
|
1070
|
+
if (ev.type === "message_start" && ev.message?.usage) {
|
|
1071
|
+
if (ev.message.usage.input_tokens) sdkInputTokens = ev.message.usage.input_tokens
|
|
1072
|
+
emitMessageStart()
|
|
1073
|
+
return
|
|
1074
|
+
}
|
|
1075
|
+
// Ensure message_start is sent before any content blocks
|
|
1076
|
+
emitMessageStart()
|
|
1044
1077
|
if (ev.type === "content_block_start") {
|
|
1045
1078
|
const cb = ev.content_block
|
|
1046
1079
|
if (cb?.type === "tool_use") {
|
|
@@ -1064,6 +1097,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
1064
1097
|
blockIdx++
|
|
1065
1098
|
} else if (ev.type === "message_delta") {
|
|
1066
1099
|
if (ev.delta?.stop_reason) capturedStopReason = ev.delta.stop_reason
|
|
1100
|
+
if (ev.usage?.output_tokens) sdkOutputTokens = ev.usage.output_tokens
|
|
1067
1101
|
}
|
|
1068
1102
|
}
|
|
1069
1103
|
|
|
@@ -1149,6 +1183,9 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
1149
1183
|
|
|
1150
1184
|
traceStore.phase(reqId, "responding")
|
|
1151
1185
|
|
|
1186
|
+
// Ensure message_start is sent even if no SDK events came
|
|
1187
|
+
emitMessageStart()
|
|
1188
|
+
|
|
1152
1189
|
// If no blocks were emitted at all, emit a placeholder text block
|
|
1153
1190
|
if (!hasEmittedAnyBlock) {
|
|
1154
1191
|
sse("content_block_start", { type: "content_block_start", index: 0, content_block: { type: "text", text: "" } })
|
|
@@ -1157,7 +1194,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
1157
1194
|
}
|
|
1158
1195
|
|
|
1159
1196
|
const stopReason = toolCallCount > 0 ? "tool_use" : (capturedStopReason ?? "end_turn")
|
|
1160
|
-
sse("message_delta", { type: "message_delta", delta: { stop_reason: stopReason, stop_sequence: null }, usage: { output_tokens: roughTokens(fullText) } })
|
|
1197
|
+
sse("message_delta", { type: "message_delta", delta: { stop_reason: stopReason, stop_sequence: null }, usage: { output_tokens: sdkOutputTokens || roughTokens(fullText) } })
|
|
1161
1198
|
sse("message_stop", { type: "message_stop" })
|
|
1162
1199
|
controller.close()
|
|
1163
1200
|
|
|
@@ -1166,7 +1203,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
1166
1203
|
}
|
|
1167
1204
|
|
|
1168
1205
|
// ── No tools: stream text deltas directly ─────────────────────
|
|
1169
|
-
|
|
1206
|
+
let contentBlockStartSent = false
|
|
1170
1207
|
|
|
1171
1208
|
let fullText = ""
|
|
1172
1209
|
let hasStreamed = false
|
|
@@ -1190,6 +1227,11 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
1190
1227
|
}
|
|
1191
1228
|
if (message.type === "stream_event") {
|
|
1192
1229
|
const ev = message.event as any
|
|
1230
|
+
// Capture usage from SDK message_start (real input tokens)
|
|
1231
|
+
if (ev.type === "message_start" && ev.message?.usage?.input_tokens) {
|
|
1232
|
+
sdkInputTokens = ev.message.usage.input_tokens
|
|
1233
|
+
emitMessageStart()
|
|
1234
|
+
}
|
|
1193
1235
|
// Detect first content event BEFORE sdkEvent records it
|
|
1194
1236
|
if (!trace!.firstTokenAt && (ev.type === "content_block_delta" || ev.type === "content_block_start")) {
|
|
1195
1237
|
traceStore.phase(reqId, "sdk_streaming")
|
|
@@ -1197,12 +1239,20 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
1197
1239
|
if (ev.type === "content_block_delta" && ev.delta?.type === "text_delta") {
|
|
1198
1240
|
const text = ev.delta.text ?? ""
|
|
1199
1241
|
if (text) {
|
|
1242
|
+
// Ensure message_start and content_block_start are sent before first delta
|
|
1243
|
+
emitMessageStart()
|
|
1244
|
+
if (!contentBlockStartSent) {
|
|
1245
|
+
contentBlockStartSent = true
|
|
1246
|
+
sse("content_block_start", { type: "content_block_start", index: 0, content_block: { type: "text", text: "" } })
|
|
1247
|
+
}
|
|
1200
1248
|
fullText += text
|
|
1201
1249
|
hasStreamed = true
|
|
1202
1250
|
traceStore.updateOutput(reqId, fullText.length)
|
|
1203
1251
|
checkOutputSize(fullText.length)
|
|
1204
1252
|
sse("content_block_delta", { type: "content_block_delta", index: 0, delta: { type: "text_delta", text } })
|
|
1205
1253
|
}
|
|
1254
|
+
} else if (ev.type === "message_delta" && ev.usage?.output_tokens) {
|
|
1255
|
+
sdkOutputTokens = ev.usage.output_tokens
|
|
1206
1256
|
}
|
|
1207
1257
|
}
|
|
1208
1258
|
traceStore.sdkEvent(reqId, sdkEventCount, message.type, subtype)
|
|
@@ -1241,18 +1291,29 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
1241
1291
|
}
|
|
1242
1292
|
if (message.type === "stream_event") {
|
|
1243
1293
|
const ev = message.event as any
|
|
1294
|
+
if (ev.type === "message_start" && ev.message?.usage?.input_tokens) {
|
|
1295
|
+
sdkInputTokens = ev.message.usage.input_tokens
|
|
1296
|
+
emitMessageStart()
|
|
1297
|
+
}
|
|
1244
1298
|
if (!trace!.firstTokenAt && (ev.type === "content_block_delta" || ev.type === "content_block_start")) {
|
|
1245
1299
|
traceStore.phase(reqId, "sdk_streaming")
|
|
1246
1300
|
}
|
|
1247
1301
|
if (ev.type === "content_block_delta" && ev.delta?.type === "text_delta") {
|
|
1248
1302
|
const text = ev.delta.text ?? ""
|
|
1249
1303
|
if (text) {
|
|
1304
|
+
emitMessageStart()
|
|
1305
|
+
if (!contentBlockStartSent) {
|
|
1306
|
+
contentBlockStartSent = true
|
|
1307
|
+
sse("content_block_start", { type: "content_block_start", index: 0, content_block: { type: "text", text: "" } })
|
|
1308
|
+
}
|
|
1250
1309
|
fullText += text
|
|
1251
1310
|
hasStreamed = true
|
|
1252
1311
|
traceStore.updateOutput(reqId, fullText.length)
|
|
1253
1312
|
checkOutputSize(fullText.length)
|
|
1254
1313
|
sse("content_block_delta", { type: "content_block_delta", index: 0, delta: { type: "text_delta", text } })
|
|
1255
1314
|
}
|
|
1315
|
+
} else if (ev.type === "message_delta" && ev.usage?.output_tokens) {
|
|
1316
|
+
sdkOutputTokens = ev.usage.output_tokens
|
|
1256
1317
|
}
|
|
1257
1318
|
}
|
|
1258
1319
|
traceStore.sdkEvent(reqId, sdkEventCount, message.type, subtype)
|
|
@@ -1273,12 +1334,19 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
1273
1334
|
releaseQueue()
|
|
1274
1335
|
}
|
|
1275
1336
|
|
|
1337
|
+
// Ensure message_start and content_block_start are emitted even if no stream events came
|
|
1338
|
+
emitMessageStart()
|
|
1339
|
+
if (!contentBlockStartSent) {
|
|
1340
|
+
contentBlockStartSent = true
|
|
1341
|
+
sse("content_block_start", { type: "content_block_start", index: 0, content_block: { type: "text", text: "" } })
|
|
1342
|
+
}
|
|
1343
|
+
|
|
1276
1344
|
if (!hasStreamed) {
|
|
1277
1345
|
sse("content_block_delta", { type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "..." } })
|
|
1278
1346
|
}
|
|
1279
1347
|
|
|
1280
1348
|
sse("content_block_stop", { type: "content_block_stop", index: 0 })
|
|
1281
|
-
sse("message_delta", { type: "message_delta", delta: { stop_reason: "end_turn", stop_sequence: null }, usage: { output_tokens: roughTokens(fullText) } })
|
|
1349
|
+
sse("message_delta", { type: "message_delta", delta: { stop_reason: "end_turn", stop_sequence: null }, usage: { output_tokens: sdkOutputTokens || roughTokens(fullText) } })
|
|
1282
1350
|
sse("message_stop", { type: "message_stop" })
|
|
1283
1351
|
controller.close()
|
|
1284
1352
|
|