claude-sdk-proxy 3.2.1 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -0
- package/package.json +1 -1
- package/src/proxy/server.ts +194 -26
- package/src/session-store.ts +4 -5
package/README.md
CHANGED
|
@@ -16,6 +16,8 @@ Any Anthropic/OpenAI client → claude-sdk-proxy (:3456) → Claude Agent SDK
|
|
|
16
16
|
- **Full tool use** — proper `tool_use` content blocks, `stop_reason: "tool_use"`, `input_json_delta` streaming
|
|
17
17
|
- **Built-in agent tools** — Claude has access to Read, Write, Edit, Bash, Glob, Grep, WebFetch, WebSearch
|
|
18
18
|
- **API key protection** — optional `CLAUDE_PROXY_API_KEY` to secure network-exposed instances
|
|
19
|
+
- **Session persistence** — SDK sessions survive proxy restarts via `persistSession: true`; automatic reset detection when message count drops
|
|
20
|
+
- **Exact usage tracking** — real `input_tokens`, `output_tokens`, `cache_read_input_tokens`, and `cache_creation_input_tokens` from the SDK (no rough estimates)
|
|
19
21
|
- **Streaming SSE** — `message_start` emitted immediately; 15s heartbeat keeps connections alive
|
|
20
22
|
- **Request timeout** — configurable per-request timeout (default 5 minutes)
|
|
21
23
|
- **Graceful shutdown** — SIGINT/SIGTERM handlers wait for in-flight requests
|
|
@@ -159,6 +161,7 @@ Client tool mode is auto-detected when the request has a `tools` array and the s
|
|
|
159
161
|
| `GET` | `/v1/models/:id` | Get model details |
|
|
160
162
|
| `POST` | `/v1/messages` | Create a message (streaming or non-streaming) |
|
|
161
163
|
| `POST` | `/v1/messages/count_tokens` | Estimate token count |
|
|
164
|
+
| `DELETE` | `/sessions/:id` | Invalidate a cached session |
|
|
162
165
|
| `POST` | `/v1/chat/completions` | OpenAI-compatible chat completions |
|
|
163
166
|
| `GET` | `/v1/chat/models` | List models (OpenAI format) |
|
|
164
167
|
|
package/package.json
CHANGED
package/src/proxy/server.ts
CHANGED
|
@@ -213,6 +213,13 @@ function roughTokens(text: string): number {
|
|
|
213
213
|
return Math.ceil((text ?? "").length / 4)
|
|
214
214
|
}
|
|
215
215
|
|
|
216
|
+
function buildUsage(input: number, output: number, cacheRead: number, cacheCreation: number) {
|
|
217
|
+
const usage: Record<string, number> = { input_tokens: input, output_tokens: output }
|
|
218
|
+
if (cacheRead > 0) usage.cache_read_input_tokens = cacheRead
|
|
219
|
+
if (cacheCreation > 0) usage.cache_creation_input_tokens = cacheCreation
|
|
220
|
+
return usage
|
|
221
|
+
}
|
|
222
|
+
|
|
216
223
|
// ── Conversation label extraction ────────────────────────────────────────────
|
|
217
224
|
// Openclaw embeds "Conversation info (untrusted metadata)" in the last user
|
|
218
225
|
// message containing a JSON block with conversation_label. Extract it to use
|
|
@@ -376,6 +383,17 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
376
383
|
return c.json(result)
|
|
377
384
|
})
|
|
378
385
|
|
|
386
|
+
app.delete("/sessions/:id", (c) => {
|
|
387
|
+
const id = decodeURIComponent(c.req.param("id"))
|
|
388
|
+
const stored = sessionStore.get(id)
|
|
389
|
+
if (stored) {
|
|
390
|
+
sessionStore.invalidate(id)
|
|
391
|
+
logInfo("session.api_reset", { conversationId: id, sdkSessionId: stored.sdkSessionId })
|
|
392
|
+
return c.json({ ok: true, invalidated: id })
|
|
393
|
+
}
|
|
394
|
+
return c.json({ ok: false, error: "session not found" }, 404)
|
|
395
|
+
})
|
|
396
|
+
|
|
379
397
|
app.get("/debug/traces", (c) => {
|
|
380
398
|
const limit = parseInt(c.req.query("limit") ?? "20", 10)
|
|
381
399
|
return c.json(traceStore.getRecentTraces(limit))
|
|
@@ -655,19 +673,30 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
655
673
|
let resumeSessionId: string | undefined
|
|
656
674
|
let isResuming = false
|
|
657
675
|
|
|
658
|
-
if (conversationId
|
|
676
|
+
if (conversationId) {
|
|
659
677
|
const stored = sessionStore.get(conversationId)
|
|
660
678
|
if (stored && stored.model === model) {
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
}
|
|
679
|
+
if (messages.length < stored.messageCount) {
|
|
680
|
+
// Client reset detected: message count dropped
|
|
681
|
+
logInfo("session.reset_detected", {
|
|
682
|
+
reqId, conversationId,
|
|
683
|
+
sdkSessionId: stored.sdkSessionId,
|
|
684
|
+
storedMsgCount: stored.messageCount,
|
|
685
|
+
currentMsgCount: messages.length,
|
|
686
|
+
})
|
|
687
|
+
sessionStore.invalidate(conversationId)
|
|
688
|
+
} else {
|
|
689
|
+
resumeSessionId = stored.sdkSessionId
|
|
690
|
+
isResuming = true
|
|
691
|
+
logInfo("session.resuming", {
|
|
692
|
+
reqId,
|
|
693
|
+
conversationId,
|
|
694
|
+
sdkSessionId: resumeSessionId,
|
|
695
|
+
storedMsgCount: stored.messageCount,
|
|
696
|
+
currentMsgCount: messages.length,
|
|
697
|
+
resumeCount: stored.resumeCount,
|
|
698
|
+
})
|
|
699
|
+
}
|
|
671
700
|
}
|
|
672
701
|
}
|
|
673
702
|
|
|
@@ -679,6 +708,11 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
679
708
|
let promptInput: string | AsyncIterable<any>
|
|
680
709
|
// promptText: always the text-only version for token counting and logging
|
|
681
710
|
promptText = serializeContent(lastMsg.content)
|
|
711
|
+
// Track real usage from SDK stream events
|
|
712
|
+
let sdkInputTokens = 0
|
|
713
|
+
let sdkOutputTokens = 0
|
|
714
|
+
let sdkCacheReadTokens = 0
|
|
715
|
+
let sdkCacheCreationTokens = 0
|
|
682
716
|
|
|
683
717
|
if (isResuming && resumeSessionId) {
|
|
684
718
|
systemPrompt = (systemContext || "").trim() || undefined
|
|
@@ -778,6 +812,16 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
778
812
|
if (message.type === "system" && (message as any).subtype === "init") {
|
|
779
813
|
capturedSessionId = (message as any).session_id
|
|
780
814
|
}
|
|
815
|
+
if (message.type === "result") {
|
|
816
|
+
const r = message as any
|
|
817
|
+
if (r.session_id) capturedSessionId = r.session_id
|
|
818
|
+
if (r.usage) {
|
|
819
|
+
sdkInputTokens = r.usage.input_tokens ?? sdkInputTokens
|
|
820
|
+
sdkOutputTokens = r.usage.output_tokens ?? sdkOutputTokens
|
|
821
|
+
sdkCacheReadTokens = r.usage.cache_read_input_tokens ?? sdkCacheReadTokens
|
|
822
|
+
sdkCacheCreationTokens = r.usage.cache_creation_input_tokens ?? sdkCacheCreationTokens
|
|
823
|
+
}
|
|
824
|
+
}
|
|
781
825
|
if (hasTools && message.type === "stream_event") {
|
|
782
826
|
const ev = (message as any).event as any
|
|
783
827
|
if (ev.type === "content_block_start") {
|
|
@@ -813,6 +857,11 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
813
857
|
}
|
|
814
858
|
} else if (ev.type === "message_delta") {
|
|
815
859
|
if (ev.delta?.stop_reason) capturedStopReason = ev.delta.stop_reason
|
|
860
|
+
if (ev.usage?.output_tokens) sdkOutputTokens = ev.usage.output_tokens
|
|
861
|
+
} else if (ev.type === "message_start" && ev.message?.usage) {
|
|
862
|
+
if (ev.message.usage.input_tokens) sdkInputTokens = ev.message.usage.input_tokens
|
|
863
|
+
if (ev.message.usage.cache_read_input_tokens) sdkCacheReadTokens = ev.message.usage.cache_read_input_tokens
|
|
864
|
+
if (ev.message.usage.cache_creation_input_tokens) sdkCacheCreationTokens = ev.message.usage.cache_creation_input_tokens
|
|
816
865
|
}
|
|
817
866
|
} else if (!hasTools && message.type === "assistant") {
|
|
818
867
|
let turnText = ""
|
|
@@ -820,6 +869,10 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
820
869
|
if (block.type === "text") turnText += block.text
|
|
821
870
|
}
|
|
822
871
|
fullText = turnText
|
|
872
|
+
// Capture usage from assistant message
|
|
873
|
+
const msgUsage = (message as any).message?.usage
|
|
874
|
+
if (msgUsage?.input_tokens) sdkInputTokens = msgUsage.input_tokens
|
|
875
|
+
if (msgUsage?.output_tokens) sdkOutputTokens = msgUsage.output_tokens
|
|
823
876
|
}
|
|
824
877
|
}
|
|
825
878
|
traceStore.phase(reqId, "sdk_done")
|
|
@@ -827,7 +880,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
827
880
|
// Store session mapping for future resumption
|
|
828
881
|
if (conversationId && capturedSessionId) {
|
|
829
882
|
if (isResuming) {
|
|
830
|
-
sessionStore.recordResume(conversationId)
|
|
883
|
+
sessionStore.recordResume(conversationId, messages.length)
|
|
831
884
|
logInfo("session.resumed_ok", { reqId, conversationId, sdkSessionId: capturedSessionId })
|
|
832
885
|
} else {
|
|
833
886
|
sessionStore.set(conversationId, capturedSessionId, model, messages.length)
|
|
@@ -860,6 +913,16 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
860
913
|
if (message.type === "system" && (message as any).subtype === "init") {
|
|
861
914
|
capturedSessionId = (message as any).session_id
|
|
862
915
|
}
|
|
916
|
+
if (message.type === "result") {
|
|
917
|
+
const r = message as any
|
|
918
|
+
if (r.session_id) capturedSessionId = r.session_id
|
|
919
|
+
if (r.usage) {
|
|
920
|
+
sdkInputTokens = r.usage.input_tokens ?? sdkInputTokens
|
|
921
|
+
sdkOutputTokens = r.usage.output_tokens ?? sdkOutputTokens
|
|
922
|
+
sdkCacheReadTokens = r.usage.cache_read_input_tokens ?? sdkCacheReadTokens
|
|
923
|
+
sdkCacheCreationTokens = r.usage.cache_creation_input_tokens ?? sdkCacheCreationTokens
|
|
924
|
+
}
|
|
925
|
+
}
|
|
863
926
|
if (hasTools && message.type === "stream_event") {
|
|
864
927
|
const ev = (message as any).event as any
|
|
865
928
|
if (ev.type === "content_block_start") {
|
|
@@ -890,6 +953,11 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
890
953
|
}
|
|
891
954
|
} else if (ev.type === "message_delta") {
|
|
892
955
|
if (ev.delta?.stop_reason) capturedStopReason = ev.delta.stop_reason
|
|
956
|
+
if (ev.usage?.output_tokens) sdkOutputTokens = ev.usage.output_tokens
|
|
957
|
+
} else if (ev.type === "message_start" && ev.message?.usage) {
|
|
958
|
+
if (ev.message.usage.input_tokens) sdkInputTokens = ev.message.usage.input_tokens
|
|
959
|
+
if (ev.message.usage.cache_read_input_tokens) sdkCacheReadTokens = ev.message.usage.cache_read_input_tokens
|
|
960
|
+
if (ev.message.usage.cache_creation_input_tokens) sdkCacheCreationTokens = ev.message.usage.cache_creation_input_tokens
|
|
893
961
|
}
|
|
894
962
|
} else if (!hasTools && message.type === "assistant") {
|
|
895
963
|
let turnText = ""
|
|
@@ -897,6 +965,9 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
897
965
|
if (block.type === "text") turnText += block.text
|
|
898
966
|
}
|
|
899
967
|
fullText = turnText
|
|
968
|
+
const msgUsage = (message as any).message?.usage
|
|
969
|
+
if (msgUsage?.input_tokens) sdkInputTokens = msgUsage.input_tokens
|
|
970
|
+
if (msgUsage?.output_tokens) sdkOutputTokens = msgUsage.output_tokens
|
|
900
971
|
}
|
|
901
972
|
}
|
|
902
973
|
traceStore.phase(reqId, "sdk_done")
|
|
@@ -937,10 +1008,12 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
937
1008
|
id: generateId("msg_"),
|
|
938
1009
|
type: "message", role: "assistant", content,
|
|
939
1010
|
model: body.model, stop_reason: stopReason, stop_sequence: null,
|
|
940
|
-
usage:
|
|
1011
|
+
usage: buildUsage(sdkInputTokens, sdkOutputTokens, sdkCacheReadTokens, sdkCacheCreationTokens)
|
|
941
1012
|
})
|
|
942
1013
|
}
|
|
943
1014
|
|
|
1015
|
+
logDebug("usage.tokens", { reqId, sdkInput: sdkInputTokens, sdkOutput: sdkOutputTokens, cacheRead: sdkCacheReadTokens, cacheCreation: sdkCacheCreationTokens })
|
|
1016
|
+
|
|
944
1017
|
if (!fullText || !fullText.trim()) fullText = "..."
|
|
945
1018
|
traceStore.complete(reqId, { outputLen: fullText.length })
|
|
946
1019
|
|
|
@@ -949,7 +1022,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
949
1022
|
type: "message", role: "assistant",
|
|
950
1023
|
content: [{ type: "text", text: fullText }],
|
|
951
1024
|
model: body.model, stop_reason: "end_turn", stop_sequence: null,
|
|
952
|
-
usage:
|
|
1025
|
+
usage: buildUsage(sdkInputTokens, sdkOutputTokens, sdkCacheReadTokens, sdkCacheCreationTokens)
|
|
953
1026
|
})
|
|
954
1027
|
}
|
|
955
1028
|
|
|
@@ -1015,14 +1088,21 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
1015
1088
|
}
|
|
1016
1089
|
}, 15_000)
|
|
1017
1090
|
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1091
|
+
// Defer message_start until SDK provides real input_tokens
|
|
1092
|
+
let messageStartSent = false
|
|
1093
|
+
const emitMessageStart = () => {
|
|
1094
|
+
if (!messageStartSent) {
|
|
1095
|
+
messageStartSent = true
|
|
1096
|
+
sse("message_start", {
|
|
1097
|
+
type: "message_start",
|
|
1098
|
+
message: {
|
|
1099
|
+
id: messageId, type: "message", role: "assistant", content: [],
|
|
1100
|
+
model: body.model, stop_reason: null, stop_sequence: null,
|
|
1101
|
+
usage: buildUsage(sdkInputTokens, 1, sdkCacheReadTokens, sdkCacheCreationTokens)
|
|
1102
|
+
}
|
|
1103
|
+
})
|
|
1024
1104
|
}
|
|
1025
|
-
}
|
|
1105
|
+
}
|
|
1026
1106
|
|
|
1027
1107
|
if (hasTools) {
|
|
1028
1108
|
// ── With tools: forward native SDK stream events directly as SSE ──
|
|
@@ -1041,6 +1121,15 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
1041
1121
|
|
|
1042
1122
|
// Helper to forward a stream event directly as SSE
|
|
1043
1123
|
const forwardStreamEvent = (ev: any) => {
|
|
1124
|
+
if (ev.type === "message_start" && ev.message?.usage) {
|
|
1125
|
+
if (ev.message.usage.input_tokens) sdkInputTokens = ev.message.usage.input_tokens
|
|
1126
|
+
if (ev.message.usage.cache_read_input_tokens) sdkCacheReadTokens = ev.message.usage.cache_read_input_tokens
|
|
1127
|
+
if (ev.message.usage.cache_creation_input_tokens) sdkCacheCreationTokens = ev.message.usage.cache_creation_input_tokens
|
|
1128
|
+
emitMessageStart()
|
|
1129
|
+
return
|
|
1130
|
+
}
|
|
1131
|
+
// Ensure message_start is sent before any content blocks
|
|
1132
|
+
emitMessageStart()
|
|
1044
1133
|
if (ev.type === "content_block_start") {
|
|
1045
1134
|
const cb = ev.content_block
|
|
1046
1135
|
if (cb?.type === "tool_use") {
|
|
@@ -1064,6 +1153,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
1064
1153
|
blockIdx++
|
|
1065
1154
|
} else if (ev.type === "message_delta") {
|
|
1066
1155
|
if (ev.delta?.stop_reason) capturedStopReason = ev.delta.stop_reason
|
|
1156
|
+
if (ev.usage?.output_tokens) sdkOutputTokens = ev.usage.output_tokens
|
|
1067
1157
|
}
|
|
1068
1158
|
}
|
|
1069
1159
|
|
|
@@ -1077,6 +1167,16 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
1077
1167
|
if (message.type === "system" && (message as any).subtype === "init") {
|
|
1078
1168
|
capturedSessionId = (message as any).session_id
|
|
1079
1169
|
}
|
|
1170
|
+
if (message.type === "result") {
|
|
1171
|
+
const r = message as any
|
|
1172
|
+
if (r.session_id) capturedSessionId = r.session_id
|
|
1173
|
+
if (r.usage) {
|
|
1174
|
+
sdkInputTokens = r.usage.input_tokens ?? sdkInputTokens
|
|
1175
|
+
sdkOutputTokens = r.usage.output_tokens ?? sdkOutputTokens
|
|
1176
|
+
sdkCacheReadTokens = r.usage.cache_read_input_tokens ?? sdkCacheReadTokens
|
|
1177
|
+
sdkCacheCreationTokens = r.usage.cache_creation_input_tokens ?? sdkCacheCreationTokens
|
|
1178
|
+
}
|
|
1179
|
+
}
|
|
1080
1180
|
if (message.type === "stream_event") {
|
|
1081
1181
|
const ev = (message as any).event as any
|
|
1082
1182
|
if (!trace!.firstTokenAt && (ev.type === "content_block_delta" || ev.type === "content_block_start")) {
|
|
@@ -1091,7 +1191,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
1091
1191
|
// Store session mapping
|
|
1092
1192
|
if (conversationId && capturedSessionId) {
|
|
1093
1193
|
if (isResuming) {
|
|
1094
|
-
sessionStore.recordResume(conversationId)
|
|
1194
|
+
sessionStore.recordResume(conversationId, messages.length)
|
|
1095
1195
|
} else {
|
|
1096
1196
|
sessionStore.set(conversationId, capturedSessionId, model, messages.length)
|
|
1097
1197
|
}
|
|
@@ -1123,6 +1223,16 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
1123
1223
|
if (message.type === "system" && (message as any).subtype === "init") {
|
|
1124
1224
|
capturedSessionId = (message as any).session_id
|
|
1125
1225
|
}
|
|
1226
|
+
if (message.type === "result") {
|
|
1227
|
+
const r = message as any
|
|
1228
|
+
if (r.session_id) capturedSessionId = r.session_id
|
|
1229
|
+
if (r.usage) {
|
|
1230
|
+
sdkInputTokens = r.usage.input_tokens ?? sdkInputTokens
|
|
1231
|
+
sdkOutputTokens = r.usage.output_tokens ?? sdkOutputTokens
|
|
1232
|
+
sdkCacheReadTokens = r.usage.cache_read_input_tokens ?? sdkCacheReadTokens
|
|
1233
|
+
sdkCacheCreationTokens = r.usage.cache_creation_input_tokens ?? sdkCacheCreationTokens
|
|
1234
|
+
}
|
|
1235
|
+
}
|
|
1126
1236
|
if (message.type === "stream_event") {
|
|
1127
1237
|
const ev = (message as any).event as any
|
|
1128
1238
|
if (!trace!.firstTokenAt && (ev.type === "content_block_delta" || ev.type === "content_block_start")) {
|
|
@@ -1149,6 +1259,9 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
1149
1259
|
|
|
1150
1260
|
traceStore.phase(reqId, "responding")
|
|
1151
1261
|
|
|
1262
|
+
// Ensure message_start is sent even if no SDK events came
|
|
1263
|
+
emitMessageStart()
|
|
1264
|
+
|
|
1152
1265
|
// If no blocks were emitted at all, emit a placeholder text block
|
|
1153
1266
|
if (!hasEmittedAnyBlock) {
|
|
1154
1267
|
sse("content_block_start", { type: "content_block_start", index: 0, content_block: { type: "text", text: "" } })
|
|
@@ -1157,7 +1270,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
1157
1270
|
}
|
|
1158
1271
|
|
|
1159
1272
|
const stopReason = toolCallCount > 0 ? "tool_use" : (capturedStopReason ?? "end_turn")
|
|
1160
|
-
sse("message_delta", { type: "message_delta", delta: { stop_reason: stopReason, stop_sequence: null }, usage: { output_tokens:
|
|
1273
|
+
sse("message_delta", { type: "message_delta", delta: { stop_reason: stopReason, stop_sequence: null }, usage: { output_tokens: sdkOutputTokens } })
|
|
1161
1274
|
sse("message_stop", { type: "message_stop" })
|
|
1162
1275
|
controller.close()
|
|
1163
1276
|
|
|
@@ -1166,7 +1279,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
1166
1279
|
}
|
|
1167
1280
|
|
|
1168
1281
|
// ── No tools: stream text deltas directly ─────────────────────
|
|
1169
|
-
|
|
1282
|
+
let contentBlockStartSent = false
|
|
1170
1283
|
|
|
1171
1284
|
let fullText = ""
|
|
1172
1285
|
let hasStreamed = false
|
|
@@ -1188,8 +1301,25 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
1188
1301
|
if (message.type === "system" && (message as any).subtype === "init") {
|
|
1189
1302
|
capturedSessionId2 = (message as any).session_id
|
|
1190
1303
|
}
|
|
1304
|
+
if (message.type === "result") {
|
|
1305
|
+
const r = message as any
|
|
1306
|
+
if (r.session_id) capturedSessionId2 = r.session_id
|
|
1307
|
+
if (r.usage) {
|
|
1308
|
+
sdkInputTokens = r.usage.input_tokens ?? sdkInputTokens
|
|
1309
|
+
sdkOutputTokens = r.usage.output_tokens ?? sdkOutputTokens
|
|
1310
|
+
sdkCacheReadTokens = r.usage.cache_read_input_tokens ?? sdkCacheReadTokens
|
|
1311
|
+
sdkCacheCreationTokens = r.usage.cache_creation_input_tokens ?? sdkCacheCreationTokens
|
|
1312
|
+
}
|
|
1313
|
+
}
|
|
1191
1314
|
if (message.type === "stream_event") {
|
|
1192
1315
|
const ev = message.event as any
|
|
1316
|
+
// Capture usage from SDK message_start (real input tokens)
|
|
1317
|
+
if (ev.type === "message_start" && ev.message?.usage) {
|
|
1318
|
+
if (ev.message.usage.input_tokens) sdkInputTokens = ev.message.usage.input_tokens
|
|
1319
|
+
if (ev.message.usage.cache_read_input_tokens) sdkCacheReadTokens = ev.message.usage.cache_read_input_tokens
|
|
1320
|
+
if (ev.message.usage.cache_creation_input_tokens) sdkCacheCreationTokens = ev.message.usage.cache_creation_input_tokens
|
|
1321
|
+
emitMessageStart()
|
|
1322
|
+
}
|
|
1193
1323
|
// Detect first content event BEFORE sdkEvent records it
|
|
1194
1324
|
if (!trace!.firstTokenAt && (ev.type === "content_block_delta" || ev.type === "content_block_start")) {
|
|
1195
1325
|
traceStore.phase(reqId, "sdk_streaming")
|
|
@@ -1197,12 +1327,20 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
1197
1327
|
if (ev.type === "content_block_delta" && ev.delta?.type === "text_delta") {
|
|
1198
1328
|
const text = ev.delta.text ?? ""
|
|
1199
1329
|
if (text) {
|
|
1330
|
+
// Ensure message_start and content_block_start are sent before first delta
|
|
1331
|
+
emitMessageStart()
|
|
1332
|
+
if (!contentBlockStartSent) {
|
|
1333
|
+
contentBlockStartSent = true
|
|
1334
|
+
sse("content_block_start", { type: "content_block_start", index: 0, content_block: { type: "text", text: "" } })
|
|
1335
|
+
}
|
|
1200
1336
|
fullText += text
|
|
1201
1337
|
hasStreamed = true
|
|
1202
1338
|
traceStore.updateOutput(reqId, fullText.length)
|
|
1203
1339
|
checkOutputSize(fullText.length)
|
|
1204
1340
|
sse("content_block_delta", { type: "content_block_delta", index: 0, delta: { type: "text_delta", text } })
|
|
1205
1341
|
}
|
|
1342
|
+
} else if (ev.type === "message_delta" && ev.usage?.output_tokens) {
|
|
1343
|
+
sdkOutputTokens = ev.usage.output_tokens
|
|
1206
1344
|
}
|
|
1207
1345
|
}
|
|
1208
1346
|
traceStore.sdkEvent(reqId, sdkEventCount, message.type, subtype)
|
|
@@ -1212,7 +1350,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
1212
1350
|
// Store session mapping
|
|
1213
1351
|
if (conversationId && capturedSessionId2) {
|
|
1214
1352
|
if (isResuming) {
|
|
1215
|
-
sessionStore.recordResume(conversationId)
|
|
1353
|
+
sessionStore.recordResume(conversationId, messages.length)
|
|
1216
1354
|
} else {
|
|
1217
1355
|
sessionStore.set(conversationId, capturedSessionId2, model, messages.length)
|
|
1218
1356
|
}
|
|
@@ -1239,20 +1377,43 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
1239
1377
|
if (message.type === "system" && (message as any).subtype === "init") {
|
|
1240
1378
|
capturedSessionId2 = (message as any).session_id
|
|
1241
1379
|
}
|
|
1380
|
+
if (message.type === "result") {
|
|
1381
|
+
const r = message as any
|
|
1382
|
+
if (r.session_id) capturedSessionId2 = r.session_id
|
|
1383
|
+
if (r.usage) {
|
|
1384
|
+
sdkInputTokens = r.usage.input_tokens ?? sdkInputTokens
|
|
1385
|
+
sdkOutputTokens = r.usage.output_tokens ?? sdkOutputTokens
|
|
1386
|
+
sdkCacheReadTokens = r.usage.cache_read_input_tokens ?? sdkCacheReadTokens
|
|
1387
|
+
sdkCacheCreationTokens = r.usage.cache_creation_input_tokens ?? sdkCacheCreationTokens
|
|
1388
|
+
}
|
|
1389
|
+
}
|
|
1242
1390
|
if (message.type === "stream_event") {
|
|
1243
1391
|
const ev = message.event as any
|
|
1392
|
+
if (ev.type === "message_start" && ev.message?.usage) {
|
|
1393
|
+
if (ev.message.usage.input_tokens) sdkInputTokens = ev.message.usage.input_tokens
|
|
1394
|
+
if (ev.message.usage.cache_read_input_tokens) sdkCacheReadTokens = ev.message.usage.cache_read_input_tokens
|
|
1395
|
+
if (ev.message.usage.cache_creation_input_tokens) sdkCacheCreationTokens = ev.message.usage.cache_creation_input_tokens
|
|
1396
|
+
emitMessageStart()
|
|
1397
|
+
}
|
|
1244
1398
|
if (!trace!.firstTokenAt && (ev.type === "content_block_delta" || ev.type === "content_block_start")) {
|
|
1245
1399
|
traceStore.phase(reqId, "sdk_streaming")
|
|
1246
1400
|
}
|
|
1247
1401
|
if (ev.type === "content_block_delta" && ev.delta?.type === "text_delta") {
|
|
1248
1402
|
const text = ev.delta.text ?? ""
|
|
1249
1403
|
if (text) {
|
|
1404
|
+
emitMessageStart()
|
|
1405
|
+
if (!contentBlockStartSent) {
|
|
1406
|
+
contentBlockStartSent = true
|
|
1407
|
+
sse("content_block_start", { type: "content_block_start", index: 0, content_block: { type: "text", text: "" } })
|
|
1408
|
+
}
|
|
1250
1409
|
fullText += text
|
|
1251
1410
|
hasStreamed = true
|
|
1252
1411
|
traceStore.updateOutput(reqId, fullText.length)
|
|
1253
1412
|
checkOutputSize(fullText.length)
|
|
1254
1413
|
sse("content_block_delta", { type: "content_block_delta", index: 0, delta: { type: "text_delta", text } })
|
|
1255
1414
|
}
|
|
1415
|
+
} else if (ev.type === "message_delta" && ev.usage?.output_tokens) {
|
|
1416
|
+
sdkOutputTokens = ev.usage.output_tokens
|
|
1256
1417
|
}
|
|
1257
1418
|
}
|
|
1258
1419
|
traceStore.sdkEvent(reqId, sdkEventCount, message.type, subtype)
|
|
@@ -1273,12 +1434,19 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
1273
1434
|
releaseQueue()
|
|
1274
1435
|
}
|
|
1275
1436
|
|
|
1437
|
+
// Ensure message_start and content_block_start are emitted even if no stream events came
|
|
1438
|
+
emitMessageStart()
|
|
1439
|
+
if (!contentBlockStartSent) {
|
|
1440
|
+
contentBlockStartSent = true
|
|
1441
|
+
sse("content_block_start", { type: "content_block_start", index: 0, content_block: { type: "text", text: "" } })
|
|
1442
|
+
}
|
|
1443
|
+
|
|
1276
1444
|
if (!hasStreamed) {
|
|
1277
1445
|
sse("content_block_delta", { type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "..." } })
|
|
1278
1446
|
}
|
|
1279
1447
|
|
|
1280
1448
|
sse("content_block_stop", { type: "content_block_stop", index: 0 })
|
|
1281
|
-
sse("message_delta", { type: "message_delta", delta: { stop_reason: "end_turn", stop_sequence: null }, usage: { output_tokens:
|
|
1449
|
+
sse("message_delta", { type: "message_delta", delta: { stop_reason: "end_turn", stop_sequence: null }, usage: { output_tokens: sdkOutputTokens } })
|
|
1282
1450
|
sse("message_stop", { type: "message_stop" })
|
|
1283
1451
|
controller.close()
|
|
1284
1452
|
|
package/src/session-store.ts
CHANGED
|
@@ -43,11 +43,9 @@ class SessionStore {
|
|
|
43
43
|
constructor(ttlMs = DEFAULT_TTL_MS) {
|
|
44
44
|
this.ttlMs = ttlMs
|
|
45
45
|
this.load()
|
|
46
|
-
//
|
|
46
|
+
// SDK sessions persist on disk via persistSession: true — keep our mappings
|
|
47
47
|
if (this.sessions.size > 0) {
|
|
48
|
-
logInfo("session-store.
|
|
49
|
-
this.sessions.clear()
|
|
50
|
-
this.save()
|
|
48
|
+
logInfo("session-store.loaded_sessions", { count: this.sessions.size })
|
|
51
49
|
}
|
|
52
50
|
}
|
|
53
51
|
|
|
@@ -87,10 +85,11 @@ class SessionStore {
|
|
|
87
85
|
}
|
|
88
86
|
|
|
89
87
|
/** Record a successful resume */
|
|
90
|
-
recordResume(conversationId: string): void {
|
|
88
|
+
recordResume(conversationId: string, messageCount: number): void {
|
|
91
89
|
const entry = this.sessions.get(conversationId)
|
|
92
90
|
if (entry) {
|
|
93
91
|
entry.resumeCount++
|
|
92
|
+
entry.messageCount = messageCount
|
|
94
93
|
entry.lastUsed = Date.now()
|
|
95
94
|
this.save()
|
|
96
95
|
}
|