claude-sdk-proxy 3.1.4 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,6 +7,7 @@ import { DEFAULT_PROXY_CONFIG } from "./types"
7
7
  import { logInfo, logWarn, logError, logDebug, LOG_DIR } from "../logger"
8
8
  import { traceStore } from "../trace"
9
9
  import { sessionStore } from "../session-store"
10
+ import { createToolMcpServer } from "../mcpTools"
10
11
  import { execSync } from "child_process"
11
12
  import { existsSync, writeFileSync, readFileSync, readdirSync } from "fs"
12
13
  import { randomBytes } from "crypto"
@@ -195,110 +196,17 @@ function createSDKUserMessage(content: Array<any>, sessionId?: string): AsyncIte
195
196
  }
196
197
 
197
198
 
198
- // ── Client tool-use support ──────────────────────────────────────────────────
199
+ // ── Client tool-use support (native MCP) ────────────────────────────────────
200
+ // Tool definitions from the client are converted to native MCP tools via
201
+ // createToolMcpServer(). The SDK generates proper tool_use content blocks
202
+ // instead of text-based XML. canUseTool denies all execution since the
203
+ // client handles tool execution.
199
204
 
200
- function buildClientToolsPrompt(tools: any[]): string {
201
- const defs = tools.map((t: any) => {
202
- const schema = t.input_schema ? `\nInput schema:\n${JSON.stringify(t.input_schema, null, 2)}` : ""
203
- return `### ${t.name}\n${t.description ?? ""}${schema}`
204
- }).join("\n\n")
205
- return `\n\n## Available Tools\n\nTo call a tool, output a <tool_use> block:\n\n` +
206
- `<tool_use>\n{"name": "TOOL_NAME", "input": {ARGUMENTS}}\n</tool_use>\n\n` +
207
- `- You may write reasoning text before the block\n` +
208
- `- Call multiple tools by including multiple <tool_use> blocks\n` +
209
- `- Each block must be valid JSON with "name" and "input" keys\n\n` +
210
- defs
211
- }
212
-
213
- interface ToolCall { id: string; name: string; input: unknown }
214
-
215
- function parseToolUse(text: string): { toolCalls: ToolCall[]; textBefore: string } {
216
- const calls: ToolCall[] = []
217
- let firstIdx = -1
218
-
219
- const xmlRegex = /<tool_use>([\s\S]*?)<\/tool_use>/g
220
- let m: RegExpExecArray | null
221
- while ((m = xmlRegex.exec(text)) !== null) {
222
- if (firstIdx < 0) firstIdx = m.index
223
- try {
224
- const raw = m[1]!.trim()
225
- let p: any
226
- try { p = JSON.parse(raw) } catch {
227
- // Repair: models often emit raw newlines in heredocs/multiline commands
228
- // which are invalid inside JSON strings. Escape them only inside quoted values.
229
- let fixed = "", inStr = false, esc = false
230
- for (const ch of raw) {
231
- if (esc) { fixed += ch; esc = false; continue }
232
- if (ch === "\\") { esc = true; fixed += ch; continue }
233
- if (ch === '"') { inStr = !inStr; fixed += ch; continue }
234
- if (inStr && ch === "\n") { fixed += "\\n"; continue }
235
- if (inStr && ch === "\r") { fixed += "\\r"; continue }
236
- if (inStr && ch === "\t") { fixed += "\\t"; continue }
237
- fixed += ch
238
- }
239
- p = JSON.parse(fixed)
240
- }
241
- calls.push({
242
- id: generateId("toolu_"),
243
- name: String(p.name ?? ""),
244
- input: p.input ?? {}
245
- })
246
- } catch { /* skip malformed block */ }
247
- }
248
-
249
- if (calls.length === 0) {
250
- const fcRegex = /<function_calls>([\s\S]*?)<\/function_calls>/g
251
- while ((m = fcRegex.exec(text)) !== null) {
252
- if (firstIdx < 0) firstIdx = m.index
253
- try {
254
- const parsed = JSON.parse(m[1]!.trim())
255
- const items = Array.isArray(parsed) ? parsed : [parsed]
256
- for (const p of items) {
257
- if (p && typeof p.name === "string") {
258
- calls.push({
259
- id: generateId("toolu_"),
260
- name: p.name,
261
- input: p.input ?? p.parameters ?? {}
262
- })
263
- }
264
- }
265
- } catch { /* skip malformed block */ }
266
- }
267
- }
268
-
269
- if (calls.length === 0) {
270
- const invokeRegex = /<invoke\s+name="([^"]+)">([\s\S]*?)<\/invoke>/g
271
- while ((m = invokeRegex.exec(text)) !== null) {
272
- if (firstIdx < 0) firstIdx = m.index
273
- const toolName = m[1]!
274
- const body = m[2]!
275
- const input: Record<string, any> = {}
276
- const paramRegex = /<parameter\s+name="([^"]+)">([\s\S]*?)<\/parameter>/g
277
- let pm: RegExpExecArray | null
278
- while ((pm = paramRegex.exec(body)) !== null) {
279
- const val = pm[2]!.trim()
280
- try { input[pm[1]!] = JSON.parse(val) } catch { input[pm[1]!] = val }
281
- }
282
- calls.push({ id: generateId("toolu_"), name: toolName, input })
283
- }
284
- }
285
-
286
- if (calls.length === 0) {
287
- const bracketRegex = /\[Tool call:\s*(\w+)\s*\nInput:\s*([\s\S]*?)\]/g
288
- while ((m = bracketRegex.exec(text)) !== null) {
289
- if (firstIdx < 0) firstIdx = m.index
290
- try {
291
- const input = JSON.parse(m[2]!.trim())
292
- calls.push({
293
- id: generateId("toolu_"),
294
- name: m[1]!.trim(),
295
- input
296
- })
297
- } catch { /* skip malformed block */ }
298
- }
299
- }
300
-
301
- return { toolCalls: calls, textBefore: firstIdx > 0 ? text.slice(0, firstIdx).trim() : "" }
205
+ // The SDK prefixes MCP tool names with "mcp__{server-name}__".
206
+ // Strip this prefix so clients see the original tool names they sent.
207
+ const MCP_TOOL_PREFIX = "mcp__proxy-tools__"
208
+ function stripMcpPrefix(name: string): string {
209
+ return name.startsWith(MCP_TOOL_PREFIX) ? name.slice(MCP_TOOL_PREFIX.length) : name
302
210
  }
303
211
 
304
212
  function roughTokens(text: string): number {
@@ -351,6 +259,8 @@ function buildQueryOptions(
351
259
  abortController?: AbortController
352
260
  thinking?: { type: "adaptive" } | { type: "enabled"; budgetTokens?: number } | { type: "disabled" }
353
261
  resume?: string
262
+ mcpServers?: Record<string, any>
263
+ canUseTool?: (toolName: string, input: Record<string, unknown>, options: any) => Promise<any>
354
264
  } = {}
355
265
  ) {
356
266
  return {
@@ -360,13 +270,15 @@ function buildQueryOptions(
360
270
  allowDangerouslySkipPermissions: true,
361
271
  persistSession: true,
362
272
  settingSources: [],
363
- tools: ["_proxy_noop_"] as string[],
273
+ tools: [] as string[],
364
274
  maxTurns: 1,
365
275
  ...(opts.partial ? { includePartialMessages: true } : {}),
366
276
  ...(opts.abortController ? { abortController: opts.abortController } : {}),
367
277
  ...(opts.thinking ? { thinking: opts.thinking } : {}),
368
278
  ...(opts.systemPrompt ? { systemPrompt: opts.systemPrompt } : {}),
369
279
  ...(opts.resume ? { resume: opts.resume } : {}),
280
+ ...(opts.mcpServers ? { mcpServers: opts.mcpServers } : {}),
281
+ ...(opts.canUseTool ? { canUseTool: opts.canUseTool } : {}),
370
282
  }
371
283
  }
372
284
 
@@ -724,7 +636,13 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
724
636
 
725
637
  let promptText: string // text version for token counting / logging
726
638
  let systemPrompt: string | undefined
727
- const toolsSection = hasTools ? buildClientToolsPrompt(body.tools) : ""
639
+
640
+ // Create MCP server for native tool calling (if client sent tools)
641
+ const mcpServer = hasTools ? createToolMcpServer(body.tools) : null
642
+ const mcpServers = mcpServer ? { "proxy-tools": mcpServer } : undefined
643
+ const canUseTool = hasTools
644
+ ? async () => ({ behavior: "deny" as const, message: "Tool execution handled by client" })
645
+ : undefined
728
646
 
729
647
  // ── Session resumption ─────────────────────────────────────────────
730
648
  // Derive conversation ID from: headers (explicit) or conversation_label
@@ -763,41 +681,22 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
763
681
  promptText = serializeContent(lastMsg.content)
764
682
 
765
683
  if (isResuming && resumeSessionId) {
766
- systemPrompt = ((systemContext || "") + toolsSection).trim() || undefined
684
+ systemPrompt = (systemContext || "").trim() || undefined
767
685
  if (lastMsgHasImages) {
768
686
  promptInput = createSDKUserMessage(buildNativeContent(lastMsg.content), resumeSessionId)
769
687
  logInfo("session.resume_with_images", { reqId, conversationId })
770
688
  } else {
771
689
  promptInput = promptText
772
690
  }
773
- } else if (messages.length === 1) {
774
- systemPrompt = ((systemContext || "") + toolsSection).trim() || undefined
691
+ } else {
692
+ // Single or multi-turn: always use just the last message as prompt.
693
+ // The client (openclaw) manages conversation context; the proxy is stateless.
694
+ // Prior turns are handled by SDK session resumption when available.
695
+ systemPrompt = (systemContext || "").trim() || undefined
775
696
  promptInput = lastMsgHasImages
776
697
  ? createSDKUserMessage(buildNativeContent(lastMsg.content))
777
698
  : promptText
778
699
  if (lastMsgHasImages) logInfo("request.native_images", { reqId })
779
- } else {
780
- const priorMsgs = messages.slice(0, -1)
781
-
782
- const contextParts = priorMsgs
783
- .map((m) => {
784
- const role = m.role === "assistant" ? "Assistant" : "User"
785
- return `[${role}]\n${serializeContent(m.content)}`
786
- })
787
- .join("\n\n")
788
-
789
- const baseSystem = systemContext || ""
790
- const contextSection = contextParts
791
- ? `\n\nPrior conversation turns:\n\n${contextParts}\n\n---`
792
- : ""
793
- systemPrompt = (baseSystem + contextSection + toolsSection).trim() || undefined
794
-
795
- if (lastMsgHasImages) {
796
- promptInput = createSDKUserMessage(buildNativeContent(lastMsg.content))
797
- logInfo("request.native_images", { reqId })
798
- } else {
799
- promptInput = promptText
800
- }
801
700
  }
802
701
 
803
702
  requestStarted = Date.now()
@@ -863,7 +762,11 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
863
762
  if (!stream) {
864
763
  let fullText = ""
865
764
  let capturedSessionId: string | undefined
866
- const queryOpts = buildQueryOptions(model, { partial: false, systemPrompt, abortController, thinking, resume: resumeSessionId })
765
+ const queryOpts = buildQueryOptions(model, { partial: hasTools, systemPrompt, abortController, thinking, resume: resumeSessionId, mcpServers, canUseTool })
766
+ // For tool mode: capture native content blocks from stream events
767
+ const contentBlocks: any[] = []
768
+ let currentToolBlock: { id: string; name: string; jsonAccum: string } | null = null
769
+ let capturedStopReason: string | null = null
867
770
  try {
868
771
  traceStore.phase(reqId, "sdk_starting")
869
772
  let sdkEventCount = 0
@@ -875,7 +778,43 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
875
778
  if (message.type === "system" && (message as any).subtype === "init") {
876
779
  capturedSessionId = (message as any).session_id
877
780
  }
878
- if (message.type === "assistant") {
781
+ if (hasTools && message.type === "stream_event") {
782
+ const ev = (message as any).event as any
783
+ if (ev.type === "content_block_start") {
784
+ const cb = ev.content_block
785
+ if (cb?.type === "tool_use") {
786
+ currentToolBlock = { id: cb.id, name: stripMcpPrefix(cb.name), jsonAccum: "" }
787
+ } else if (cb?.type === "text") {
788
+ contentBlocks.push({ type: "text", text: "" })
789
+ }
790
+ } else if (ev.type === "content_block_delta") {
791
+ if (ev.delta?.type === "text_delta") {
792
+ const lastText = contentBlocks[contentBlocks.length - 1]
793
+ if (lastText?.type === "text") {
794
+ lastText.text += ev.delta.text ?? ""
795
+ } else {
796
+ contentBlocks.push({ type: "text", text: ev.delta.text ?? "" })
797
+ }
798
+ fullText += ev.delta.text ?? ""
799
+ } else if (ev.delta?.type === "input_json_delta" && currentToolBlock) {
800
+ currentToolBlock.jsonAccum += ev.delta.partial_json ?? ""
801
+ }
802
+ } else if (ev.type === "content_block_stop") {
803
+ if (currentToolBlock) {
804
+ let input: any = {}
805
+ try { input = JSON.parse(currentToolBlock.jsonAccum) } catch {}
806
+ contentBlocks.push({
807
+ type: "tool_use",
808
+ id: currentToolBlock.id,
809
+ name: currentToolBlock.name,
810
+ input,
811
+ })
812
+ currentToolBlock = null
813
+ }
814
+ } else if (ev.type === "message_delta") {
815
+ if (ev.delta?.stop_reason) capturedStopReason = ev.delta.stop_reason
816
+ }
817
+ } else if (!hasTools && message.type === "assistant") {
879
818
  let turnText = ""
880
819
  for (const block of message.message.content) {
881
820
  if (block.type === "text") turnText += block.text
@@ -896,45 +835,63 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
896
835
  }
897
836
  }
898
837
  } catch (resumeErr) {
899
- // If resume failed, retry with full context
838
+ // If resume failed, start a fresh session (no history reconstruction)
900
839
  if (isResuming && resumeSessionId) {
901
840
  logWarn("session.resume_failed", {
902
- reqId,
903
- conversationId,
904
- sdkSessionId: resumeSessionId,
841
+ reqId, conversationId, sdkSessionId: resumeSessionId,
905
842
  error: resumeErr instanceof Error ? resumeErr.message : String(resumeErr),
906
843
  })
907
844
  if (conversationId) {
908
845
  sessionStore.recordFailure(conversationId)
909
846
  sessionStore.invalidate(conversationId)
910
847
  }
911
- // Rebuild with full context (non-resume path)
912
- const fbLastMsg = messages[messages.length - 1]!
913
- const priorMsgs = messages.slice(0, -1)
914
- const contextParts = priorMsgs
915
- .map((m) => {
916
- const role = m.role === "assistant" ? "Assistant" : "User"
917
- return `[${role}]\n${serializeContent(m.content)}`
918
- })
919
- .join("\n\n")
920
- const baseSystem = systemContext || ""
921
- const contextSection = contextParts ? `\n\nPrior conversation turns:\n\n${contextParts}\n\n---` : ""
922
- const fallbackSystem = (baseSystem + contextSection + toolsSection).trim() || undefined
923
- const fallbackInput: string | AsyncIterable<any> = contentHasImages(fbLastMsg.content)
924
- ? createSDKUserMessage(buildNativeContent(fbLastMsg.content))
925
- : serializeContent(fbLastMsg.content)
926
- const fallbackOpts = buildQueryOptions(model, { partial: false, systemPrompt: fallbackSystem, abortController, thinking })
927
-
928
- logInfo("session.fallback_full_context", { reqId, conversationId })
848
+
849
+ logInfo("session.fallback_fresh", { reqId, conversationId })
850
+ // Reset accumulators and retry as a brand new session
851
+ contentBlocks.length = 0
852
+ currentToolBlock = null
853
+ capturedStopReason = null
854
+ fullText = ""
929
855
  let sdkEventCount = 0
930
- for await (const message of query({ prompt: fallbackInput, options: fallbackOpts })) {
856
+ for await (const message of query({ prompt: promptInput, options: buildQueryOptions(model, { partial: hasTools, systemPrompt, abortController, thinking, mcpServers, canUseTool }) })) {
931
857
  sdkEventCount++
932
858
  resetStallTimer()
933
859
  traceStore.sdkEvent(reqId, sdkEventCount, message.type, (message as any).event?.type ?? (message as any).message?.type)
934
860
  if (message.type === "system" && (message as any).subtype === "init") {
935
861
  capturedSessionId = (message as any).session_id
936
862
  }
937
- if (message.type === "assistant") {
863
+ if (hasTools && message.type === "stream_event") {
864
+ const ev = (message as any).event as any
865
+ if (ev.type === "content_block_start") {
866
+ const cb = ev.content_block
867
+ if (cb?.type === "tool_use") {
868
+ currentToolBlock = { id: cb.id, name: stripMcpPrefix(cb.name), jsonAccum: "" }
869
+ } else if (cb?.type === "text") {
870
+ contentBlocks.push({ type: "text", text: "" })
871
+ }
872
+ } else if (ev.type === "content_block_delta") {
873
+ if (ev.delta?.type === "text_delta") {
874
+ const lastText = contentBlocks[contentBlocks.length - 1]
875
+ if (lastText?.type === "text") {
876
+ lastText.text += ev.delta.text ?? ""
877
+ } else {
878
+ contentBlocks.push({ type: "text", text: ev.delta.text ?? "" })
879
+ }
880
+ fullText += ev.delta.text ?? ""
881
+ } else if (ev.delta?.type === "input_json_delta" && currentToolBlock) {
882
+ currentToolBlock.jsonAccum += ev.delta.partial_json ?? ""
883
+ }
884
+ } else if (ev.type === "content_block_stop") {
885
+ if (currentToolBlock) {
886
+ let input: any = {}
887
+ try { input = JSON.parse(currentToolBlock.jsonAccum) } catch {}
888
+ contentBlocks.push({ type: "tool_use", id: currentToolBlock.id, name: currentToolBlock.name, input })
889
+ currentToolBlock = null
890
+ }
891
+ } else if (ev.type === "message_delta") {
892
+ if (ev.delta?.stop_reason) capturedStopReason = ev.delta.stop_reason
893
+ }
894
+ } else if (!hasTools && message.type === "assistant") {
938
895
  let turnText = ""
939
896
  for (const block of message.message.content) {
940
897
  if (block.type === "text") turnText += block.text
@@ -943,17 +900,15 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
943
900
  }
944
901
  }
945
902
  traceStore.phase(reqId, "sdk_done")
946
- // Store the new session
947
903
  if (conversationId && capturedSessionId) {
948
904
  sessionStore.set(conversationId, capturedSessionId, model, messages.length)
949
- logInfo("session.recreated_after_fallback", { reqId, conversationId, sdkSessionId: capturedSessionId })
905
+ logInfo("session.created_after_fallback", { reqId, conversationId, sdkSessionId: capturedSessionId })
950
906
  }
951
907
  } else {
952
908
  throw resumeErr
953
909
  }
954
910
  } finally {
955
911
  clearStallTimer(); clearHardTimer()
956
- // (temp files no longer used — images passed natively)
957
912
  requestQueue.release()
958
913
  logDebug("queue.released", {
959
914
  reqId,
@@ -965,26 +920,18 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
965
920
  traceStore.phase(reqId, "responding")
966
921
 
967
922
  if (hasTools) {
968
- const { toolCalls, textBefore } = parseToolUse(fullText)
969
- // Debug: log when tools were expected but none parsed
970
- if (toolCalls.length === 0 && fullText.length > 0) {
971
- logDebug("tool_parse.no_match", {
972
- reqId,
973
- outputLen: fullText.length,
974
- textPreview: fullText.slice(0, 500),
975
- hasToolUseTag: fullText.includes("<tool_use>"),
976
- hasFunctionCalls: fullText.includes("<function_calls>"),
977
- hasInvoke: fullText.includes("<invoke"),
978
- hasAntmlInvoke: fullText.includes("antml:invoke"),
979
- })
980
- }
981
- const content: any[] = []
982
- if (textBefore) content.push({ type: "text", text: textBefore })
983
- for (const tc of toolCalls) content.push({ type: "tool_use", id: tc.id, name: tc.name, input: tc.input })
923
+ // Use native content blocks captured from stream events
924
+ const toolCallCount = contentBlocks.filter((b: any) => b.type === "tool_use").length
925
+ // Filter out empty text blocks
926
+ const content = contentBlocks.filter((b: any) => {
927
+ if (b.type === "text" && !b.text?.trim()) return false
928
+ return true
929
+ })
984
930
  if (content.length === 0) content.push({ type: "text", text: fullText || "..." })
985
- const stopReason = toolCalls.length > 0 ? "tool_use" : "end_turn"
986
931
 
987
- traceStore.complete(reqId, { outputLen: fullText.length, toolCallCount: toolCalls.length })
932
+ const stopReason = toolCallCount > 0 ? "tool_use" : (capturedStopReason ?? "end_turn")
933
+
934
+ traceStore.complete(reqId, { outputLen: fullText.length, toolCallCount })
988
935
 
989
936
  return c.json({
990
937
  id: generateId("msg_"),
@@ -1078,8 +1025,12 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
1078
1025
  })
1079
1026
 
1080
1027
  if (hasTools) {
1081
- // ── With tools: buffer output, parse tool_use blocks at end ──
1028
+ // ── With tools: forward native SDK stream events directly as SSE ──
1082
1029
  let fullText = ""
1030
+ let blockIdx = 0
1031
+ let toolCallCount = 0
1032
+ let capturedStopReason: string | null = null
1033
+ let hasEmittedAnyBlock = false
1083
1034
  let sdkEventCount = 0
1084
1035
  let lastEventAt = Date.now()
1085
1036
  const stallLog = setInterval(() => {
@@ -1087,28 +1038,51 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
1087
1038
  traceStore.stall(reqId, stallMs)
1088
1039
  }, 15_000)
1089
1040
  let capturedSessionId: string | undefined
1041
+
1042
+ // Helper to forward a stream event directly as SSE
1043
+ const forwardStreamEvent = (ev: any) => {
1044
+ if (ev.type === "content_block_start") {
1045
+ const cb = ev.content_block
1046
+ if (cb?.type === "tool_use") {
1047
+ toolCallCount++
1048
+ sse("content_block_start", { type: "content_block_start", index: blockIdx, content_block: { type: "tool_use", id: cb.id, name: stripMcpPrefix(cb.name), input: {} } })
1049
+ } else if (cb?.type === "text") {
1050
+ sse("content_block_start", { type: "content_block_start", index: blockIdx, content_block: { type: "text", text: "" } })
1051
+ }
1052
+ hasEmittedAnyBlock = true
1053
+ } else if (ev.type === "content_block_delta") {
1054
+ if (ev.delta?.type === "text_delta") {
1055
+ fullText += ev.delta.text ?? ""
1056
+ traceStore.updateOutput(reqId, fullText.length)
1057
+ checkOutputSize(fullText.length)
1058
+ sse("content_block_delta", { type: "content_block_delta", index: blockIdx, delta: { type: "text_delta", text: ev.delta.text ?? "" } })
1059
+ } else if (ev.delta?.type === "input_json_delta") {
1060
+ sse("content_block_delta", { type: "content_block_delta", index: blockIdx, delta: { type: "input_json_delta", partial_json: ev.delta.partial_json ?? "" } })
1061
+ }
1062
+ } else if (ev.type === "content_block_stop") {
1063
+ sse("content_block_stop", { type: "content_block_stop", index: blockIdx })
1064
+ blockIdx++
1065
+ } else if (ev.type === "message_delta") {
1066
+ if (ev.delta?.stop_reason) capturedStopReason = ev.delta.stop_reason
1067
+ }
1068
+ }
1069
+
1090
1070
  try {
1091
1071
  traceStore.phase(reqId, "sdk_starting")
1092
- for await (const message of query({ prompt: promptInput, options: buildQueryOptions(model, { partial: true, systemPrompt, abortController, thinking, resume: resumeSessionId }) })) {
1072
+ for await (const message of query({ prompt: promptInput, options: buildQueryOptions(model, { partial: true, systemPrompt, abortController, thinking, resume: resumeSessionId, mcpServers, canUseTool }) })) {
1093
1073
  sdkEventCount++
1094
1074
  lastEventAt = Date.now()
1095
1075
  resetStallTimer()
1096
1076
  const subtype = (message as any).event?.type ?? (message as any).message?.type
1097
- // Capture session_id from init message
1098
1077
  if (message.type === "system" && (message as any).subtype === "init") {
1099
1078
  capturedSessionId = (message as any).session_id
1100
1079
  }
1101
1080
  if (message.type === "stream_event") {
1102
- const ev = message.event as any
1103
- // Detect first content event BEFORE sdkEvent records it
1081
+ const ev = (message as any).event as any
1104
1082
  if (!trace!.firstTokenAt && (ev.type === "content_block_delta" || ev.type === "content_block_start")) {
1105
1083
  traceStore.phase(reqId, "sdk_streaming")
1106
1084
  }
1107
- if (ev.type === "content_block_delta" && ev.delta?.type === "text_delta") {
1108
- fullText += ev.delta.text ?? ""
1109
- traceStore.updateOutput(reqId, fullText.length)
1110
- checkOutputSize(fullText.length)
1111
- }
1085
+ forwardStreamEvent(ev)
1112
1086
  }
1113
1087
  traceStore.sdkEvent(reqId, sdkEventCount, message.type, subtype)
1114
1088
  }
@@ -1123,7 +1097,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
1123
1097
  }
1124
1098
  }
1125
1099
  } catch (resumeErr) {
1126
- // Resume failed in streaming with-tools path retry with full context
1100
+ // Resume failed start a fresh session (no history reconstruction)
1127
1101
  if (isResuming && resumeSessionId) {
1128
1102
  logWarn("session.resume_failed_stream", {
1129
1103
  reqId, conversationId, sdkSessionId: resumeSessionId,
@@ -1133,25 +1107,15 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
1133
1107
  sessionStore.recordFailure(conversationId)
1134
1108
  sessionStore.invalidate(conversationId)
1135
1109
  }
1136
- const fbLastMsg = messages[messages.length - 1]!
1137
- const priorMsgs = messages.slice(0, -1)
1138
- const contextParts = priorMsgs
1139
- .map((m) => {
1140
- const role = m.role === "assistant" ? "Assistant" : "User"
1141
- return `[${role}]\n${serializeContent(m.content)}`
1142
- })
1143
- .join("\n\n")
1144
- const baseSystem = systemContext || ""
1145
- const contextSection = contextParts ? `\n\nPrior conversation turns:\n\n${contextParts}\n\n---` : ""
1146
- const fallbackSystem = (baseSystem + contextSection + toolsSection).trim() || undefined
1147
- const fallbackInput: string | AsyncIterable<any> = contentHasImages(fbLastMsg.content)
1148
- ? createSDKUserMessage(buildNativeContent(fbLastMsg.content))
1149
- : serializeContent(fbLastMsg.content)
1150
- const fallbackOpts = buildQueryOptions(model, { partial: true, systemPrompt: fallbackSystem, abortController, thinking })
1151
-
1152
- logInfo("session.fallback_full_context_stream", { reqId, conversationId })
1110
+
1111
+ logInfo("session.fallback_fresh_stream", { reqId, conversationId })
1112
+ fullText = ""
1113
+ blockIdx = 0
1114
+ toolCallCount = 0
1115
+ capturedStopReason = null
1116
+ hasEmittedAnyBlock = false
1153
1117
  sdkEventCount = 0
1154
- for await (const message of query({ prompt: fallbackInput, options: fallbackOpts })) {
1118
+ for await (const message of query({ prompt: promptInput, options: buildQueryOptions(model, { partial: true, systemPrompt, abortController, thinking, mcpServers, canUseTool }) })) {
1155
1119
  sdkEventCount++
1156
1120
  lastEventAt = Date.now()
1157
1121
  resetStallTimer()
@@ -1160,22 +1124,18 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
1160
1124
  capturedSessionId = (message as any).session_id
1161
1125
  }
1162
1126
  if (message.type === "stream_event") {
1163
- const ev = message.event as any
1127
+ const ev = (message as any).event as any
1164
1128
  if (!trace!.firstTokenAt && (ev.type === "content_block_delta" || ev.type === "content_block_start")) {
1165
1129
  traceStore.phase(reqId, "sdk_streaming")
1166
1130
  }
1167
- if (ev.type === "content_block_delta" && ev.delta?.type === "text_delta") {
1168
- fullText += ev.delta.text ?? ""
1169
- traceStore.updateOutput(reqId, fullText.length)
1170
- checkOutputSize(fullText.length)
1171
- }
1131
+ forwardStreamEvent(ev)
1172
1132
  }
1173
1133
  traceStore.sdkEvent(reqId, sdkEventCount, message.type, subtype)
1174
1134
  }
1175
1135
  traceStore.phase(reqId, "sdk_done")
1176
1136
  if (conversationId && capturedSessionId) {
1177
1137
  sessionStore.set(conversationId, capturedSessionId, model, messages.length)
1178
- logInfo("session.recreated_after_fallback_stream", { reqId, conversationId, sdkSessionId: capturedSessionId })
1138
+ logInfo("session.created_after_fallback_stream", { reqId, conversationId, sdkSessionId: capturedSessionId })
1179
1139
  }
1180
1140
  } else {
1181
1141
  throw resumeErr
@@ -1184,52 +1144,24 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
1184
1144
  clearInterval(stallLog)
1185
1145
  clearInterval(heartbeat)
1186
1146
  clearStallTimer(); clearHardTimer()
1187
- // (temp files no longer used — images passed natively)
1188
1147
  releaseQueue()
1189
1148
  }
1190
1149
 
1191
1150
  traceStore.phase(reqId, "responding")
1192
- const { toolCalls, textBefore } = parseToolUse(fullText)
1193
-
1194
- // Debug: log when tools were expected but none parsed
1195
- if (toolCalls.length === 0 && fullText.length > 0) {
1196
- logDebug("tool_parse.no_match_stream", {
1197
- reqId,
1198
- outputLen: fullText.length,
1199
- textPreview: fullText.slice(0, 500),
1200
- hasToolUseTag: fullText.includes("<tool_use>"),
1201
- hasFunctionCalls: fullText.includes("<function_calls>"),
1202
- hasInvoke: fullText.includes("<invoke"),
1203
- hasAntmlInvoke: fullText.includes("antml:invoke"),
1204
- })
1205
- }
1206
1151
 
1207
- let blockIdx = 0
1208
- const textContent = toolCalls.length === 0 ? (fullText || "...") : textBefore
1209
- if (textContent) {
1210
- sse("content_block_start", { type: "content_block_start", index: blockIdx, content_block: { type: "text", text: "" } })
1211
- sse("content_block_delta", { type: "content_block_delta", index: blockIdx, delta: { type: "text_delta", text: textContent } })
1212
- sse("content_block_stop", { type: "content_block_stop", index: blockIdx })
1213
- blockIdx++
1214
- } else if (toolCalls.length === 0) {
1152
+ // If no blocks were emitted at all, emit a placeholder text block
1153
+ if (!hasEmittedAnyBlock) {
1215
1154
  sse("content_block_start", { type: "content_block_start", index: 0, content_block: { type: "text", text: "" } })
1216
1155
  sse("content_block_delta", { type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "..." } })
1217
1156
  sse("content_block_stop", { type: "content_block_stop", index: 0 })
1218
- blockIdx = 1
1219
- }
1220
- for (const tc of toolCalls) {
1221
- sse("content_block_start", { type: "content_block_start", index: blockIdx, content_block: { type: "tool_use", id: tc.id, name: tc.name, input: {} } })
1222
- sse("content_block_delta", { type: "content_block_delta", index: blockIdx, delta: { type: "input_json_delta", partial_json: JSON.stringify(tc.input) } })
1223
- sse("content_block_stop", { type: "content_block_stop", index: blockIdx })
1224
- blockIdx++
1225
1157
  }
1226
1158
 
1227
- const stopReason = toolCalls.length > 0 ? "tool_use" : "end_turn"
1159
+ const stopReason = toolCallCount > 0 ? "tool_use" : (capturedStopReason ?? "end_turn")
1228
1160
  sse("message_delta", { type: "message_delta", delta: { stop_reason: stopReason, stop_sequence: null }, usage: { output_tokens: roughTokens(fullText) } })
1229
1161
  sse("message_stop", { type: "message_stop" })
1230
1162
  controller.close()
1231
1163
 
1232
- traceStore.complete(reqId, { outputLen: fullText.length, toolCallCount: toolCalls.length })
1164
+ traceStore.complete(reqId, { outputLen: fullText.length, toolCallCount })
1233
1165
  return
1234
1166
  }
1235
1167
 
@@ -1286,7 +1218,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
1286
1218
  }
1287
1219
  }
1288
1220
  } catch (resumeErr) {
1289
- // Resume failed in streaming no-tools path retry with full context
1221
+ // Resume failed start a fresh session (no history reconstruction)
1290
1222
  if (isResuming && resumeSessionId) {
1291
1223
  logWarn("session.resume_failed_stream", {
1292
1224
  reqId, conversationId, sdkSessionId: resumeSessionId,
@@ -1296,25 +1228,10 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
1296
1228
  sessionStore.recordFailure(conversationId)
1297
1229
  sessionStore.invalidate(conversationId)
1298
1230
  }
1299
- const fbLastMsg = messages[messages.length - 1]!
1300
- const priorMsgs = messages.slice(0, -1)
1301
- const contextParts = priorMsgs
1302
- .map((m) => {
1303
- const role = m.role === "assistant" ? "Assistant" : "User"
1304
- return `[${role}]\n${serializeContent(m.content)}`
1305
- })
1306
- .join("\n\n")
1307
- const baseSystem = systemContext || ""
1308
- const contextSection = contextParts ? `\n\nPrior conversation turns:\n\n${contextParts}\n\n---` : ""
1309
- const fallbackSystem = (baseSystem + contextSection + toolsSection).trim() || undefined
1310
- const fallbackInput: string | AsyncIterable<any> = contentHasImages(fbLastMsg.content)
1311
- ? createSDKUserMessage(buildNativeContent(fbLastMsg.content))
1312
- : serializeContent(fbLastMsg.content)
1313
- const fallbackOpts = buildQueryOptions(model, { partial: true, systemPrompt: fallbackSystem, abortController, thinking })
1314
-
1315
- logInfo("session.fallback_full_context_stream", { reqId, conversationId })
1231
+
1232
+ logInfo("session.fallback_fresh_stream", { reqId, conversationId })
1316
1233
  sdkEventCount = 0
1317
- for await (const message of query({ prompt: fallbackInput, options: fallbackOpts })) {
1234
+ for await (const message of query({ prompt: promptInput, options: buildQueryOptions(model, { partial: true, systemPrompt, abortController, thinking }) })) {
1318
1235
  sdkEventCount++
1319
1236
  lastEventAt = Date.now()
1320
1237
  resetStallTimer()
@@ -1343,7 +1260,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
1343
1260
  traceStore.phase(reqId, "sdk_done")
1344
1261
  if (conversationId && capturedSessionId2) {
1345
1262
  sessionStore.set(conversationId, capturedSessionId2, model, messages.length)
1346
- logInfo("session.recreated_after_fallback_stream", { reqId, conversationId, sdkSessionId: capturedSessionId2 })
1263
+ logInfo("session.created_after_fallback_stream", { reqId, conversationId, sdkSessionId: capturedSessionId2 })
1347
1264
  }
1348
1265
  } else {
1349
1266
  throw resumeErr