claude-sdk-proxy 3.1.4 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/mcpTools.ts +183 -3
- package/src/proxy/server.ts +182 -265
- package/src/proxy/server.ts.bak +0 -1841
package/src/proxy/server.ts
CHANGED
|
@@ -7,6 +7,7 @@ import { DEFAULT_PROXY_CONFIG } from "./types"
|
|
|
7
7
|
import { logInfo, logWarn, logError, logDebug, LOG_DIR } from "../logger"
|
|
8
8
|
import { traceStore } from "../trace"
|
|
9
9
|
import { sessionStore } from "../session-store"
|
|
10
|
+
import { createToolMcpServer } from "../mcpTools"
|
|
10
11
|
import { execSync } from "child_process"
|
|
11
12
|
import { existsSync, writeFileSync, readFileSync, readdirSync } from "fs"
|
|
12
13
|
import { randomBytes } from "crypto"
|
|
@@ -195,110 +196,17 @@ function createSDKUserMessage(content: Array<any>, sessionId?: string): AsyncIte
|
|
|
195
196
|
}
|
|
196
197
|
|
|
197
198
|
|
|
198
|
-
// ── Client tool-use support
|
|
199
|
+
// ── Client tool-use support (native MCP) ────────────────────────────────────
|
|
200
|
+
// Tool definitions from the client are converted to native MCP tools via
|
|
201
|
+
// createToolMcpServer(). The SDK generates proper tool_use content blocks
|
|
202
|
+
// instead of text-based XML. canUseTool denies all execution since the
|
|
203
|
+
// client handles tool execution.
|
|
199
204
|
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
return `\n\n## Available Tools\n\nTo call a tool, output a <tool_use> block:\n\n` +
|
|
206
|
-
`<tool_use>\n{"name": "TOOL_NAME", "input": {ARGUMENTS}}\n</tool_use>\n\n` +
|
|
207
|
-
`- You may write reasoning text before the block\n` +
|
|
208
|
-
`- Call multiple tools by including multiple <tool_use> blocks\n` +
|
|
209
|
-
`- Each block must be valid JSON with "name" and "input" keys\n\n` +
|
|
210
|
-
defs
|
|
211
|
-
}
|
|
212
|
-
|
|
213
|
-
interface ToolCall { id: string; name: string; input: unknown }
|
|
214
|
-
|
|
215
|
-
function parseToolUse(text: string): { toolCalls: ToolCall[]; textBefore: string } {
|
|
216
|
-
const calls: ToolCall[] = []
|
|
217
|
-
let firstIdx = -1
|
|
218
|
-
|
|
219
|
-
const xmlRegex = /<tool_use>([\s\S]*?)<\/tool_use>/g
|
|
220
|
-
let m: RegExpExecArray | null
|
|
221
|
-
while ((m = xmlRegex.exec(text)) !== null) {
|
|
222
|
-
if (firstIdx < 0) firstIdx = m.index
|
|
223
|
-
try {
|
|
224
|
-
const raw = m[1]!.trim()
|
|
225
|
-
let p: any
|
|
226
|
-
try { p = JSON.parse(raw) } catch {
|
|
227
|
-
// Repair: models often emit raw newlines in heredocs/multiline commands
|
|
228
|
-
// which are invalid inside JSON strings. Escape them only inside quoted values.
|
|
229
|
-
let fixed = "", inStr = false, esc = false
|
|
230
|
-
for (const ch of raw) {
|
|
231
|
-
if (esc) { fixed += ch; esc = false; continue }
|
|
232
|
-
if (ch === "\\") { esc = true; fixed += ch; continue }
|
|
233
|
-
if (ch === '"') { inStr = !inStr; fixed += ch; continue }
|
|
234
|
-
if (inStr && ch === "\n") { fixed += "\\n"; continue }
|
|
235
|
-
if (inStr && ch === "\r") { fixed += "\\r"; continue }
|
|
236
|
-
if (inStr && ch === "\t") { fixed += "\\t"; continue }
|
|
237
|
-
fixed += ch
|
|
238
|
-
}
|
|
239
|
-
p = JSON.parse(fixed)
|
|
240
|
-
}
|
|
241
|
-
calls.push({
|
|
242
|
-
id: generateId("toolu_"),
|
|
243
|
-
name: String(p.name ?? ""),
|
|
244
|
-
input: p.input ?? {}
|
|
245
|
-
})
|
|
246
|
-
} catch { /* skip malformed block */ }
|
|
247
|
-
}
|
|
248
|
-
|
|
249
|
-
if (calls.length === 0) {
|
|
250
|
-
const fcRegex = /<function_calls>([\s\S]*?)<\/function_calls>/g
|
|
251
|
-
while ((m = fcRegex.exec(text)) !== null) {
|
|
252
|
-
if (firstIdx < 0) firstIdx = m.index
|
|
253
|
-
try {
|
|
254
|
-
const parsed = JSON.parse(m[1]!.trim())
|
|
255
|
-
const items = Array.isArray(parsed) ? parsed : [parsed]
|
|
256
|
-
for (const p of items) {
|
|
257
|
-
if (p && typeof p.name === "string") {
|
|
258
|
-
calls.push({
|
|
259
|
-
id: generateId("toolu_"),
|
|
260
|
-
name: p.name,
|
|
261
|
-
input: p.input ?? p.parameters ?? {}
|
|
262
|
-
})
|
|
263
|
-
}
|
|
264
|
-
}
|
|
265
|
-
} catch { /* skip malformed block */ }
|
|
266
|
-
}
|
|
267
|
-
}
|
|
268
|
-
|
|
269
|
-
if (calls.length === 0) {
|
|
270
|
-
const invokeRegex = /<invoke\s+name="([^"]+)">([\s\S]*?)<\/invoke>/g
|
|
271
|
-
while ((m = invokeRegex.exec(text)) !== null) {
|
|
272
|
-
if (firstIdx < 0) firstIdx = m.index
|
|
273
|
-
const toolName = m[1]!
|
|
274
|
-
const body = m[2]!
|
|
275
|
-
const input: Record<string, any> = {}
|
|
276
|
-
const paramRegex = /<parameter\s+name="([^"]+)">([\s\S]*?)<\/parameter>/g
|
|
277
|
-
let pm: RegExpExecArray | null
|
|
278
|
-
while ((pm = paramRegex.exec(body)) !== null) {
|
|
279
|
-
const val = pm[2]!.trim()
|
|
280
|
-
try { input[pm[1]!] = JSON.parse(val) } catch { input[pm[1]!] = val }
|
|
281
|
-
}
|
|
282
|
-
calls.push({ id: generateId("toolu_"), name: toolName, input })
|
|
283
|
-
}
|
|
284
|
-
}
|
|
285
|
-
|
|
286
|
-
if (calls.length === 0) {
|
|
287
|
-
const bracketRegex = /\[Tool call:\s*(\w+)\s*\nInput:\s*([\s\S]*?)\]/g
|
|
288
|
-
while ((m = bracketRegex.exec(text)) !== null) {
|
|
289
|
-
if (firstIdx < 0) firstIdx = m.index
|
|
290
|
-
try {
|
|
291
|
-
const input = JSON.parse(m[2]!.trim())
|
|
292
|
-
calls.push({
|
|
293
|
-
id: generateId("toolu_"),
|
|
294
|
-
name: m[1]!.trim(),
|
|
295
|
-
input
|
|
296
|
-
})
|
|
297
|
-
} catch { /* skip malformed block */ }
|
|
298
|
-
}
|
|
299
|
-
}
|
|
300
|
-
|
|
301
|
-
return { toolCalls: calls, textBefore: firstIdx > 0 ? text.slice(0, firstIdx).trim() : "" }
|
|
205
|
+
// The SDK prefixes MCP tool names with "mcp__{server-name}__".
|
|
206
|
+
// Strip this prefix so clients see the original tool names they sent.
|
|
207
|
+
const MCP_TOOL_PREFIX = "mcp__proxy-tools__"
|
|
208
|
+
function stripMcpPrefix(name: string): string {
|
|
209
|
+
return name.startsWith(MCP_TOOL_PREFIX) ? name.slice(MCP_TOOL_PREFIX.length) : name
|
|
302
210
|
}
|
|
303
211
|
|
|
304
212
|
function roughTokens(text: string): number {
|
|
@@ -351,6 +259,8 @@ function buildQueryOptions(
|
|
|
351
259
|
abortController?: AbortController
|
|
352
260
|
thinking?: { type: "adaptive" } | { type: "enabled"; budgetTokens?: number } | { type: "disabled" }
|
|
353
261
|
resume?: string
|
|
262
|
+
mcpServers?: Record<string, any>
|
|
263
|
+
canUseTool?: (toolName: string, input: Record<string, unknown>, options: any) => Promise<any>
|
|
354
264
|
} = {}
|
|
355
265
|
) {
|
|
356
266
|
return {
|
|
@@ -360,13 +270,15 @@ function buildQueryOptions(
|
|
|
360
270
|
allowDangerouslySkipPermissions: true,
|
|
361
271
|
persistSession: true,
|
|
362
272
|
settingSources: [],
|
|
363
|
-
tools: [
|
|
273
|
+
tools: [] as string[],
|
|
364
274
|
maxTurns: 1,
|
|
365
275
|
...(opts.partial ? { includePartialMessages: true } : {}),
|
|
366
276
|
...(opts.abortController ? { abortController: opts.abortController } : {}),
|
|
367
277
|
...(opts.thinking ? { thinking: opts.thinking } : {}),
|
|
368
278
|
...(opts.systemPrompt ? { systemPrompt: opts.systemPrompt } : {}),
|
|
369
279
|
...(opts.resume ? { resume: opts.resume } : {}),
|
|
280
|
+
...(opts.mcpServers ? { mcpServers: opts.mcpServers } : {}),
|
|
281
|
+
...(opts.canUseTool ? { canUseTool: opts.canUseTool } : {}),
|
|
370
282
|
}
|
|
371
283
|
}
|
|
372
284
|
|
|
@@ -724,7 +636,13 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
724
636
|
|
|
725
637
|
let promptText: string // text version for token counting / logging
|
|
726
638
|
let systemPrompt: string | undefined
|
|
727
|
-
|
|
639
|
+
|
|
640
|
+
// Create MCP server for native tool calling (if client sent tools)
|
|
641
|
+
const mcpServer = hasTools ? createToolMcpServer(body.tools) : null
|
|
642
|
+
const mcpServers = mcpServer ? { "proxy-tools": mcpServer } : undefined
|
|
643
|
+
const canUseTool = hasTools
|
|
644
|
+
? async () => ({ behavior: "deny" as const, message: "Tool execution handled by client" })
|
|
645
|
+
: undefined
|
|
728
646
|
|
|
729
647
|
// ── Session resumption ─────────────────────────────────────────────
|
|
730
648
|
// Derive conversation ID from: headers (explicit) or conversation_label
|
|
@@ -763,41 +681,22 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
763
681
|
promptText = serializeContent(lastMsg.content)
|
|
764
682
|
|
|
765
683
|
if (isResuming && resumeSessionId) {
|
|
766
|
-
systemPrompt = (
|
|
684
|
+
systemPrompt = (systemContext || "").trim() || undefined
|
|
767
685
|
if (lastMsgHasImages) {
|
|
768
686
|
promptInput = createSDKUserMessage(buildNativeContent(lastMsg.content), resumeSessionId)
|
|
769
687
|
logInfo("session.resume_with_images", { reqId, conversationId })
|
|
770
688
|
} else {
|
|
771
689
|
promptInput = promptText
|
|
772
690
|
}
|
|
773
|
-
} else
|
|
774
|
-
|
|
691
|
+
} else {
|
|
692
|
+
// Single or multi-turn: always use just the last message as prompt.
|
|
693
|
+
// The client (openclaw) manages conversation context; the proxy is stateless.
|
|
694
|
+
// Prior turns are handled by SDK session resumption when available.
|
|
695
|
+
systemPrompt = (systemContext || "").trim() || undefined
|
|
775
696
|
promptInput = lastMsgHasImages
|
|
776
697
|
? createSDKUserMessage(buildNativeContent(lastMsg.content))
|
|
777
698
|
: promptText
|
|
778
699
|
if (lastMsgHasImages) logInfo("request.native_images", { reqId })
|
|
779
|
-
} else {
|
|
780
|
-
const priorMsgs = messages.slice(0, -1)
|
|
781
|
-
|
|
782
|
-
const contextParts = priorMsgs
|
|
783
|
-
.map((m) => {
|
|
784
|
-
const role = m.role === "assistant" ? "Assistant" : "User"
|
|
785
|
-
return `[${role}]\n${serializeContent(m.content)}`
|
|
786
|
-
})
|
|
787
|
-
.join("\n\n")
|
|
788
|
-
|
|
789
|
-
const baseSystem = systemContext || ""
|
|
790
|
-
const contextSection = contextParts
|
|
791
|
-
? `\n\nPrior conversation turns:\n\n${contextParts}\n\n---`
|
|
792
|
-
: ""
|
|
793
|
-
systemPrompt = (baseSystem + contextSection + toolsSection).trim() || undefined
|
|
794
|
-
|
|
795
|
-
if (lastMsgHasImages) {
|
|
796
|
-
promptInput = createSDKUserMessage(buildNativeContent(lastMsg.content))
|
|
797
|
-
logInfo("request.native_images", { reqId })
|
|
798
|
-
} else {
|
|
799
|
-
promptInput = promptText
|
|
800
|
-
}
|
|
801
700
|
}
|
|
802
701
|
|
|
803
702
|
requestStarted = Date.now()
|
|
@@ -863,7 +762,11 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
863
762
|
if (!stream) {
|
|
864
763
|
let fullText = ""
|
|
865
764
|
let capturedSessionId: string | undefined
|
|
866
|
-
const queryOpts = buildQueryOptions(model, { partial:
|
|
765
|
+
const queryOpts = buildQueryOptions(model, { partial: hasTools, systemPrompt, abortController, thinking, resume: resumeSessionId, mcpServers, canUseTool })
|
|
766
|
+
// For tool mode: capture native content blocks from stream events
|
|
767
|
+
const contentBlocks: any[] = []
|
|
768
|
+
let currentToolBlock: { id: string; name: string; jsonAccum: string } | null = null
|
|
769
|
+
let capturedStopReason: string | null = null
|
|
867
770
|
try {
|
|
868
771
|
traceStore.phase(reqId, "sdk_starting")
|
|
869
772
|
let sdkEventCount = 0
|
|
@@ -875,7 +778,43 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
875
778
|
if (message.type === "system" && (message as any).subtype === "init") {
|
|
876
779
|
capturedSessionId = (message as any).session_id
|
|
877
780
|
}
|
|
878
|
-
if (message.type === "
|
|
781
|
+
if (hasTools && message.type === "stream_event") {
|
|
782
|
+
const ev = (message as any).event as any
|
|
783
|
+
if (ev.type === "content_block_start") {
|
|
784
|
+
const cb = ev.content_block
|
|
785
|
+
if (cb?.type === "tool_use") {
|
|
786
|
+
currentToolBlock = { id: cb.id, name: stripMcpPrefix(cb.name), jsonAccum: "" }
|
|
787
|
+
} else if (cb?.type === "text") {
|
|
788
|
+
contentBlocks.push({ type: "text", text: "" })
|
|
789
|
+
}
|
|
790
|
+
} else if (ev.type === "content_block_delta") {
|
|
791
|
+
if (ev.delta?.type === "text_delta") {
|
|
792
|
+
const lastText = contentBlocks[contentBlocks.length - 1]
|
|
793
|
+
if (lastText?.type === "text") {
|
|
794
|
+
lastText.text += ev.delta.text ?? ""
|
|
795
|
+
} else {
|
|
796
|
+
contentBlocks.push({ type: "text", text: ev.delta.text ?? "" })
|
|
797
|
+
}
|
|
798
|
+
fullText += ev.delta.text ?? ""
|
|
799
|
+
} else if (ev.delta?.type === "input_json_delta" && currentToolBlock) {
|
|
800
|
+
currentToolBlock.jsonAccum += ev.delta.partial_json ?? ""
|
|
801
|
+
}
|
|
802
|
+
} else if (ev.type === "content_block_stop") {
|
|
803
|
+
if (currentToolBlock) {
|
|
804
|
+
let input: any = {}
|
|
805
|
+
try { input = JSON.parse(currentToolBlock.jsonAccum) } catch {}
|
|
806
|
+
contentBlocks.push({
|
|
807
|
+
type: "tool_use",
|
|
808
|
+
id: currentToolBlock.id,
|
|
809
|
+
name: currentToolBlock.name,
|
|
810
|
+
input,
|
|
811
|
+
})
|
|
812
|
+
currentToolBlock = null
|
|
813
|
+
}
|
|
814
|
+
} else if (ev.type === "message_delta") {
|
|
815
|
+
if (ev.delta?.stop_reason) capturedStopReason = ev.delta.stop_reason
|
|
816
|
+
}
|
|
817
|
+
} else if (!hasTools && message.type === "assistant") {
|
|
879
818
|
let turnText = ""
|
|
880
819
|
for (const block of message.message.content) {
|
|
881
820
|
if (block.type === "text") turnText += block.text
|
|
@@ -896,45 +835,63 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
896
835
|
}
|
|
897
836
|
}
|
|
898
837
|
} catch (resumeErr) {
|
|
899
|
-
// If resume failed,
|
|
838
|
+
// If resume failed, start a fresh session (no history reconstruction)
|
|
900
839
|
if (isResuming && resumeSessionId) {
|
|
901
840
|
logWarn("session.resume_failed", {
|
|
902
|
-
reqId,
|
|
903
|
-
conversationId,
|
|
904
|
-
sdkSessionId: resumeSessionId,
|
|
841
|
+
reqId, conversationId, sdkSessionId: resumeSessionId,
|
|
905
842
|
error: resumeErr instanceof Error ? resumeErr.message : String(resumeErr),
|
|
906
843
|
})
|
|
907
844
|
if (conversationId) {
|
|
908
845
|
sessionStore.recordFailure(conversationId)
|
|
909
846
|
sessionStore.invalidate(conversationId)
|
|
910
847
|
}
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
})
|
|
919
|
-
.join("\n\n")
|
|
920
|
-
const baseSystem = systemContext || ""
|
|
921
|
-
const contextSection = contextParts ? `\n\nPrior conversation turns:\n\n${contextParts}\n\n---` : ""
|
|
922
|
-
const fallbackSystem = (baseSystem + contextSection + toolsSection).trim() || undefined
|
|
923
|
-
const fallbackInput: string | AsyncIterable<any> = contentHasImages(fbLastMsg.content)
|
|
924
|
-
? createSDKUserMessage(buildNativeContent(fbLastMsg.content))
|
|
925
|
-
: serializeContent(fbLastMsg.content)
|
|
926
|
-
const fallbackOpts = buildQueryOptions(model, { partial: false, systemPrompt: fallbackSystem, abortController, thinking })
|
|
927
|
-
|
|
928
|
-
logInfo("session.fallback_full_context", { reqId, conversationId })
|
|
848
|
+
|
|
849
|
+
logInfo("session.fallback_fresh", { reqId, conversationId })
|
|
850
|
+
// Reset accumulators and retry as a brand new session
|
|
851
|
+
contentBlocks.length = 0
|
|
852
|
+
currentToolBlock = null
|
|
853
|
+
capturedStopReason = null
|
|
854
|
+
fullText = ""
|
|
929
855
|
let sdkEventCount = 0
|
|
930
|
-
for await (const message of query({ prompt:
|
|
856
|
+
for await (const message of query({ prompt: promptInput, options: buildQueryOptions(model, { partial: hasTools, systemPrompt, abortController, thinking, mcpServers, canUseTool }) })) {
|
|
931
857
|
sdkEventCount++
|
|
932
858
|
resetStallTimer()
|
|
933
859
|
traceStore.sdkEvent(reqId, sdkEventCount, message.type, (message as any).event?.type ?? (message as any).message?.type)
|
|
934
860
|
if (message.type === "system" && (message as any).subtype === "init") {
|
|
935
861
|
capturedSessionId = (message as any).session_id
|
|
936
862
|
}
|
|
937
|
-
if (message.type === "
|
|
863
|
+
if (hasTools && message.type === "stream_event") {
|
|
864
|
+
const ev = (message as any).event as any
|
|
865
|
+
if (ev.type === "content_block_start") {
|
|
866
|
+
const cb = ev.content_block
|
|
867
|
+
if (cb?.type === "tool_use") {
|
|
868
|
+
currentToolBlock = { id: cb.id, name: stripMcpPrefix(cb.name), jsonAccum: "" }
|
|
869
|
+
} else if (cb?.type === "text") {
|
|
870
|
+
contentBlocks.push({ type: "text", text: "" })
|
|
871
|
+
}
|
|
872
|
+
} else if (ev.type === "content_block_delta") {
|
|
873
|
+
if (ev.delta?.type === "text_delta") {
|
|
874
|
+
const lastText = contentBlocks[contentBlocks.length - 1]
|
|
875
|
+
if (lastText?.type === "text") {
|
|
876
|
+
lastText.text += ev.delta.text ?? ""
|
|
877
|
+
} else {
|
|
878
|
+
contentBlocks.push({ type: "text", text: ev.delta.text ?? "" })
|
|
879
|
+
}
|
|
880
|
+
fullText += ev.delta.text ?? ""
|
|
881
|
+
} else if (ev.delta?.type === "input_json_delta" && currentToolBlock) {
|
|
882
|
+
currentToolBlock.jsonAccum += ev.delta.partial_json ?? ""
|
|
883
|
+
}
|
|
884
|
+
} else if (ev.type === "content_block_stop") {
|
|
885
|
+
if (currentToolBlock) {
|
|
886
|
+
let input: any = {}
|
|
887
|
+
try { input = JSON.parse(currentToolBlock.jsonAccum) } catch {}
|
|
888
|
+
contentBlocks.push({ type: "tool_use", id: currentToolBlock.id, name: currentToolBlock.name, input })
|
|
889
|
+
currentToolBlock = null
|
|
890
|
+
}
|
|
891
|
+
} else if (ev.type === "message_delta") {
|
|
892
|
+
if (ev.delta?.stop_reason) capturedStopReason = ev.delta.stop_reason
|
|
893
|
+
}
|
|
894
|
+
} else if (!hasTools && message.type === "assistant") {
|
|
938
895
|
let turnText = ""
|
|
939
896
|
for (const block of message.message.content) {
|
|
940
897
|
if (block.type === "text") turnText += block.text
|
|
@@ -943,17 +900,15 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
943
900
|
}
|
|
944
901
|
}
|
|
945
902
|
traceStore.phase(reqId, "sdk_done")
|
|
946
|
-
// Store the new session
|
|
947
903
|
if (conversationId && capturedSessionId) {
|
|
948
904
|
sessionStore.set(conversationId, capturedSessionId, model, messages.length)
|
|
949
|
-
logInfo("session.
|
|
905
|
+
logInfo("session.created_after_fallback", { reqId, conversationId, sdkSessionId: capturedSessionId })
|
|
950
906
|
}
|
|
951
907
|
} else {
|
|
952
908
|
throw resumeErr
|
|
953
909
|
}
|
|
954
910
|
} finally {
|
|
955
911
|
clearStallTimer(); clearHardTimer()
|
|
956
|
-
// (temp files no longer used — images passed natively)
|
|
957
912
|
requestQueue.release()
|
|
958
913
|
logDebug("queue.released", {
|
|
959
914
|
reqId,
|
|
@@ -965,26 +920,18 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
965
920
|
traceStore.phase(reqId, "responding")
|
|
966
921
|
|
|
967
922
|
if (hasTools) {
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
hasToolUseTag: fullText.includes("<tool_use>"),
|
|
976
|
-
hasFunctionCalls: fullText.includes("<function_calls>"),
|
|
977
|
-
hasInvoke: fullText.includes("<invoke"),
|
|
978
|
-
hasAntmlInvoke: fullText.includes("antml:invoke"),
|
|
979
|
-
})
|
|
980
|
-
}
|
|
981
|
-
const content: any[] = []
|
|
982
|
-
if (textBefore) content.push({ type: "text", text: textBefore })
|
|
983
|
-
for (const tc of toolCalls) content.push({ type: "tool_use", id: tc.id, name: tc.name, input: tc.input })
|
|
923
|
+
// Use native content blocks captured from stream events
|
|
924
|
+
const toolCallCount = contentBlocks.filter((b: any) => b.type === "tool_use").length
|
|
925
|
+
// Filter out empty text blocks
|
|
926
|
+
const content = contentBlocks.filter((b: any) => {
|
|
927
|
+
if (b.type === "text" && !b.text?.trim()) return false
|
|
928
|
+
return true
|
|
929
|
+
})
|
|
984
930
|
if (content.length === 0) content.push({ type: "text", text: fullText || "..." })
|
|
985
|
-
const stopReason = toolCalls.length > 0 ? "tool_use" : "end_turn"
|
|
986
931
|
|
|
987
|
-
|
|
932
|
+
const stopReason = toolCallCount > 0 ? "tool_use" : (capturedStopReason ?? "end_turn")
|
|
933
|
+
|
|
934
|
+
traceStore.complete(reqId, { outputLen: fullText.length, toolCallCount })
|
|
988
935
|
|
|
989
936
|
return c.json({
|
|
990
937
|
id: generateId("msg_"),
|
|
@@ -1078,8 +1025,12 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
1078
1025
|
})
|
|
1079
1026
|
|
|
1080
1027
|
if (hasTools) {
|
|
1081
|
-
// ── With tools:
|
|
1028
|
+
// ── With tools: forward native SDK stream events directly as SSE ──
|
|
1082
1029
|
let fullText = ""
|
|
1030
|
+
let blockIdx = 0
|
|
1031
|
+
let toolCallCount = 0
|
|
1032
|
+
let capturedStopReason: string | null = null
|
|
1033
|
+
let hasEmittedAnyBlock = false
|
|
1083
1034
|
let sdkEventCount = 0
|
|
1084
1035
|
let lastEventAt = Date.now()
|
|
1085
1036
|
const stallLog = setInterval(() => {
|
|
@@ -1087,28 +1038,51 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
1087
1038
|
traceStore.stall(reqId, stallMs)
|
|
1088
1039
|
}, 15_000)
|
|
1089
1040
|
let capturedSessionId: string | undefined
|
|
1041
|
+
|
|
1042
|
+
// Helper to forward a stream event directly as SSE
|
|
1043
|
+
const forwardStreamEvent = (ev: any) => {
|
|
1044
|
+
if (ev.type === "content_block_start") {
|
|
1045
|
+
const cb = ev.content_block
|
|
1046
|
+
if (cb?.type === "tool_use") {
|
|
1047
|
+
toolCallCount++
|
|
1048
|
+
sse("content_block_start", { type: "content_block_start", index: blockIdx, content_block: { type: "tool_use", id: cb.id, name: stripMcpPrefix(cb.name), input: {} } })
|
|
1049
|
+
} else if (cb?.type === "text") {
|
|
1050
|
+
sse("content_block_start", { type: "content_block_start", index: blockIdx, content_block: { type: "text", text: "" } })
|
|
1051
|
+
}
|
|
1052
|
+
hasEmittedAnyBlock = true
|
|
1053
|
+
} else if (ev.type === "content_block_delta") {
|
|
1054
|
+
if (ev.delta?.type === "text_delta") {
|
|
1055
|
+
fullText += ev.delta.text ?? ""
|
|
1056
|
+
traceStore.updateOutput(reqId, fullText.length)
|
|
1057
|
+
checkOutputSize(fullText.length)
|
|
1058
|
+
sse("content_block_delta", { type: "content_block_delta", index: blockIdx, delta: { type: "text_delta", text: ev.delta.text ?? "" } })
|
|
1059
|
+
} else if (ev.delta?.type === "input_json_delta") {
|
|
1060
|
+
sse("content_block_delta", { type: "content_block_delta", index: blockIdx, delta: { type: "input_json_delta", partial_json: ev.delta.partial_json ?? "" } })
|
|
1061
|
+
}
|
|
1062
|
+
} else if (ev.type === "content_block_stop") {
|
|
1063
|
+
sse("content_block_stop", { type: "content_block_stop", index: blockIdx })
|
|
1064
|
+
blockIdx++
|
|
1065
|
+
} else if (ev.type === "message_delta") {
|
|
1066
|
+
if (ev.delta?.stop_reason) capturedStopReason = ev.delta.stop_reason
|
|
1067
|
+
}
|
|
1068
|
+
}
|
|
1069
|
+
|
|
1090
1070
|
try {
|
|
1091
1071
|
traceStore.phase(reqId, "sdk_starting")
|
|
1092
|
-
for await (const message of query({ prompt: promptInput, options: buildQueryOptions(model, { partial: true, systemPrompt, abortController, thinking, resume: resumeSessionId }) })) {
|
|
1072
|
+
for await (const message of query({ prompt: promptInput, options: buildQueryOptions(model, { partial: true, systemPrompt, abortController, thinking, resume: resumeSessionId, mcpServers, canUseTool }) })) {
|
|
1093
1073
|
sdkEventCount++
|
|
1094
1074
|
lastEventAt = Date.now()
|
|
1095
1075
|
resetStallTimer()
|
|
1096
1076
|
const subtype = (message as any).event?.type ?? (message as any).message?.type
|
|
1097
|
-
// Capture session_id from init message
|
|
1098
1077
|
if (message.type === "system" && (message as any).subtype === "init") {
|
|
1099
1078
|
capturedSessionId = (message as any).session_id
|
|
1100
1079
|
}
|
|
1101
1080
|
if (message.type === "stream_event") {
|
|
1102
|
-
const ev = message.event as any
|
|
1103
|
-
// Detect first content event BEFORE sdkEvent records it
|
|
1081
|
+
const ev = (message as any).event as any
|
|
1104
1082
|
if (!trace!.firstTokenAt && (ev.type === "content_block_delta" || ev.type === "content_block_start")) {
|
|
1105
1083
|
traceStore.phase(reqId, "sdk_streaming")
|
|
1106
1084
|
}
|
|
1107
|
-
|
|
1108
|
-
fullText += ev.delta.text ?? ""
|
|
1109
|
-
traceStore.updateOutput(reqId, fullText.length)
|
|
1110
|
-
checkOutputSize(fullText.length)
|
|
1111
|
-
}
|
|
1085
|
+
forwardStreamEvent(ev)
|
|
1112
1086
|
}
|
|
1113
1087
|
traceStore.sdkEvent(reqId, sdkEventCount, message.type, subtype)
|
|
1114
1088
|
}
|
|
@@ -1123,7 +1097,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
1123
1097
|
}
|
|
1124
1098
|
}
|
|
1125
1099
|
} catch (resumeErr) {
|
|
1126
|
-
// Resume failed
|
|
1100
|
+
// Resume failed — start a fresh session (no history reconstruction)
|
|
1127
1101
|
if (isResuming && resumeSessionId) {
|
|
1128
1102
|
logWarn("session.resume_failed_stream", {
|
|
1129
1103
|
reqId, conversationId, sdkSessionId: resumeSessionId,
|
|
@@ -1133,25 +1107,15 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
1133
1107
|
sessionStore.recordFailure(conversationId)
|
|
1134
1108
|
sessionStore.invalidate(conversationId)
|
|
1135
1109
|
}
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
|
|
1141
|
-
|
|
1142
|
-
|
|
1143
|
-
.join("\n\n")
|
|
1144
|
-
const baseSystem = systemContext || ""
|
|
1145
|
-
const contextSection = contextParts ? `\n\nPrior conversation turns:\n\n${contextParts}\n\n---` : ""
|
|
1146
|
-
const fallbackSystem = (baseSystem + contextSection + toolsSection).trim() || undefined
|
|
1147
|
-
const fallbackInput: string | AsyncIterable<any> = contentHasImages(fbLastMsg.content)
|
|
1148
|
-
? createSDKUserMessage(buildNativeContent(fbLastMsg.content))
|
|
1149
|
-
: serializeContent(fbLastMsg.content)
|
|
1150
|
-
const fallbackOpts = buildQueryOptions(model, { partial: true, systemPrompt: fallbackSystem, abortController, thinking })
|
|
1151
|
-
|
|
1152
|
-
logInfo("session.fallback_full_context_stream", { reqId, conversationId })
|
|
1110
|
+
|
|
1111
|
+
logInfo("session.fallback_fresh_stream", { reqId, conversationId })
|
|
1112
|
+
fullText = ""
|
|
1113
|
+
blockIdx = 0
|
|
1114
|
+
toolCallCount = 0
|
|
1115
|
+
capturedStopReason = null
|
|
1116
|
+
hasEmittedAnyBlock = false
|
|
1153
1117
|
sdkEventCount = 0
|
|
1154
|
-
for await (const message of query({ prompt:
|
|
1118
|
+
for await (const message of query({ prompt: promptInput, options: buildQueryOptions(model, { partial: true, systemPrompt, abortController, thinking, mcpServers, canUseTool }) })) {
|
|
1155
1119
|
sdkEventCount++
|
|
1156
1120
|
lastEventAt = Date.now()
|
|
1157
1121
|
resetStallTimer()
|
|
@@ -1160,22 +1124,18 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
1160
1124
|
capturedSessionId = (message as any).session_id
|
|
1161
1125
|
}
|
|
1162
1126
|
if (message.type === "stream_event") {
|
|
1163
|
-
const ev = message.event as any
|
|
1127
|
+
const ev = (message as any).event as any
|
|
1164
1128
|
if (!trace!.firstTokenAt && (ev.type === "content_block_delta" || ev.type === "content_block_start")) {
|
|
1165
1129
|
traceStore.phase(reqId, "sdk_streaming")
|
|
1166
1130
|
}
|
|
1167
|
-
|
|
1168
|
-
fullText += ev.delta.text ?? ""
|
|
1169
|
-
traceStore.updateOutput(reqId, fullText.length)
|
|
1170
|
-
checkOutputSize(fullText.length)
|
|
1171
|
-
}
|
|
1131
|
+
forwardStreamEvent(ev)
|
|
1172
1132
|
}
|
|
1173
1133
|
traceStore.sdkEvent(reqId, sdkEventCount, message.type, subtype)
|
|
1174
1134
|
}
|
|
1175
1135
|
traceStore.phase(reqId, "sdk_done")
|
|
1176
1136
|
if (conversationId && capturedSessionId) {
|
|
1177
1137
|
sessionStore.set(conversationId, capturedSessionId, model, messages.length)
|
|
1178
|
-
logInfo("session.
|
|
1138
|
+
logInfo("session.created_after_fallback_stream", { reqId, conversationId, sdkSessionId: capturedSessionId })
|
|
1179
1139
|
}
|
|
1180
1140
|
} else {
|
|
1181
1141
|
throw resumeErr
|
|
@@ -1184,52 +1144,24 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
1184
1144
|
clearInterval(stallLog)
|
|
1185
1145
|
clearInterval(heartbeat)
|
|
1186
1146
|
clearStallTimer(); clearHardTimer()
|
|
1187
|
-
// (temp files no longer used — images passed natively)
|
|
1188
1147
|
releaseQueue()
|
|
1189
1148
|
}
|
|
1190
1149
|
|
|
1191
1150
|
traceStore.phase(reqId, "responding")
|
|
1192
|
-
const { toolCalls, textBefore } = parseToolUse(fullText)
|
|
1193
|
-
|
|
1194
|
-
// Debug: log when tools were expected but none parsed
|
|
1195
|
-
if (toolCalls.length === 0 && fullText.length > 0) {
|
|
1196
|
-
logDebug("tool_parse.no_match_stream", {
|
|
1197
|
-
reqId,
|
|
1198
|
-
outputLen: fullText.length,
|
|
1199
|
-
textPreview: fullText.slice(0, 500),
|
|
1200
|
-
hasToolUseTag: fullText.includes("<tool_use>"),
|
|
1201
|
-
hasFunctionCalls: fullText.includes("<function_calls>"),
|
|
1202
|
-
hasInvoke: fullText.includes("<invoke"),
|
|
1203
|
-
hasAntmlInvoke: fullText.includes("antml:invoke"),
|
|
1204
|
-
})
|
|
1205
|
-
}
|
|
1206
1151
|
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
-
if (textContent) {
|
|
1210
|
-
sse("content_block_start", { type: "content_block_start", index: blockIdx, content_block: { type: "text", text: "" } })
|
|
1211
|
-
sse("content_block_delta", { type: "content_block_delta", index: blockIdx, delta: { type: "text_delta", text: textContent } })
|
|
1212
|
-
sse("content_block_stop", { type: "content_block_stop", index: blockIdx })
|
|
1213
|
-
blockIdx++
|
|
1214
|
-
} else if (toolCalls.length === 0) {
|
|
1152
|
+
// If no blocks were emitted at all, emit a placeholder text block
|
|
1153
|
+
if (!hasEmittedAnyBlock) {
|
|
1215
1154
|
sse("content_block_start", { type: "content_block_start", index: 0, content_block: { type: "text", text: "" } })
|
|
1216
1155
|
sse("content_block_delta", { type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "..." } })
|
|
1217
1156
|
sse("content_block_stop", { type: "content_block_stop", index: 0 })
|
|
1218
|
-
blockIdx = 1
|
|
1219
|
-
}
|
|
1220
|
-
for (const tc of toolCalls) {
|
|
1221
|
-
sse("content_block_start", { type: "content_block_start", index: blockIdx, content_block: { type: "tool_use", id: tc.id, name: tc.name, input: {} } })
|
|
1222
|
-
sse("content_block_delta", { type: "content_block_delta", index: blockIdx, delta: { type: "input_json_delta", partial_json: JSON.stringify(tc.input) } })
|
|
1223
|
-
sse("content_block_stop", { type: "content_block_stop", index: blockIdx })
|
|
1224
|
-
blockIdx++
|
|
1225
1157
|
}
|
|
1226
1158
|
|
|
1227
|
-
const stopReason =
|
|
1159
|
+
const stopReason = toolCallCount > 0 ? "tool_use" : (capturedStopReason ?? "end_turn")
|
|
1228
1160
|
sse("message_delta", { type: "message_delta", delta: { stop_reason: stopReason, stop_sequence: null }, usage: { output_tokens: roughTokens(fullText) } })
|
|
1229
1161
|
sse("message_stop", { type: "message_stop" })
|
|
1230
1162
|
controller.close()
|
|
1231
1163
|
|
|
1232
|
-
traceStore.complete(reqId, { outputLen: fullText.length, toolCallCount
|
|
1164
|
+
traceStore.complete(reqId, { outputLen: fullText.length, toolCallCount })
|
|
1233
1165
|
return
|
|
1234
1166
|
}
|
|
1235
1167
|
|
|
@@ -1286,7 +1218,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
1286
1218
|
}
|
|
1287
1219
|
}
|
|
1288
1220
|
} catch (resumeErr) {
|
|
1289
|
-
// Resume failed
|
|
1221
|
+
// Resume failed — start a fresh session (no history reconstruction)
|
|
1290
1222
|
if (isResuming && resumeSessionId) {
|
|
1291
1223
|
logWarn("session.resume_failed_stream", {
|
|
1292
1224
|
reqId, conversationId, sdkSessionId: resumeSessionId,
|
|
@@ -1296,25 +1228,10 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
1296
1228
|
sessionStore.recordFailure(conversationId)
|
|
1297
1229
|
sessionStore.invalidate(conversationId)
|
|
1298
1230
|
}
|
|
1299
|
-
|
|
1300
|
-
|
|
1301
|
-
const contextParts = priorMsgs
|
|
1302
|
-
.map((m) => {
|
|
1303
|
-
const role = m.role === "assistant" ? "Assistant" : "User"
|
|
1304
|
-
return `[${role}]\n${serializeContent(m.content)}`
|
|
1305
|
-
})
|
|
1306
|
-
.join("\n\n")
|
|
1307
|
-
const baseSystem = systemContext || ""
|
|
1308
|
-
const contextSection = contextParts ? `\n\nPrior conversation turns:\n\n${contextParts}\n\n---` : ""
|
|
1309
|
-
const fallbackSystem = (baseSystem + contextSection + toolsSection).trim() || undefined
|
|
1310
|
-
const fallbackInput: string | AsyncIterable<any> = contentHasImages(fbLastMsg.content)
|
|
1311
|
-
? createSDKUserMessage(buildNativeContent(fbLastMsg.content))
|
|
1312
|
-
: serializeContent(fbLastMsg.content)
|
|
1313
|
-
const fallbackOpts = buildQueryOptions(model, { partial: true, systemPrompt: fallbackSystem, abortController, thinking })
|
|
1314
|
-
|
|
1315
|
-
logInfo("session.fallback_full_context_stream", { reqId, conversationId })
|
|
1231
|
+
|
|
1232
|
+
logInfo("session.fallback_fresh_stream", { reqId, conversationId })
|
|
1316
1233
|
sdkEventCount = 0
|
|
1317
|
-
for await (const message of query({ prompt:
|
|
1234
|
+
for await (const message of query({ prompt: promptInput, options: buildQueryOptions(model, { partial: true, systemPrompt, abortController, thinking }) })) {
|
|
1318
1235
|
sdkEventCount++
|
|
1319
1236
|
lastEventAt = Date.now()
|
|
1320
1237
|
resetStallTimer()
|
|
@@ -1343,7 +1260,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
1343
1260
|
traceStore.phase(reqId, "sdk_done")
|
|
1344
1261
|
if (conversationId && capturedSessionId2) {
|
|
1345
1262
|
sessionStore.set(conversationId, capturedSessionId2, model, messages.length)
|
|
1346
|
-
logInfo("session.
|
|
1263
|
+
logInfo("session.created_after_fallback_stream", { reqId, conversationId, sdkSessionId: capturedSessionId2 })
|
|
1347
1264
|
}
|
|
1348
1265
|
} else {
|
|
1349
1266
|
throw resumeErr
|