claude-sdk-proxy 3.0.0 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/logger.ts +127 -8
- package/src/proxy/server.ts +992 -144
- package/src/proxy/types.ts +9 -2
- package/src/session-store.ts +198 -0
- package/src/trace.ts +633 -0
package/src/proxy/server.ts
CHANGED
|
@@ -4,13 +4,15 @@ import { query } from "@anthropic-ai/claude-agent-sdk"
|
|
|
4
4
|
import type { Context } from "hono"
|
|
5
5
|
import type { ProxyConfig } from "./types"
|
|
6
6
|
import { DEFAULT_PROXY_CONFIG } from "./types"
|
|
7
|
-
import {
|
|
7
|
+
import { logInfo, logWarn, logError, logDebug, LOG_DIR } from "../logger"
|
|
8
|
+
import { traceStore } from "../trace"
|
|
9
|
+
import { sessionStore } from "../session-store"
|
|
8
10
|
import { execSync } from "child_process"
|
|
9
|
-
import { existsSync, writeFileSync,
|
|
10
|
-
import { tmpdir } from "os"
|
|
11
|
+
import { existsSync, writeFileSync, readFileSync, readdirSync } from "fs"
|
|
11
12
|
import { randomBytes } from "crypto"
|
|
12
13
|
import { fileURLToPath } from "url"
|
|
13
14
|
import { join, dirname } from "path"
|
|
15
|
+
|
|
14
16
|
// Base62 ID generator — matches Anthropic's real ID format (e.g. msg_01XFDUDYJgAACzvnptvVoYEL)
|
|
15
17
|
const BASE62 = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
|
|
16
18
|
function generateId(prefix: string, length = 24): string {
|
|
@@ -47,9 +49,11 @@ const claudeExecutable = resolveClaudeExecutable()
|
|
|
47
49
|
|
|
48
50
|
const MAX_CONCURRENT = parseInt(process.env.CLAUDE_PROXY_MAX_CONCURRENT ?? "5", 10)
|
|
49
51
|
|
|
52
|
+
const QUEUE_TIMEOUT_MS = parseInt(process.env.CLAUDE_PROXY_QUEUE_TIMEOUT_MS ?? "30000", 10)
|
|
53
|
+
|
|
50
54
|
class RequestQueue {
|
|
51
55
|
private active = 0
|
|
52
|
-
private waiting: Array<() => void> = []
|
|
56
|
+
private waiting: Array<{ resolve: () => void; reject: (err: Error) => void }> = []
|
|
53
57
|
|
|
54
58
|
get activeCount() { return this.active }
|
|
55
59
|
get waitingCount() { return this.waiting.length }
|
|
@@ -59,15 +63,25 @@ class RequestQueue {
|
|
|
59
63
|
this.active++
|
|
60
64
|
return
|
|
61
65
|
}
|
|
62
|
-
return new Promise<void>((resolve) => {
|
|
63
|
-
|
|
66
|
+
return new Promise<void>((resolve, reject) => {
|
|
67
|
+
const entry = { resolve: () => { this.active++; resolve() }, reject }
|
|
68
|
+
this.waiting.push(entry)
|
|
69
|
+
const timer = setTimeout(() => {
|
|
70
|
+
const idx = this.waiting.indexOf(entry)
|
|
71
|
+
if (idx !== -1) {
|
|
72
|
+
this.waiting.splice(idx, 1)
|
|
73
|
+
reject(new Error("Queue timeout — all slots busy"))
|
|
74
|
+
}
|
|
75
|
+
}, QUEUE_TIMEOUT_MS)
|
|
76
|
+
const origResolve = entry.resolve
|
|
77
|
+
entry.resolve = () => { clearTimeout(timer); origResolve() }
|
|
64
78
|
})
|
|
65
79
|
}
|
|
66
80
|
|
|
67
81
|
release(): void {
|
|
68
82
|
this.active--
|
|
69
83
|
const next = this.waiting.shift()
|
|
70
|
-
if (next) next()
|
|
84
|
+
if (next) next.resolve()
|
|
71
85
|
}
|
|
72
86
|
}
|
|
73
87
|
|
|
@@ -81,45 +95,13 @@ function mapModelToClaudeModel(model: string): "sonnet" | "opus" | "haiku" {
|
|
|
81
95
|
|
|
82
96
|
// ── Content-block serialization ──────────────────────────────────────────────
|
|
83
97
|
|
|
84
|
-
function
|
|
85
|
-
try {
|
|
86
|
-
let data: string | undefined
|
|
87
|
-
let mediaType = "image/jpeg"
|
|
88
|
-
|
|
89
|
-
if (typeof block.data === "string") {
|
|
90
|
-
data = block.data
|
|
91
|
-
mediaType = block.media_type || mediaType
|
|
92
|
-
} else if (block.source) {
|
|
93
|
-
if (block.source.type === "base64" && block.source.data) {
|
|
94
|
-
data = block.source.data
|
|
95
|
-
mediaType = block.source.media_type || mediaType
|
|
96
|
-
} else if (block.source.url) {
|
|
97
|
-
return block.source.url
|
|
98
|
-
}
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
if (!data) return null
|
|
102
|
-
|
|
103
|
-
const ext = mediaType.split("/")[1]?.replace("jpeg", "jpg") || "jpg"
|
|
104
|
-
const tmpPath = join(tmpdir(), `proxy-img-${randomBytes(8).toString("hex")}.${ext}`)
|
|
105
|
-
writeFileSync(tmpPath, Buffer.from(data, "base64"))
|
|
106
|
-
tempFiles.push(tmpPath)
|
|
107
|
-
return tmpPath
|
|
108
|
-
} catch {
|
|
109
|
-
return null
|
|
110
|
-
}
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
function serializeBlock(block: any, tempFiles: string[]): string {
|
|
98
|
+
function serializeBlock(block: any): string {
|
|
114
99
|
switch (block.type) {
|
|
115
100
|
case "text":
|
|
116
101
|
return block.text || ""
|
|
117
|
-
case "image":
|
|
118
|
-
|
|
119
|
-
return imgPath ? `[Image: ${imgPath}]` : "[Image: (unable to save)]"
|
|
120
|
-
}
|
|
102
|
+
case "image":
|
|
103
|
+
return "[Image attached]"
|
|
121
104
|
case "tool_use":
|
|
122
|
-
// Use <tool_use> XML format so the model continues using parseable blocks
|
|
123
105
|
return `<tool_use>\n{"name": "${block.name}", "input": ${JSON.stringify(block.input ?? {})}}\n</tool_use>`
|
|
124
106
|
case "tool_result": {
|
|
125
107
|
const content = Array.isArray(block.content)
|
|
@@ -128,7 +110,7 @@ function serializeBlock(block: any, tempFiles: string[]): string {
|
|
|
128
110
|
const truncated = content.length > 4000
|
|
129
111
|
? content.slice(0, 4000) + `\n...[truncated ${content.length - 4000} chars]`
|
|
130
112
|
: content
|
|
131
|
-
return
|
|
113
|
+
return `[Tool Result (id: ${block.tool_use_id})]\n${truncated}\n[/Tool Result]`
|
|
132
114
|
}
|
|
133
115
|
case "thinking":
|
|
134
116
|
return ""
|
|
@@ -137,22 +119,83 @@ function serializeBlock(block: any, tempFiles: string[]): string {
|
|
|
137
119
|
}
|
|
138
120
|
}
|
|
139
121
|
|
|
140
|
-
function serializeContent(content: string | Array<any
|
|
122
|
+
function serializeContent(content: string | Array<any>): string {
|
|
141
123
|
if (typeof content === "string") return content
|
|
142
124
|
if (!Array.isArray(content)) return String(content)
|
|
143
|
-
return content.map(b => serializeBlock(b
|
|
125
|
+
return content.map(b => serializeBlock(b)).filter(Boolean).join("\n")
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// ── Image handling via SDKUserMessage ────────────────────────────────────────
|
|
129
|
+
// The SDK query() accepts AsyncIterable<SDKUserMessage> which supports native
|
|
130
|
+
// Anthropic MessageParam content blocks including images. When images are
|
|
131
|
+
// detected, we pass them through natively instead of serializing to text.
|
|
132
|
+
|
|
133
|
+
function contentHasImages(content: string | Array<any>): boolean {
|
|
134
|
+
if (typeof content === "string") return false
|
|
135
|
+
if (!Array.isArray(content)) return false
|
|
136
|
+
return content.some((b: any) => b.type === "image")
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/** Convert an Anthropic image content block to SDK-compatible format */
|
|
140
|
+
function toAnthropicImageBlock(block: any): any {
|
|
141
|
+
if (block.source) return block // already in Anthropic format
|
|
142
|
+
// openclaw may use { type: "image", data: "...", mimeType: "..." }
|
|
143
|
+
if (block.data && block.mimeType) {
|
|
144
|
+
return {
|
|
145
|
+
type: "image",
|
|
146
|
+
source: {
|
|
147
|
+
type: "base64",
|
|
148
|
+
media_type: block.mimeType,
|
|
149
|
+
data: block.data,
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
if (block.data && block.media_type) {
|
|
154
|
+
return {
|
|
155
|
+
type: "image",
|
|
156
|
+
source: {
|
|
157
|
+
type: "base64",
|
|
158
|
+
media_type: block.media_type,
|
|
159
|
+
data: block.data,
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
return block
|
|
144
164
|
}
|
|
145
165
|
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
166
|
+
/** Build Anthropic MessageParam content array, preserving images natively */
|
|
167
|
+
function buildNativeContent(content: string | Array<any>): Array<any> {
|
|
168
|
+
if (typeof content === "string") return [{ type: "text", text: content }]
|
|
169
|
+
if (!Array.isArray(content)) return [{ type: "text", text: String(content) }]
|
|
170
|
+
return content.map((block: any) => {
|
|
171
|
+
if (block.type === "image") return toAnthropicImageBlock(block)
|
|
172
|
+
if (block.type === "text") return { type: "text", text: block.text ?? "" }
|
|
173
|
+
// For other types, serialize to text
|
|
174
|
+
const serialized = serializeBlock(block)
|
|
175
|
+
return serialized ? { type: "text", text: serialized } : null
|
|
176
|
+
}).filter(Boolean)
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
/** Create an async iterable yielding a single SDKUserMessage with native content */
|
|
180
|
+
function createSDKUserMessage(content: Array<any>, sessionId?: string): AsyncIterable<any> {
|
|
181
|
+
const msg = {
|
|
182
|
+
type: "user" as const,
|
|
183
|
+
message: {
|
|
184
|
+
role: "user" as const,
|
|
185
|
+
content,
|
|
186
|
+
},
|
|
187
|
+
parent_tool_use_id: null,
|
|
188
|
+
session_id: sessionId ?? "",
|
|
189
|
+
}
|
|
190
|
+
return {
|
|
191
|
+
async *[Symbol.asyncIterator]() {
|
|
192
|
+
yield msg
|
|
193
|
+
}
|
|
149
194
|
}
|
|
150
195
|
}
|
|
151
196
|
|
|
197
|
+
|
|
152
198
|
// ── Client tool-use support ──────────────────────────────────────────────────
|
|
153
|
-
// The proxy never uses Claude Code's built-in tools. All tools come from the
|
|
154
|
-
// API caller. Tool definitions are injected into the system prompt; <tool_use>
|
|
155
|
-
// XML blocks in the output are parsed back into Anthropic tool_use content.
|
|
156
199
|
|
|
157
200
|
function buildClientToolsPrompt(tools: any[]): string {
|
|
158
201
|
const defs = tools.map((t: any) => {
|
|
@@ -173,7 +216,6 @@ function parseToolUse(text: string): { toolCalls: ToolCall[]; textBefore: string
|
|
|
173
216
|
const calls: ToolCall[] = []
|
|
174
217
|
let firstIdx = -1
|
|
175
218
|
|
|
176
|
-
// Parse <tool_use> XML blocks (primary format)
|
|
177
219
|
const xmlRegex = /<tool_use>([\s\S]*?)<\/tool_use>/g
|
|
178
220
|
let m: RegExpExecArray | null
|
|
179
221
|
while ((m = xmlRegex.exec(text)) !== null) {
|
|
@@ -188,7 +230,43 @@ function parseToolUse(text: string): { toolCalls: ToolCall[]; textBefore: string
|
|
|
188
230
|
} catch { /* skip malformed block */ }
|
|
189
231
|
}
|
|
190
232
|
|
|
191
|
-
|
|
233
|
+
if (calls.length === 0) {
|
|
234
|
+
const fcRegex = /<function_calls>([\s\S]*?)<\/function_calls>/g
|
|
235
|
+
while ((m = fcRegex.exec(text)) !== null) {
|
|
236
|
+
if (firstIdx < 0) firstIdx = m.index
|
|
237
|
+
try {
|
|
238
|
+
const parsed = JSON.parse(m[1]!.trim())
|
|
239
|
+
const items = Array.isArray(parsed) ? parsed : [parsed]
|
|
240
|
+
for (const p of items) {
|
|
241
|
+
if (p && typeof p.name === "string") {
|
|
242
|
+
calls.push({
|
|
243
|
+
id: generateId("toolu_"),
|
|
244
|
+
name: p.name,
|
|
245
|
+
input: p.input ?? p.parameters ?? {}
|
|
246
|
+
})
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
} catch { /* skip malformed block */ }
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
if (calls.length === 0) {
|
|
254
|
+
const invokeRegex = /<invoke\s+name="([^"]+)">([\s\S]*?)<\/invoke>/g
|
|
255
|
+
while ((m = invokeRegex.exec(text)) !== null) {
|
|
256
|
+
if (firstIdx < 0) firstIdx = m.index
|
|
257
|
+
const toolName = m[1]!
|
|
258
|
+
const body = m[2]!
|
|
259
|
+
const input: Record<string, any> = {}
|
|
260
|
+
const paramRegex = /<parameter\s+name="([^"]+)">([\s\S]*?)<\/parameter>/g
|
|
261
|
+
let pm: RegExpExecArray | null
|
|
262
|
+
while ((pm = paramRegex.exec(body)) !== null) {
|
|
263
|
+
const val = pm[2]!.trim()
|
|
264
|
+
try { input[pm[1]!] = JSON.parse(val) } catch { input[pm[1]!] = val }
|
|
265
|
+
}
|
|
266
|
+
calls.push({ id: generateId("toolu_"), name: toolName, input })
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
|
|
192
270
|
if (calls.length === 0) {
|
|
193
271
|
const bracketRegex = /\[Tool call:\s*(\w+)\s*\nInput:\s*([\s\S]*?)\]/g
|
|
194
272
|
while ((m = bracketRegex.exec(text)) !== null) {
|
|
@@ -211,10 +289,43 @@ function roughTokens(text: string): number {
|
|
|
211
289
|
return Math.ceil((text ?? "").length / 4)
|
|
212
290
|
}
|
|
213
291
|
|
|
292
|
+
// ── Conversation label extraction ────────────────────────────────────────────
|
|
293
|
+
// Openclaw embeds "Conversation info (untrusted metadata)" in the last user
|
|
294
|
+
// message containing a JSON block with conversation_label. Extract it to use
|
|
295
|
+
// as a stable conversation ID for session persistence.
|
|
296
|
+
|
|
297
|
+
function extractConversationLabel(messages: Array<{ role: string; content: string | Array<any> }>): string | null {
|
|
298
|
+
// Search from the last message backwards for a user message with metadata
|
|
299
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
300
|
+
const msg = messages[i]!
|
|
301
|
+
if (msg.role !== "user") continue
|
|
302
|
+
|
|
303
|
+
const text = typeof msg.content === "string"
|
|
304
|
+
? msg.content
|
|
305
|
+
: Array.isArray(msg.content)
|
|
306
|
+
? msg.content.filter((b: any) => b.type === "text").map((b: any) => b.text ?? "").join("\n")
|
|
307
|
+
: ""
|
|
308
|
+
|
|
309
|
+
// Look for the JSON block after "Conversation info"
|
|
310
|
+
const jsonMatch = text.match(/Conversation info[^`]*```json\s*(\{[\s\S]*?\})\s*```/)
|
|
311
|
+
if (!jsonMatch?.[1]) continue
|
|
312
|
+
|
|
313
|
+
try {
|
|
314
|
+
const meta = JSON.parse(jsonMatch[1])
|
|
315
|
+
// conversation_label is present for both PMs and groups
|
|
316
|
+
if (meta.conversation_label) return meta.conversation_label
|
|
317
|
+
// Fallback: use sender_id if no label (shouldn't happen but just in case)
|
|
318
|
+
if (meta.sender_id) return `dm:${meta.sender_id}`
|
|
319
|
+
} catch {
|
|
320
|
+
// Regex fallback if JSON parse fails
|
|
321
|
+
const labelMatch = text.match(/"conversation_label"\s*:\s*"([^"]*)"/)
|
|
322
|
+
if (labelMatch?.[1]) return labelMatch[1]
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
return null
|
|
326
|
+
}
|
|
327
|
+
|
|
214
328
|
// ── Query options builder ────────────────────────────────────────────────────
|
|
215
|
-
// Always runs with all built-in tools disabled (tools: []) and maxTurns: 1.
|
|
216
|
-
// The proxy is a pure API translation layer — tool definitions come from the
|
|
217
|
-
// caller and are injected into the system prompt. No MCP servers, no agent loop.
|
|
218
329
|
|
|
219
330
|
function buildQueryOptions(
|
|
220
331
|
model: "sonnet" | "opus" | "haiku",
|
|
@@ -223,6 +334,7 @@ function buildQueryOptions(
|
|
|
223
334
|
systemPrompt?: string
|
|
224
335
|
abortController?: AbortController
|
|
225
336
|
thinking?: { type: "adaptive" } | { type: "enabled"; budgetTokens?: number } | { type: "disabled" }
|
|
337
|
+
resume?: string
|
|
226
338
|
} = {}
|
|
227
339
|
) {
|
|
228
340
|
return {
|
|
@@ -230,14 +342,15 @@ function buildQueryOptions(
|
|
|
230
342
|
pathToClaudeCodeExecutable: claudeExecutable,
|
|
231
343
|
permissionMode: "bypassPermissions" as const,
|
|
232
344
|
allowDangerouslySkipPermissions: true,
|
|
233
|
-
persistSession:
|
|
345
|
+
persistSession: true,
|
|
234
346
|
settingSources: [],
|
|
235
|
-
tools: [] as string[],
|
|
347
|
+
tools: ["_proxy_noop_"] as string[],
|
|
236
348
|
maxTurns: 1,
|
|
237
349
|
...(opts.partial ? { includePartialMessages: true } : {}),
|
|
238
350
|
...(opts.abortController ? { abortController: opts.abortController } : {}),
|
|
239
351
|
...(opts.thinking ? { thinking: opts.thinking } : {}),
|
|
240
352
|
...(opts.systemPrompt ? { systemPrompt: opts.systemPrompt } : {}),
|
|
353
|
+
...(opts.resume ? { resume: opts.resume } : {}),
|
|
241
354
|
}
|
|
242
355
|
}
|
|
243
356
|
|
|
@@ -249,13 +362,11 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
249
362
|
|
|
250
363
|
app.use("*", cors())
|
|
251
364
|
|
|
252
|
-
// Optional API key validation
|
|
253
|
-
// require a matching x-api-key or Authorization: Bearer header.
|
|
365
|
+
// Optional API key validation
|
|
254
366
|
const requiredApiKey = process.env.CLAUDE_PROXY_API_KEY
|
|
255
367
|
if (requiredApiKey) {
|
|
256
368
|
app.use("*", async (c, next) => {
|
|
257
|
-
|
|
258
|
-
if (c.req.path === "/" || c.req.method === "OPTIONS") return next()
|
|
369
|
+
if (c.req.path === "/" || c.req.path.startsWith("/debug") || c.req.method === "OPTIONS") return next()
|
|
259
370
|
const key = c.req.header("x-api-key")
|
|
260
371
|
?? c.req.header("authorization")?.replace(/^Bearer\s+/i, "")
|
|
261
372
|
if (key !== requiredApiKey) {
|
|
@@ -269,30 +380,175 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
269
380
|
})
|
|
270
381
|
}
|
|
271
382
|
|
|
272
|
-
// Anthropic-compatible headers + request logging
|
|
383
|
+
// Anthropic-compatible headers + HTTP request logging
|
|
273
384
|
app.use("*", async (c, next) => {
|
|
274
385
|
const start = Date.now()
|
|
275
386
|
const requestId = c.req.header("x-request-id") ?? generateId("req_")
|
|
276
387
|
c.header("x-request-id", requestId)
|
|
277
388
|
c.header("request-id", requestId)
|
|
278
|
-
// Echo back Anthropic-standard headers
|
|
279
389
|
c.header("anthropic-version", "2023-06-01")
|
|
280
390
|
const betaHeader = c.req.header("anthropic-beta")
|
|
281
391
|
if (betaHeader) c.header("anthropic-beta", betaHeader)
|
|
282
392
|
await next()
|
|
283
393
|
const ms = Date.now() - start
|
|
284
|
-
|
|
394
|
+
// Only log non-debug HTTP requests at info level; debug endpoints at debug level
|
|
395
|
+
if (c.req.path.startsWith("/debug")) {
|
|
396
|
+
logDebug("http.request", { method: c.req.method, path: c.req.path, status: c.res.status, ms, reqId: requestId })
|
|
397
|
+
} else {
|
|
398
|
+
logInfo("http.request", { method: c.req.method, path: c.req.path, status: c.res.status, ms, reqId: requestId })
|
|
399
|
+
}
|
|
285
400
|
})
|
|
286
401
|
|
|
402
|
+
// ── Health / Info ────────────────────────────────────────────────────────
|
|
403
|
+
|
|
287
404
|
app.get("/", (c) => c.json({
|
|
288
405
|
status: "ok",
|
|
289
406
|
service: "claude-sdk-proxy",
|
|
290
407
|
version: PROXY_VERSION,
|
|
291
408
|
format: "anthropic",
|
|
292
|
-
endpoints: ["/v1/messages", "/v1/models", "/v1/chat/completions"],
|
|
293
|
-
queue: { active: requestQueue.activeCount, waiting: requestQueue.waitingCount, max: MAX_CONCURRENT }
|
|
409
|
+
endpoints: ["/v1/messages", "/v1/models", "/v1/chat/completions", "/debug/stats", "/debug/traces", "/debug/errors", "/debug/active", "/debug/health", "/sessions", "/sessions/cleanup"],
|
|
410
|
+
queue: { active: requestQueue.activeCount, waiting: requestQueue.waitingCount, max: MAX_CONCURRENT },
|
|
411
|
+
logDir: LOG_DIR,
|
|
294
412
|
}))
|
|
295
413
|
|
|
414
|
+
// ── Debug / Observability endpoints ──────────────────────────────────────
|
|
415
|
+
|
|
416
|
+
app.get("/debug/stats", (c) => {
|
|
417
|
+
const stats = traceStore.getStats()
|
|
418
|
+
const sessionStats = sessionStore.getStats()
|
|
419
|
+
return c.json({
|
|
420
|
+
version: PROXY_VERSION,
|
|
421
|
+
config: {
|
|
422
|
+
stallTimeoutMs: finalConfig.stallTimeoutMs,
|
|
423
|
+
maxDurationMs: finalConfig.maxDurationMs,
|
|
424
|
+
maxOutputChars: finalConfig.maxOutputChars,
|
|
425
|
+
maxConcurrent: MAX_CONCURRENT,
|
|
426
|
+
queueTimeoutMs: QUEUE_TIMEOUT_MS,
|
|
427
|
+
claudeExecutable,
|
|
428
|
+
logDir: LOG_DIR,
|
|
429
|
+
debug: finalConfig.debug,
|
|
430
|
+
},
|
|
431
|
+
queue: { active: requestQueue.activeCount, waiting: requestQueue.waitingCount, max: MAX_CONCURRENT },
|
|
432
|
+
sessions: sessionStats,
|
|
433
|
+
...stats,
|
|
434
|
+
})
|
|
435
|
+
})
|
|
436
|
+
|
|
437
|
+
// ── Session management endpoints ──────────────────────────────────────
|
|
438
|
+
|
|
439
|
+
app.get("/sessions", (c) => {
|
|
440
|
+
return c.json({
|
|
441
|
+
sessions: sessionStore.list(),
|
|
442
|
+
stats: sessionStore.getStats(),
|
|
443
|
+
})
|
|
444
|
+
})
|
|
445
|
+
|
|
446
|
+
app.get("/sessions/cleanup", (c) => {
|
|
447
|
+
const result = sessionStore.cleanup()
|
|
448
|
+
return c.json(result)
|
|
449
|
+
})
|
|
450
|
+
|
|
451
|
+
app.get("/debug/traces", (c) => {
|
|
452
|
+
const limit = parseInt(c.req.query("limit") ?? "20", 10)
|
|
453
|
+
return c.json(traceStore.getRecentTraces(limit))
|
|
454
|
+
})
|
|
455
|
+
|
|
456
|
+
app.get("/debug/traces/:id", (c) => {
|
|
457
|
+
const id = c.req.param("id")
|
|
458
|
+
const trace = traceStore.getTrace(id)
|
|
459
|
+
if (!trace) return c.json({ error: "Trace not found", reqId: id }, 404)
|
|
460
|
+
return c.json(trace)
|
|
461
|
+
})
|
|
462
|
+
|
|
463
|
+
app.get("/debug/errors", (c) => {
|
|
464
|
+
const limit = parseInt(c.req.query("limit") ?? "10", 10)
|
|
465
|
+
return c.json(traceStore.getRecentErrors(limit))
|
|
466
|
+
})
|
|
467
|
+
|
|
468
|
+
app.get("/debug/logs", (c) => {
|
|
469
|
+
// List available log files
|
|
470
|
+
try {
|
|
471
|
+
const files = readdirSync(LOG_DIR)
|
|
472
|
+
.filter(f => f.startsWith("proxy-") && f.endsWith(".log"))
|
|
473
|
+
.sort()
|
|
474
|
+
.reverse()
|
|
475
|
+
return c.json({ logDir: LOG_DIR, files })
|
|
476
|
+
} catch {
|
|
477
|
+
return c.json({ logDir: LOG_DIR, files: [], error: "Cannot read log directory" })
|
|
478
|
+
}
|
|
479
|
+
})
|
|
480
|
+
|
|
481
|
+
app.get("/debug/logs/:filename", (c) => {
|
|
482
|
+
// Serve a specific log file (last N lines)
|
|
483
|
+
const filename = c.req.param("filename")
|
|
484
|
+
if (!filename.match(/^proxy-\d{4}-\d{2}-\d{2}\.log$/)) {
|
|
485
|
+
return c.json({ error: "Invalid log filename" }, 400)
|
|
486
|
+
}
|
|
487
|
+
const tail = parseInt(c.req.query("tail") ?? "100", 10)
|
|
488
|
+
try {
|
|
489
|
+
const content = readFileSync(join(LOG_DIR, filename), "utf-8")
|
|
490
|
+
const lines = content.trim().split("\n")
|
|
491
|
+
const sliced = lines.slice(-tail)
|
|
492
|
+
const parsed = sliced.map(line => {
|
|
493
|
+
try { return JSON.parse(line) } catch { return { raw: line } }
|
|
494
|
+
})
|
|
495
|
+
return c.json({ file: filename, total: lines.length, returned: sliced.length, lines: parsed })
|
|
496
|
+
} catch {
|
|
497
|
+
return c.json({ error: "Log file not found" }, 404)
|
|
498
|
+
}
|
|
499
|
+
})
|
|
500
|
+
|
|
501
|
+
app.get("/debug/errors/:id", (c) => {
|
|
502
|
+
// Serve a specific error dump file
|
|
503
|
+
const id = c.req.param("id")
|
|
504
|
+
if (!id.match(/^req_/)) return c.json({ error: "Invalid request ID format" }, 400)
|
|
505
|
+
try {
|
|
506
|
+
const content = readFileSync(join(LOG_DIR, "errors", `${id}.json`), "utf-8")
|
|
507
|
+
return c.json(JSON.parse(content))
|
|
508
|
+
} catch {
|
|
509
|
+
return c.json({ error: "Error dump not found", reqId: id }, 404)
|
|
510
|
+
}
|
|
511
|
+
})
|
|
512
|
+
|
|
513
|
+
app.get("/debug/active", (c) => {
|
|
514
|
+
// Detailed view of currently active requests
|
|
515
|
+
const stats = traceStore.getStats()
|
|
516
|
+
return c.json({
|
|
517
|
+
queue: { active: requestQueue.activeCount, waiting: requestQueue.waitingCount, max: MAX_CONCURRENT },
|
|
518
|
+
activeRequests: stats.activeRequests,
|
|
519
|
+
})
|
|
520
|
+
})
|
|
521
|
+
|
|
522
|
+
app.get("/debug/health", (c) => {
|
|
523
|
+
// Process health: memory, uptime, resource usage
|
|
524
|
+
const mem = process.memoryUsage()
|
|
525
|
+
const stats = traceStore.getStats()
|
|
526
|
+
return c.json({
|
|
527
|
+
version: PROXY_VERSION,
|
|
528
|
+
pid: process.pid,
|
|
529
|
+
uptimeMs: stats.uptimeMs,
|
|
530
|
+
uptimeHuman: stats.uptimeHuman,
|
|
531
|
+
memory: {
|
|
532
|
+
rss: `${(mem.rss / 1024 / 1024).toFixed(1)}MB`,
|
|
533
|
+
heapUsed: `${(mem.heapUsed / 1024 / 1024).toFixed(1)}MB`,
|
|
534
|
+
heapTotal: `${(mem.heapTotal / 1024 / 1024).toFixed(1)}MB`,
|
|
535
|
+
external: `${(mem.external / 1024 / 1024).toFixed(1)}MB`,
|
|
536
|
+
rssBytes: mem.rss,
|
|
537
|
+
heapUsedBytes: mem.heapUsed,
|
|
538
|
+
},
|
|
539
|
+
queue: { active: requestQueue.activeCount, waiting: requestQueue.waitingCount, max: MAX_CONCURRENT },
|
|
540
|
+
requests: stats.requests,
|
|
541
|
+
config: {
|
|
542
|
+
stallTimeoutMs: finalConfig.stallTimeoutMs,
|
|
543
|
+
maxConcurrent: MAX_CONCURRENT,
|
|
544
|
+
queueTimeoutMs: QUEUE_TIMEOUT_MS,
|
|
545
|
+
debug: finalConfig.debug,
|
|
546
|
+
},
|
|
547
|
+
})
|
|
548
|
+
})
|
|
549
|
+
|
|
550
|
+
// ── Model endpoints ──────────────────────────────────────────────────────
|
|
551
|
+
|
|
296
552
|
const MODELS = [
|
|
297
553
|
{ type: "model", id: "claude-opus-4-6", display_name: "Claude Opus 4.6", created_at: "2025-08-01T00:00:00Z" },
|
|
298
554
|
{ type: "model", id: "claude-opus-4-6-20250801", display_name: "Claude Opus 4.6", created_at: "2025-08-01T00:00:00Z" },
|
|
@@ -303,7 +559,6 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
303
559
|
{ type: "model", id: "claude-haiku-4-5-20251001", display_name: "Claude Haiku 4.5", created_at: "2025-10-01T00:00:00Z" },
|
|
304
560
|
]
|
|
305
561
|
|
|
306
|
-
// Dual-format model data: includes fields for both Anthropic and OpenAI SDKs
|
|
307
562
|
const MODELS_DUAL = MODELS.map(m => ({
|
|
308
563
|
...m,
|
|
309
564
|
object: "model" as const,
|
|
@@ -341,17 +596,27 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
341
596
|
app.post("/v1/messages/count_tokens", handleCountTokens)
|
|
342
597
|
app.post("/messages/count_tokens", handleCountTokens)
|
|
343
598
|
|
|
599
|
+
// ── Messages handler ─────────────────────────────────────────────────────
|
|
600
|
+
|
|
344
601
|
const handleMessages = async (c: Context) => {
|
|
345
602
|
const reqId = generateId("req_")
|
|
603
|
+
// Will be set after body parse; needed for outer catch
|
|
604
|
+
let trace: ReturnType<typeof traceStore.create> | undefined
|
|
605
|
+
let requestStarted = Date.now()
|
|
606
|
+
let clientDisconnected = false
|
|
607
|
+
let abortReason: "stall" | "max_duration" | "max_output" | null = null
|
|
608
|
+
|
|
346
609
|
try {
|
|
347
610
|
let body: any
|
|
348
611
|
try {
|
|
349
612
|
body = await c.req.json()
|
|
350
|
-
} catch {
|
|
613
|
+
} catch (parseErr) {
|
|
614
|
+
logWarn("request.invalid_json", { reqId })
|
|
351
615
|
return c.json({ type: "error", error: { type: "invalid_request_error", message: "Request body must be valid JSON" }, request_id: reqId }, 400)
|
|
352
616
|
}
|
|
353
617
|
|
|
354
618
|
if (!body.messages || !Array.isArray(body.messages) || body.messages.length === 0) {
|
|
619
|
+
logWarn("request.missing_messages", { reqId })
|
|
355
620
|
return c.json({ type: "error", error: { type: "invalid_request_error", message: "messages is required and must be a non-empty array" }, request_id: reqId }, 400)
|
|
356
621
|
}
|
|
357
622
|
|
|
@@ -359,17 +624,74 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
359
624
|
const stream = body.stream ?? false
|
|
360
625
|
const hasTools = body.tools?.length > 0
|
|
361
626
|
const abortController = new AbortController()
|
|
362
|
-
const timeout = setTimeout(() => abortController.abort(), finalConfig.requestTimeoutMs)
|
|
363
627
|
|
|
364
|
-
//
|
|
628
|
+
// Stall-based timeout: only aborts if no SDK events received for stallTimeoutMs.
|
|
629
|
+
// Resets on every SDK event, so active requests never get killed.
|
|
630
|
+
// NOTE: not started until queue is acquired — queue wait doesn't count.
|
|
631
|
+
let stallTimer: ReturnType<typeof setTimeout> | null = null
|
|
632
|
+
const resetStallTimer = () => {
|
|
633
|
+
if (stallTimer) clearTimeout(stallTimer)
|
|
634
|
+
stallTimer = setTimeout(() => {
|
|
635
|
+
abortReason = "stall"
|
|
636
|
+
logWarn("request.stall_timeout", {
|
|
637
|
+
reqId,
|
|
638
|
+
stallTimeoutMs: finalConfig.stallTimeoutMs,
|
|
639
|
+
phase: trace?.phase,
|
|
640
|
+
sdkEventCount: trace?.sdkEventCount,
|
|
641
|
+
outputLen: trace?.outputLen,
|
|
642
|
+
lastEventType: trace?.lastEventType,
|
|
643
|
+
})
|
|
644
|
+
abortController.abort()
|
|
645
|
+
}, finalConfig.stallTimeoutMs)
|
|
646
|
+
}
|
|
647
|
+
const clearStallTimer = () => {
|
|
648
|
+
if (stallTimer) { clearTimeout(stallTimer); stallTimer = null }
|
|
649
|
+
}
|
|
650
|
+
|
|
651
|
+
// Hard max duration: kills request even if actively streaming. Safety valve.
|
|
652
|
+
let hardTimer: ReturnType<typeof setTimeout> | null = null
|
|
653
|
+
const startHardTimer = () => {
|
|
654
|
+
hardTimer = setTimeout(() => {
|
|
655
|
+
abortReason = "max_duration"
|
|
656
|
+
logError("request.max_duration", {
|
|
657
|
+
reqId,
|
|
658
|
+
maxDurationMs: finalConfig.maxDurationMs,
|
|
659
|
+
phase: trace?.phase,
|
|
660
|
+
sdkEventCount: trace?.sdkEventCount,
|
|
661
|
+
outputLen: trace?.outputLen,
|
|
662
|
+
model: trace?.model,
|
|
663
|
+
lastEventType: trace?.lastEventType,
|
|
664
|
+
})
|
|
665
|
+
abortController.abort()
|
|
666
|
+
}, finalConfig.maxDurationMs)
|
|
667
|
+
}
|
|
668
|
+
const clearHardTimer = () => {
|
|
669
|
+
if (hardTimer) { clearTimeout(hardTimer); hardTimer = null }
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
// Output size check: kills request if output exceeds maxOutputChars.
|
|
673
|
+
const checkOutputSize = (outputLen: number) => {
|
|
674
|
+
if (outputLen > finalConfig.maxOutputChars && !abortReason) {
|
|
675
|
+
abortReason = "max_output"
|
|
676
|
+
logError("request.max_output", {
|
|
677
|
+
reqId,
|
|
678
|
+
outputLen,
|
|
679
|
+
maxOutputChars: finalConfig.maxOutputChars,
|
|
680
|
+
phase: trace?.phase,
|
|
681
|
+
sdkEventCount: trace?.sdkEventCount,
|
|
682
|
+
model: trace?.model,
|
|
683
|
+
elapsedMs: trace ? Date.now() - trace.startedAt : undefined,
|
|
684
|
+
})
|
|
685
|
+
abortController.abort()
|
|
686
|
+
}
|
|
687
|
+
}
|
|
688
|
+
|
|
365
689
|
const thinking: { type: "adaptive" } | { type: "enabled"; budgetTokens?: number } | { type: "disabled" } | undefined =
|
|
366
690
|
body.thinking?.type === "enabled" ? { type: "enabled", budgetTokens: body.thinking.budget_tokens }
|
|
367
691
|
: body.thinking?.type === "disabled" ? { type: "disabled" }
|
|
368
692
|
: body.thinking?.type === "adaptive" ? { type: "adaptive" }
|
|
369
693
|
: undefined
|
|
370
694
|
|
|
371
|
-
const tempFiles: string[] = []
|
|
372
|
-
|
|
373
695
|
let systemContext = ""
|
|
374
696
|
if (body.system) {
|
|
375
697
|
if (typeof body.system === "string") {
|
|
@@ -382,49 +704,161 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
382
704
|
}
|
|
383
705
|
}
|
|
384
706
|
|
|
385
|
-
// Build the prompt from messages. The SDK's query() takes a single prompt
|
|
386
|
-
// string, so multi-turn conversations are serialized with XML-delimited
|
|
387
|
-
// turns. Prior turns go into the system prompt as context, the last user
|
|
388
|
-
// message becomes the prompt.
|
|
389
707
|
const messages = body.messages as Array<{ role: string; content: string | Array<any> }>
|
|
390
708
|
|
|
391
|
-
let
|
|
709
|
+
let promptText: string // text version for token counting / logging
|
|
392
710
|
let systemPrompt: string | undefined
|
|
393
711
|
const toolsSection = hasTools ? buildClientToolsPrompt(body.tools) : ""
|
|
394
712
|
|
|
395
|
-
|
|
713
|
+
// ── Session resumption ─────────────────────────────────────────────
|
|
714
|
+
// Derive conversation ID from: headers (explicit) or conversation_label
|
|
715
|
+
// embedded in openclaw message metadata.
|
|
716
|
+
const conversationId = c.req.header("x-conversation-id")
|
|
717
|
+
?? c.req.header("x-session-id")
|
|
718
|
+
?? extractConversationLabel(messages)
|
|
719
|
+
?? null
|
|
720
|
+
|
|
721
|
+
let resumeSessionId: string | undefined
|
|
722
|
+
let isResuming = false
|
|
723
|
+
|
|
724
|
+
if (conversationId && messages.length > 1) {
|
|
725
|
+
const stored = sessionStore.get(conversationId)
|
|
726
|
+
if (stored && stored.model === model) {
|
|
727
|
+
resumeSessionId = stored.sdkSessionId
|
|
728
|
+
isResuming = true
|
|
729
|
+
logInfo("session.resuming", {
|
|
730
|
+
reqId,
|
|
731
|
+
conversationId,
|
|
732
|
+
sdkSessionId: resumeSessionId,
|
|
733
|
+
storedMsgCount: stored.messageCount,
|
|
734
|
+
currentMsgCount: messages.length,
|
|
735
|
+
resumeCount: stored.resumeCount,
|
|
736
|
+
})
|
|
737
|
+
}
|
|
738
|
+
}
|
|
739
|
+
|
|
740
|
+
// Check if last user message contains images — if so, use native SDK multimodal input
|
|
741
|
+
const lastMsg = messages[messages.length - 1]!
|
|
742
|
+
const lastMsgHasImages = contentHasImages(lastMsg.content)
|
|
743
|
+
|
|
744
|
+
// promptInput: either a string (text-only) or AsyncIterable<SDKUserMessage> (multimodal)
|
|
745
|
+
let promptInput: string | AsyncIterable<any>
|
|
746
|
+
// promptText: always the text-only version for token counting and logging
|
|
747
|
+
promptText = serializeContent(lastMsg.content)
|
|
748
|
+
|
|
749
|
+
if (isResuming && resumeSessionId) {
|
|
750
|
+
systemPrompt = ((systemContext || "") + toolsSection).trim() || undefined
|
|
751
|
+
if (lastMsgHasImages) {
|
|
752
|
+
promptInput = createSDKUserMessage(buildNativeContent(lastMsg.content), resumeSessionId)
|
|
753
|
+
logInfo("session.resume_with_images", { reqId, conversationId })
|
|
754
|
+
} else {
|
|
755
|
+
promptInput = promptText
|
|
756
|
+
}
|
|
757
|
+
} else if (messages.length === 1) {
|
|
396
758
|
systemPrompt = ((systemContext || "") + toolsSection).trim() || undefined
|
|
397
|
-
|
|
759
|
+
promptInput = lastMsgHasImages
|
|
760
|
+
? createSDKUserMessage(buildNativeContent(lastMsg.content))
|
|
761
|
+
: promptText
|
|
762
|
+
if (lastMsgHasImages) logInfo("request.native_images", { reqId })
|
|
398
763
|
} else {
|
|
399
|
-
const lastMsg = messages[messages.length - 1]!
|
|
400
764
|
const priorMsgs = messages.slice(0, -1)
|
|
401
765
|
|
|
402
766
|
const contextParts = priorMsgs
|
|
403
767
|
.map((m) => {
|
|
404
|
-
const
|
|
405
|
-
return
|
|
768
|
+
const role = m.role === "assistant" ? "Assistant" : "User"
|
|
769
|
+
return `[${role}]\n${serializeContent(m.content)}`
|
|
406
770
|
})
|
|
407
771
|
.join("\n\n")
|
|
408
772
|
|
|
409
773
|
const baseSystem = systemContext || ""
|
|
410
774
|
const contextSection = contextParts
|
|
411
|
-
? `\n\n
|
|
775
|
+
? `\n\nPrior conversation turns:\n\n${contextParts}\n\n---`
|
|
412
776
|
: ""
|
|
413
777
|
systemPrompt = (baseSystem + contextSection + toolsSection).trim() || undefined
|
|
414
|
-
|
|
778
|
+
|
|
779
|
+
if (lastMsgHasImages) {
|
|
780
|
+
promptInput = createSDKUserMessage(buildNativeContent(lastMsg.content))
|
|
781
|
+
logInfo("request.native_images", { reqId })
|
|
782
|
+
} else {
|
|
783
|
+
promptInput = promptText
|
|
784
|
+
}
|
|
415
785
|
}
|
|
416
786
|
|
|
417
|
-
|
|
787
|
+
requestStarted = Date.now()
|
|
788
|
+
|
|
789
|
+
// Capture client info
|
|
790
|
+
const clientIp = c.req.header("x-forwarded-for")
|
|
791
|
+
?? c.req.header("x-real-ip")
|
|
792
|
+
?? c.req.header("cf-connecting-ip")
|
|
793
|
+
?? "unknown"
|
|
794
|
+
const userAgent = c.req.header("user-agent") ?? "unknown"
|
|
795
|
+
const bodyBytes = JSON.stringify(body).length
|
|
796
|
+
|
|
797
|
+
// ── Create trace ──────────────────────────────────────────────────────
|
|
798
|
+
trace = traceStore.create({
|
|
799
|
+
reqId,
|
|
800
|
+
model,
|
|
801
|
+
requestedModel: body.model || "sonnet",
|
|
802
|
+
stream,
|
|
803
|
+
hasTools,
|
|
804
|
+
thinking: thinking?.type,
|
|
805
|
+
promptLen: promptText.length,
|
|
806
|
+
systemLen: systemPrompt?.length ?? 0,
|
|
807
|
+
msgCount: messages.length,
|
|
808
|
+
bodyBytes,
|
|
809
|
+
clientIp,
|
|
810
|
+
userAgent,
|
|
811
|
+
})
|
|
812
|
+
|
|
813
|
+
// ── Queue ─────────────────────────────────────────────────────────────
|
|
814
|
+
const queueActive = requestQueue.activeCount
|
|
815
|
+
const queueWaiting = requestQueue.waitingCount
|
|
816
|
+
const needsQueue = queueActive >= MAX_CONCURRENT
|
|
817
|
+
|
|
818
|
+
traceStore.phase(reqId, "queued", { queueActive, queueWaiting })
|
|
819
|
+
|
|
820
|
+
if (needsQueue) {
|
|
821
|
+
logInfo("queue.waiting", {
|
|
822
|
+
reqId,
|
|
823
|
+
model,
|
|
824
|
+
queueActive,
|
|
825
|
+
queueWaiting,
|
|
826
|
+
queueTimeoutMs: QUEUE_TIMEOUT_MS,
|
|
827
|
+
})
|
|
828
|
+
}
|
|
418
829
|
|
|
419
|
-
// Acquire a slot in the concurrency queue — all code after this MUST
|
|
420
|
-
// release via the try/finally blocks in both streaming and non-streaming paths.
|
|
421
830
|
await requestQueue.acquire()
|
|
422
831
|
|
|
832
|
+
const queueWaitMs = Date.now() - requestStarted
|
|
833
|
+
traceStore.phase(reqId, "acquired", { queueWaitMs })
|
|
834
|
+
|
|
835
|
+
logInfo("queue.acquired", {
|
|
836
|
+
reqId,
|
|
837
|
+
queueWaitMs,
|
|
838
|
+
queueActive: requestQueue.activeCount,
|
|
839
|
+
queueWaiting: requestQueue.waitingCount,
|
|
840
|
+
})
|
|
841
|
+
|
|
842
|
+
// Start timers AFTER queue acquire — queue wait doesn't count
|
|
843
|
+
resetStallTimer()
|
|
844
|
+
startHardTimer()
|
|
845
|
+
|
|
423
846
|
// ── Non-streaming ──────────────────────────────────────────────────────
|
|
424
847
|
if (!stream) {
|
|
425
848
|
let fullText = ""
|
|
849
|
+
let capturedSessionId: string | undefined
|
|
850
|
+
const queryOpts = buildQueryOptions(model, { partial: false, systemPrompt, abortController, thinking, resume: resumeSessionId })
|
|
426
851
|
try {
|
|
427
|
-
|
|
852
|
+
traceStore.phase(reqId, "sdk_starting")
|
|
853
|
+
let sdkEventCount = 0
|
|
854
|
+
for await (const message of query({ prompt: promptInput, options: queryOpts })) {
|
|
855
|
+
sdkEventCount++
|
|
856
|
+
resetStallTimer()
|
|
857
|
+
traceStore.sdkEvent(reqId, sdkEventCount, message.type, (message as any).event?.type ?? (message as any).message?.type)
|
|
858
|
+
// Capture session_id from init message
|
|
859
|
+
if (message.type === "system" && (message as any).subtype === "init") {
|
|
860
|
+
capturedSessionId = (message as any).session_id
|
|
861
|
+
}
|
|
428
862
|
if (message.type === "assistant") {
|
|
429
863
|
let turnText = ""
|
|
430
864
|
for (const block of message.message.content) {
|
|
@@ -433,12 +867,87 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
433
867
|
fullText = turnText
|
|
434
868
|
}
|
|
435
869
|
}
|
|
870
|
+
traceStore.phase(reqId, "sdk_done")
|
|
871
|
+
|
|
872
|
+
// Store session mapping for future resumption
|
|
873
|
+
if (conversationId && capturedSessionId) {
|
|
874
|
+
if (isResuming) {
|
|
875
|
+
sessionStore.recordResume(conversationId)
|
|
876
|
+
logInfo("session.resumed_ok", { reqId, conversationId, sdkSessionId: capturedSessionId })
|
|
877
|
+
} else {
|
|
878
|
+
sessionStore.set(conversationId, capturedSessionId, model, messages.length)
|
|
879
|
+
logInfo("session.created", { reqId, conversationId, sdkSessionId: capturedSessionId })
|
|
880
|
+
}
|
|
881
|
+
}
|
|
882
|
+
} catch (resumeErr) {
|
|
883
|
+
// If resume failed, retry with full context
|
|
884
|
+
if (isResuming && resumeSessionId) {
|
|
885
|
+
logWarn("session.resume_failed", {
|
|
886
|
+
reqId,
|
|
887
|
+
conversationId,
|
|
888
|
+
sdkSessionId: resumeSessionId,
|
|
889
|
+
error: resumeErr instanceof Error ? resumeErr.message : String(resumeErr),
|
|
890
|
+
})
|
|
891
|
+
if (conversationId) {
|
|
892
|
+
sessionStore.recordFailure(conversationId)
|
|
893
|
+
sessionStore.invalidate(conversationId)
|
|
894
|
+
}
|
|
895
|
+
// Rebuild with full context (non-resume path)
|
|
896
|
+
const fbLastMsg = messages[messages.length - 1]!
|
|
897
|
+
const priorMsgs = messages.slice(0, -1)
|
|
898
|
+
const contextParts = priorMsgs
|
|
899
|
+
.map((m) => {
|
|
900
|
+
const role = m.role === "assistant" ? "Assistant" : "User"
|
|
901
|
+
return `[${role}]\n${serializeContent(m.content)}`
|
|
902
|
+
})
|
|
903
|
+
.join("\n\n")
|
|
904
|
+
const baseSystem = systemContext || ""
|
|
905
|
+
const contextSection = contextParts ? `\n\nPrior conversation turns:\n\n${contextParts}\n\n---` : ""
|
|
906
|
+
const fallbackSystem = (baseSystem + contextSection + toolsSection).trim() || undefined
|
|
907
|
+
const fallbackInput: string | AsyncIterable<any> = contentHasImages(fbLastMsg.content)
|
|
908
|
+
? createSDKUserMessage(buildNativeContent(fbLastMsg.content))
|
|
909
|
+
: serializeContent(fbLastMsg.content)
|
|
910
|
+
const fallbackOpts = buildQueryOptions(model, { partial: false, systemPrompt: fallbackSystem, abortController, thinking })
|
|
911
|
+
|
|
912
|
+
logInfo("session.fallback_full_context", { reqId, conversationId })
|
|
913
|
+
let sdkEventCount = 0
|
|
914
|
+
for await (const message of query({ prompt: fallbackInput, options: fallbackOpts })) {
|
|
915
|
+
sdkEventCount++
|
|
916
|
+
resetStallTimer()
|
|
917
|
+
traceStore.sdkEvent(reqId, sdkEventCount, message.type, (message as any).event?.type ?? (message as any).message?.type)
|
|
918
|
+
if (message.type === "system" && (message as any).subtype === "init") {
|
|
919
|
+
capturedSessionId = (message as any).session_id
|
|
920
|
+
}
|
|
921
|
+
if (message.type === "assistant") {
|
|
922
|
+
let turnText = ""
|
|
923
|
+
for (const block of message.message.content) {
|
|
924
|
+
if (block.type === "text") turnText += block.text
|
|
925
|
+
}
|
|
926
|
+
fullText = turnText
|
|
927
|
+
}
|
|
928
|
+
}
|
|
929
|
+
traceStore.phase(reqId, "sdk_done")
|
|
930
|
+
// Store the new session
|
|
931
|
+
if (conversationId && capturedSessionId) {
|
|
932
|
+
sessionStore.set(conversationId, capturedSessionId, model, messages.length)
|
|
933
|
+
logInfo("session.recreated_after_fallback", { reqId, conversationId, sdkSessionId: capturedSessionId })
|
|
934
|
+
}
|
|
935
|
+
} else {
|
|
936
|
+
throw resumeErr
|
|
937
|
+
}
|
|
436
938
|
} finally {
|
|
437
|
-
|
|
438
|
-
|
|
939
|
+
clearStallTimer(); clearHardTimer()
|
|
940
|
+
// (temp files no longer used — images passed natively)
|
|
439
941
|
requestQueue.release()
|
|
942
|
+
logDebug("queue.released", {
|
|
943
|
+
reqId,
|
|
944
|
+
queueActive: requestQueue.activeCount,
|
|
945
|
+
queueWaiting: requestQueue.waitingCount,
|
|
946
|
+
})
|
|
440
947
|
}
|
|
441
948
|
|
|
949
|
+
traceStore.phase(reqId, "responding")
|
|
950
|
+
|
|
442
951
|
if (hasTools) {
|
|
443
952
|
const { toolCalls, textBefore } = parseToolUse(fullText)
|
|
444
953
|
const content: any[] = []
|
|
@@ -446,23 +955,26 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
446
955
|
for (const tc of toolCalls) content.push({ type: "tool_use", id: tc.id, name: tc.name, input: tc.input })
|
|
447
956
|
if (content.length === 0) content.push({ type: "text", text: fullText || "..." })
|
|
448
957
|
const stopReason = toolCalls.length > 0 ? "tool_use" : "end_turn"
|
|
449
|
-
|
|
958
|
+
|
|
959
|
+
traceStore.complete(reqId, { outputLen: fullText.length, toolCallCount: toolCalls.length })
|
|
960
|
+
|
|
450
961
|
return c.json({
|
|
451
962
|
id: generateId("msg_"),
|
|
452
963
|
type: "message", role: "assistant", content,
|
|
453
964
|
model: body.model, stop_reason: stopReason, stop_sequence: null,
|
|
454
|
-
usage: { input_tokens: roughTokens(
|
|
965
|
+
usage: { input_tokens: roughTokens(promptText), output_tokens: roughTokens(fullText) }
|
|
455
966
|
})
|
|
456
967
|
}
|
|
457
968
|
|
|
458
969
|
if (!fullText || !fullText.trim()) fullText = "..."
|
|
459
|
-
|
|
970
|
+
traceStore.complete(reqId, { outputLen: fullText.length })
|
|
971
|
+
|
|
460
972
|
return c.json({
|
|
461
973
|
id: generateId("msg_"),
|
|
462
974
|
type: "message", role: "assistant",
|
|
463
975
|
content: [{ type: "text", text: fullText }],
|
|
464
976
|
model: body.model, stop_reason: "end_turn", stop_sequence: null,
|
|
465
|
-
usage: { input_tokens: roughTokens(
|
|
977
|
+
usage: { input_tokens: roughTokens(promptText), output_tokens: roughTokens(fullText) }
|
|
466
978
|
})
|
|
467
979
|
}
|
|
468
980
|
|
|
@@ -470,23 +982,62 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
470
982
|
const encoder = new TextEncoder()
|
|
471
983
|
const readable = new ReadableStream({
|
|
472
984
|
cancel() {
|
|
473
|
-
|
|
985
|
+
clientDisconnected = true
|
|
986
|
+
logWarn("stream.client_disconnect", {
|
|
987
|
+
reqId,
|
|
988
|
+
phase: trace?.phase,
|
|
989
|
+
sdkEventCount: trace?.sdkEventCount,
|
|
990
|
+
outputLen: trace?.outputLen,
|
|
991
|
+
elapsedMs: trace ? Date.now() - trace.startedAt : undefined,
|
|
992
|
+
model: trace?.model,
|
|
993
|
+
})
|
|
474
994
|
abortController.abort()
|
|
475
995
|
},
|
|
476
996
|
async start(controller) {
|
|
477
997
|
const messageId = generateId("msg_")
|
|
478
998
|
let queueReleased = false
|
|
479
|
-
const releaseQueue = () => {
|
|
999
|
+
const releaseQueue = () => {
|
|
1000
|
+
if (!queueReleased) {
|
|
1001
|
+
queueReleased = true
|
|
1002
|
+
requestQueue.release()
|
|
1003
|
+
logDebug("queue.released", {
|
|
1004
|
+
reqId,
|
|
1005
|
+
queueActive: requestQueue.activeCount,
|
|
1006
|
+
queueWaiting: requestQueue.waitingCount,
|
|
1007
|
+
})
|
|
1008
|
+
}
|
|
1009
|
+
}
|
|
480
1010
|
|
|
1011
|
+
let sseSendErrors = 0
|
|
481
1012
|
const sse = (event: string, data: object) => {
|
|
482
1013
|
try {
|
|
483
1014
|
controller.enqueue(encoder.encode(`event: ${event}\ndata: ${JSON.stringify(data)}\n\n`))
|
|
484
|
-
} catch {
|
|
1015
|
+
} catch (e) {
|
|
1016
|
+
sseSendErrors++
|
|
1017
|
+
if (sseSendErrors <= 3) {
|
|
1018
|
+
logWarn("stream.sse_send_failed", {
|
|
1019
|
+
reqId,
|
|
1020
|
+
event,
|
|
1021
|
+
sseSendErrors,
|
|
1022
|
+
error: e instanceof Error ? e.message : String(e),
|
|
1023
|
+
})
|
|
1024
|
+
}
|
|
1025
|
+
}
|
|
485
1026
|
}
|
|
486
1027
|
|
|
487
1028
|
try {
|
|
488
1029
|
const heartbeat = setInterval(() => {
|
|
489
|
-
try {
|
|
1030
|
+
try {
|
|
1031
|
+
controller.enqueue(encoder.encode(`event: ping\ndata: {"type": "ping"}\n\n`))
|
|
1032
|
+
} catch (e) {
|
|
1033
|
+
logWarn("stream.heartbeat_failed", {
|
|
1034
|
+
reqId,
|
|
1035
|
+
error: e instanceof Error ? e.message : String(e),
|
|
1036
|
+
phase: trace?.phase,
|
|
1037
|
+
elapsedMs: trace ? Date.now() - trace.startedAt : undefined,
|
|
1038
|
+
})
|
|
1039
|
+
clearInterval(heartbeat)
|
|
1040
|
+
}
|
|
490
1041
|
}, 15_000)
|
|
491
1042
|
|
|
492
1043
|
sse("message_start", {
|
|
@@ -494,31 +1045,123 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
494
1045
|
message: {
|
|
495
1046
|
id: messageId, type: "message", role: "assistant", content: [],
|
|
496
1047
|
model: body.model, stop_reason: null, stop_sequence: null,
|
|
497
|
-
usage: { input_tokens: roughTokens(
|
|
1048
|
+
usage: { input_tokens: roughTokens(promptText), output_tokens: 1 }
|
|
498
1049
|
}
|
|
499
1050
|
})
|
|
500
1051
|
|
|
501
1052
|
if (hasTools) {
|
|
502
1053
|
// ── With tools: buffer output, parse tool_use blocks at end ──
|
|
503
1054
|
let fullText = ""
|
|
1055
|
+
let sdkEventCount = 0
|
|
1056
|
+
let lastEventAt = Date.now()
|
|
1057
|
+
const stallLog = setInterval(() => {
|
|
1058
|
+
const stallMs = Date.now() - lastEventAt
|
|
1059
|
+
traceStore.stall(reqId, stallMs)
|
|
1060
|
+
}, 15_000)
|
|
1061
|
+
let capturedSessionId: string | undefined
|
|
504
1062
|
try {
|
|
505
|
-
|
|
1063
|
+
traceStore.phase(reqId, "sdk_starting")
|
|
1064
|
+
for await (const message of query({ prompt: promptInput, options: buildQueryOptions(model, { partial: true, systemPrompt, abortController, thinking, resume: resumeSessionId }) })) {
|
|
1065
|
+
sdkEventCount++
|
|
1066
|
+
lastEventAt = Date.now()
|
|
1067
|
+
resetStallTimer()
|
|
1068
|
+
const subtype = (message as any).event?.type ?? (message as any).message?.type
|
|
1069
|
+
// Capture session_id from init message
|
|
1070
|
+
if (message.type === "system" && (message as any).subtype === "init") {
|
|
1071
|
+
capturedSessionId = (message as any).session_id
|
|
1072
|
+
}
|
|
506
1073
|
if (message.type === "stream_event") {
|
|
507
1074
|
const ev = message.event as any
|
|
1075
|
+
// Detect first content event BEFORE sdkEvent records it
|
|
1076
|
+
if (!trace!.firstTokenAt && (ev.type === "content_block_delta" || ev.type === "content_block_start")) {
|
|
1077
|
+
traceStore.phase(reqId, "sdk_streaming")
|
|
1078
|
+
}
|
|
508
1079
|
if (ev.type === "content_block_delta" && ev.delta?.type === "text_delta") {
|
|
509
1080
|
fullText += ev.delta.text ?? ""
|
|
1081
|
+
traceStore.updateOutput(reqId, fullText.length)
|
|
1082
|
+
checkOutputSize(fullText.length)
|
|
510
1083
|
}
|
|
511
1084
|
}
|
|
1085
|
+
traceStore.sdkEvent(reqId, sdkEventCount, message.type, subtype)
|
|
1086
|
+
}
|
|
1087
|
+
traceStore.phase(reqId, "sdk_done")
|
|
1088
|
+
|
|
1089
|
+
// Store session mapping
|
|
1090
|
+
if (conversationId && capturedSessionId) {
|
|
1091
|
+
if (isResuming) {
|
|
1092
|
+
sessionStore.recordResume(conversationId)
|
|
1093
|
+
} else {
|
|
1094
|
+
sessionStore.set(conversationId, capturedSessionId, model, messages.length)
|
|
1095
|
+
}
|
|
1096
|
+
}
|
|
1097
|
+
} catch (resumeErr) {
|
|
1098
|
+
// Resume failed in streaming with-tools path — retry with full context
|
|
1099
|
+
if (isResuming && resumeSessionId) {
|
|
1100
|
+
logWarn("session.resume_failed_stream", {
|
|
1101
|
+
reqId, conversationId, sdkSessionId: resumeSessionId,
|
|
1102
|
+
error: resumeErr instanceof Error ? resumeErr.message : String(resumeErr),
|
|
1103
|
+
})
|
|
1104
|
+
if (conversationId) {
|
|
1105
|
+
sessionStore.recordFailure(conversationId)
|
|
1106
|
+
sessionStore.invalidate(conversationId)
|
|
1107
|
+
}
|
|
1108
|
+
const fbLastMsg = messages[messages.length - 1]!
|
|
1109
|
+
const priorMsgs = messages.slice(0, -1)
|
|
1110
|
+
const contextParts = priorMsgs
|
|
1111
|
+
.map((m) => {
|
|
1112
|
+
const role = m.role === "assistant" ? "Assistant" : "User"
|
|
1113
|
+
return `[${role}]\n${serializeContent(m.content)}`
|
|
1114
|
+
})
|
|
1115
|
+
.join("\n\n")
|
|
1116
|
+
const baseSystem = systemContext || ""
|
|
1117
|
+
const contextSection = contextParts ? `\n\nPrior conversation turns:\n\n${contextParts}\n\n---` : ""
|
|
1118
|
+
const fallbackSystem = (baseSystem + contextSection + toolsSection).trim() || undefined
|
|
1119
|
+
const fallbackInput: string | AsyncIterable<any> = contentHasImages(fbLastMsg.content)
|
|
1120
|
+
? createSDKUserMessage(buildNativeContent(fbLastMsg.content))
|
|
1121
|
+
: serializeContent(fbLastMsg.content)
|
|
1122
|
+
const fallbackOpts = buildQueryOptions(model, { partial: true, systemPrompt: fallbackSystem, abortController, thinking })
|
|
1123
|
+
|
|
1124
|
+
logInfo("session.fallback_full_context_stream", { reqId, conversationId })
|
|
1125
|
+
sdkEventCount = 0
|
|
1126
|
+
for await (const message of query({ prompt: fallbackInput, options: fallbackOpts })) {
|
|
1127
|
+
sdkEventCount++
|
|
1128
|
+
lastEventAt = Date.now()
|
|
1129
|
+
resetStallTimer()
|
|
1130
|
+
const subtype = (message as any).event?.type ?? (message as any).message?.type
|
|
1131
|
+
if (message.type === "system" && (message as any).subtype === "init") {
|
|
1132
|
+
capturedSessionId = (message as any).session_id
|
|
1133
|
+
}
|
|
1134
|
+
if (message.type === "stream_event") {
|
|
1135
|
+
const ev = message.event as any
|
|
1136
|
+
if (!trace!.firstTokenAt && (ev.type === "content_block_delta" || ev.type === "content_block_start")) {
|
|
1137
|
+
traceStore.phase(reqId, "sdk_streaming")
|
|
1138
|
+
}
|
|
1139
|
+
if (ev.type === "content_block_delta" && ev.delta?.type === "text_delta") {
|
|
1140
|
+
fullText += ev.delta.text ?? ""
|
|
1141
|
+
traceStore.updateOutput(reqId, fullText.length)
|
|
1142
|
+
checkOutputSize(fullText.length)
|
|
1143
|
+
}
|
|
1144
|
+
}
|
|
1145
|
+
traceStore.sdkEvent(reqId, sdkEventCount, message.type, subtype)
|
|
1146
|
+
}
|
|
1147
|
+
traceStore.phase(reqId, "sdk_done")
|
|
1148
|
+
if (conversationId && capturedSessionId) {
|
|
1149
|
+
sessionStore.set(conversationId, capturedSessionId, model, messages.length)
|
|
1150
|
+
logInfo("session.recreated_after_fallback_stream", { reqId, conversationId, sdkSessionId: capturedSessionId })
|
|
1151
|
+
}
|
|
1152
|
+
} else {
|
|
1153
|
+
throw resumeErr
|
|
512
1154
|
}
|
|
513
1155
|
} finally {
|
|
1156
|
+
clearInterval(stallLog)
|
|
514
1157
|
clearInterval(heartbeat)
|
|
515
|
-
|
|
516
|
-
|
|
1158
|
+
clearStallTimer(); clearHardTimer()
|
|
1159
|
+
// (temp files no longer used — images passed natively)
|
|
517
1160
|
releaseQueue()
|
|
518
1161
|
}
|
|
519
1162
|
|
|
1163
|
+
traceStore.phase(reqId, "responding")
|
|
520
1164
|
const { toolCalls, textBefore } = parseToolUse(fullText)
|
|
521
|
-
claudeLog("proxy.stream.done", { reqId, len: fullText.length, toolCalls: toolCalls.length })
|
|
522
1165
|
|
|
523
1166
|
let blockIdx = 0
|
|
524
1167
|
const textContent = toolCalls.length === 0 ? (fullText || "...") : textBefore
|
|
@@ -544,6 +1187,8 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
544
1187
|
sse("message_delta", { type: "message_delta", delta: { stop_reason: stopReason, stop_sequence: null }, usage: { output_tokens: roughTokens(fullText) } })
|
|
545
1188
|
sse("message_stop", { type: "message_stop" })
|
|
546
1189
|
controller.close()
|
|
1190
|
+
|
|
1191
|
+
traceStore.complete(reqId, { outputLen: fullText.length, toolCallCount: toolCalls.length })
|
|
547
1192
|
return
|
|
548
1193
|
}
|
|
549
1194
|
|
|
@@ -552,29 +1197,124 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
552
1197
|
|
|
553
1198
|
let fullText = ""
|
|
554
1199
|
let hasStreamed = false
|
|
1200
|
+
let sdkEventCount = 0
|
|
1201
|
+
let lastEventAt = Date.now()
|
|
1202
|
+
let capturedSessionId2: string | undefined
|
|
1203
|
+
const stallLog = setInterval(() => {
|
|
1204
|
+
const stallMs = Date.now() - lastEventAt
|
|
1205
|
+
traceStore.stall(reqId, stallMs)
|
|
1206
|
+
}, 15_000)
|
|
555
1207
|
try {
|
|
556
|
-
|
|
1208
|
+
traceStore.phase(reqId, "sdk_starting")
|
|
1209
|
+
for await (const message of query({ prompt: promptInput, options: buildQueryOptions(model, { partial: true, systemPrompt, abortController, thinking, resume: resumeSessionId }) })) {
|
|
1210
|
+
sdkEventCount++
|
|
1211
|
+
lastEventAt = Date.now()
|
|
1212
|
+
resetStallTimer()
|
|
1213
|
+
const subtype = (message as any).event?.type ?? (message as any).message?.type
|
|
1214
|
+
// Capture session_id from init message
|
|
1215
|
+
if (message.type === "system" && (message as any).subtype === "init") {
|
|
1216
|
+
capturedSessionId2 = (message as any).session_id
|
|
1217
|
+
}
|
|
557
1218
|
if (message.type === "stream_event") {
|
|
558
1219
|
const ev = message.event as any
|
|
1220
|
+
// Detect first content event BEFORE sdkEvent records it
|
|
1221
|
+
if (!trace!.firstTokenAt && (ev.type === "content_block_delta" || ev.type === "content_block_start")) {
|
|
1222
|
+
traceStore.phase(reqId, "sdk_streaming")
|
|
1223
|
+
}
|
|
559
1224
|
if (ev.type === "content_block_delta" && ev.delta?.type === "text_delta") {
|
|
560
1225
|
const text = ev.delta.text ?? ""
|
|
561
1226
|
if (text) {
|
|
562
1227
|
fullText += text
|
|
563
1228
|
hasStreamed = true
|
|
1229
|
+
traceStore.updateOutput(reqId, fullText.length)
|
|
1230
|
+
checkOutputSize(fullText.length)
|
|
564
1231
|
sse("content_block_delta", { type: "content_block_delta", index: 0, delta: { type: "text_delta", text } })
|
|
565
1232
|
}
|
|
566
1233
|
}
|
|
567
1234
|
}
|
|
1235
|
+
traceStore.sdkEvent(reqId, sdkEventCount, message.type, subtype)
|
|
1236
|
+
}
|
|
1237
|
+
traceStore.phase(reqId, "sdk_done")
|
|
1238
|
+
|
|
1239
|
+
// Store session mapping
|
|
1240
|
+
if (conversationId && capturedSessionId2) {
|
|
1241
|
+
if (isResuming) {
|
|
1242
|
+
sessionStore.recordResume(conversationId)
|
|
1243
|
+
} else {
|
|
1244
|
+
sessionStore.set(conversationId, capturedSessionId2, model, messages.length)
|
|
1245
|
+
}
|
|
1246
|
+
}
|
|
1247
|
+
} catch (resumeErr) {
|
|
1248
|
+
// Resume failed in streaming no-tools path — retry with full context
|
|
1249
|
+
if (isResuming && resumeSessionId) {
|
|
1250
|
+
logWarn("session.resume_failed_stream", {
|
|
1251
|
+
reqId, conversationId, sdkSessionId: resumeSessionId,
|
|
1252
|
+
error: resumeErr instanceof Error ? resumeErr.message : String(resumeErr),
|
|
1253
|
+
})
|
|
1254
|
+
if (conversationId) {
|
|
1255
|
+
sessionStore.recordFailure(conversationId)
|
|
1256
|
+
sessionStore.invalidate(conversationId)
|
|
1257
|
+
}
|
|
1258
|
+
const fbLastMsg = messages[messages.length - 1]!
|
|
1259
|
+
const priorMsgs = messages.slice(0, -1)
|
|
1260
|
+
const contextParts = priorMsgs
|
|
1261
|
+
.map((m) => {
|
|
1262
|
+
const role = m.role === "assistant" ? "Assistant" : "User"
|
|
1263
|
+
return `[${role}]\n${serializeContent(m.content)}`
|
|
1264
|
+
})
|
|
1265
|
+
.join("\n\n")
|
|
1266
|
+
const baseSystem = systemContext || ""
|
|
1267
|
+
const contextSection = contextParts ? `\n\nPrior conversation turns:\n\n${contextParts}\n\n---` : ""
|
|
1268
|
+
const fallbackSystem = (baseSystem + contextSection + toolsSection).trim() || undefined
|
|
1269
|
+
const fallbackInput: string | AsyncIterable<any> = contentHasImages(fbLastMsg.content)
|
|
1270
|
+
? createSDKUserMessage(buildNativeContent(fbLastMsg.content))
|
|
1271
|
+
: serializeContent(fbLastMsg.content)
|
|
1272
|
+
const fallbackOpts = buildQueryOptions(model, { partial: true, systemPrompt: fallbackSystem, abortController, thinking })
|
|
1273
|
+
|
|
1274
|
+
logInfo("session.fallback_full_context_stream", { reqId, conversationId })
|
|
1275
|
+
sdkEventCount = 0
|
|
1276
|
+
for await (const message of query({ prompt: fallbackInput, options: fallbackOpts })) {
|
|
1277
|
+
sdkEventCount++
|
|
1278
|
+
lastEventAt = Date.now()
|
|
1279
|
+
resetStallTimer()
|
|
1280
|
+
const subtype = (message as any).event?.type ?? (message as any).message?.type
|
|
1281
|
+
if (message.type === "system" && (message as any).subtype === "init") {
|
|
1282
|
+
capturedSessionId2 = (message as any).session_id
|
|
1283
|
+
}
|
|
1284
|
+
if (message.type === "stream_event") {
|
|
1285
|
+
const ev = message.event as any
|
|
1286
|
+
if (!trace!.firstTokenAt && (ev.type === "content_block_delta" || ev.type === "content_block_start")) {
|
|
1287
|
+
traceStore.phase(reqId, "sdk_streaming")
|
|
1288
|
+
}
|
|
1289
|
+
if (ev.type === "content_block_delta" && ev.delta?.type === "text_delta") {
|
|
1290
|
+
const text = ev.delta.text ?? ""
|
|
1291
|
+
if (text) {
|
|
1292
|
+
fullText += text
|
|
1293
|
+
hasStreamed = true
|
|
1294
|
+
traceStore.updateOutput(reqId, fullText.length)
|
|
1295
|
+
checkOutputSize(fullText.length)
|
|
1296
|
+
sse("content_block_delta", { type: "content_block_delta", index: 0, delta: { type: "text_delta", text } })
|
|
1297
|
+
}
|
|
1298
|
+
}
|
|
1299
|
+
}
|
|
1300
|
+
traceStore.sdkEvent(reqId, sdkEventCount, message.type, subtype)
|
|
1301
|
+
}
|
|
1302
|
+
traceStore.phase(reqId, "sdk_done")
|
|
1303
|
+
if (conversationId && capturedSessionId2) {
|
|
1304
|
+
sessionStore.set(conversationId, capturedSessionId2, model, messages.length)
|
|
1305
|
+
logInfo("session.recreated_after_fallback_stream", { reqId, conversationId, sdkSessionId: capturedSessionId2 })
|
|
1306
|
+
}
|
|
1307
|
+
} else {
|
|
1308
|
+
throw resumeErr
|
|
568
1309
|
}
|
|
569
1310
|
} finally {
|
|
1311
|
+
clearInterval(stallLog)
|
|
570
1312
|
clearInterval(heartbeat)
|
|
571
|
-
|
|
572
|
-
|
|
1313
|
+
clearStallTimer(); clearHardTimer()
|
|
1314
|
+
// (temp files no longer used — images passed natively)
|
|
573
1315
|
releaseQueue()
|
|
574
1316
|
}
|
|
575
1317
|
|
|
576
|
-
claudeLog("proxy.stream.done", { reqId, len: fullText.length })
|
|
577
|
-
|
|
578
1318
|
if (!hasStreamed) {
|
|
579
1319
|
sse("content_block_delta", { type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "..." } })
|
|
580
1320
|
}
|
|
@@ -584,18 +1324,58 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
584
1324
|
sse("message_stop", { type: "message_stop" })
|
|
585
1325
|
controller.close()
|
|
586
1326
|
|
|
1327
|
+
traceStore.complete(reqId, { outputLen: fullText.length })
|
|
1328
|
+
|
|
587
1329
|
} catch (error) {
|
|
588
|
-
|
|
1330
|
+
clearStallTimer(); clearHardTimer()
|
|
589
1331
|
releaseQueue()
|
|
590
|
-
const
|
|
591
|
-
const
|
|
592
|
-
const
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
1332
|
+
const err = error instanceof Error ? error : new Error(String(error))
|
|
1333
|
+
const isAbort = err.name === "AbortError" || err.message?.includes("abort")
|
|
1334
|
+
const isQueueTimeout = err.message.includes("Queue timeout")
|
|
1335
|
+
|
|
1336
|
+
let errMsg: string
|
|
1337
|
+
let errType: string
|
|
1338
|
+
if (clientDisconnected) {
|
|
1339
|
+
errMsg = "Client disconnected during streaming."
|
|
1340
|
+
errType = "api_error"
|
|
1341
|
+
} else if (abortReason === "max_duration") {
|
|
1342
|
+
errMsg = `Request exceeded max duration of ${finalConfig.maxDurationMs / 1000}s. Output: ${trace?.outputLen ?? 0} chars.`
|
|
1343
|
+
errType = "api_error"
|
|
1344
|
+
} else if (abortReason === "max_output") {
|
|
1345
|
+
errMsg = `Request exceeded max output size of ${finalConfig.maxOutputChars} chars.`
|
|
1346
|
+
errType = "api_error"
|
|
1347
|
+
} else if (isAbort) {
|
|
1348
|
+
errMsg = `Request stalled — no SDK activity for ${finalConfig.stallTimeoutMs / 1000}s. Please retry.`
|
|
1349
|
+
errType = "api_error"
|
|
1350
|
+
} else if (isQueueTimeout) {
|
|
1351
|
+
errMsg = "Server busy — all request slots are occupied. Please retry shortly."
|
|
1352
|
+
errType = "overloaded_error"
|
|
1353
|
+
} else {
|
|
1354
|
+
errMsg = err.message
|
|
1355
|
+
errType = "api_error"
|
|
1356
|
+
}
|
|
1357
|
+
|
|
1358
|
+
// Trace the failure with full context
|
|
1359
|
+
traceStore.fail(reqId, err, "error", {
|
|
1360
|
+
clientDisconnect: clientDisconnected,
|
|
1361
|
+
abortReason,
|
|
1362
|
+
aborted: isAbort,
|
|
1363
|
+
queueTimeout: isQueueTimeout,
|
|
1364
|
+
stallTimeoutMs: finalConfig.stallTimeoutMs,
|
|
1365
|
+
maxDurationMs: finalConfig.maxDurationMs,
|
|
1366
|
+
maxOutputChars: finalConfig.maxOutputChars,
|
|
1367
|
+
sseSendErrors,
|
|
1368
|
+
})
|
|
1369
|
+
|
|
1370
|
+
// (temp files no longer used — images passed natively)
|
|
1371
|
+
if (!clientDisconnected) {
|
|
1372
|
+
try {
|
|
1373
|
+
sse("error", { type: "error", error: { type: errType, message: errMsg }, request_id: reqId })
|
|
1374
|
+
controller.close()
|
|
1375
|
+
} catch {}
|
|
1376
|
+
} else {
|
|
1377
|
+
try { controller.close() } catch {}
|
|
1378
|
+
}
|
|
599
1379
|
}
|
|
600
1380
|
}
|
|
601
1381
|
})
|
|
@@ -609,15 +1389,53 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
609
1389
|
})
|
|
610
1390
|
|
|
611
1391
|
} catch (error) {
|
|
612
|
-
const
|
|
613
|
-
const
|
|
614
|
-
const
|
|
615
|
-
|
|
616
|
-
|
|
1392
|
+
const err = error instanceof Error ? error : new Error(String(error))
|
|
1393
|
+
const isAbort = err.name === "AbortError" || err.message?.includes("abort")
|
|
1394
|
+
const isQueueTimeout = err.message.includes("Queue timeout")
|
|
1395
|
+
|
|
1396
|
+
let errMsg: string
|
|
1397
|
+
let errType: string
|
|
1398
|
+
if (clientDisconnected) {
|
|
1399
|
+
errMsg = "Client disconnected."
|
|
1400
|
+
errType = "api_error"
|
|
1401
|
+
} else if (abortReason === "max_duration") {
|
|
1402
|
+
errMsg = `Request exceeded max duration of ${finalConfig.maxDurationMs / 1000}s.`
|
|
1403
|
+
errType = "api_error"
|
|
1404
|
+
} else if (abortReason === "max_output") {
|
|
1405
|
+
errMsg = `Request exceeded max output size of ${finalConfig.maxOutputChars} chars.`
|
|
1406
|
+
errType = "api_error"
|
|
1407
|
+
} else if (isAbort) {
|
|
1408
|
+
errMsg = `Request stalled — no SDK activity for ${finalConfig.stallTimeoutMs / 1000}s. Please retry.`
|
|
1409
|
+
errType = "api_error"
|
|
1410
|
+
} else if (isQueueTimeout) {
|
|
1411
|
+
errMsg = "Server busy — all request slots are occupied. Please retry shortly."
|
|
1412
|
+
errType = "overloaded_error"
|
|
1413
|
+
} else {
|
|
1414
|
+
errMsg = err.message
|
|
1415
|
+
errType = "api_error"
|
|
1416
|
+
}
|
|
1417
|
+
|
|
1418
|
+
// Trace the failure
|
|
1419
|
+
if (trace) {
|
|
1420
|
+
traceStore.fail(reqId, err, "error", {
|
|
1421
|
+
clientDisconnect: clientDisconnected,
|
|
1422
|
+
aborted: isAbort,
|
|
1423
|
+
queueTimeout: isQueueTimeout,
|
|
1424
|
+
})
|
|
1425
|
+
} else {
|
|
1426
|
+
logError("request.error.no_trace", { reqId, error: errMsg, stack: err.stack })
|
|
1427
|
+
}
|
|
1428
|
+
|
|
1429
|
+
if (isQueueTimeout) {
|
|
617
1430
|
return new Response(JSON.stringify({ type: "error", error: { type: errType, message: errMsg }, request_id: reqId }), {
|
|
618
1431
|
status: 529, headers: { "Content-Type": "application/json" }
|
|
619
1432
|
})
|
|
620
1433
|
}
|
|
1434
|
+
if (isAbort) {
|
|
1435
|
+
return new Response(JSON.stringify({ type: "error", error: { type: errType, message: errMsg }, request_id: reqId }), {
|
|
1436
|
+
status: 504, headers: { "Content-Type": "application/json" }
|
|
1437
|
+
})
|
|
1438
|
+
}
|
|
621
1439
|
return c.json({ type: "error", error: { type: errType, message: errMsg }, request_id: reqId }, 500)
|
|
622
1440
|
}
|
|
623
1441
|
}
|
|
@@ -635,20 +1453,15 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
635
1453
|
app.get("/v1/messages/batches/:id", handleBatches)
|
|
636
1454
|
|
|
637
1455
|
// ── OpenAI-compatible /v1/chat/completions ─────────────────────────────
|
|
638
|
-
// Translates OpenAI ChatCompletion format to/from Anthropic Messages API
|
|
639
|
-
// so tools expecting OpenAI endpoints (LangChain, LiteLLM, etc.) just work.
|
|
640
1456
|
|
|
641
1457
|
function convertOpenaiContent(content: any): any {
|
|
642
|
-
// String content → pass through
|
|
643
1458
|
if (typeof content === "string") return content
|
|
644
1459
|
if (!Array.isArray(content)) return String(content ?? "")
|
|
645
1460
|
|
|
646
|
-
// Array content → convert image_url parts to Anthropic image blocks
|
|
647
1461
|
return content.map((part: any) => {
|
|
648
1462
|
if (part.type === "text") return { type: "text", text: part.text ?? "" }
|
|
649
1463
|
if (part.type === "image_url" && part.image_url?.url) {
|
|
650
1464
|
const url = part.image_url.url as string
|
|
651
|
-
// Data URL: data:image/jpeg;base64,...
|
|
652
1465
|
const dataMatch = url.match(/^data:(image\/\w+);base64,(.+)$/)
|
|
653
1466
|
if (dataMatch) {
|
|
654
1467
|
return {
|
|
@@ -660,7 +1473,6 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
660
1473
|
}
|
|
661
1474
|
}
|
|
662
1475
|
}
|
|
663
|
-
// HTTP URL — pass as URL source
|
|
664
1476
|
return {
|
|
665
1477
|
type: "image",
|
|
666
1478
|
source: { type: "url", url }
|
|
@@ -683,7 +1495,6 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
683
1495
|
} else if (msg.role === "user") {
|
|
684
1496
|
converted.push({ role: "user", content: convertOpenaiContent(msg.content) })
|
|
685
1497
|
} else if (msg.role === "assistant") {
|
|
686
|
-
// Handle assistant messages with tool_calls (OpenAI format)
|
|
687
1498
|
if (msg.tool_calls?.length) {
|
|
688
1499
|
const content: any[] = []
|
|
689
1500
|
if (msg.content) content.push({ type: "text", text: msg.content })
|
|
@@ -700,7 +1511,6 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
700
1511
|
converted.push({ role: "assistant", content: msg.content ?? "" })
|
|
701
1512
|
}
|
|
702
1513
|
} else if (msg.role === "tool") {
|
|
703
|
-
// OpenAI tool result → Anthropic tool_result
|
|
704
1514
|
converted.push({
|
|
705
1515
|
role: "user",
|
|
706
1516
|
content: [{
|
|
@@ -782,7 +1592,6 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
782
1592
|
const stream = body.stream ?? false
|
|
783
1593
|
const requestedModel = body.model ?? "claude-sonnet-4-6"
|
|
784
1594
|
|
|
785
|
-
// Build Anthropic-format request body
|
|
786
1595
|
const anthropicBody: any = {
|
|
787
1596
|
model: requestedModel,
|
|
788
1597
|
messages,
|
|
@@ -795,12 +1604,10 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
795
1604
|
if (body.temperature !== undefined) anthropicBody.temperature = body.temperature
|
|
796
1605
|
if (body.top_p !== undefined) anthropicBody.top_p = body.top_p
|
|
797
1606
|
if (body.stop) anthropicBody.stop_sequences = Array.isArray(body.stop) ? body.stop : [body.stop]
|
|
798
|
-
// Convert OpenAI tools format to Anthropic tools format
|
|
799
1607
|
if (body.tools?.length) {
|
|
800
1608
|
anthropicBody.tools = openaiToAnthropicTools(body.tools)
|
|
801
1609
|
}
|
|
802
1610
|
|
|
803
|
-
// Forward to our own /v1/messages handler by making an internal request
|
|
804
1611
|
const internalHeaders: Record<string, string> = { "Content-Type": "application/json" }
|
|
805
1612
|
const authHeader = c.req.header("authorization") ?? c.req.header("x-api-key")
|
|
806
1613
|
if (authHeader) {
|
|
@@ -821,7 +1628,6 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
821
1628
|
return c.json(anthropicToOpenaiResponse(anthropicJson, requestedModel))
|
|
822
1629
|
}
|
|
823
1630
|
|
|
824
|
-
// Streaming: translate SSE events from Anthropic format to OpenAI format
|
|
825
1631
|
const includeUsage = body.stream_options?.include_usage === true
|
|
826
1632
|
const encoder = new TextEncoder()
|
|
827
1633
|
const readable = new ReadableStream({
|
|
@@ -836,7 +1642,6 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
836
1642
|
const created = Math.floor(Date.now() / 1000)
|
|
837
1643
|
let sentRole = false
|
|
838
1644
|
let finishReason: string | null = null
|
|
839
|
-
// Track active tool calls for streaming
|
|
840
1645
|
const activeToolCalls: Map<number, { id: string; name: string }> = new Map()
|
|
841
1646
|
let toolCallIndex = 0
|
|
842
1647
|
let usageInfo: { input_tokens: number; output_tokens: number } | null = null
|
|
@@ -854,7 +1659,6 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
854
1659
|
try {
|
|
855
1660
|
const event = JSON.parse(line.slice(6))
|
|
856
1661
|
|
|
857
|
-
// Emit role delta on first event
|
|
858
1662
|
if (!sentRole && (event.type === "content_block_start" || event.type === "content_block_delta")) {
|
|
859
1663
|
sentRole = true
|
|
860
1664
|
controller.enqueue(encoder.encode(`data: ${JSON.stringify({
|
|
@@ -864,7 +1668,6 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
864
1668
|
}
|
|
865
1669
|
|
|
866
1670
|
if (event.type === "content_block_start" && event.content_block?.type === "tool_use") {
|
|
867
|
-
// Start of a tool_use block → emit tool_call function header
|
|
868
1671
|
const idx = toolCallIndex++
|
|
869
1672
|
activeToolCalls.set(event.index, { id: event.content_block.id, name: event.content_block.name })
|
|
870
1673
|
controller.enqueue(encoder.encode(`data: ${JSON.stringify({
|
|
@@ -874,7 +1677,6 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
874
1677
|
}, finish_reason: null }]
|
|
875
1678
|
})}\n\n`))
|
|
876
1679
|
} else if (event.type === "content_block_delta" && event.delta?.type === "input_json_delta") {
|
|
877
|
-
// Tool call argument streaming
|
|
878
1680
|
const tc = activeToolCalls.get(event.index)
|
|
879
1681
|
if (tc) {
|
|
880
1682
|
const idx = Array.from(activeToolCalls.keys()).indexOf(event.index)
|
|
@@ -891,7 +1693,6 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
891
1693
|
choices: [{ index: 0, delta: { content: event.delta.text }, finish_reason: null }]
|
|
892
1694
|
})}\n\n`))
|
|
893
1695
|
} else if (event.type === "message_delta") {
|
|
894
|
-
// Capture finish reason and usage for final chunk
|
|
895
1696
|
const sr = event.delta?.stop_reason
|
|
896
1697
|
finishReason = sr === "tool_use" ? "tool_calls" : sr === "max_tokens" ? "length" : "stop"
|
|
897
1698
|
if (event.usage) {
|
|
@@ -903,7 +1704,6 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
903
1704
|
}
|
|
904
1705
|
}
|
|
905
1706
|
} else if (event.type === "message_start" && event.message?.usage) {
|
|
906
|
-
// Capture input token count from message_start
|
|
907
1707
|
usageInfo = { input_tokens: event.message.usage.input_tokens ?? 0, output_tokens: 0 }
|
|
908
1708
|
} else if (event.type === "message_stop") {
|
|
909
1709
|
const finalChunk: any = {
|
|
@@ -959,7 +1759,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
959
1759
|
})
|
|
960
1760
|
app.get("/v1/chat/models", handleOpenaiModels)
|
|
961
1761
|
|
|
962
|
-
// 404 catch-all
|
|
1762
|
+
// 404 catch-all
|
|
963
1763
|
app.all("*", (c) => c.json({
|
|
964
1764
|
type: "error",
|
|
965
1765
|
error: { type: "not_found_error", message: `${c.req.method} ${c.req.path} not found` }
|
|
@@ -978,12 +1778,60 @@ export async function startProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
|
978
1778
|
idleTimeout: 0
|
|
979
1779
|
})
|
|
980
1780
|
|
|
1781
|
+
// Startup log with full configuration
|
|
1782
|
+
logInfo("proxy.started", {
|
|
1783
|
+
version: PROXY_VERSION,
|
|
1784
|
+
host: finalConfig.host,
|
|
1785
|
+
port: finalConfig.port,
|
|
1786
|
+
stallTimeoutMs: finalConfig.stallTimeoutMs,
|
|
1787
|
+
maxDurationMs: finalConfig.maxDurationMs,
|
|
1788
|
+
maxOutputChars: finalConfig.maxOutputChars,
|
|
1789
|
+
maxConcurrent: MAX_CONCURRENT,
|
|
1790
|
+
queueTimeoutMs: QUEUE_TIMEOUT_MS,
|
|
1791
|
+
claudeExecutable,
|
|
1792
|
+
logDir: LOG_DIR,
|
|
1793
|
+
debug: finalConfig.debug,
|
|
1794
|
+
pid: process.pid,
|
|
1795
|
+
})
|
|
1796
|
+
|
|
981
1797
|
console.log(`Claude SDK Proxy v${PROXY_VERSION} running at http://${finalConfig.host}:${finalConfig.port}`)
|
|
1798
|
+
console.log(` Logs: ${LOG_DIR}`)
|
|
1799
|
+
console.log(` Debug: http://${finalConfig.host}:${finalConfig.port}/debug/stats`)
|
|
1800
|
+
|
|
1801
|
+
// Periodic health logging (every 5 minutes)
|
|
1802
|
+
const healthInterval = setInterval(() => {
|
|
1803
|
+
const mem = process.memoryUsage()
|
|
1804
|
+
const stats = traceStore.getStats()
|
|
1805
|
+
logInfo("proxy.health", {
|
|
1806
|
+
pid: process.pid,
|
|
1807
|
+
rssBytes: mem.rss,
|
|
1808
|
+
rssMB: +(mem.rss / 1024 / 1024).toFixed(1),
|
|
1809
|
+
heapUsedMB: +(mem.heapUsed / 1024 / 1024).toFixed(1),
|
|
1810
|
+
heapTotalMB: +(mem.heapTotal / 1024 / 1024).toFixed(1),
|
|
1811
|
+
externalMB: +(mem.external / 1024 / 1024).toFixed(1),
|
|
1812
|
+
uptimeMs: stats.uptimeMs,
|
|
1813
|
+
totalRequests: stats.requests.total,
|
|
1814
|
+
totalErrors: stats.requests.errors,
|
|
1815
|
+
activeRequests: stats.requests.active,
|
|
1816
|
+
queueActive: requestQueue.activeCount,
|
|
1817
|
+
queueWaiting: requestQueue.waitingCount,
|
|
1818
|
+
})
|
|
1819
|
+
}, 300_000) // 5 minutes
|
|
982
1820
|
|
|
983
1821
|
// Graceful shutdown
|
|
984
1822
|
const shutdown = (signal: string) => {
|
|
1823
|
+
const stats = traceStore.getStats()
|
|
1824
|
+
logInfo("proxy.shutdown", {
|
|
1825
|
+
signal,
|
|
1826
|
+
pid: process.pid,
|
|
1827
|
+
totalRequests: stats.requests.total,
|
|
1828
|
+
totalErrors: stats.requests.errors,
|
|
1829
|
+
activeRequests: stats.requests.active,
|
|
1830
|
+
uptimeMs: stats.uptimeMs,
|
|
1831
|
+
})
|
|
1832
|
+
clearInterval(healthInterval)
|
|
985
1833
|
console.log(`\nReceived ${signal}, shutting down...`)
|
|
986
|
-
server.stop(true)
|
|
1834
|
+
server.stop(true)
|
|
987
1835
|
process.exit(0)
|
|
988
1836
|
}
|
|
989
1837
|
process.on("SIGINT", () => shutdown("SIGINT"))
|