claude-sdk-proxy 3.0.0 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,13 +4,15 @@ import { query } from "@anthropic-ai/claude-agent-sdk"
4
4
  import type { Context } from "hono"
5
5
  import type { ProxyConfig } from "./types"
6
6
  import { DEFAULT_PROXY_CONFIG } from "./types"
7
- import { claudeLog } from "../logger"
7
+ import { logInfo, logWarn, logError, logDebug, LOG_DIR } from "../logger"
8
+ import { traceStore } from "../trace"
9
+ import { sessionStore } from "../session-store"
8
10
  import { execSync } from "child_process"
9
- import { existsSync, writeFileSync, unlinkSync, readFileSync } from "fs"
10
- import { tmpdir } from "os"
11
+ import { existsSync, writeFileSync, readFileSync, readdirSync } from "fs"
11
12
  import { randomBytes } from "crypto"
12
13
  import { fileURLToPath } from "url"
13
14
  import { join, dirname } from "path"
15
+
14
16
  // Base62 ID generator — matches Anthropic's real ID format (e.g. msg_01XFDUDYJgAACzvnptvVoYEL)
15
17
  const BASE62 = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
16
18
  function generateId(prefix: string, length = 24): string {
@@ -47,9 +49,11 @@ const claudeExecutable = resolveClaudeExecutable()
47
49
 
48
50
  const MAX_CONCURRENT = parseInt(process.env.CLAUDE_PROXY_MAX_CONCURRENT ?? "5", 10)
49
51
 
52
+ const QUEUE_TIMEOUT_MS = parseInt(process.env.CLAUDE_PROXY_QUEUE_TIMEOUT_MS ?? "30000", 10)
53
+
50
54
  class RequestQueue {
51
55
  private active = 0
52
- private waiting: Array<() => void> = []
56
+ private waiting: Array<{ resolve: () => void; reject: (err: Error) => void }> = []
53
57
 
54
58
  get activeCount() { return this.active }
55
59
  get waitingCount() { return this.waiting.length }
@@ -59,15 +63,25 @@ class RequestQueue {
59
63
  this.active++
60
64
  return
61
65
  }
62
- return new Promise<void>((resolve) => {
63
- this.waiting.push(() => { this.active++; resolve() })
66
+ return new Promise<void>((resolve, reject) => {
67
+ const entry = { resolve: () => { this.active++; resolve() }, reject }
68
+ this.waiting.push(entry)
69
+ const timer = setTimeout(() => {
70
+ const idx = this.waiting.indexOf(entry)
71
+ if (idx !== -1) {
72
+ this.waiting.splice(idx, 1)
73
+ reject(new Error("Queue timeout — all slots busy"))
74
+ }
75
+ }, QUEUE_TIMEOUT_MS)
76
+ const origResolve = entry.resolve
77
+ entry.resolve = () => { clearTimeout(timer); origResolve() }
64
78
  })
65
79
  }
66
80
 
67
81
  release(): void {
68
82
  this.active--
69
83
  const next = this.waiting.shift()
70
- if (next) next()
84
+ if (next) next.resolve()
71
85
  }
72
86
  }
73
87
 
@@ -81,45 +95,13 @@ function mapModelToClaudeModel(model: string): "sonnet" | "opus" | "haiku" {
81
95
 
82
96
  // ── Content-block serialization ──────────────────────────────────────────────
83
97
 
84
- function saveImageToTemp(block: any, tempFiles: string[]): string | null {
85
- try {
86
- let data: string | undefined
87
- let mediaType = "image/jpeg"
88
-
89
- if (typeof block.data === "string") {
90
- data = block.data
91
- mediaType = block.media_type || mediaType
92
- } else if (block.source) {
93
- if (block.source.type === "base64" && block.source.data) {
94
- data = block.source.data
95
- mediaType = block.source.media_type || mediaType
96
- } else if (block.source.url) {
97
- return block.source.url
98
- }
99
- }
100
-
101
- if (!data) return null
102
-
103
- const ext = mediaType.split("/")[1]?.replace("jpeg", "jpg") || "jpg"
104
- const tmpPath = join(tmpdir(), `proxy-img-${randomBytes(8).toString("hex")}.${ext}`)
105
- writeFileSync(tmpPath, Buffer.from(data, "base64"))
106
- tempFiles.push(tmpPath)
107
- return tmpPath
108
- } catch {
109
- return null
110
- }
111
- }
112
-
113
- function serializeBlock(block: any, tempFiles: string[]): string {
98
+ function serializeBlock(block: any): string {
114
99
  switch (block.type) {
115
100
  case "text":
116
101
  return block.text || ""
117
- case "image": {
118
- const imgPath = saveImageToTemp(block, tempFiles)
119
- return imgPath ? `[Image: ${imgPath}]` : "[Image: (unable to save)]"
120
- }
102
+ case "image":
103
+ return "[Image attached]"
121
104
  case "tool_use":
122
- // Use <tool_use> XML format so the model continues using parseable blocks
123
105
  return `<tool_use>\n{"name": "${block.name}", "input": ${JSON.stringify(block.input ?? {})}}\n</tool_use>`
124
106
  case "tool_result": {
125
107
  const content = Array.isArray(block.content)
@@ -128,7 +110,7 @@ function serializeBlock(block: any, tempFiles: string[]): string {
128
110
  const truncated = content.length > 4000
129
111
  ? content.slice(0, 4000) + `\n...[truncated ${content.length - 4000} chars]`
130
112
  : content
131
- return `<tool_result tool_use_id="${block.tool_use_id}">\n${truncated}\n</tool_result>`
113
+ return `[Tool Result (id: ${block.tool_use_id})]\n${truncated}\n[/Tool Result]`
132
114
  }
133
115
  case "thinking":
134
116
  return ""
@@ -137,22 +119,83 @@ function serializeBlock(block: any, tempFiles: string[]): string {
137
119
  }
138
120
  }
139
121
 
140
- function serializeContent(content: string | Array<any>, tempFiles: string[]): string {
122
+ function serializeContent(content: string | Array<any>): string {
141
123
  if (typeof content === "string") return content
142
124
  if (!Array.isArray(content)) return String(content)
143
- return content.map(b => serializeBlock(b, tempFiles)).filter(Boolean).join("\n")
125
+ return content.map(b => serializeBlock(b)).filter(Boolean).join("\n")
126
+ }
127
+
128
+ // ── Image handling via SDKUserMessage ────────────────────────────────────────
129
+ // The SDK query() accepts AsyncIterable<SDKUserMessage> which supports native
130
+ // Anthropic MessageParam content blocks including images. When images are
131
+ // detected, we pass them through natively instead of serializing to text.
132
+
133
+ function contentHasImages(content: string | Array<any>): boolean {
134
+ if (typeof content === "string") return false
135
+ if (!Array.isArray(content)) return false
136
+ return content.some((b: any) => b.type === "image")
137
+ }
138
+
139
+ /** Convert an Anthropic image content block to SDK-compatible format */
140
+ function toAnthropicImageBlock(block: any): any {
141
+ if (block.source) return block // already in Anthropic format
142
+ // openclaw may use { type: "image", data: "...", mimeType: "..." }
143
+ if (block.data && block.mimeType) {
144
+ return {
145
+ type: "image",
146
+ source: {
147
+ type: "base64",
148
+ media_type: block.mimeType,
149
+ data: block.data,
150
+ }
151
+ }
152
+ }
153
+ if (block.data && block.media_type) {
154
+ return {
155
+ type: "image",
156
+ source: {
157
+ type: "base64",
158
+ media_type: block.media_type,
159
+ data: block.data,
160
+ }
161
+ }
162
+ }
163
+ return block
144
164
  }
145
165
 
146
- function cleanupTempFiles(tempFiles: string[]) {
147
- for (const f of tempFiles) {
148
- try { unlinkSync(f) } catch {}
166
+ /** Build Anthropic MessageParam content array, preserving images natively */
167
+ function buildNativeContent(content: string | Array<any>): Array<any> {
168
+ if (typeof content === "string") return [{ type: "text", text: content }]
169
+ if (!Array.isArray(content)) return [{ type: "text", text: String(content) }]
170
+ return content.map((block: any) => {
171
+ if (block.type === "image") return toAnthropicImageBlock(block)
172
+ if (block.type === "text") return { type: "text", text: block.text ?? "" }
173
+ // For other types, serialize to text
174
+ const serialized = serializeBlock(block)
175
+ return serialized ? { type: "text", text: serialized } : null
176
+ }).filter(Boolean)
177
+ }
178
+
179
+ /** Create an async iterable yielding a single SDKUserMessage with native content */
180
+ function createSDKUserMessage(content: Array<any>, sessionId?: string): AsyncIterable<any> {
181
+ const msg = {
182
+ type: "user" as const,
183
+ message: {
184
+ role: "user" as const,
185
+ content,
186
+ },
187
+ parent_tool_use_id: null,
188
+ session_id: sessionId ?? "",
189
+ }
190
+ return {
191
+ async *[Symbol.asyncIterator]() {
192
+ yield msg
193
+ }
149
194
  }
150
195
  }
151
196
 
197
+
152
198
  // ── Client tool-use support ──────────────────────────────────────────────────
153
- // The proxy never uses Claude Code's built-in tools. All tools come from the
154
- // API caller. Tool definitions are injected into the system prompt; <tool_use>
155
- // XML blocks in the output are parsed back into Anthropic tool_use content.
156
199
 
157
200
  function buildClientToolsPrompt(tools: any[]): string {
158
201
  const defs = tools.map((t: any) => {
@@ -173,7 +216,6 @@ function parseToolUse(text: string): { toolCalls: ToolCall[]; textBefore: string
173
216
  const calls: ToolCall[] = []
174
217
  let firstIdx = -1
175
218
 
176
- // Parse <tool_use> XML blocks (primary format)
177
219
  const xmlRegex = /<tool_use>([\s\S]*?)<\/tool_use>/g
178
220
  let m: RegExpExecArray | null
179
221
  while ((m = xmlRegex.exec(text)) !== null) {
@@ -188,7 +230,43 @@ function parseToolUse(text: string): { toolCalls: ToolCall[]; textBefore: string
188
230
  } catch { /* skip malformed block */ }
189
231
  }
190
232
 
191
- // Fallback: parse [Tool call: name\nInput: {...}] format
233
+ if (calls.length === 0) {
234
+ const fcRegex = /<function_calls>([\s\S]*?)<\/function_calls>/g
235
+ while ((m = fcRegex.exec(text)) !== null) {
236
+ if (firstIdx < 0) firstIdx = m.index
237
+ try {
238
+ const parsed = JSON.parse(m[1]!.trim())
239
+ const items = Array.isArray(parsed) ? parsed : [parsed]
240
+ for (const p of items) {
241
+ if (p && typeof p.name === "string") {
242
+ calls.push({
243
+ id: generateId("toolu_"),
244
+ name: p.name,
245
+ input: p.input ?? p.parameters ?? {}
246
+ })
247
+ }
248
+ }
249
+ } catch { /* skip malformed block */ }
250
+ }
251
+ }
252
+
253
+ if (calls.length === 0) {
254
+ const invokeRegex = /<invoke\s+name="([^"]+)">([\s\S]*?)<\/invoke>/g
255
+ while ((m = invokeRegex.exec(text)) !== null) {
256
+ if (firstIdx < 0) firstIdx = m.index
257
+ const toolName = m[1]!
258
+ const body = m[2]!
259
+ const input: Record<string, any> = {}
260
+ const paramRegex = /<parameter\s+name="([^"]+)">([\s\S]*?)<\/parameter>/g
261
+ let pm: RegExpExecArray | null
262
+ while ((pm = paramRegex.exec(body)) !== null) {
263
+ const val = pm[2]!.trim()
264
+ try { input[pm[1]!] = JSON.parse(val) } catch { input[pm[1]!] = val }
265
+ }
266
+ calls.push({ id: generateId("toolu_"), name: toolName, input })
267
+ }
268
+ }
269
+
192
270
  if (calls.length === 0) {
193
271
  const bracketRegex = /\[Tool call:\s*(\w+)\s*\nInput:\s*([\s\S]*?)\]/g
194
272
  while ((m = bracketRegex.exec(text)) !== null) {
@@ -211,10 +289,43 @@ function roughTokens(text: string): number {
211
289
  return Math.ceil((text ?? "").length / 4)
212
290
  }
213
291
 
292
+ // ── Conversation label extraction ────────────────────────────────────────────
293
+ // Openclaw embeds "Conversation info (untrusted metadata)" in the last user
294
+ // message containing a JSON block with conversation_label. Extract it to use
295
+ // as a stable conversation ID for session persistence.
296
+
297
+ function extractConversationLabel(messages: Array<{ role: string; content: string | Array<any> }>): string | null {
298
+ // Search from the last message backwards for a user message with metadata
299
+ for (let i = messages.length - 1; i >= 0; i--) {
300
+ const msg = messages[i]!
301
+ if (msg.role !== "user") continue
302
+
303
+ const text = typeof msg.content === "string"
304
+ ? msg.content
305
+ : Array.isArray(msg.content)
306
+ ? msg.content.filter((b: any) => b.type === "text").map((b: any) => b.text ?? "").join("\n")
307
+ : ""
308
+
309
+ // Look for the JSON block after "Conversation info"
310
+ const jsonMatch = text.match(/Conversation info[^`]*```json\s*(\{[\s\S]*?\})\s*```/)
311
+ if (!jsonMatch?.[1]) continue
312
+
313
+ try {
314
+ const meta = JSON.parse(jsonMatch[1])
315
+ // conversation_label is present for both PMs and groups
316
+ if (meta.conversation_label) return meta.conversation_label
317
+ // Fallback: use sender_id if no label (shouldn't happen but just in case)
318
+ if (meta.sender_id) return `dm:${meta.sender_id}`
319
+ } catch {
320
+ // Regex fallback if JSON parse fails
321
+ const labelMatch = text.match(/"conversation_label"\s*:\s*"([^"]*)"/)
322
+ if (labelMatch?.[1]) return labelMatch[1]
323
+ }
324
+ }
325
+ return null
326
+ }
327
+
214
328
  // ── Query options builder ────────────────────────────────────────────────────
215
- // Always runs with all built-in tools disabled (tools: []) and maxTurns: 1.
216
- // The proxy is a pure API translation layer — tool definitions come from the
217
- // caller and are injected into the system prompt. No MCP servers, no agent loop.
218
329
 
219
330
  function buildQueryOptions(
220
331
  model: "sonnet" | "opus" | "haiku",
@@ -223,6 +334,7 @@ function buildQueryOptions(
223
334
  systemPrompt?: string
224
335
  abortController?: AbortController
225
336
  thinking?: { type: "adaptive" } | { type: "enabled"; budgetTokens?: number } | { type: "disabled" }
337
+ resume?: string
226
338
  } = {}
227
339
  ) {
228
340
  return {
@@ -230,14 +342,15 @@ function buildQueryOptions(
230
342
  pathToClaudeCodeExecutable: claudeExecutable,
231
343
  permissionMode: "bypassPermissions" as const,
232
344
  allowDangerouslySkipPermissions: true,
233
- persistSession: false,
345
+ persistSession: true,
234
346
  settingSources: [],
235
- tools: [] as string[],
347
+ tools: ["_proxy_noop_"] as string[],
236
348
  maxTurns: 1,
237
349
  ...(opts.partial ? { includePartialMessages: true } : {}),
238
350
  ...(opts.abortController ? { abortController: opts.abortController } : {}),
239
351
  ...(opts.thinking ? { thinking: opts.thinking } : {}),
240
352
  ...(opts.systemPrompt ? { systemPrompt: opts.systemPrompt } : {}),
353
+ ...(opts.resume ? { resume: opts.resume } : {}),
241
354
  }
242
355
  }
243
356
 
@@ -249,13 +362,11 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
249
362
 
250
363
  app.use("*", cors())
251
364
 
252
- // Optional API key validation — when CLAUDE_PROXY_API_KEY is set,
253
- // require a matching x-api-key or Authorization: Bearer header.
365
+ // Optional API key validation
254
366
  const requiredApiKey = process.env.CLAUDE_PROXY_API_KEY
255
367
  if (requiredApiKey) {
256
368
  app.use("*", async (c, next) => {
257
- // Skip auth for health check and OPTIONS
258
- if (c.req.path === "/" || c.req.method === "OPTIONS") return next()
369
+ if (c.req.path === "/" || c.req.path.startsWith("/debug") || c.req.method === "OPTIONS") return next()
259
370
  const key = c.req.header("x-api-key")
260
371
  ?? c.req.header("authorization")?.replace(/^Bearer\s+/i, "")
261
372
  if (key !== requiredApiKey) {
@@ -269,30 +380,175 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
269
380
  })
270
381
  }
271
382
 
272
- // Anthropic-compatible headers + request logging
383
+ // Anthropic-compatible headers + HTTP request logging
273
384
  app.use("*", async (c, next) => {
274
385
  const start = Date.now()
275
386
  const requestId = c.req.header("x-request-id") ?? generateId("req_")
276
387
  c.header("x-request-id", requestId)
277
388
  c.header("request-id", requestId)
278
- // Echo back Anthropic-standard headers
279
389
  c.header("anthropic-version", "2023-06-01")
280
390
  const betaHeader = c.req.header("anthropic-beta")
281
391
  if (betaHeader) c.header("anthropic-beta", betaHeader)
282
392
  await next()
283
393
  const ms = Date.now() - start
284
- claudeLog("proxy.http", { method: c.req.method, path: c.req.path, status: c.res.status, ms, requestId })
394
+ // Only log non-debug HTTP requests at info level; debug endpoints at debug level
395
+ if (c.req.path.startsWith("/debug")) {
396
+ logDebug("http.request", { method: c.req.method, path: c.req.path, status: c.res.status, ms, reqId: requestId })
397
+ } else {
398
+ logInfo("http.request", { method: c.req.method, path: c.req.path, status: c.res.status, ms, reqId: requestId })
399
+ }
285
400
  })
286
401
 
402
+ // ── Health / Info ────────────────────────────────────────────────────────
403
+
287
404
  app.get("/", (c) => c.json({
288
405
  status: "ok",
289
406
  service: "claude-sdk-proxy",
290
407
  version: PROXY_VERSION,
291
408
  format: "anthropic",
292
- endpoints: ["/v1/messages", "/v1/models", "/v1/chat/completions"],
293
- queue: { active: requestQueue.activeCount, waiting: requestQueue.waitingCount, max: MAX_CONCURRENT }
409
+ endpoints: ["/v1/messages", "/v1/models", "/v1/chat/completions", "/debug/stats", "/debug/traces", "/debug/errors", "/debug/active", "/debug/health", "/sessions", "/sessions/cleanup"],
410
+ queue: { active: requestQueue.activeCount, waiting: requestQueue.waitingCount, max: MAX_CONCURRENT },
411
+ logDir: LOG_DIR,
294
412
  }))
295
413
 
414
+ // ── Debug / Observability endpoints ──────────────────────────────────────
415
+
416
+ app.get("/debug/stats", (c) => {
417
+ const stats = traceStore.getStats()
418
+ const sessionStats = sessionStore.getStats()
419
+ return c.json({
420
+ version: PROXY_VERSION,
421
+ config: {
422
+ stallTimeoutMs: finalConfig.stallTimeoutMs,
423
+ maxDurationMs: finalConfig.maxDurationMs,
424
+ maxOutputChars: finalConfig.maxOutputChars,
425
+ maxConcurrent: MAX_CONCURRENT,
426
+ queueTimeoutMs: QUEUE_TIMEOUT_MS,
427
+ claudeExecutable,
428
+ logDir: LOG_DIR,
429
+ debug: finalConfig.debug,
430
+ },
431
+ queue: { active: requestQueue.activeCount, waiting: requestQueue.waitingCount, max: MAX_CONCURRENT },
432
+ sessions: sessionStats,
433
+ ...stats,
434
+ })
435
+ })
436
+
437
+ // ── Session management endpoints ──────────────────────────────────────
438
+
439
+ app.get("/sessions", (c) => {
440
+ return c.json({
441
+ sessions: sessionStore.list(),
442
+ stats: sessionStore.getStats(),
443
+ })
444
+ })
445
+
446
+ app.get("/sessions/cleanup", (c) => {
447
+ const result = sessionStore.cleanup()
448
+ return c.json(result)
449
+ })
450
+
451
+ app.get("/debug/traces", (c) => {
452
+ const limit = parseInt(c.req.query("limit") ?? "20", 10)
453
+ return c.json(traceStore.getRecentTraces(limit))
454
+ })
455
+
456
+ app.get("/debug/traces/:id", (c) => {
457
+ const id = c.req.param("id")
458
+ const trace = traceStore.getTrace(id)
459
+ if (!trace) return c.json({ error: "Trace not found", reqId: id }, 404)
460
+ return c.json(trace)
461
+ })
462
+
463
+ app.get("/debug/errors", (c) => {
464
+ const limit = parseInt(c.req.query("limit") ?? "10", 10)
465
+ return c.json(traceStore.getRecentErrors(limit))
466
+ })
467
+
468
+ app.get("/debug/logs", (c) => {
469
+ // List available log files
470
+ try {
471
+ const files = readdirSync(LOG_DIR)
472
+ .filter(f => f.startsWith("proxy-") && f.endsWith(".log"))
473
+ .sort()
474
+ .reverse()
475
+ return c.json({ logDir: LOG_DIR, files })
476
+ } catch {
477
+ return c.json({ logDir: LOG_DIR, files: [], error: "Cannot read log directory" })
478
+ }
479
+ })
480
+
481
+ app.get("/debug/logs/:filename", (c) => {
482
+ // Serve a specific log file (last N lines)
483
+ const filename = c.req.param("filename")
484
+ if (!filename.match(/^proxy-\d{4}-\d{2}-\d{2}\.log$/)) {
485
+ return c.json({ error: "Invalid log filename" }, 400)
486
+ }
487
+ const tail = parseInt(c.req.query("tail") ?? "100", 10)
488
+ try {
489
+ const content = readFileSync(join(LOG_DIR, filename), "utf-8")
490
+ const lines = content.trim().split("\n")
491
+ const sliced = lines.slice(-tail)
492
+ const parsed = sliced.map(line => {
493
+ try { return JSON.parse(line) } catch { return { raw: line } }
494
+ })
495
+ return c.json({ file: filename, total: lines.length, returned: sliced.length, lines: parsed })
496
+ } catch {
497
+ return c.json({ error: "Log file not found" }, 404)
498
+ }
499
+ })
500
+
501
+ app.get("/debug/errors/:id", (c) => {
502
+ // Serve a specific error dump file
503
+ const id = c.req.param("id")
504
+ if (!id.match(/^req_/)) return c.json({ error: "Invalid request ID format" }, 400)
505
+ try {
506
+ const content = readFileSync(join(LOG_DIR, "errors", `${id}.json`), "utf-8")
507
+ return c.json(JSON.parse(content))
508
+ } catch {
509
+ return c.json({ error: "Error dump not found", reqId: id }, 404)
510
+ }
511
+ })
512
+
513
+ app.get("/debug/active", (c) => {
514
+ // Detailed view of currently active requests
515
+ const stats = traceStore.getStats()
516
+ return c.json({
517
+ queue: { active: requestQueue.activeCount, waiting: requestQueue.waitingCount, max: MAX_CONCURRENT },
518
+ activeRequests: stats.activeRequests,
519
+ })
520
+ })
521
+
522
+ app.get("/debug/health", (c) => {
523
+ // Process health: memory, uptime, resource usage
524
+ const mem = process.memoryUsage()
525
+ const stats = traceStore.getStats()
526
+ return c.json({
527
+ version: PROXY_VERSION,
528
+ pid: process.pid,
529
+ uptimeMs: stats.uptimeMs,
530
+ uptimeHuman: stats.uptimeHuman,
531
+ memory: {
532
+ rss: `${(mem.rss / 1024 / 1024).toFixed(1)}MB`,
533
+ heapUsed: `${(mem.heapUsed / 1024 / 1024).toFixed(1)}MB`,
534
+ heapTotal: `${(mem.heapTotal / 1024 / 1024).toFixed(1)}MB`,
535
+ external: `${(mem.external / 1024 / 1024).toFixed(1)}MB`,
536
+ rssBytes: mem.rss,
537
+ heapUsedBytes: mem.heapUsed,
538
+ },
539
+ queue: { active: requestQueue.activeCount, waiting: requestQueue.waitingCount, max: MAX_CONCURRENT },
540
+ requests: stats.requests,
541
+ config: {
542
+ stallTimeoutMs: finalConfig.stallTimeoutMs,
543
+ maxConcurrent: MAX_CONCURRENT,
544
+ queueTimeoutMs: QUEUE_TIMEOUT_MS,
545
+ debug: finalConfig.debug,
546
+ },
547
+ })
548
+ })
549
+
550
+ // ── Model endpoints ──────────────────────────────────────────────────────
551
+
296
552
  const MODELS = [
297
553
  { type: "model", id: "claude-opus-4-6", display_name: "Claude Opus 4.6", created_at: "2025-08-01T00:00:00Z" },
298
554
  { type: "model", id: "claude-opus-4-6-20250801", display_name: "Claude Opus 4.6", created_at: "2025-08-01T00:00:00Z" },
@@ -303,7 +559,6 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
303
559
  { type: "model", id: "claude-haiku-4-5-20251001", display_name: "Claude Haiku 4.5", created_at: "2025-10-01T00:00:00Z" },
304
560
  ]
305
561
 
306
- // Dual-format model data: includes fields for both Anthropic and OpenAI SDKs
307
562
  const MODELS_DUAL = MODELS.map(m => ({
308
563
  ...m,
309
564
  object: "model" as const,
@@ -341,17 +596,27 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
341
596
  app.post("/v1/messages/count_tokens", handleCountTokens)
342
597
  app.post("/messages/count_tokens", handleCountTokens)
343
598
 
599
+ // ── Messages handler ─────────────────────────────────────────────────────
600
+
344
601
  const handleMessages = async (c: Context) => {
345
602
  const reqId = generateId("req_")
603
+ // Will be set after body parse; needed for outer catch
604
+ let trace: ReturnType<typeof traceStore.create> | undefined
605
+ let requestStarted = Date.now()
606
+ let clientDisconnected = false
607
+ let abortReason: "stall" | "max_duration" | "max_output" | null = null
608
+
346
609
  try {
347
610
  let body: any
348
611
  try {
349
612
  body = await c.req.json()
350
- } catch {
613
+ } catch (parseErr) {
614
+ logWarn("request.invalid_json", { reqId })
351
615
  return c.json({ type: "error", error: { type: "invalid_request_error", message: "Request body must be valid JSON" }, request_id: reqId }, 400)
352
616
  }
353
617
 
354
618
  if (!body.messages || !Array.isArray(body.messages) || body.messages.length === 0) {
619
+ logWarn("request.missing_messages", { reqId })
355
620
  return c.json({ type: "error", error: { type: "invalid_request_error", message: "messages is required and must be a non-empty array" }, request_id: reqId }, 400)
356
621
  }
357
622
 
@@ -359,17 +624,74 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
359
624
  const stream = body.stream ?? false
360
625
  const hasTools = body.tools?.length > 0
361
626
  const abortController = new AbortController()
362
- const timeout = setTimeout(() => abortController.abort(), finalConfig.requestTimeoutMs)
363
627
 
364
- // Extended thinking: map Anthropic API thinking param to SDK ThinkingConfig
628
+ // Stall-based timeout: only aborts if no SDK events received for stallTimeoutMs.
629
+ // Resets on every SDK event, so active requests never get killed.
630
+ // NOTE: not started until queue is acquired — queue wait doesn't count.
631
+ let stallTimer: ReturnType<typeof setTimeout> | null = null
632
+ const resetStallTimer = () => {
633
+ if (stallTimer) clearTimeout(stallTimer)
634
+ stallTimer = setTimeout(() => {
635
+ abortReason = "stall"
636
+ logWarn("request.stall_timeout", {
637
+ reqId,
638
+ stallTimeoutMs: finalConfig.stallTimeoutMs,
639
+ phase: trace?.phase,
640
+ sdkEventCount: trace?.sdkEventCount,
641
+ outputLen: trace?.outputLen,
642
+ lastEventType: trace?.lastEventType,
643
+ })
644
+ abortController.abort()
645
+ }, finalConfig.stallTimeoutMs)
646
+ }
647
+ const clearStallTimer = () => {
648
+ if (stallTimer) { clearTimeout(stallTimer); stallTimer = null }
649
+ }
650
+
651
+ // Hard max duration: kills request even if actively streaming. Safety valve.
652
+ let hardTimer: ReturnType<typeof setTimeout> | null = null
653
+ const startHardTimer = () => {
654
+ hardTimer = setTimeout(() => {
655
+ abortReason = "max_duration"
656
+ logError("request.max_duration", {
657
+ reqId,
658
+ maxDurationMs: finalConfig.maxDurationMs,
659
+ phase: trace?.phase,
660
+ sdkEventCount: trace?.sdkEventCount,
661
+ outputLen: trace?.outputLen,
662
+ model: trace?.model,
663
+ lastEventType: trace?.lastEventType,
664
+ })
665
+ abortController.abort()
666
+ }, finalConfig.maxDurationMs)
667
+ }
668
+ const clearHardTimer = () => {
669
+ if (hardTimer) { clearTimeout(hardTimer); hardTimer = null }
670
+ }
671
+
672
+ // Output size check: kills request if output exceeds maxOutputChars.
673
+ const checkOutputSize = (outputLen: number) => {
674
+ if (outputLen > finalConfig.maxOutputChars && !abortReason) {
675
+ abortReason = "max_output"
676
+ logError("request.max_output", {
677
+ reqId,
678
+ outputLen,
679
+ maxOutputChars: finalConfig.maxOutputChars,
680
+ phase: trace?.phase,
681
+ sdkEventCount: trace?.sdkEventCount,
682
+ model: trace?.model,
683
+ elapsedMs: trace ? Date.now() - trace.startedAt : undefined,
684
+ })
685
+ abortController.abort()
686
+ }
687
+ }
688
+
365
689
  const thinking: { type: "adaptive" } | { type: "enabled"; budgetTokens?: number } | { type: "disabled" } | undefined =
366
690
  body.thinking?.type === "enabled" ? { type: "enabled", budgetTokens: body.thinking.budget_tokens }
367
691
  : body.thinking?.type === "disabled" ? { type: "disabled" }
368
692
  : body.thinking?.type === "adaptive" ? { type: "adaptive" }
369
693
  : undefined
370
694
 
371
- const tempFiles: string[] = []
372
-
373
695
  let systemContext = ""
374
696
  if (body.system) {
375
697
  if (typeof body.system === "string") {
@@ -382,49 +704,161 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
382
704
  }
383
705
  }
384
706
 
385
- // Build the prompt from messages. The SDK's query() takes a single prompt
386
- // string, so multi-turn conversations are serialized with XML-delimited
387
- // turns. Prior turns go into the system prompt as context, the last user
388
- // message becomes the prompt.
389
707
  const messages = body.messages as Array<{ role: string; content: string | Array<any> }>
390
708
 
391
- let prompt: string
709
+ let promptText: string // text version for token counting / logging
392
710
  let systemPrompt: string | undefined
393
711
  const toolsSection = hasTools ? buildClientToolsPrompt(body.tools) : ""
394
712
 
395
- if (messages.length === 1) {
713
+ // ── Session resumption ─────────────────────────────────────────────
714
+ // Derive conversation ID from: headers (explicit) or conversation_label
715
+ // embedded in openclaw message metadata.
716
+ const conversationId = c.req.header("x-conversation-id")
717
+ ?? c.req.header("x-session-id")
718
+ ?? extractConversationLabel(messages)
719
+ ?? null
720
+
721
+ let resumeSessionId: string | undefined
722
+ let isResuming = false
723
+
724
+ if (conversationId && messages.length > 1) {
725
+ const stored = sessionStore.get(conversationId)
726
+ if (stored && stored.model === model) {
727
+ resumeSessionId = stored.sdkSessionId
728
+ isResuming = true
729
+ logInfo("session.resuming", {
730
+ reqId,
731
+ conversationId,
732
+ sdkSessionId: resumeSessionId,
733
+ storedMsgCount: stored.messageCount,
734
+ currentMsgCount: messages.length,
735
+ resumeCount: stored.resumeCount,
736
+ })
737
+ }
738
+ }
739
+
740
+ // Check if last user message contains images — if so, use native SDK multimodal input
741
+ const lastMsg = messages[messages.length - 1]!
742
+ const lastMsgHasImages = contentHasImages(lastMsg.content)
743
+
744
+ // promptInput: either a string (text-only) or AsyncIterable<SDKUserMessage> (multimodal)
745
+ let promptInput: string | AsyncIterable<any>
746
+ // promptText: always the text-only version for token counting and logging
747
+ promptText = serializeContent(lastMsg.content)
748
+
749
+ if (isResuming && resumeSessionId) {
750
+ systemPrompt = ((systemContext || "") + toolsSection).trim() || undefined
751
+ if (lastMsgHasImages) {
752
+ promptInput = createSDKUserMessage(buildNativeContent(lastMsg.content), resumeSessionId)
753
+ logInfo("session.resume_with_images", { reqId, conversationId })
754
+ } else {
755
+ promptInput = promptText
756
+ }
757
+ } else if (messages.length === 1) {
396
758
  systemPrompt = ((systemContext || "") + toolsSection).trim() || undefined
397
- prompt = serializeContent(messages[0]!.content, tempFiles)
759
+ promptInput = lastMsgHasImages
760
+ ? createSDKUserMessage(buildNativeContent(lastMsg.content))
761
+ : promptText
762
+ if (lastMsgHasImages) logInfo("request.native_images", { reqId })
398
763
  } else {
399
- const lastMsg = messages[messages.length - 1]!
400
764
  const priorMsgs = messages.slice(0, -1)
401
765
 
402
766
  const contextParts = priorMsgs
403
767
  .map((m) => {
404
- const tag = m.role === "assistant" ? "assistant_message" : "user_message"
405
- return `<${tag}>\n${serializeContent(m.content, tempFiles)}\n</${tag}>`
768
+ const role = m.role === "assistant" ? "Assistant" : "User"
769
+ return `[${role}]\n${serializeContent(m.content)}`
406
770
  })
407
771
  .join("\n\n")
408
772
 
409
773
  const baseSystem = systemContext || ""
410
774
  const contextSection = contextParts
411
- ? `\n\n<conversation_history>\n${contextParts}\n</conversation_history>`
775
+ ? `\n\nPrior conversation turns:\n\n${contextParts}\n\n---`
412
776
  : ""
413
777
  systemPrompt = (baseSystem + contextSection + toolsSection).trim() || undefined
414
- prompt = serializeContent(lastMsg.content, tempFiles)
778
+
779
+ if (lastMsgHasImages) {
780
+ promptInput = createSDKUserMessage(buildNativeContent(lastMsg.content))
781
+ logInfo("request.native_images", { reqId })
782
+ } else {
783
+ promptInput = promptText
784
+ }
415
785
  }
416
786
 
417
- claudeLog("proxy.request", { reqId, model, stream, msgs: body.messages?.length, hasTools, ...(thinking ? { thinking: thinking.type } : {}), queueActive: requestQueue.activeCount, queueWaiting: requestQueue.waitingCount })
787
+ requestStarted = Date.now()
788
+
789
+ // Capture client info
790
+ const clientIp = c.req.header("x-forwarded-for")
791
+ ?? c.req.header("x-real-ip")
792
+ ?? c.req.header("cf-connecting-ip")
793
+ ?? "unknown"
794
+ const userAgent = c.req.header("user-agent") ?? "unknown"
795
+ const bodyBytes = JSON.stringify(body).length
796
+
797
+ // ── Create trace ──────────────────────────────────────────────────────
798
+ trace = traceStore.create({
799
+ reqId,
800
+ model,
801
+ requestedModel: body.model || "sonnet",
802
+ stream,
803
+ hasTools,
804
+ thinking: thinking?.type,
805
+ promptLen: promptText.length,
806
+ systemLen: systemPrompt?.length ?? 0,
807
+ msgCount: messages.length,
808
+ bodyBytes,
809
+ clientIp,
810
+ userAgent,
811
+ })
812
+
813
+ // ── Queue ─────────────────────────────────────────────────────────────
814
+ const queueActive = requestQueue.activeCount
815
+ const queueWaiting = requestQueue.waitingCount
816
+ const needsQueue = queueActive >= MAX_CONCURRENT
817
+
818
+ traceStore.phase(reqId, "queued", { queueActive, queueWaiting })
819
+
820
+ if (needsQueue) {
821
+ logInfo("queue.waiting", {
822
+ reqId,
823
+ model,
824
+ queueActive,
825
+ queueWaiting,
826
+ queueTimeoutMs: QUEUE_TIMEOUT_MS,
827
+ })
828
+ }
418
829
 
419
- // Acquire a slot in the concurrency queue — all code after this MUST
420
- // release via the try/finally blocks in both streaming and non-streaming paths.
421
830
  await requestQueue.acquire()
422
831
 
832
+ const queueWaitMs = Date.now() - requestStarted
833
+ traceStore.phase(reqId, "acquired", { queueWaitMs })
834
+
835
+ logInfo("queue.acquired", {
836
+ reqId,
837
+ queueWaitMs,
838
+ queueActive: requestQueue.activeCount,
839
+ queueWaiting: requestQueue.waitingCount,
840
+ })
841
+
842
+ // Start timers AFTER queue acquire — queue wait doesn't count
843
+ resetStallTimer()
844
+ startHardTimer()
845
+
423
846
  // ── Non-streaming ──────────────────────────────────────────────────────
424
847
  if (!stream) {
425
848
  let fullText = ""
849
+ let capturedSessionId: string | undefined
850
+ const queryOpts = buildQueryOptions(model, { partial: false, systemPrompt, abortController, thinking, resume: resumeSessionId })
426
851
  try {
427
- for await (const message of query({ prompt, options: buildQueryOptions(model, { partial: false, systemPrompt, abortController, thinking }) })) {
852
+ traceStore.phase(reqId, "sdk_starting")
853
+ let sdkEventCount = 0
854
+ for await (const message of query({ prompt: promptInput, options: queryOpts })) {
855
+ sdkEventCount++
856
+ resetStallTimer()
857
+ traceStore.sdkEvent(reqId, sdkEventCount, message.type, (message as any).event?.type ?? (message as any).message?.type)
858
+ // Capture session_id from init message
859
+ if (message.type === "system" && (message as any).subtype === "init") {
860
+ capturedSessionId = (message as any).session_id
861
+ }
428
862
  if (message.type === "assistant") {
429
863
  let turnText = ""
430
864
  for (const block of message.message.content) {
@@ -433,12 +867,87 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
433
867
  fullText = turnText
434
868
  }
435
869
  }
870
+ traceStore.phase(reqId, "sdk_done")
871
+
872
+ // Store session mapping for future resumption
873
+ if (conversationId && capturedSessionId) {
874
+ if (isResuming) {
875
+ sessionStore.recordResume(conversationId)
876
+ logInfo("session.resumed_ok", { reqId, conversationId, sdkSessionId: capturedSessionId })
877
+ } else {
878
+ sessionStore.set(conversationId, capturedSessionId, model, messages.length)
879
+ logInfo("session.created", { reqId, conversationId, sdkSessionId: capturedSessionId })
880
+ }
881
+ }
882
+ } catch (resumeErr) {
883
+ // If resume failed, retry with full context
884
+ if (isResuming && resumeSessionId) {
885
+ logWarn("session.resume_failed", {
886
+ reqId,
887
+ conversationId,
888
+ sdkSessionId: resumeSessionId,
889
+ error: resumeErr instanceof Error ? resumeErr.message : String(resumeErr),
890
+ })
891
+ if (conversationId) {
892
+ sessionStore.recordFailure(conversationId)
893
+ sessionStore.invalidate(conversationId)
894
+ }
895
+ // Rebuild with full context (non-resume path)
896
+ const fbLastMsg = messages[messages.length - 1]!
897
+ const priorMsgs = messages.slice(0, -1)
898
+ const contextParts = priorMsgs
899
+ .map((m) => {
900
+ const role = m.role === "assistant" ? "Assistant" : "User"
901
+ return `[${role}]\n${serializeContent(m.content)}`
902
+ })
903
+ .join("\n\n")
904
+ const baseSystem = systemContext || ""
905
+ const contextSection = contextParts ? `\n\nPrior conversation turns:\n\n${contextParts}\n\n---` : ""
906
+ const fallbackSystem = (baseSystem + contextSection + toolsSection).trim() || undefined
907
+ const fallbackInput: string | AsyncIterable<any> = contentHasImages(fbLastMsg.content)
908
+ ? createSDKUserMessage(buildNativeContent(fbLastMsg.content))
909
+ : serializeContent(fbLastMsg.content)
910
+ const fallbackOpts = buildQueryOptions(model, { partial: false, systemPrompt: fallbackSystem, abortController, thinking })
911
+
912
+ logInfo("session.fallback_full_context", { reqId, conversationId })
913
+ let sdkEventCount = 0
914
+ for await (const message of query({ prompt: fallbackInput, options: fallbackOpts })) {
915
+ sdkEventCount++
916
+ resetStallTimer()
917
+ traceStore.sdkEvent(reqId, sdkEventCount, message.type, (message as any).event?.type ?? (message as any).message?.type)
918
+ if (message.type === "system" && (message as any).subtype === "init") {
919
+ capturedSessionId = (message as any).session_id
920
+ }
921
+ if (message.type === "assistant") {
922
+ let turnText = ""
923
+ for (const block of message.message.content) {
924
+ if (block.type === "text") turnText += block.text
925
+ }
926
+ fullText = turnText
927
+ }
928
+ }
929
+ traceStore.phase(reqId, "sdk_done")
930
+ // Store the new session
931
+ if (conversationId && capturedSessionId) {
932
+ sessionStore.set(conversationId, capturedSessionId, model, messages.length)
933
+ logInfo("session.recreated_after_fallback", { reqId, conversationId, sdkSessionId: capturedSessionId })
934
+ }
935
+ } else {
936
+ throw resumeErr
937
+ }
436
938
  } finally {
437
- clearTimeout(timeout)
438
- cleanupTempFiles(tempFiles)
939
+ clearStallTimer(); clearHardTimer()
940
+ // (temp files no longer used — images passed natively)
439
941
  requestQueue.release()
942
+ logDebug("queue.released", {
943
+ reqId,
944
+ queueActive: requestQueue.activeCount,
945
+ queueWaiting: requestQueue.waitingCount,
946
+ })
440
947
  }
441
948
 
949
+ traceStore.phase(reqId, "responding")
950
+
442
951
  if (hasTools) {
443
952
  const { toolCalls, textBefore } = parseToolUse(fullText)
444
953
  const content: any[] = []
@@ -446,23 +955,26 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
446
955
  for (const tc of toolCalls) content.push({ type: "tool_use", id: tc.id, name: tc.name, input: tc.input })
447
956
  if (content.length === 0) content.push({ type: "text", text: fullText || "..." })
448
957
  const stopReason = toolCalls.length > 0 ? "tool_use" : "end_turn"
449
- claudeLog("proxy.response", { reqId, len: fullText.length, toolCalls: toolCalls.length })
958
+
959
+ traceStore.complete(reqId, { outputLen: fullText.length, toolCallCount: toolCalls.length })
960
+
450
961
  return c.json({
451
962
  id: generateId("msg_"),
452
963
  type: "message", role: "assistant", content,
453
964
  model: body.model, stop_reason: stopReason, stop_sequence: null,
454
- usage: { input_tokens: roughTokens(prompt), output_tokens: roughTokens(fullText) }
965
+ usage: { input_tokens: roughTokens(promptText), output_tokens: roughTokens(fullText) }
455
966
  })
456
967
  }
457
968
 
458
969
  if (!fullText || !fullText.trim()) fullText = "..."
459
- claudeLog("proxy.response", { reqId, len: fullText.length })
970
+ traceStore.complete(reqId, { outputLen: fullText.length })
971
+
460
972
  return c.json({
461
973
  id: generateId("msg_"),
462
974
  type: "message", role: "assistant",
463
975
  content: [{ type: "text", text: fullText }],
464
976
  model: body.model, stop_reason: "end_turn", stop_sequence: null,
465
- usage: { input_tokens: roughTokens(prompt), output_tokens: roughTokens(fullText) }
977
+ usage: { input_tokens: roughTokens(promptText), output_tokens: roughTokens(fullText) }
466
978
  })
467
979
  }
468
980
 
@@ -470,23 +982,62 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
470
982
  const encoder = new TextEncoder()
471
983
  const readable = new ReadableStream({
472
984
  cancel() {
473
- // Client disconnected — abort the SDK query to free resources
985
+ clientDisconnected = true
986
+ logWarn("stream.client_disconnect", {
987
+ reqId,
988
+ phase: trace?.phase,
989
+ sdkEventCount: trace?.sdkEventCount,
990
+ outputLen: trace?.outputLen,
991
+ elapsedMs: trace ? Date.now() - trace.startedAt : undefined,
992
+ model: trace?.model,
993
+ })
474
994
  abortController.abort()
475
995
  },
476
996
  async start(controller) {
477
997
  const messageId = generateId("msg_")
478
998
  let queueReleased = false
479
- const releaseQueue = () => { if (!queueReleased) { queueReleased = true; requestQueue.release() } }
999
+ const releaseQueue = () => {
1000
+ if (!queueReleased) {
1001
+ queueReleased = true
1002
+ requestQueue.release()
1003
+ logDebug("queue.released", {
1004
+ reqId,
1005
+ queueActive: requestQueue.activeCount,
1006
+ queueWaiting: requestQueue.waitingCount,
1007
+ })
1008
+ }
1009
+ }
480
1010
 
1011
+ let sseSendErrors = 0
481
1012
  const sse = (event: string, data: object) => {
482
1013
  try {
483
1014
  controller.enqueue(encoder.encode(`event: ${event}\ndata: ${JSON.stringify(data)}\n\n`))
484
- } catch {}
1015
+ } catch (e) {
1016
+ sseSendErrors++
1017
+ if (sseSendErrors <= 3) {
1018
+ logWarn("stream.sse_send_failed", {
1019
+ reqId,
1020
+ event,
1021
+ sseSendErrors,
1022
+ error: e instanceof Error ? e.message : String(e),
1023
+ })
1024
+ }
1025
+ }
485
1026
  }
486
1027
 
487
1028
  try {
488
1029
  const heartbeat = setInterval(() => {
489
- try { controller.enqueue(encoder.encode(`event: ping\ndata: {"type": "ping"}\n\n`)) } catch { clearInterval(heartbeat) }
1030
+ try {
1031
+ controller.enqueue(encoder.encode(`event: ping\ndata: {"type": "ping"}\n\n`))
1032
+ } catch (e) {
1033
+ logWarn("stream.heartbeat_failed", {
1034
+ reqId,
1035
+ error: e instanceof Error ? e.message : String(e),
1036
+ phase: trace?.phase,
1037
+ elapsedMs: trace ? Date.now() - trace.startedAt : undefined,
1038
+ })
1039
+ clearInterval(heartbeat)
1040
+ }
490
1041
  }, 15_000)
491
1042
 
492
1043
  sse("message_start", {
@@ -494,31 +1045,123 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
494
1045
  message: {
495
1046
  id: messageId, type: "message", role: "assistant", content: [],
496
1047
  model: body.model, stop_reason: null, stop_sequence: null,
497
- usage: { input_tokens: roughTokens(prompt), output_tokens: 1 }
1048
+ usage: { input_tokens: roughTokens(promptText), output_tokens: 1 }
498
1049
  }
499
1050
  })
500
1051
 
501
1052
  if (hasTools) {
502
1053
  // ── With tools: buffer output, parse tool_use blocks at end ──
503
1054
  let fullText = ""
1055
+ let sdkEventCount = 0
1056
+ let lastEventAt = Date.now()
1057
+ const stallLog = setInterval(() => {
1058
+ const stallMs = Date.now() - lastEventAt
1059
+ traceStore.stall(reqId, stallMs)
1060
+ }, 15_000)
1061
+ let capturedSessionId: string | undefined
504
1062
  try {
505
- for await (const message of query({ prompt, options: buildQueryOptions(model, { partial: true, systemPrompt, abortController, thinking }) })) {
1063
+ traceStore.phase(reqId, "sdk_starting")
1064
+ for await (const message of query({ prompt: promptInput, options: buildQueryOptions(model, { partial: true, systemPrompt, abortController, thinking, resume: resumeSessionId }) })) {
1065
+ sdkEventCount++
1066
+ lastEventAt = Date.now()
1067
+ resetStallTimer()
1068
+ const subtype = (message as any).event?.type ?? (message as any).message?.type
1069
+ // Capture session_id from init message
1070
+ if (message.type === "system" && (message as any).subtype === "init") {
1071
+ capturedSessionId = (message as any).session_id
1072
+ }
506
1073
  if (message.type === "stream_event") {
507
1074
  const ev = message.event as any
1075
+ // Detect first content event BEFORE sdkEvent records it
1076
+ if (!trace!.firstTokenAt && (ev.type === "content_block_delta" || ev.type === "content_block_start")) {
1077
+ traceStore.phase(reqId, "sdk_streaming")
1078
+ }
508
1079
  if (ev.type === "content_block_delta" && ev.delta?.type === "text_delta") {
509
1080
  fullText += ev.delta.text ?? ""
1081
+ traceStore.updateOutput(reqId, fullText.length)
1082
+ checkOutputSize(fullText.length)
510
1083
  }
511
1084
  }
1085
+ traceStore.sdkEvent(reqId, sdkEventCount, message.type, subtype)
1086
+ }
1087
+ traceStore.phase(reqId, "sdk_done")
1088
+
1089
+ // Store session mapping
1090
+ if (conversationId && capturedSessionId) {
1091
+ if (isResuming) {
1092
+ sessionStore.recordResume(conversationId)
1093
+ } else {
1094
+ sessionStore.set(conversationId, capturedSessionId, model, messages.length)
1095
+ }
1096
+ }
1097
+ } catch (resumeErr) {
1098
+ // Resume failed in streaming with-tools path — retry with full context
1099
+ if (isResuming && resumeSessionId) {
1100
+ logWarn("session.resume_failed_stream", {
1101
+ reqId, conversationId, sdkSessionId: resumeSessionId,
1102
+ error: resumeErr instanceof Error ? resumeErr.message : String(resumeErr),
1103
+ })
1104
+ if (conversationId) {
1105
+ sessionStore.recordFailure(conversationId)
1106
+ sessionStore.invalidate(conversationId)
1107
+ }
1108
+ const fbLastMsg = messages[messages.length - 1]!
1109
+ const priorMsgs = messages.slice(0, -1)
1110
+ const contextParts = priorMsgs
1111
+ .map((m) => {
1112
+ const role = m.role === "assistant" ? "Assistant" : "User"
1113
+ return `[${role}]\n${serializeContent(m.content)}`
1114
+ })
1115
+ .join("\n\n")
1116
+ const baseSystem = systemContext || ""
1117
+ const contextSection = contextParts ? `\n\nPrior conversation turns:\n\n${contextParts}\n\n---` : ""
1118
+ const fallbackSystem = (baseSystem + contextSection + toolsSection).trim() || undefined
1119
+ const fallbackInput: string | AsyncIterable<any> = contentHasImages(fbLastMsg.content)
1120
+ ? createSDKUserMessage(buildNativeContent(fbLastMsg.content))
1121
+ : serializeContent(fbLastMsg.content)
1122
+ const fallbackOpts = buildQueryOptions(model, { partial: true, systemPrompt: fallbackSystem, abortController, thinking })
1123
+
1124
+ logInfo("session.fallback_full_context_stream", { reqId, conversationId })
1125
+ sdkEventCount = 0
1126
+ for await (const message of query({ prompt: fallbackInput, options: fallbackOpts })) {
1127
+ sdkEventCount++
1128
+ lastEventAt = Date.now()
1129
+ resetStallTimer()
1130
+ const subtype = (message as any).event?.type ?? (message as any).message?.type
1131
+ if (message.type === "system" && (message as any).subtype === "init") {
1132
+ capturedSessionId = (message as any).session_id
1133
+ }
1134
+ if (message.type === "stream_event") {
1135
+ const ev = message.event as any
1136
+ if (!trace!.firstTokenAt && (ev.type === "content_block_delta" || ev.type === "content_block_start")) {
1137
+ traceStore.phase(reqId, "sdk_streaming")
1138
+ }
1139
+ if (ev.type === "content_block_delta" && ev.delta?.type === "text_delta") {
1140
+ fullText += ev.delta.text ?? ""
1141
+ traceStore.updateOutput(reqId, fullText.length)
1142
+ checkOutputSize(fullText.length)
1143
+ }
1144
+ }
1145
+ traceStore.sdkEvent(reqId, sdkEventCount, message.type, subtype)
1146
+ }
1147
+ traceStore.phase(reqId, "sdk_done")
1148
+ if (conversationId && capturedSessionId) {
1149
+ sessionStore.set(conversationId, capturedSessionId, model, messages.length)
1150
+ logInfo("session.recreated_after_fallback_stream", { reqId, conversationId, sdkSessionId: capturedSessionId })
1151
+ }
1152
+ } else {
1153
+ throw resumeErr
512
1154
  }
513
1155
  } finally {
1156
+ clearInterval(stallLog)
514
1157
  clearInterval(heartbeat)
515
- clearTimeout(timeout)
516
- cleanupTempFiles(tempFiles)
1158
+ clearStallTimer(); clearHardTimer()
1159
+ // (temp files no longer used — images passed natively)
517
1160
  releaseQueue()
518
1161
  }
519
1162
 
1163
+ traceStore.phase(reqId, "responding")
520
1164
  const { toolCalls, textBefore } = parseToolUse(fullText)
521
- claudeLog("proxy.stream.done", { reqId, len: fullText.length, toolCalls: toolCalls.length })
522
1165
 
523
1166
  let blockIdx = 0
524
1167
  const textContent = toolCalls.length === 0 ? (fullText || "...") : textBefore
@@ -544,6 +1187,8 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
544
1187
  sse("message_delta", { type: "message_delta", delta: { stop_reason: stopReason, stop_sequence: null }, usage: { output_tokens: roughTokens(fullText) } })
545
1188
  sse("message_stop", { type: "message_stop" })
546
1189
  controller.close()
1190
+
1191
+ traceStore.complete(reqId, { outputLen: fullText.length, toolCallCount: toolCalls.length })
547
1192
  return
548
1193
  }
549
1194
 
@@ -552,29 +1197,124 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
552
1197
 
553
1198
  let fullText = ""
554
1199
  let hasStreamed = false
1200
+ let sdkEventCount = 0
1201
+ let lastEventAt = Date.now()
1202
+ let capturedSessionId2: string | undefined
1203
+ const stallLog = setInterval(() => {
1204
+ const stallMs = Date.now() - lastEventAt
1205
+ traceStore.stall(reqId, stallMs)
1206
+ }, 15_000)
555
1207
  try {
556
- for await (const message of query({ prompt, options: buildQueryOptions(model, { partial: true, systemPrompt, abortController, thinking }) })) {
1208
+ traceStore.phase(reqId, "sdk_starting")
1209
+ for await (const message of query({ prompt: promptInput, options: buildQueryOptions(model, { partial: true, systemPrompt, abortController, thinking, resume: resumeSessionId }) })) {
1210
+ sdkEventCount++
1211
+ lastEventAt = Date.now()
1212
+ resetStallTimer()
1213
+ const subtype = (message as any).event?.type ?? (message as any).message?.type
1214
+ // Capture session_id from init message
1215
+ if (message.type === "system" && (message as any).subtype === "init") {
1216
+ capturedSessionId2 = (message as any).session_id
1217
+ }
557
1218
  if (message.type === "stream_event") {
558
1219
  const ev = message.event as any
1220
+ // Detect first content event BEFORE sdkEvent records it
1221
+ if (!trace!.firstTokenAt && (ev.type === "content_block_delta" || ev.type === "content_block_start")) {
1222
+ traceStore.phase(reqId, "sdk_streaming")
1223
+ }
559
1224
  if (ev.type === "content_block_delta" && ev.delta?.type === "text_delta") {
560
1225
  const text = ev.delta.text ?? ""
561
1226
  if (text) {
562
1227
  fullText += text
563
1228
  hasStreamed = true
1229
+ traceStore.updateOutput(reqId, fullText.length)
1230
+ checkOutputSize(fullText.length)
564
1231
  sse("content_block_delta", { type: "content_block_delta", index: 0, delta: { type: "text_delta", text } })
565
1232
  }
566
1233
  }
567
1234
  }
1235
+ traceStore.sdkEvent(reqId, sdkEventCount, message.type, subtype)
1236
+ }
1237
+ traceStore.phase(reqId, "sdk_done")
1238
+
1239
+ // Store session mapping
1240
+ if (conversationId && capturedSessionId2) {
1241
+ if (isResuming) {
1242
+ sessionStore.recordResume(conversationId)
1243
+ } else {
1244
+ sessionStore.set(conversationId, capturedSessionId2, model, messages.length)
1245
+ }
1246
+ }
1247
+ } catch (resumeErr) {
1248
+ // Resume failed in streaming no-tools path — retry with full context
1249
+ if (isResuming && resumeSessionId) {
1250
+ logWarn("session.resume_failed_stream", {
1251
+ reqId, conversationId, sdkSessionId: resumeSessionId,
1252
+ error: resumeErr instanceof Error ? resumeErr.message : String(resumeErr),
1253
+ })
1254
+ if (conversationId) {
1255
+ sessionStore.recordFailure(conversationId)
1256
+ sessionStore.invalidate(conversationId)
1257
+ }
1258
+ const fbLastMsg = messages[messages.length - 1]!
1259
+ const priorMsgs = messages.slice(0, -1)
1260
+ const contextParts = priorMsgs
1261
+ .map((m) => {
1262
+ const role = m.role === "assistant" ? "Assistant" : "User"
1263
+ return `[${role}]\n${serializeContent(m.content)}`
1264
+ })
1265
+ .join("\n\n")
1266
+ const baseSystem = systemContext || ""
1267
+ const contextSection = contextParts ? `\n\nPrior conversation turns:\n\n${contextParts}\n\n---` : ""
1268
+ const fallbackSystem = (baseSystem + contextSection + toolsSection).trim() || undefined
1269
+ const fallbackInput: string | AsyncIterable<any> = contentHasImages(fbLastMsg.content)
1270
+ ? createSDKUserMessage(buildNativeContent(fbLastMsg.content))
1271
+ : serializeContent(fbLastMsg.content)
1272
+ const fallbackOpts = buildQueryOptions(model, { partial: true, systemPrompt: fallbackSystem, abortController, thinking })
1273
+
1274
+ logInfo("session.fallback_full_context_stream", { reqId, conversationId })
1275
+ sdkEventCount = 0
1276
+ for await (const message of query({ prompt: fallbackInput, options: fallbackOpts })) {
1277
+ sdkEventCount++
1278
+ lastEventAt = Date.now()
1279
+ resetStallTimer()
1280
+ const subtype = (message as any).event?.type ?? (message as any).message?.type
1281
+ if (message.type === "system" && (message as any).subtype === "init") {
1282
+ capturedSessionId2 = (message as any).session_id
1283
+ }
1284
+ if (message.type === "stream_event") {
1285
+ const ev = message.event as any
1286
+ if (!trace!.firstTokenAt && (ev.type === "content_block_delta" || ev.type === "content_block_start")) {
1287
+ traceStore.phase(reqId, "sdk_streaming")
1288
+ }
1289
+ if (ev.type === "content_block_delta" && ev.delta?.type === "text_delta") {
1290
+ const text = ev.delta.text ?? ""
1291
+ if (text) {
1292
+ fullText += text
1293
+ hasStreamed = true
1294
+ traceStore.updateOutput(reqId, fullText.length)
1295
+ checkOutputSize(fullText.length)
1296
+ sse("content_block_delta", { type: "content_block_delta", index: 0, delta: { type: "text_delta", text } })
1297
+ }
1298
+ }
1299
+ }
1300
+ traceStore.sdkEvent(reqId, sdkEventCount, message.type, subtype)
1301
+ }
1302
+ traceStore.phase(reqId, "sdk_done")
1303
+ if (conversationId && capturedSessionId2) {
1304
+ sessionStore.set(conversationId, capturedSessionId2, model, messages.length)
1305
+ logInfo("session.recreated_after_fallback_stream", { reqId, conversationId, sdkSessionId: capturedSessionId2 })
1306
+ }
1307
+ } else {
1308
+ throw resumeErr
568
1309
  }
569
1310
  } finally {
1311
+ clearInterval(stallLog)
570
1312
  clearInterval(heartbeat)
571
- clearTimeout(timeout)
572
- cleanupTempFiles(tempFiles)
1313
+ clearStallTimer(); clearHardTimer()
1314
+ // (temp files no longer used — images passed natively)
573
1315
  releaseQueue()
574
1316
  }
575
1317
 
576
- claudeLog("proxy.stream.done", { reqId, len: fullText.length })
577
-
578
1318
  if (!hasStreamed) {
579
1319
  sse("content_block_delta", { type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "..." } })
580
1320
  }
@@ -584,18 +1324,58 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
584
1324
  sse("message_stop", { type: "message_stop" })
585
1325
  controller.close()
586
1326
 
1327
+ traceStore.complete(reqId, { outputLen: fullText.length })
1328
+
587
1329
  } catch (error) {
588
- clearTimeout(timeout)
1330
+ clearStallTimer(); clearHardTimer()
589
1331
  releaseQueue()
590
- const isAbort = error instanceof Error && error.name === "AbortError"
591
- const errMsg = isAbort ? "Request timeout" : (error instanceof Error ? error.message : "Unknown error")
592
- const errType = isAbort ? "overloaded_error" : "api_error"
593
- claudeLog("proxy.stream.error", { reqId, error: errMsg })
594
- cleanupTempFiles(tempFiles)
595
- try {
596
- sse("error", { type: "error", error: { type: errType, message: errMsg }, request_id: reqId })
597
- controller.close()
598
- } catch {}
1332
+ const err = error instanceof Error ? error : new Error(String(error))
1333
+ const isAbort = err.name === "AbortError" || err.message?.includes("abort")
1334
+ const isQueueTimeout = err.message.includes("Queue timeout")
1335
+
1336
+ let errMsg: string
1337
+ let errType: string
1338
+ if (clientDisconnected) {
1339
+ errMsg = "Client disconnected during streaming."
1340
+ errType = "api_error"
1341
+ } else if (abortReason === "max_duration") {
1342
+ errMsg = `Request exceeded max duration of ${finalConfig.maxDurationMs / 1000}s. Output: ${trace?.outputLen ?? 0} chars.`
1343
+ errType = "api_error"
1344
+ } else if (abortReason === "max_output") {
1345
+ errMsg = `Request exceeded max output size of ${finalConfig.maxOutputChars} chars.`
1346
+ errType = "api_error"
1347
+ } else if (isAbort) {
1348
+ errMsg = `Request stalled — no SDK activity for ${finalConfig.stallTimeoutMs / 1000}s. Please retry.`
1349
+ errType = "api_error"
1350
+ } else if (isQueueTimeout) {
1351
+ errMsg = "Server busy — all request slots are occupied. Please retry shortly."
1352
+ errType = "overloaded_error"
1353
+ } else {
1354
+ errMsg = err.message
1355
+ errType = "api_error"
1356
+ }
1357
+
1358
+ // Trace the failure with full context
1359
+ traceStore.fail(reqId, err, "error", {
1360
+ clientDisconnect: clientDisconnected,
1361
+ abortReason,
1362
+ aborted: isAbort,
1363
+ queueTimeout: isQueueTimeout,
1364
+ stallTimeoutMs: finalConfig.stallTimeoutMs,
1365
+ maxDurationMs: finalConfig.maxDurationMs,
1366
+ maxOutputChars: finalConfig.maxOutputChars,
1367
+ sseSendErrors,
1368
+ })
1369
+
1370
+ // (temp files no longer used — images passed natively)
1371
+ if (!clientDisconnected) {
1372
+ try {
1373
+ sse("error", { type: "error", error: { type: errType, message: errMsg }, request_id: reqId })
1374
+ controller.close()
1375
+ } catch {}
1376
+ } else {
1377
+ try { controller.close() } catch {}
1378
+ }
599
1379
  }
600
1380
  }
601
1381
  })
@@ -609,15 +1389,53 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
609
1389
  })
610
1390
 
611
1391
  } catch (error) {
612
- const isAbort = error instanceof Error && error.name === "AbortError"
613
- const errMsg = isAbort ? "Request timeout" : (error instanceof Error ? error.message : "Unknown error")
614
- const errType = isAbort ? "overloaded_error" : "api_error"
615
- claudeLog("proxy.error", { reqId, error: errMsg })
616
- if (isAbort) {
1392
+ const err = error instanceof Error ? error : new Error(String(error))
1393
+ const isAbort = err.name === "AbortError" || err.message?.includes("abort")
1394
+ const isQueueTimeout = err.message.includes("Queue timeout")
1395
+
1396
+ let errMsg: string
1397
+ let errType: string
1398
+ if (clientDisconnected) {
1399
+ errMsg = "Client disconnected."
1400
+ errType = "api_error"
1401
+ } else if (abortReason === "max_duration") {
1402
+ errMsg = `Request exceeded max duration of ${finalConfig.maxDurationMs / 1000}s.`
1403
+ errType = "api_error"
1404
+ } else if (abortReason === "max_output") {
1405
+ errMsg = `Request exceeded max output size of ${finalConfig.maxOutputChars} chars.`
1406
+ errType = "api_error"
1407
+ } else if (isAbort) {
1408
+ errMsg = `Request stalled — no SDK activity for ${finalConfig.stallTimeoutMs / 1000}s. Please retry.`
1409
+ errType = "api_error"
1410
+ } else if (isQueueTimeout) {
1411
+ errMsg = "Server busy — all request slots are occupied. Please retry shortly."
1412
+ errType = "overloaded_error"
1413
+ } else {
1414
+ errMsg = err.message
1415
+ errType = "api_error"
1416
+ }
1417
+
1418
+ // Trace the failure
1419
+ if (trace) {
1420
+ traceStore.fail(reqId, err, "error", {
1421
+ clientDisconnect: clientDisconnected,
1422
+ aborted: isAbort,
1423
+ queueTimeout: isQueueTimeout,
1424
+ })
1425
+ } else {
1426
+ logError("request.error.no_trace", { reqId, error: errMsg, stack: err.stack })
1427
+ }
1428
+
1429
+ if (isQueueTimeout) {
617
1430
  return new Response(JSON.stringify({ type: "error", error: { type: errType, message: errMsg }, request_id: reqId }), {
618
1431
  status: 529, headers: { "Content-Type": "application/json" }
619
1432
  })
620
1433
  }
1434
+ if (isAbort) {
1435
+ return new Response(JSON.stringify({ type: "error", error: { type: errType, message: errMsg }, request_id: reqId }), {
1436
+ status: 504, headers: { "Content-Type": "application/json" }
1437
+ })
1438
+ }
621
1439
  return c.json({ type: "error", error: { type: errType, message: errMsg }, request_id: reqId }, 500)
622
1440
  }
623
1441
  }
@@ -635,20 +1453,15 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
635
1453
  app.get("/v1/messages/batches/:id", handleBatches)
636
1454
 
637
1455
  // ── OpenAI-compatible /v1/chat/completions ─────────────────────────────
638
- // Translates OpenAI ChatCompletion format to/from Anthropic Messages API
639
- // so tools expecting OpenAI endpoints (LangChain, LiteLLM, etc.) just work.
640
1456
 
641
1457
  function convertOpenaiContent(content: any): any {
642
- // String content → pass through
643
1458
  if (typeof content === "string") return content
644
1459
  if (!Array.isArray(content)) return String(content ?? "")
645
1460
 
646
- // Array content → convert image_url parts to Anthropic image blocks
647
1461
  return content.map((part: any) => {
648
1462
  if (part.type === "text") return { type: "text", text: part.text ?? "" }
649
1463
  if (part.type === "image_url" && part.image_url?.url) {
650
1464
  const url = part.image_url.url as string
651
- // Data URL: data:image/jpeg;base64,...
652
1465
  const dataMatch = url.match(/^data:(image\/\w+);base64,(.+)$/)
653
1466
  if (dataMatch) {
654
1467
  return {
@@ -660,7 +1473,6 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
660
1473
  }
661
1474
  }
662
1475
  }
663
- // HTTP URL — pass as URL source
664
1476
  return {
665
1477
  type: "image",
666
1478
  source: { type: "url", url }
@@ -683,7 +1495,6 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
683
1495
  } else if (msg.role === "user") {
684
1496
  converted.push({ role: "user", content: convertOpenaiContent(msg.content) })
685
1497
  } else if (msg.role === "assistant") {
686
- // Handle assistant messages with tool_calls (OpenAI format)
687
1498
  if (msg.tool_calls?.length) {
688
1499
  const content: any[] = []
689
1500
  if (msg.content) content.push({ type: "text", text: msg.content })
@@ -700,7 +1511,6 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
700
1511
  converted.push({ role: "assistant", content: msg.content ?? "" })
701
1512
  }
702
1513
  } else if (msg.role === "tool") {
703
- // OpenAI tool result → Anthropic tool_result
704
1514
  converted.push({
705
1515
  role: "user",
706
1516
  content: [{
@@ -782,7 +1592,6 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
782
1592
  const stream = body.stream ?? false
783
1593
  const requestedModel = body.model ?? "claude-sonnet-4-6"
784
1594
 
785
- // Build Anthropic-format request body
786
1595
  const anthropicBody: any = {
787
1596
  model: requestedModel,
788
1597
  messages,
@@ -795,12 +1604,10 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
795
1604
  if (body.temperature !== undefined) anthropicBody.temperature = body.temperature
796
1605
  if (body.top_p !== undefined) anthropicBody.top_p = body.top_p
797
1606
  if (body.stop) anthropicBody.stop_sequences = Array.isArray(body.stop) ? body.stop : [body.stop]
798
- // Convert OpenAI tools format to Anthropic tools format
799
1607
  if (body.tools?.length) {
800
1608
  anthropicBody.tools = openaiToAnthropicTools(body.tools)
801
1609
  }
802
1610
 
803
- // Forward to our own /v1/messages handler by making an internal request
804
1611
  const internalHeaders: Record<string, string> = { "Content-Type": "application/json" }
805
1612
  const authHeader = c.req.header("authorization") ?? c.req.header("x-api-key")
806
1613
  if (authHeader) {
@@ -821,7 +1628,6 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
821
1628
  return c.json(anthropicToOpenaiResponse(anthropicJson, requestedModel))
822
1629
  }
823
1630
 
824
- // Streaming: translate SSE events from Anthropic format to OpenAI format
825
1631
  const includeUsage = body.stream_options?.include_usage === true
826
1632
  const encoder = new TextEncoder()
827
1633
  const readable = new ReadableStream({
@@ -836,7 +1642,6 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
836
1642
  const created = Math.floor(Date.now() / 1000)
837
1643
  let sentRole = false
838
1644
  let finishReason: string | null = null
839
- // Track active tool calls for streaming
840
1645
  const activeToolCalls: Map<number, { id: string; name: string }> = new Map()
841
1646
  let toolCallIndex = 0
842
1647
  let usageInfo: { input_tokens: number; output_tokens: number } | null = null
@@ -854,7 +1659,6 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
854
1659
  try {
855
1660
  const event = JSON.parse(line.slice(6))
856
1661
 
857
- // Emit role delta on first event
858
1662
  if (!sentRole && (event.type === "content_block_start" || event.type === "content_block_delta")) {
859
1663
  sentRole = true
860
1664
  controller.enqueue(encoder.encode(`data: ${JSON.stringify({
@@ -864,7 +1668,6 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
864
1668
  }
865
1669
 
866
1670
  if (event.type === "content_block_start" && event.content_block?.type === "tool_use") {
867
- // Start of a tool_use block → emit tool_call function header
868
1671
  const idx = toolCallIndex++
869
1672
  activeToolCalls.set(event.index, { id: event.content_block.id, name: event.content_block.name })
870
1673
  controller.enqueue(encoder.encode(`data: ${JSON.stringify({
@@ -874,7 +1677,6 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
874
1677
  }, finish_reason: null }]
875
1678
  })}\n\n`))
876
1679
  } else if (event.type === "content_block_delta" && event.delta?.type === "input_json_delta") {
877
- // Tool call argument streaming
878
1680
  const tc = activeToolCalls.get(event.index)
879
1681
  if (tc) {
880
1682
  const idx = Array.from(activeToolCalls.keys()).indexOf(event.index)
@@ -891,7 +1693,6 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
891
1693
  choices: [{ index: 0, delta: { content: event.delta.text }, finish_reason: null }]
892
1694
  })}\n\n`))
893
1695
  } else if (event.type === "message_delta") {
894
- // Capture finish reason and usage for final chunk
895
1696
  const sr = event.delta?.stop_reason
896
1697
  finishReason = sr === "tool_use" ? "tool_calls" : sr === "max_tokens" ? "length" : "stop"
897
1698
  if (event.usage) {
@@ -903,7 +1704,6 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
903
1704
  }
904
1705
  }
905
1706
  } else if (event.type === "message_start" && event.message?.usage) {
906
- // Capture input token count from message_start
907
1707
  usageInfo = { input_tokens: event.message.usage.input_tokens ?? 0, output_tokens: 0 }
908
1708
  } else if (event.type === "message_stop") {
909
1709
  const finalChunk: any = {
@@ -959,7 +1759,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
959
1759
  })
960
1760
  app.get("/v1/chat/models", handleOpenaiModels)
961
1761
 
962
- // 404 catch-all — return Anthropic-format error for unknown routes
1762
+ // 404 catch-all
963
1763
  app.all("*", (c) => c.json({
964
1764
  type: "error",
965
1765
  error: { type: "not_found_error", message: `${c.req.method} ${c.req.path} not found` }
@@ -978,12 +1778,60 @@ export async function startProxyServer(config: Partial<ProxyConfig> = {}) {
978
1778
  idleTimeout: 0
979
1779
  })
980
1780
 
1781
+ // Startup log with full configuration
1782
+ logInfo("proxy.started", {
1783
+ version: PROXY_VERSION,
1784
+ host: finalConfig.host,
1785
+ port: finalConfig.port,
1786
+ stallTimeoutMs: finalConfig.stallTimeoutMs,
1787
+ maxDurationMs: finalConfig.maxDurationMs,
1788
+ maxOutputChars: finalConfig.maxOutputChars,
1789
+ maxConcurrent: MAX_CONCURRENT,
1790
+ queueTimeoutMs: QUEUE_TIMEOUT_MS,
1791
+ claudeExecutable,
1792
+ logDir: LOG_DIR,
1793
+ debug: finalConfig.debug,
1794
+ pid: process.pid,
1795
+ })
1796
+
981
1797
  console.log(`Claude SDK Proxy v${PROXY_VERSION} running at http://${finalConfig.host}:${finalConfig.port}`)
1798
+ console.log(` Logs: ${LOG_DIR}`)
1799
+ console.log(` Debug: http://${finalConfig.host}:${finalConfig.port}/debug/stats`)
1800
+
1801
+ // Periodic health logging (every 5 minutes)
1802
+ const healthInterval = setInterval(() => {
1803
+ const mem = process.memoryUsage()
1804
+ const stats = traceStore.getStats()
1805
+ logInfo("proxy.health", {
1806
+ pid: process.pid,
1807
+ rssBytes: mem.rss,
1808
+ rssMB: +(mem.rss / 1024 / 1024).toFixed(1),
1809
+ heapUsedMB: +(mem.heapUsed / 1024 / 1024).toFixed(1),
1810
+ heapTotalMB: +(mem.heapTotal / 1024 / 1024).toFixed(1),
1811
+ externalMB: +(mem.external / 1024 / 1024).toFixed(1),
1812
+ uptimeMs: stats.uptimeMs,
1813
+ totalRequests: stats.requests.total,
1814
+ totalErrors: stats.requests.errors,
1815
+ activeRequests: stats.requests.active,
1816
+ queueActive: requestQueue.activeCount,
1817
+ queueWaiting: requestQueue.waitingCount,
1818
+ })
1819
+ }, 300_000) // 5 minutes
982
1820
 
983
1821
  // Graceful shutdown
984
1822
  const shutdown = (signal: string) => {
1823
+ const stats = traceStore.getStats()
1824
+ logInfo("proxy.shutdown", {
1825
+ signal,
1826
+ pid: process.pid,
1827
+ totalRequests: stats.requests.total,
1828
+ totalErrors: stats.requests.errors,
1829
+ activeRequests: stats.requests.active,
1830
+ uptimeMs: stats.uptimeMs,
1831
+ })
1832
+ clearInterval(healthInterval)
985
1833
  console.log(`\nReceived ${signal}, shutting down...`)
986
- server.stop(true) // true = wait for in-flight requests
1834
+ server.stop(true)
987
1835
  process.exit(0)
988
1836
  }
989
1837
  process.on("SIGINT", () => shutdown("SIGINT"))