claude-sdk-proxy 3.1.2 → 3.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1841 @@
1
+ import { Hono } from "hono"
2
+ import { cors } from "hono/cors"
3
+ import { query } from "@anthropic-ai/claude-agent-sdk"
4
+ import type { Context } from "hono"
5
+ import type { ProxyConfig } from "./types"
6
+ import { DEFAULT_PROXY_CONFIG } from "./types"
7
+ import { logInfo, logWarn, logError, logDebug, LOG_DIR } from "../logger"
8
+ import { traceStore } from "../trace"
9
+ import { sessionStore } from "../session-store"
10
+ import { execSync } from "child_process"
11
+ import { existsSync, writeFileSync, readFileSync, readdirSync } from "fs"
12
+ import { randomBytes } from "crypto"
13
+ import { fileURLToPath } from "url"
14
+ import { join, dirname } from "path"
15
+
16
+ // Base62 ID generator — matches Anthropic's real ID format (e.g. msg_01XFDUDYJgAACzvnptvVoYEL)
17
+ const BASE62 = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
18
+ function generateId(prefix: string, length = 24): string {
19
+ const bytes = randomBytes(length)
20
+ let id = prefix
21
+ for (let i = 0; i < length; i++) id += BASE62[bytes[i]! % 62]
22
+ return id
23
+ }
24
+
25
+ const PROXY_VERSION: string = (() => {
26
+ try {
27
+ const pkg = JSON.parse(readFileSync(join(dirname(fileURLToPath(import.meta.url)), "../../package.json"), "utf-8"))
28
+ return pkg.version ?? "unknown"
29
+ } catch { return "unknown" }
30
+ })()
31
+
32
+ function resolveClaudeExecutable(): string {
33
+ try {
34
+ const sdkPath = fileURLToPath(import.meta.resolve("@anthropic-ai/claude-agent-sdk"))
35
+ const sdkCliJs = join(dirname(sdkPath), "cli.js")
36
+ if (existsSync(sdkCliJs)) return sdkCliJs
37
+ } catch {}
38
+ try {
39
+ const claudePath = execSync("which claude", { encoding: "utf-8" }).trim()
40
+ if (claudePath && existsSync(claudePath)) return claudePath
41
+ } catch {}
42
+ throw new Error("Could not find Claude Code executable. Install: npm install -g @anthropic-ai/claude-code")
43
+ }
44
+
45
+ const claudeExecutable = resolveClaudeExecutable()
46
+
47
+ // ── Concurrency control ──────────────────────────────────────────────────────
48
+ // Limits simultaneous Claude SDK sessions to prevent resource exhaustion.
49
+
50
+ const MAX_CONCURRENT = parseInt(process.env.CLAUDE_PROXY_MAX_CONCURRENT ?? "5", 10)
51
+
52
+ const QUEUE_TIMEOUT_MS = parseInt(process.env.CLAUDE_PROXY_QUEUE_TIMEOUT_MS ?? "30000", 10)
53
+
54
+ class RequestQueue {
55
+ private active = 0
56
+ private waiting: Array<{ resolve: () => void; reject: (err: Error) => void }> = []
57
+
58
+ get activeCount() { return this.active }
59
+ get waitingCount() { return this.waiting.length }
60
+
61
+ async acquire(): Promise<void> {
62
+ if (this.active < MAX_CONCURRENT) {
63
+ this.active++
64
+ return
65
+ }
66
+ return new Promise<void>((resolve, reject) => {
67
+ const entry = { resolve: () => { this.active++; resolve() }, reject }
68
+ this.waiting.push(entry)
69
+ const timer = setTimeout(() => {
70
+ const idx = this.waiting.indexOf(entry)
71
+ if (idx !== -1) {
72
+ this.waiting.splice(idx, 1)
73
+ reject(new Error("Queue timeout — all slots busy"))
74
+ }
75
+ }, QUEUE_TIMEOUT_MS)
76
+ const origResolve = entry.resolve
77
+ entry.resolve = () => { clearTimeout(timer); origResolve() }
78
+ })
79
+ }
80
+
81
+ release(): void {
82
+ this.active--
83
+ const next = this.waiting.shift()
84
+ if (next) next.resolve()
85
+ }
86
+ }
87
+
88
+ const requestQueue = new RequestQueue()
89
+
90
+ function mapModelToClaudeModel(model: string): "sonnet" | "opus" | "haiku" {
91
+ if (model.includes("opus")) return "opus"
92
+ if (model.includes("haiku")) return "haiku"
93
+ return "sonnet"
94
+ }
95
+
96
+ // ── Content-block serialization ──────────────────────────────────────────────
97
+
98
+ function serializeBlock(block: any): string {
99
+ switch (block.type) {
100
+ case "text":
101
+ return block.text || ""
102
+ case "image":
103
+ return "[Image attached]"
104
+ case "tool_use":
105
+ return `<tool_use>\n{"name": "${block.name}", "input": ${JSON.stringify(block.input ?? {})}}\n</tool_use>`
106
+ case "tool_result": {
107
+ const content = Array.isArray(block.content)
108
+ ? block.content.filter((b: any) => b.type === "text").map((b: any) => b.text).join("")
109
+ : String(block.content ?? "")
110
+ const truncated = content.length > 4000
111
+ ? content.slice(0, 4000) + `\n...[truncated ${content.length - 4000} chars]`
112
+ : content
113
+ return `[Tool Result (id: ${block.tool_use_id})]\n${truncated}\n[/Tool Result]`
114
+ }
115
+ case "thinking":
116
+ return ""
117
+ default:
118
+ return ""
119
+ }
120
+ }
121
+
122
+ function serializeContent(content: string | Array<any>): string {
123
+ if (typeof content === "string") return content
124
+ if (!Array.isArray(content)) return String(content)
125
+ return content.map(b => serializeBlock(b)).filter(Boolean).join("\n")
126
+ }
127
+
128
+ // ── Image handling via SDKUserMessage ────────────────────────────────────────
129
+ // The SDK query() accepts AsyncIterable<SDKUserMessage> which supports native
130
+ // Anthropic MessageParam content blocks including images. When images are
131
+ // detected, we pass them through natively instead of serializing to text.
132
+
133
+ function contentHasImages(content: string | Array<any>): boolean {
134
+ if (typeof content === "string") return false
135
+ if (!Array.isArray(content)) return false
136
+ return content.some((b: any) => b.type === "image")
137
+ }
138
+
139
+ /** Convert an Anthropic image content block to SDK-compatible format */
140
+ function toAnthropicImageBlock(block: any): any {
141
+ if (block.source) return block // already in Anthropic format
142
+ // openclaw may use { type: "image", data: "...", mimeType: "..." }
143
+ if (block.data && block.mimeType) {
144
+ return {
145
+ type: "image",
146
+ source: {
147
+ type: "base64",
148
+ media_type: block.mimeType,
149
+ data: block.data,
150
+ }
151
+ }
152
+ }
153
+ if (block.data && block.media_type) {
154
+ return {
155
+ type: "image",
156
+ source: {
157
+ type: "base64",
158
+ media_type: block.media_type,
159
+ data: block.data,
160
+ }
161
+ }
162
+ }
163
+ return block
164
+ }
165
+
166
+ /** Build Anthropic MessageParam content array, preserving images natively */
167
+ function buildNativeContent(content: string | Array<any>): Array<any> {
168
+ if (typeof content === "string") return [{ type: "text", text: content }]
169
+ if (!Array.isArray(content)) return [{ type: "text", text: String(content) }]
170
+ return content.map((block: any) => {
171
+ if (block.type === "image") return toAnthropicImageBlock(block)
172
+ if (block.type === "text") return { type: "text", text: block.text ?? "" }
173
+ // For other types, serialize to text
174
+ const serialized = serializeBlock(block)
175
+ return serialized ? { type: "text", text: serialized } : null
176
+ }).filter(Boolean)
177
+ }
178
+
179
+ /** Create an async iterable yielding a single SDKUserMessage with native content */
180
+ function createSDKUserMessage(content: Array<any>, sessionId?: string): AsyncIterable<any> {
181
+ const msg = {
182
+ type: "user" as const,
183
+ message: {
184
+ role: "user" as const,
185
+ content,
186
+ },
187
+ parent_tool_use_id: null,
188
+ session_id: sessionId ?? "",
189
+ }
190
+ return {
191
+ async *[Symbol.asyncIterator]() {
192
+ yield msg
193
+ }
194
+ }
195
+ }
196
+
197
+
198
+ // ── Client tool-use support ──────────────────────────────────────────────────
199
+
200
+ function buildClientToolsPrompt(tools: any[]): string {
201
+ const defs = tools.map((t: any) => {
202
+ const schema = t.input_schema ? `\nInput schema:\n${JSON.stringify(t.input_schema, null, 2)}` : ""
203
+ return `### ${t.name}\n${t.description ?? ""}${schema}`
204
+ }).join("\n\n")
205
+ return `\n\n## Available Tools\n\nTo call a tool, output a <tool_use> block:\n\n` +
206
+ `<tool_use>\n{"name": "TOOL_NAME", "input": {ARGUMENTS}}\n</tool_use>\n\n` +
207
+ `- You may write reasoning text before the block\n` +
208
+ `- Call multiple tools by including multiple <tool_use> blocks\n` +
209
+ `- Each block must be valid JSON with "name" and "input" keys\n\n` +
210
+ defs
211
+ }
212
+
213
+ interface ToolCall { id: string; name: string; input: unknown }
214
+
215
+ function parseToolUse(text: string): { toolCalls: ToolCall[]; textBefore: string } {
216
+ const calls: ToolCall[] = []
217
+ let firstIdx = -1
218
+
219
+ const xmlRegex = /<tool_use>([\s\S]*?)<\/tool_use>/g
220
+ let m: RegExpExecArray | null
221
+ while ((m = xmlRegex.exec(text)) !== null) {
222
+ if (firstIdx < 0) firstIdx = m.index
223
+ try {
224
+ const p = JSON.parse(m[1]!.trim())
225
+ calls.push({
226
+ id: generateId("toolu_"),
227
+ name: String(p.name ?? ""),
228
+ input: p.input ?? {}
229
+ })
230
+ } catch { /* skip malformed block */ }
231
+ }
232
+
233
+ if (calls.length === 0) {
234
+ const fcRegex = /<function_calls>([\s\S]*?)<\/function_calls>/g
235
+ while ((m = fcRegex.exec(text)) !== null) {
236
+ if (firstIdx < 0) firstIdx = m.index
237
+ try {
238
+ const parsed = JSON.parse(m[1]!.trim())
239
+ const items = Array.isArray(parsed) ? parsed : [parsed]
240
+ for (const p of items) {
241
+ if (p && typeof p.name === "string") {
242
+ calls.push({
243
+ id: generateId("toolu_"),
244
+ name: p.name,
245
+ input: p.input ?? p.parameters ?? {}
246
+ })
247
+ }
248
+ }
249
+ } catch { /* skip malformed block */ }
250
+ }
251
+ }
252
+
253
+ if (calls.length === 0) {
254
+ const invokeRegex = /<invoke\s+name="([^"]+)">([\s\S]*?)<\/invoke>/g
255
+ while ((m = invokeRegex.exec(text)) !== null) {
256
+ if (firstIdx < 0) firstIdx = m.index
257
+ const toolName = m[1]!
258
+ const body = m[2]!
259
+ const input: Record<string, any> = {}
260
+ const paramRegex = /<parameter\s+name="([^"]+)">([\s\S]*?)<\/parameter>/g
261
+ let pm: RegExpExecArray | null
262
+ while ((pm = paramRegex.exec(body)) !== null) {
263
+ const val = pm[2]!.trim()
264
+ try { input[pm[1]!] = JSON.parse(val) } catch { input[pm[1]!] = val }
265
+ }
266
+ calls.push({ id: generateId("toolu_"), name: toolName, input })
267
+ }
268
+ }
269
+
270
+ if (calls.length === 0) {
271
+ const bracketRegex = /\[Tool call:\s*(\w+)\s*\nInput:\s*([\s\S]*?)\]/g
272
+ while ((m = bracketRegex.exec(text)) !== null) {
273
+ if (firstIdx < 0) firstIdx = m.index
274
+ try {
275
+ const input = JSON.parse(m[2]!.trim())
276
+ calls.push({
277
+ id: generateId("toolu_"),
278
+ name: m[1]!.trim(),
279
+ input
280
+ })
281
+ } catch { /* skip malformed block */ }
282
+ }
283
+ }
284
+
285
+ return { toolCalls: calls, textBefore: firstIdx > 0 ? text.slice(0, firstIdx).trim() : "" }
286
+ }
287
+
288
+ function roughTokens(text: string): number {
289
+ return Math.ceil((text ?? "").length / 4)
290
+ }
291
+
292
+ // ── Conversation label extraction ────────────────────────────────────────────
293
+ // Openclaw embeds "Conversation info (untrusted metadata)" in the last user
294
+ // message containing a JSON block with conversation_label. Extract it to use
295
+ // as a stable conversation ID for session persistence.
296
+
297
+ function extractConversationLabel(messages: Array<{ role: string; content: string | Array<any> }>): string | null {
298
+ // Search from the last message backwards for a user message with metadata
299
+ for (let i = messages.length - 1; i >= 0; i--) {
300
+ const msg = messages[i]!
301
+ if (msg.role !== "user") continue
302
+
303
+ const text = typeof msg.content === "string"
304
+ ? msg.content
305
+ : Array.isArray(msg.content)
306
+ ? msg.content.filter((b: any) => b.type === "text").map((b: any) => b.text ?? "").join("\n")
307
+ : ""
308
+
309
+ // Look for the JSON block after "Conversation info"
310
+ const jsonMatch = text.match(/Conversation info[^`]*```json\s*(\{[\s\S]*?\})\s*```/)
311
+ if (!jsonMatch?.[1]) continue
312
+
313
+ try {
314
+ const meta = JSON.parse(jsonMatch[1])
315
+ // conversation_label is present for both PMs and groups
316
+ if (meta.conversation_label) return meta.conversation_label
317
+ // Fallback: use sender_id if no label (shouldn't happen but just in case)
318
+ if (meta.sender_id) return `dm:${meta.sender_id}`
319
+ } catch {
320
+ // Regex fallback if JSON parse fails
321
+ const labelMatch = text.match(/"conversation_label"\s*:\s*"([^"]*)"/)
322
+ if (labelMatch?.[1]) return labelMatch[1]
323
+ }
324
+ }
325
+ return null
326
+ }
327
+
328
+ // ── Query options builder ────────────────────────────────────────────────────
329
+
330
+ function buildQueryOptions(
331
+ model: "sonnet" | "opus" | "haiku",
332
+ opts: {
333
+ partial?: boolean
334
+ systemPrompt?: string
335
+ abortController?: AbortController
336
+ thinking?: { type: "adaptive" } | { type: "enabled"; budgetTokens?: number } | { type: "disabled" }
337
+ resume?: string
338
+ } = {}
339
+ ) {
340
+ return {
341
+ model,
342
+ pathToClaudeCodeExecutable: claudeExecutable,
343
+ permissionMode: "bypassPermissions" as const,
344
+ allowDangerouslySkipPermissions: true,
345
+ persistSession: true,
346
+ settingSources: [],
347
+ tools: ["_proxy_noop_"] as string[],
348
+ maxTurns: 1,
349
+ ...(opts.partial ? { includePartialMessages: true } : {}),
350
+ ...(opts.abortController ? { abortController: opts.abortController } : {}),
351
+ ...(opts.thinking ? { thinking: opts.thinking } : {}),
352
+ ...(opts.systemPrompt ? { systemPrompt: opts.systemPrompt } : {}),
353
+ ...(opts.resume ? { resume: opts.resume } : {}),
354
+ }
355
+ }
356
+
357
+ // ── Route handler ────────────────────────────────────────────────────────────
358
+
359
+ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
360
+ const finalConfig = { ...DEFAULT_PROXY_CONFIG, ...config }
361
+ const app = new Hono()
362
+
363
+ app.use("*", cors())
364
+
365
+ // Optional API key validation
366
+ const requiredApiKey = process.env.CLAUDE_PROXY_API_KEY
367
+ if (requiredApiKey) {
368
+ app.use("*", async (c, next) => {
369
+ if (c.req.path === "/" || c.req.path.startsWith("/debug") || c.req.method === "OPTIONS") return next()
370
+ const key = c.req.header("x-api-key")
371
+ ?? c.req.header("authorization")?.replace(/^Bearer\s+/i, "")
372
+ if (key !== requiredApiKey) {
373
+ return c.json({
374
+ type: "error",
375
+ error: { type: "authentication_error", message: "Invalid API key" },
376
+ request_id: c.res.headers.get("request-id") ?? generateId("req_")
377
+ }, 401)
378
+ }
379
+ return next()
380
+ })
381
+ }
382
+
383
+ // Anthropic-compatible headers + HTTP request logging
384
+ app.use("*", async (c, next) => {
385
+ const start = Date.now()
386
+ const requestId = c.req.header("x-request-id") ?? generateId("req_")
387
+ c.header("x-request-id", requestId)
388
+ c.header("request-id", requestId)
389
+ c.header("anthropic-version", "2023-06-01")
390
+ const betaHeader = c.req.header("anthropic-beta")
391
+ if (betaHeader) c.header("anthropic-beta", betaHeader)
392
+ await next()
393
+ const ms = Date.now() - start
394
+ // Only log non-debug HTTP requests at info level; debug endpoints at debug level
395
+ if (c.req.path.startsWith("/debug")) {
396
+ logDebug("http.request", { method: c.req.method, path: c.req.path, status: c.res.status, ms, reqId: requestId })
397
+ } else {
398
+ logInfo("http.request", { method: c.req.method, path: c.req.path, status: c.res.status, ms, reqId: requestId })
399
+ }
400
+ })
401
+
402
+ // ── Health / Info ────────────────────────────────────────────────────────
403
+
404
+ app.get("/", (c) => c.json({
405
+ status: "ok",
406
+ service: "claude-sdk-proxy",
407
+ version: PROXY_VERSION,
408
+ format: "anthropic",
409
+ endpoints: ["/v1/messages", "/v1/models", "/v1/chat/completions", "/debug/stats", "/debug/traces", "/debug/errors", "/debug/active", "/debug/health", "/sessions", "/sessions/cleanup"],
410
+ queue: { active: requestQueue.activeCount, waiting: requestQueue.waitingCount, max: MAX_CONCURRENT },
411
+ logDir: LOG_DIR,
412
+ }))
413
+
414
+ // ── Debug / Observability endpoints ──────────────────────────────────────
415
+
416
+ app.get("/debug/stats", (c) => {
417
+ const stats = traceStore.getStats()
418
+ const sessionStats = sessionStore.getStats()
419
+ return c.json({
420
+ version: PROXY_VERSION,
421
+ config: {
422
+ stallTimeoutMs: finalConfig.stallTimeoutMs,
423
+ maxDurationMs: finalConfig.maxDurationMs,
424
+ maxOutputChars: finalConfig.maxOutputChars,
425
+ maxConcurrent: MAX_CONCURRENT,
426
+ queueTimeoutMs: QUEUE_TIMEOUT_MS,
427
+ claudeExecutable,
428
+ logDir: LOG_DIR,
429
+ debug: finalConfig.debug,
430
+ },
431
+ queue: { active: requestQueue.activeCount, waiting: requestQueue.waitingCount, max: MAX_CONCURRENT },
432
+ sessions: sessionStats,
433
+ ...stats,
434
+ })
435
+ })
436
+
437
+ // ── Session management endpoints ──────────────────────────────────────
438
+
439
+ app.get("/sessions", (c) => {
440
+ return c.json({
441
+ sessions: sessionStore.list(),
442
+ stats: sessionStore.getStats(),
443
+ })
444
+ })
445
+
446
+ app.get("/sessions/cleanup", (c) => {
447
+ const result = sessionStore.cleanup()
448
+ return c.json(result)
449
+ })
450
+
451
+ app.get("/debug/traces", (c) => {
452
+ const limit = parseInt(c.req.query("limit") ?? "20", 10)
453
+ return c.json(traceStore.getRecentTraces(limit))
454
+ })
455
+
456
+ app.get("/debug/traces/:id", (c) => {
457
+ const id = c.req.param("id")
458
+ const trace = traceStore.getTrace(id)
459
+ if (!trace) return c.json({ error: "Trace not found", reqId: id }, 404)
460
+ return c.json(trace)
461
+ })
462
+
463
+ app.get("/debug/errors", (c) => {
464
+ const limit = parseInt(c.req.query("limit") ?? "10", 10)
465
+ return c.json(traceStore.getRecentErrors(limit))
466
+ })
467
+
468
+ app.get("/debug/logs", (c) => {
469
+ // List available log files
470
+ try {
471
+ const files = readdirSync(LOG_DIR)
472
+ .filter(f => f.startsWith("proxy-") && f.endsWith(".log"))
473
+ .sort()
474
+ .reverse()
475
+ return c.json({ logDir: LOG_DIR, files })
476
+ } catch {
477
+ return c.json({ logDir: LOG_DIR, files: [], error: "Cannot read log directory" })
478
+ }
479
+ })
480
+
481
+ app.get("/debug/logs/:filename", (c) => {
482
+ // Serve a specific log file (last N lines)
483
+ const filename = c.req.param("filename")
484
+ if (!filename.match(/^proxy-\d{4}-\d{2}-\d{2}\.log$/)) {
485
+ return c.json({ error: "Invalid log filename" }, 400)
486
+ }
487
+ const tail = parseInt(c.req.query("tail") ?? "100", 10)
488
+ try {
489
+ const content = readFileSync(join(LOG_DIR, filename), "utf-8")
490
+ const lines = content.trim().split("\n")
491
+ const sliced = lines.slice(-tail)
492
+ const parsed = sliced.map(line => {
493
+ try { return JSON.parse(line) } catch { return { raw: line } }
494
+ })
495
+ return c.json({ file: filename, total: lines.length, returned: sliced.length, lines: parsed })
496
+ } catch {
497
+ return c.json({ error: "Log file not found" }, 404)
498
+ }
499
+ })
500
+
501
+ app.get("/debug/errors/:id", (c) => {
502
+ // Serve a specific error dump file
503
+ const id = c.req.param("id")
504
+ if (!id.match(/^req_/)) return c.json({ error: "Invalid request ID format" }, 400)
505
+ try {
506
+ const content = readFileSync(join(LOG_DIR, "errors", `${id}.json`), "utf-8")
507
+ return c.json(JSON.parse(content))
508
+ } catch {
509
+ return c.json({ error: "Error dump not found", reqId: id }, 404)
510
+ }
511
+ })
512
+
513
+ app.get("/debug/active", (c) => {
514
+ // Detailed view of currently active requests
515
+ const stats = traceStore.getStats()
516
+ return c.json({
517
+ queue: { active: requestQueue.activeCount, waiting: requestQueue.waitingCount, max: MAX_CONCURRENT },
518
+ activeRequests: stats.activeRequests,
519
+ })
520
+ })
521
+
522
+ app.get("/debug/health", (c) => {
523
+ // Process health: memory, uptime, resource usage
524
+ const mem = process.memoryUsage()
525
+ const stats = traceStore.getStats()
526
+ return c.json({
527
+ version: PROXY_VERSION,
528
+ pid: process.pid,
529
+ uptimeMs: stats.uptimeMs,
530
+ uptimeHuman: stats.uptimeHuman,
531
+ memory: {
532
+ rss: `${(mem.rss / 1024 / 1024).toFixed(1)}MB`,
533
+ heapUsed: `${(mem.heapUsed / 1024 / 1024).toFixed(1)}MB`,
534
+ heapTotal: `${(mem.heapTotal / 1024 / 1024).toFixed(1)}MB`,
535
+ external: `${(mem.external / 1024 / 1024).toFixed(1)}MB`,
536
+ rssBytes: mem.rss,
537
+ heapUsedBytes: mem.heapUsed,
538
+ },
539
+ queue: { active: requestQueue.activeCount, waiting: requestQueue.waitingCount, max: MAX_CONCURRENT },
540
+ requests: stats.requests,
541
+ config: {
542
+ stallTimeoutMs: finalConfig.stallTimeoutMs,
543
+ maxConcurrent: MAX_CONCURRENT,
544
+ queueTimeoutMs: QUEUE_TIMEOUT_MS,
545
+ debug: finalConfig.debug,
546
+ },
547
+ })
548
+ })
549
+
550
+ // ── Model endpoints ──────────────────────────────────────────────────────
551
+
552
+ const MODELS = [
553
+ { type: "model", id: "claude-opus-4-6", display_name: "Claude Opus 4.6", created_at: "2025-08-01T00:00:00Z" },
554
+ { type: "model", id: "claude-opus-4-6-20250801", display_name: "Claude Opus 4.6", created_at: "2025-08-01T00:00:00Z" },
555
+ { type: "model", id: "claude-sonnet-4-6", display_name: "Claude Sonnet 4.6", created_at: "2025-08-01T00:00:00Z" },
556
+ { type: "model", id: "claude-sonnet-4-6-20250801", display_name: "Claude Sonnet 4.6", created_at: "2025-08-01T00:00:00Z" },
557
+ { type: "model", id: "claude-sonnet-4-5-20250929", display_name: "Claude Sonnet 4.5", created_at: "2025-09-29T00:00:00Z" },
558
+ { type: "model", id: "claude-haiku-4-5", display_name: "Claude Haiku 4.5", created_at: "2025-10-01T00:00:00Z" },
559
+ { type: "model", id: "claude-haiku-4-5-20251001", display_name: "Claude Haiku 4.5", created_at: "2025-10-01T00:00:00Z" },
560
+ ]
561
+
562
+ const MODELS_DUAL = MODELS.map(m => ({
563
+ ...m,
564
+ object: "model" as const,
565
+ created: Math.floor(new Date(m.created_at).getTime() / 1000),
566
+ owned_by: "anthropic" as const
567
+ }))
568
+
569
+ const handleModels = (c: Context) => c.json({ object: "list", data: MODELS_DUAL })
570
+ app.get("/v1/models", handleModels)
571
+ app.get("/models", handleModels)
572
+
573
+ const handleModel = (c: Context) => {
574
+ const id = c.req.param("id")
575
+ const model = MODELS_DUAL.find(m => m.id === id)
576
+ if (!model) return c.json({ type: "error", error: { type: "not_found_error", message: `Model \`${id}\` not found` } }, 404)
577
+ return c.json(model)
578
+ }
579
+ app.get("/v1/models/:id", handleModel)
580
+ app.get("/models/:id", handleModel)
581
+
582
+ const handleCountTokens = async (c: Context) => {
583
+ try {
584
+ const body = await c.req.json()
585
+ const sysText = Array.isArray(body.system)
586
+ ? body.system.filter((b: any) => b.type === "text").map((b: any) => b.text).join("\n")
587
+ : String(body.system ?? "")
588
+ const msgText = (body.messages ?? [])
589
+ .map((m: any) => typeof m.content === "string" ? m.content : JSON.stringify(m.content))
590
+ .join("\n")
591
+ return c.json({ input_tokens: roughTokens(sysText + msgText) })
592
+ } catch {
593
+ return c.json({ input_tokens: 0 })
594
+ }
595
+ }
596
+ app.post("/v1/messages/count_tokens", handleCountTokens)
597
+ app.post("/messages/count_tokens", handleCountTokens)
598
+
599
+ // ── Messages handler ─────────────────────────────────────────────────────
600
+
601
+ const handleMessages = async (c: Context) => {
602
+ const reqId = generateId("req_")
603
+ // Will be set after body parse; needed for outer catch
604
+ let trace: ReturnType<typeof traceStore.create> | undefined
605
+ let requestStarted = Date.now()
606
+ let clientDisconnected = false
607
+ let abortReason: "stall" | "max_duration" | "max_output" | null = null
608
+
609
+ try {
610
+ let body: any
611
+ try {
612
+ body = await c.req.json()
613
+ } catch (parseErr) {
614
+ logWarn("request.invalid_json", { reqId })
615
+ return c.json({ type: "error", error: { type: "invalid_request_error", message: "Request body must be valid JSON" }, request_id: reqId }, 400)
616
+ }
617
+
618
+ if (!body.messages || !Array.isArray(body.messages) || body.messages.length === 0) {
619
+ logWarn("request.missing_messages", { reqId })
620
+ return c.json({ type: "error", error: { type: "invalid_request_error", message: "messages is required and must be a non-empty array" }, request_id: reqId }, 400)
621
+ }
622
+
623
+ const model = mapModelToClaudeModel(body.model || "sonnet")
624
+ const stream = body.stream ?? false
625
+ const hasTools = body.tools?.length > 0
626
+ const abortController = new AbortController()
627
+
628
+ // Stall-based timeout: only aborts if no SDK events received for stallTimeoutMs.
629
+ // Resets on every SDK event, so active requests never get killed.
630
+ // NOTE: not started until queue is acquired — queue wait doesn't count.
631
+ let stallTimer: ReturnType<typeof setTimeout> | null = null
632
+ const resetStallTimer = () => {
633
+ if (stallTimer) clearTimeout(stallTimer)
634
+ stallTimer = setTimeout(() => {
635
+ abortReason = "stall"
636
+ logWarn("request.stall_timeout", {
637
+ reqId,
638
+ stallTimeoutMs: finalConfig.stallTimeoutMs,
639
+ phase: trace?.phase,
640
+ sdkEventCount: trace?.sdkEventCount,
641
+ outputLen: trace?.outputLen,
642
+ lastEventType: trace?.lastEventType,
643
+ })
644
+ abortController.abort()
645
+ }, finalConfig.stallTimeoutMs)
646
+ }
647
+ const clearStallTimer = () => {
648
+ if (stallTimer) { clearTimeout(stallTimer); stallTimer = null }
649
+ }
650
+
651
+ // Hard max duration: kills request even if actively streaming. Safety valve.
652
+ let hardTimer: ReturnType<typeof setTimeout> | null = null
653
+ const startHardTimer = () => {
654
+ hardTimer = setTimeout(() => {
655
+ abortReason = "max_duration"
656
+ logError("request.max_duration", {
657
+ reqId,
658
+ maxDurationMs: finalConfig.maxDurationMs,
659
+ phase: trace?.phase,
660
+ sdkEventCount: trace?.sdkEventCount,
661
+ outputLen: trace?.outputLen,
662
+ model: trace?.model,
663
+ lastEventType: trace?.lastEventType,
664
+ })
665
+ abortController.abort()
666
+ }, finalConfig.maxDurationMs)
667
+ }
668
+ const clearHardTimer = () => {
669
+ if (hardTimer) { clearTimeout(hardTimer); hardTimer = null }
670
+ }
671
+
672
+ // Output size check: kills request if output exceeds maxOutputChars.
673
+ const checkOutputSize = (outputLen: number) => {
674
+ if (outputLen > finalConfig.maxOutputChars && !abortReason) {
675
+ abortReason = "max_output"
676
+ logError("request.max_output", {
677
+ reqId,
678
+ outputLen,
679
+ maxOutputChars: finalConfig.maxOutputChars,
680
+ phase: trace?.phase,
681
+ sdkEventCount: trace?.sdkEventCount,
682
+ model: trace?.model,
683
+ elapsedMs: trace ? Date.now() - trace.startedAt : undefined,
684
+ })
685
+ abortController.abort()
686
+ }
687
+ }
688
+
689
+ const thinking: { type: "adaptive" } | { type: "enabled"; budgetTokens?: number } | { type: "disabled" } | undefined =
690
+ body.thinking?.type === "enabled" ? { type: "enabled", budgetTokens: body.thinking.budget_tokens }
691
+ : body.thinking?.type === "disabled" ? { type: "disabled" }
692
+ : body.thinking?.type === "adaptive" ? { type: "adaptive" }
693
+ : undefined
694
+
695
+ let systemContext = ""
696
+ if (body.system) {
697
+ if (typeof body.system === "string") {
698
+ systemContext = body.system
699
+ } else if (Array.isArray(body.system)) {
700
+ systemContext = body.system
701
+ .filter((b: any) => b.type === "text" && b.text)
702
+ .map((b: any) => b.text)
703
+ .join("\n")
704
+ }
705
+ }
706
+
707
+ const messages = body.messages as Array<{ role: string; content: string | Array<any> }>
708
+
709
+ let promptText: string // text version for token counting / logging
710
+ let systemPrompt: string | undefined
711
+ const toolsSection = hasTools ? buildClientToolsPrompt(body.tools) : ""
712
+
713
+ // ── Session resumption ─────────────────────────────────────────────
714
+ // Derive conversation ID from: headers (explicit) or conversation_label
715
+ // embedded in openclaw message metadata.
716
+ const conversationId = c.req.header("x-conversation-id")
717
+ ?? c.req.header("x-session-id")
718
+ ?? extractConversationLabel(messages)
719
+ ?? null
720
+
721
+ let resumeSessionId: string | undefined
722
+ let isResuming = false
723
+
724
+ if (conversationId && messages.length > 1) {
725
+ const stored = sessionStore.get(conversationId)
726
+ if (stored && stored.model === model) {
727
+ resumeSessionId = stored.sdkSessionId
728
+ isResuming = true
729
+ logInfo("session.resuming", {
730
+ reqId,
731
+ conversationId,
732
+ sdkSessionId: resumeSessionId,
733
+ storedMsgCount: stored.messageCount,
734
+ currentMsgCount: messages.length,
735
+ resumeCount: stored.resumeCount,
736
+ })
737
+ }
738
+ }
739
+
740
+ // Check if last user message contains images — if so, use native SDK multimodal input
741
+ const lastMsg = messages[messages.length - 1]!
742
+ const lastMsgHasImages = contentHasImages(lastMsg.content)
743
+
744
+ // promptInput: either a string (text-only) or AsyncIterable<SDKUserMessage> (multimodal)
745
+ let promptInput: string | AsyncIterable<any>
746
+ // promptText: always the text-only version for token counting and logging
747
+ promptText = serializeContent(lastMsg.content)
748
+
749
+ if (isResuming && resumeSessionId) {
750
+ systemPrompt = ((systemContext || "") + toolsSection).trim() || undefined
751
+ if (lastMsgHasImages) {
752
+ promptInput = createSDKUserMessage(buildNativeContent(lastMsg.content), resumeSessionId)
753
+ logInfo("session.resume_with_images", { reqId, conversationId })
754
+ } else {
755
+ promptInput = promptText
756
+ }
757
+ } else if (messages.length === 1) {
758
+ systemPrompt = ((systemContext || "") + toolsSection).trim() || undefined
759
+ promptInput = lastMsgHasImages
760
+ ? createSDKUserMessage(buildNativeContent(lastMsg.content))
761
+ : promptText
762
+ if (lastMsgHasImages) logInfo("request.native_images", { reqId })
763
+ } else {
764
+ const priorMsgs = messages.slice(0, -1)
765
+
766
+ const contextParts = priorMsgs
767
+ .map((m) => {
768
+ const role = m.role === "assistant" ? "Assistant" : "User"
769
+ return `[${role}]\n${serializeContent(m.content)}`
770
+ })
771
+ .join("\n\n")
772
+
773
+ const baseSystem = systemContext || ""
774
+ const contextSection = contextParts
775
+ ? `\n\nPrior conversation turns:\n\n${contextParts}\n\n---`
776
+ : ""
777
+ systemPrompt = (baseSystem + contextSection + toolsSection).trim() || undefined
778
+
779
+ if (lastMsgHasImages) {
780
+ promptInput = createSDKUserMessage(buildNativeContent(lastMsg.content))
781
+ logInfo("request.native_images", { reqId })
782
+ } else {
783
+ promptInput = promptText
784
+ }
785
+ }
786
+
787
+ requestStarted = Date.now()
788
+
789
+ // Capture client info
790
+ const clientIp = c.req.header("x-forwarded-for")
791
+ ?? c.req.header("x-real-ip")
792
+ ?? c.req.header("cf-connecting-ip")
793
+ ?? "unknown"
794
+ const userAgent = c.req.header("user-agent") ?? "unknown"
795
+ const bodyBytes = JSON.stringify(body).length
796
+
797
+ // ── Create trace ──────────────────────────────────────────────────────
798
+ trace = traceStore.create({
799
+ reqId,
800
+ model,
801
+ requestedModel: body.model || "sonnet",
802
+ stream,
803
+ hasTools,
804
+ thinking: thinking?.type,
805
+ promptLen: promptText.length,
806
+ systemLen: systemPrompt?.length ?? 0,
807
+ msgCount: messages.length,
808
+ bodyBytes,
809
+ clientIp,
810
+ userAgent,
811
+ })
812
+
813
+ // ── Queue ─────────────────────────────────────────────────────────────
814
+ const queueActive = requestQueue.activeCount
815
+ const queueWaiting = requestQueue.waitingCount
816
+ const needsQueue = queueActive >= MAX_CONCURRENT
817
+
818
+ traceStore.phase(reqId, "queued", { queueActive, queueWaiting })
819
+
820
+ if (needsQueue) {
821
+ logInfo("queue.waiting", {
822
+ reqId,
823
+ model,
824
+ queueActive,
825
+ queueWaiting,
826
+ queueTimeoutMs: QUEUE_TIMEOUT_MS,
827
+ })
828
+ }
829
+
830
+ await requestQueue.acquire()
831
+
832
+ const queueWaitMs = Date.now() - requestStarted
833
+ traceStore.phase(reqId, "acquired", { queueWaitMs })
834
+
835
+ logInfo("queue.acquired", {
836
+ reqId,
837
+ queueWaitMs,
838
+ queueActive: requestQueue.activeCount,
839
+ queueWaiting: requestQueue.waitingCount,
840
+ })
841
+
842
+ // Start timers AFTER queue acquire — queue wait doesn't count
843
+ resetStallTimer()
844
+ startHardTimer()
845
+
846
+ // ── Non-streaming ──────────────────────────────────────────────────────
847
+ if (!stream) {
848
+ let fullText = ""
849
+ let capturedSessionId: string | undefined
850
+ const queryOpts = buildQueryOptions(model, { partial: false, systemPrompt, abortController, thinking, resume: resumeSessionId })
851
+ try {
852
+ traceStore.phase(reqId, "sdk_starting")
853
+ let sdkEventCount = 0
854
+ for await (const message of query({ prompt: promptInput, options: queryOpts })) {
855
+ sdkEventCount++
856
+ resetStallTimer()
857
+ traceStore.sdkEvent(reqId, sdkEventCount, message.type, (message as any).event?.type ?? (message as any).message?.type)
858
+ // Capture session_id from init message
859
+ if (message.type === "system" && (message as any).subtype === "init") {
860
+ capturedSessionId = (message as any).session_id
861
+ }
862
+ if (message.type === "assistant") {
863
+ let turnText = ""
864
+ for (const block of message.message.content) {
865
+ if (block.type === "text") turnText += block.text
866
+ }
867
+ fullText = turnText
868
+ }
869
+ }
870
+ traceStore.phase(reqId, "sdk_done")
871
+
872
+ // Store session mapping for future resumption
873
+ if (conversationId && capturedSessionId) {
874
+ if (isResuming) {
875
+ sessionStore.recordResume(conversationId)
876
+ logInfo("session.resumed_ok", { reqId, conversationId, sdkSessionId: capturedSessionId })
877
+ } else {
878
+ sessionStore.set(conversationId, capturedSessionId, model, messages.length)
879
+ logInfo("session.created", { reqId, conversationId, sdkSessionId: capturedSessionId })
880
+ }
881
+ }
882
+ } catch (resumeErr) {
883
+ // If resume failed, retry with full context
884
+ if (isResuming && resumeSessionId) {
885
+ logWarn("session.resume_failed", {
886
+ reqId,
887
+ conversationId,
888
+ sdkSessionId: resumeSessionId,
889
+ error: resumeErr instanceof Error ? resumeErr.message : String(resumeErr),
890
+ })
891
+ if (conversationId) {
892
+ sessionStore.recordFailure(conversationId)
893
+ sessionStore.invalidate(conversationId)
894
+ }
895
+ // Rebuild with full context (non-resume path)
896
+ const fbLastMsg = messages[messages.length - 1]!
897
+ const priorMsgs = messages.slice(0, -1)
898
+ const contextParts = priorMsgs
899
+ .map((m) => {
900
+ const role = m.role === "assistant" ? "Assistant" : "User"
901
+ return `[${role}]\n${serializeContent(m.content)}`
902
+ })
903
+ .join("\n\n")
904
+ const baseSystem = systemContext || ""
905
+ const contextSection = contextParts ? `\n\nPrior conversation turns:\n\n${contextParts}\n\n---` : ""
906
+ const fallbackSystem = (baseSystem + contextSection + toolsSection).trim() || undefined
907
+ const fallbackInput: string | AsyncIterable<any> = contentHasImages(fbLastMsg.content)
908
+ ? createSDKUserMessage(buildNativeContent(fbLastMsg.content))
909
+ : serializeContent(fbLastMsg.content)
910
+ const fallbackOpts = buildQueryOptions(model, { partial: false, systemPrompt: fallbackSystem, abortController, thinking })
911
+
912
+ logInfo("session.fallback_full_context", { reqId, conversationId })
913
+ let sdkEventCount = 0
914
+ for await (const message of query({ prompt: fallbackInput, options: fallbackOpts })) {
915
+ sdkEventCount++
916
+ resetStallTimer()
917
+ traceStore.sdkEvent(reqId, sdkEventCount, message.type, (message as any).event?.type ?? (message as any).message?.type)
918
+ if (message.type === "system" && (message as any).subtype === "init") {
919
+ capturedSessionId = (message as any).session_id
920
+ }
921
+ if (message.type === "assistant") {
922
+ let turnText = ""
923
+ for (const block of message.message.content) {
924
+ if (block.type === "text") turnText += block.text
925
+ }
926
+ fullText = turnText
927
+ }
928
+ }
929
+ traceStore.phase(reqId, "sdk_done")
930
+ // Store the new session
931
+ if (conversationId && capturedSessionId) {
932
+ sessionStore.set(conversationId, capturedSessionId, model, messages.length)
933
+ logInfo("session.recreated_after_fallback", { reqId, conversationId, sdkSessionId: capturedSessionId })
934
+ }
935
+ } else {
936
+ throw resumeErr
937
+ }
938
+ } finally {
939
+ clearStallTimer(); clearHardTimer()
940
+ // (temp files no longer used — images passed natively)
941
+ requestQueue.release()
942
+ logDebug("queue.released", {
943
+ reqId,
944
+ queueActive: requestQueue.activeCount,
945
+ queueWaiting: requestQueue.waitingCount,
946
+ })
947
+ }
948
+
949
+ traceStore.phase(reqId, "responding")
950
+
951
+ if (hasTools) {
952
+ const { toolCalls, textBefore } = parseToolUse(fullText)
953
+ const content: any[] = []
954
+ if (textBefore) content.push({ type: "text", text: textBefore })
955
+ for (const tc of toolCalls) content.push({ type: "tool_use", id: tc.id, name: tc.name, input: tc.input })
956
+ if (content.length === 0) content.push({ type: "text", text: fullText || "..." })
957
+ const stopReason = toolCalls.length > 0 ? "tool_use" : "end_turn"
958
+
959
+ traceStore.complete(reqId, { outputLen: fullText.length, toolCallCount: toolCalls.length })
960
+
961
+ return c.json({
962
+ id: generateId("msg_"),
963
+ type: "message", role: "assistant", content,
964
+ model: body.model, stop_reason: stopReason, stop_sequence: null,
965
+ usage: { input_tokens: roughTokens(promptText), output_tokens: roughTokens(fullText) }
966
+ })
967
+ }
968
+
969
+ if (!fullText || !fullText.trim()) fullText = "..."
970
+ traceStore.complete(reqId, { outputLen: fullText.length })
971
+
972
+ return c.json({
973
+ id: generateId("msg_"),
974
+ type: "message", role: "assistant",
975
+ content: [{ type: "text", text: fullText }],
976
+ model: body.model, stop_reason: "end_turn", stop_sequence: null,
977
+ usage: { input_tokens: roughTokens(promptText), output_tokens: roughTokens(fullText) }
978
+ })
979
+ }
980
+
981
+ // ── Streaming ──────────────────────────────────────────────────────────
982
+ const encoder = new TextEncoder()
983
+ const readable = new ReadableStream({
984
+ cancel() {
985
+ clientDisconnected = true
986
+ logWarn("stream.client_disconnect", {
987
+ reqId,
988
+ phase: trace?.phase,
989
+ sdkEventCount: trace?.sdkEventCount,
990
+ outputLen: trace?.outputLen,
991
+ elapsedMs: trace ? Date.now() - trace.startedAt : undefined,
992
+ model: trace?.model,
993
+ })
994
+ abortController.abort()
995
+ },
996
+ async start(controller) {
997
+ const messageId = generateId("msg_")
998
+ let queueReleased = false
999
+ const releaseQueue = () => {
1000
+ if (!queueReleased) {
1001
+ queueReleased = true
1002
+ requestQueue.release()
1003
+ logDebug("queue.released", {
1004
+ reqId,
1005
+ queueActive: requestQueue.activeCount,
1006
+ queueWaiting: requestQueue.waitingCount,
1007
+ })
1008
+ }
1009
+ }
1010
+
1011
+ let sseSendErrors = 0
1012
+ const sse = (event: string, data: object) => {
1013
+ try {
1014
+ controller.enqueue(encoder.encode(`event: ${event}\ndata: ${JSON.stringify(data)}\n\n`))
1015
+ } catch (e) {
1016
+ sseSendErrors++
1017
+ if (sseSendErrors <= 3) {
1018
+ logWarn("stream.sse_send_failed", {
1019
+ reqId,
1020
+ event,
1021
+ sseSendErrors,
1022
+ error: e instanceof Error ? e.message : String(e),
1023
+ })
1024
+ }
1025
+ }
1026
+ }
1027
+
1028
+ try {
1029
+ const heartbeat = setInterval(() => {
1030
+ try {
1031
+ controller.enqueue(encoder.encode(`event: ping\ndata: {"type": "ping"}\n\n`))
1032
+ } catch (e) {
1033
+ logWarn("stream.heartbeat_failed", {
1034
+ reqId,
1035
+ error: e instanceof Error ? e.message : String(e),
1036
+ phase: trace?.phase,
1037
+ elapsedMs: trace ? Date.now() - trace.startedAt : undefined,
1038
+ })
1039
+ clearInterval(heartbeat)
1040
+ }
1041
+ }, 15_000)
1042
+
1043
+ sse("message_start", {
1044
+ type: "message_start",
1045
+ message: {
1046
+ id: messageId, type: "message", role: "assistant", content: [],
1047
+ model: body.model, stop_reason: null, stop_sequence: null,
1048
+ usage: { input_tokens: roughTokens(promptText), output_tokens: 1 }
1049
+ }
1050
+ })
1051
+
1052
+ if (hasTools) {
1053
+ // ── With tools: buffer output, parse tool_use blocks at end ──
1054
+ let fullText = ""
1055
+ let sdkEventCount = 0
1056
+ let lastEventAt = Date.now()
1057
+ const stallLog = setInterval(() => {
1058
+ const stallMs = Date.now() - lastEventAt
1059
+ traceStore.stall(reqId, stallMs)
1060
+ }, 15_000)
1061
+ let capturedSessionId: string | undefined
1062
+ try {
1063
+ traceStore.phase(reqId, "sdk_starting")
1064
+ for await (const message of query({ prompt: promptInput, options: buildQueryOptions(model, { partial: true, systemPrompt, abortController, thinking, resume: resumeSessionId }) })) {
1065
+ sdkEventCount++
1066
+ lastEventAt = Date.now()
1067
+ resetStallTimer()
1068
+ const subtype = (message as any).event?.type ?? (message as any).message?.type
1069
+ // Capture session_id from init message
1070
+ if (message.type === "system" && (message as any).subtype === "init") {
1071
+ capturedSessionId = (message as any).session_id
1072
+ }
1073
+ if (message.type === "stream_event") {
1074
+ const ev = message.event as any
1075
+ // Detect first content event BEFORE sdkEvent records it
1076
+ if (!trace!.firstTokenAt && (ev.type === "content_block_delta" || ev.type === "content_block_start")) {
1077
+ traceStore.phase(reqId, "sdk_streaming")
1078
+ }
1079
+ if (ev.type === "content_block_delta" && ev.delta?.type === "text_delta") {
1080
+ fullText += ev.delta.text ?? ""
1081
+ traceStore.updateOutput(reqId, fullText.length)
1082
+ checkOutputSize(fullText.length)
1083
+ }
1084
+ }
1085
+ traceStore.sdkEvent(reqId, sdkEventCount, message.type, subtype)
1086
+ }
1087
+ traceStore.phase(reqId, "sdk_done")
1088
+
1089
+ // Store session mapping
1090
+ if (conversationId && capturedSessionId) {
1091
+ if (isResuming) {
1092
+ sessionStore.recordResume(conversationId)
1093
+ } else {
1094
+ sessionStore.set(conversationId, capturedSessionId, model, messages.length)
1095
+ }
1096
+ }
1097
+ } catch (resumeErr) {
1098
+ // Resume failed in streaming with-tools path — retry with full context
1099
+ if (isResuming && resumeSessionId) {
1100
+ logWarn("session.resume_failed_stream", {
1101
+ reqId, conversationId, sdkSessionId: resumeSessionId,
1102
+ error: resumeErr instanceof Error ? resumeErr.message : String(resumeErr),
1103
+ })
1104
+ if (conversationId) {
1105
+ sessionStore.recordFailure(conversationId)
1106
+ sessionStore.invalidate(conversationId)
1107
+ }
1108
+ const fbLastMsg = messages[messages.length - 1]!
1109
+ const priorMsgs = messages.slice(0, -1)
1110
+ const contextParts = priorMsgs
1111
+ .map((m) => {
1112
+ const role = m.role === "assistant" ? "Assistant" : "User"
1113
+ return `[${role}]\n${serializeContent(m.content)}`
1114
+ })
1115
+ .join("\n\n")
1116
+ const baseSystem = systemContext || ""
1117
+ const contextSection = contextParts ? `\n\nPrior conversation turns:\n\n${contextParts}\n\n---` : ""
1118
+ const fallbackSystem = (baseSystem + contextSection + toolsSection).trim() || undefined
1119
+ const fallbackInput: string | AsyncIterable<any> = contentHasImages(fbLastMsg.content)
1120
+ ? createSDKUserMessage(buildNativeContent(fbLastMsg.content))
1121
+ : serializeContent(fbLastMsg.content)
1122
+ const fallbackOpts = buildQueryOptions(model, { partial: true, systemPrompt: fallbackSystem, abortController, thinking })
1123
+
1124
+ logInfo("session.fallback_full_context_stream", { reqId, conversationId })
1125
+ sdkEventCount = 0
1126
+ for await (const message of query({ prompt: fallbackInput, options: fallbackOpts })) {
1127
+ sdkEventCount++
1128
+ lastEventAt = Date.now()
1129
+ resetStallTimer()
1130
+ const subtype = (message as any).event?.type ?? (message as any).message?.type
1131
+ if (message.type === "system" && (message as any).subtype === "init") {
1132
+ capturedSessionId = (message as any).session_id
1133
+ }
1134
+ if (message.type === "stream_event") {
1135
+ const ev = message.event as any
1136
+ if (!trace!.firstTokenAt && (ev.type === "content_block_delta" || ev.type === "content_block_start")) {
1137
+ traceStore.phase(reqId, "sdk_streaming")
1138
+ }
1139
+ if (ev.type === "content_block_delta" && ev.delta?.type === "text_delta") {
1140
+ fullText += ev.delta.text ?? ""
1141
+ traceStore.updateOutput(reqId, fullText.length)
1142
+ checkOutputSize(fullText.length)
1143
+ }
1144
+ }
1145
+ traceStore.sdkEvent(reqId, sdkEventCount, message.type, subtype)
1146
+ }
1147
+ traceStore.phase(reqId, "sdk_done")
1148
+ if (conversationId && capturedSessionId) {
1149
+ sessionStore.set(conversationId, capturedSessionId, model, messages.length)
1150
+ logInfo("session.recreated_after_fallback_stream", { reqId, conversationId, sdkSessionId: capturedSessionId })
1151
+ }
1152
+ } else {
1153
+ throw resumeErr
1154
+ }
1155
+ } finally {
1156
+ clearInterval(stallLog)
1157
+ clearInterval(heartbeat)
1158
+ clearStallTimer(); clearHardTimer()
1159
+ // (temp files no longer used — images passed natively)
1160
+ releaseQueue()
1161
+ }
1162
+
1163
+ traceStore.phase(reqId, "responding")
1164
+ const { toolCalls, textBefore } = parseToolUse(fullText)
1165
+
1166
+ let blockIdx = 0
1167
+ const textContent = toolCalls.length === 0 ? (fullText || "...") : textBefore
1168
+ if (textContent) {
1169
+ sse("content_block_start", { type: "content_block_start", index: blockIdx, content_block: { type: "text", text: "" } })
1170
+ sse("content_block_delta", { type: "content_block_delta", index: blockIdx, delta: { type: "text_delta", text: textContent } })
1171
+ sse("content_block_stop", { type: "content_block_stop", index: blockIdx })
1172
+ blockIdx++
1173
+ } else if (toolCalls.length === 0) {
1174
+ sse("content_block_start", { type: "content_block_start", index: 0, content_block: { type: "text", text: "" } })
1175
+ sse("content_block_delta", { type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "..." } })
1176
+ sse("content_block_stop", { type: "content_block_stop", index: 0 })
1177
+ blockIdx = 1
1178
+ }
1179
+ for (const tc of toolCalls) {
1180
+ sse("content_block_start", { type: "content_block_start", index: blockIdx, content_block: { type: "tool_use", id: tc.id, name: tc.name, input: {} } })
1181
+ sse("content_block_delta", { type: "content_block_delta", index: blockIdx, delta: { type: "input_json_delta", partial_json: JSON.stringify(tc.input) } })
1182
+ sse("content_block_stop", { type: "content_block_stop", index: blockIdx })
1183
+ blockIdx++
1184
+ }
1185
+
1186
+ const stopReason = toolCalls.length > 0 ? "tool_use" : "end_turn"
1187
+ sse("message_delta", { type: "message_delta", delta: { stop_reason: stopReason, stop_sequence: null }, usage: { output_tokens: roughTokens(fullText) } })
1188
+ sse("message_stop", { type: "message_stop" })
1189
+ controller.close()
1190
+
1191
+ traceStore.complete(reqId, { outputLen: fullText.length, toolCallCount: toolCalls.length })
1192
+ return
1193
+ }
1194
+
1195
+ // ── No tools: stream text deltas directly ─────────────────────
1196
+ sse("content_block_start", { type: "content_block_start", index: 0, content_block: { type: "text", text: "" } })
1197
+
1198
+ let fullText = ""
1199
+ let hasStreamed = false
1200
+ let sdkEventCount = 0
1201
+ let lastEventAt = Date.now()
1202
+ let capturedSessionId2: string | undefined
1203
+ const stallLog = setInterval(() => {
1204
+ const stallMs = Date.now() - lastEventAt
1205
+ traceStore.stall(reqId, stallMs)
1206
+ }, 15_000)
1207
+ try {
1208
+ traceStore.phase(reqId, "sdk_starting")
1209
+ for await (const message of query({ prompt: promptInput, options: buildQueryOptions(model, { partial: true, systemPrompt, abortController, thinking, resume: resumeSessionId }) })) {
1210
+ sdkEventCount++
1211
+ lastEventAt = Date.now()
1212
+ resetStallTimer()
1213
+ const subtype = (message as any).event?.type ?? (message as any).message?.type
1214
+ // Capture session_id from init message
1215
+ if (message.type === "system" && (message as any).subtype === "init") {
1216
+ capturedSessionId2 = (message as any).session_id
1217
+ }
1218
+ if (message.type === "stream_event") {
1219
+ const ev = message.event as any
1220
+ // Detect first content event BEFORE sdkEvent records it
1221
+ if (!trace!.firstTokenAt && (ev.type === "content_block_delta" || ev.type === "content_block_start")) {
1222
+ traceStore.phase(reqId, "sdk_streaming")
1223
+ }
1224
+ if (ev.type === "content_block_delta" && ev.delta?.type === "text_delta") {
1225
+ const text = ev.delta.text ?? ""
1226
+ if (text) {
1227
+ fullText += text
1228
+ hasStreamed = true
1229
+ traceStore.updateOutput(reqId, fullText.length)
1230
+ checkOutputSize(fullText.length)
1231
+ sse("content_block_delta", { type: "content_block_delta", index: 0, delta: { type: "text_delta", text } })
1232
+ }
1233
+ }
1234
+ }
1235
+ traceStore.sdkEvent(reqId, sdkEventCount, message.type, subtype)
1236
+ }
1237
+ traceStore.phase(reqId, "sdk_done")
1238
+
1239
+ // Store session mapping
1240
+ if (conversationId && capturedSessionId2) {
1241
+ if (isResuming) {
1242
+ sessionStore.recordResume(conversationId)
1243
+ } else {
1244
+ sessionStore.set(conversationId, capturedSessionId2, model, messages.length)
1245
+ }
1246
+ }
1247
+ } catch (resumeErr) {
1248
+ // Resume failed in streaming no-tools path — retry with full context
1249
+ if (isResuming && resumeSessionId) {
1250
+ logWarn("session.resume_failed_stream", {
1251
+ reqId, conversationId, sdkSessionId: resumeSessionId,
1252
+ error: resumeErr instanceof Error ? resumeErr.message : String(resumeErr),
1253
+ })
1254
+ if (conversationId) {
1255
+ sessionStore.recordFailure(conversationId)
1256
+ sessionStore.invalidate(conversationId)
1257
+ }
1258
+ const fbLastMsg = messages[messages.length - 1]!
1259
+ const priorMsgs = messages.slice(0, -1)
1260
+ const contextParts = priorMsgs
1261
+ .map((m) => {
1262
+ const role = m.role === "assistant" ? "Assistant" : "User"
1263
+ return `[${role}]\n${serializeContent(m.content)}`
1264
+ })
1265
+ .join("\n\n")
1266
+ const baseSystem = systemContext || ""
1267
+ const contextSection = contextParts ? `\n\nPrior conversation turns:\n\n${contextParts}\n\n---` : ""
1268
+ const fallbackSystem = (baseSystem + contextSection + toolsSection).trim() || undefined
1269
+ const fallbackInput: string | AsyncIterable<any> = contentHasImages(fbLastMsg.content)
1270
+ ? createSDKUserMessage(buildNativeContent(fbLastMsg.content))
1271
+ : serializeContent(fbLastMsg.content)
1272
+ const fallbackOpts = buildQueryOptions(model, { partial: true, systemPrompt: fallbackSystem, abortController, thinking })
1273
+
1274
+ logInfo("session.fallback_full_context_stream", { reqId, conversationId })
1275
+ sdkEventCount = 0
1276
+ for await (const message of query({ prompt: fallbackInput, options: fallbackOpts })) {
1277
+ sdkEventCount++
1278
+ lastEventAt = Date.now()
1279
+ resetStallTimer()
1280
+ const subtype = (message as any).event?.type ?? (message as any).message?.type
1281
+ if (message.type === "system" && (message as any).subtype === "init") {
1282
+ capturedSessionId2 = (message as any).session_id
1283
+ }
1284
+ if (message.type === "stream_event") {
1285
+ const ev = message.event as any
1286
+ if (!trace!.firstTokenAt && (ev.type === "content_block_delta" || ev.type === "content_block_start")) {
1287
+ traceStore.phase(reqId, "sdk_streaming")
1288
+ }
1289
+ if (ev.type === "content_block_delta" && ev.delta?.type === "text_delta") {
1290
+ const text = ev.delta.text ?? ""
1291
+ if (text) {
1292
+ fullText += text
1293
+ hasStreamed = true
1294
+ traceStore.updateOutput(reqId, fullText.length)
1295
+ checkOutputSize(fullText.length)
1296
+ sse("content_block_delta", { type: "content_block_delta", index: 0, delta: { type: "text_delta", text } })
1297
+ }
1298
+ }
1299
+ }
1300
+ traceStore.sdkEvent(reqId, sdkEventCount, message.type, subtype)
1301
+ }
1302
+ traceStore.phase(reqId, "sdk_done")
1303
+ if (conversationId && capturedSessionId2) {
1304
+ sessionStore.set(conversationId, capturedSessionId2, model, messages.length)
1305
+ logInfo("session.recreated_after_fallback_stream", { reqId, conversationId, sdkSessionId: capturedSessionId2 })
1306
+ }
1307
+ } else {
1308
+ throw resumeErr
1309
+ }
1310
+ } finally {
1311
+ clearInterval(stallLog)
1312
+ clearInterval(heartbeat)
1313
+ clearStallTimer(); clearHardTimer()
1314
+ // (temp files no longer used — images passed natively)
1315
+ releaseQueue()
1316
+ }
1317
+
1318
+ if (!hasStreamed) {
1319
+ sse("content_block_delta", { type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "..." } })
1320
+ }
1321
+
1322
+ sse("content_block_stop", { type: "content_block_stop", index: 0 })
1323
+ sse("message_delta", { type: "message_delta", delta: { stop_reason: "end_turn", stop_sequence: null }, usage: { output_tokens: roughTokens(fullText) } })
1324
+ sse("message_stop", { type: "message_stop" })
1325
+ controller.close()
1326
+
1327
+ traceStore.complete(reqId, { outputLen: fullText.length })
1328
+
1329
+ } catch (error) {
1330
+ clearStallTimer(); clearHardTimer()
1331
+ releaseQueue()
1332
+ const err = error instanceof Error ? error : new Error(String(error))
1333
+ const isAbort = err.name === "AbortError" || err.message?.includes("abort")
1334
+ const isQueueTimeout = err.message.includes("Queue timeout")
1335
+
1336
+ let errMsg: string
1337
+ let errType: string
1338
+ if (clientDisconnected) {
1339
+ errMsg = "Client disconnected during streaming."
1340
+ errType = "api_error"
1341
+ } else if (abortReason === "max_duration") {
1342
+ errMsg = `Request exceeded max duration of ${finalConfig.maxDurationMs / 1000}s. Output: ${trace?.outputLen ?? 0} chars.`
1343
+ errType = "api_error"
1344
+ } else if (abortReason === "max_output") {
1345
+ errMsg = `Request exceeded max output size of ${finalConfig.maxOutputChars} chars.`
1346
+ errType = "api_error"
1347
+ } else if (isAbort) {
1348
+ errMsg = `Request stalled — no SDK activity for ${finalConfig.stallTimeoutMs / 1000}s. Please retry.`
1349
+ errType = "api_error"
1350
+ } else if (isQueueTimeout) {
1351
+ errMsg = "Server busy — all request slots are occupied. Please retry shortly."
1352
+ errType = "overloaded_error"
1353
+ } else {
1354
+ errMsg = err.message
1355
+ errType = "api_error"
1356
+ }
1357
+
1358
+ // Trace the failure with full context
1359
+ traceStore.fail(reqId, err, "error", {
1360
+ clientDisconnect: clientDisconnected,
1361
+ abortReason,
1362
+ aborted: isAbort,
1363
+ queueTimeout: isQueueTimeout,
1364
+ stallTimeoutMs: finalConfig.stallTimeoutMs,
1365
+ maxDurationMs: finalConfig.maxDurationMs,
1366
+ maxOutputChars: finalConfig.maxOutputChars,
1367
+ sseSendErrors,
1368
+ })
1369
+
1370
+ // (temp files no longer used — images passed natively)
1371
+ if (!clientDisconnected) {
1372
+ try {
1373
+ sse("error", { type: "error", error: { type: errType, message: errMsg }, request_id: reqId })
1374
+ controller.close()
1375
+ } catch {}
1376
+ } else {
1377
+ try { controller.close() } catch {}
1378
+ }
1379
+ }
1380
+ }
1381
+ })
1382
+
1383
+ return new Response(readable, {
1384
+ headers: {
1385
+ "Content-Type": "text/event-stream",
1386
+ "Cache-Control": "no-cache",
1387
+ "Connection": "keep-alive"
1388
+ }
1389
+ })
1390
+
1391
+ } catch (error) {
1392
+ const err = error instanceof Error ? error : new Error(String(error))
1393
+ const isAbort = err.name === "AbortError" || err.message?.includes("abort")
1394
+ const isQueueTimeout = err.message.includes("Queue timeout")
1395
+
1396
+ let errMsg: string
1397
+ let errType: string
1398
+ if (clientDisconnected) {
1399
+ errMsg = "Client disconnected."
1400
+ errType = "api_error"
1401
+ } else if (abortReason === "max_duration") {
1402
+ errMsg = `Request exceeded max duration of ${finalConfig.maxDurationMs / 1000}s.`
1403
+ errType = "api_error"
1404
+ } else if (abortReason === "max_output") {
1405
+ errMsg = `Request exceeded max output size of ${finalConfig.maxOutputChars} chars.`
1406
+ errType = "api_error"
1407
+ } else if (isAbort) {
1408
+ errMsg = `Request stalled — no SDK activity for ${finalConfig.stallTimeoutMs / 1000}s. Please retry.`
1409
+ errType = "api_error"
1410
+ } else if (isQueueTimeout) {
1411
+ errMsg = "Server busy — all request slots are occupied. Please retry shortly."
1412
+ errType = "overloaded_error"
1413
+ } else {
1414
+ errMsg = err.message
1415
+ errType = "api_error"
1416
+ }
1417
+
1418
+ // Trace the failure
1419
+ if (trace) {
1420
+ traceStore.fail(reqId, err, "error", {
1421
+ clientDisconnect: clientDisconnected,
1422
+ aborted: isAbort,
1423
+ queueTimeout: isQueueTimeout,
1424
+ })
1425
+ } else {
1426
+ logError("request.error.no_trace", { reqId, error: errMsg, stack: err.stack })
1427
+ }
1428
+
1429
+ if (isQueueTimeout) {
1430
+ return new Response(JSON.stringify({ type: "error", error: { type: errType, message: errMsg }, request_id: reqId }), {
1431
+ status: 529, headers: { "Content-Type": "application/json" }
1432
+ })
1433
+ }
1434
+ if (isAbort) {
1435
+ return new Response(JSON.stringify({ type: "error", error: { type: errType, message: errMsg }, request_id: reqId }), {
1436
+ status: 504, headers: { "Content-Type": "application/json" }
1437
+ })
1438
+ }
1439
+ return c.json({ type: "error", error: { type: errType, message: errMsg }, request_id: reqId }, 500)
1440
+ }
1441
+ }
1442
+
1443
+ app.post("/v1/messages", handleMessages)
1444
+ app.post("/messages", handleMessages)
1445
+
1446
+ // Stub: batches API not supported
1447
+ const handleBatches = (c: Context) => c.json({
1448
+ type: "error",
1449
+ error: { type: "not_implemented_error", message: "Batches API is not supported by this proxy" }
1450
+ }, 501)
1451
+ app.post("/v1/messages/batches", handleBatches)
1452
+ app.get("/v1/messages/batches", handleBatches)
1453
+ app.get("/v1/messages/batches/:id", handleBatches)
1454
+
1455
+ // ── OpenAI-compatible /v1/chat/completions ─────────────────────────────
1456
+
1457
+ function convertOpenaiContent(content: any): any {
1458
+ if (typeof content === "string") return content
1459
+ if (!Array.isArray(content)) return String(content ?? "")
1460
+
1461
+ return content.map((part: any) => {
1462
+ if (part.type === "text") return { type: "text", text: part.text ?? "" }
1463
+ if (part.type === "image_url" && part.image_url?.url) {
1464
+ const url = part.image_url.url as string
1465
+ const dataMatch = url.match(/^data:(image\/\w+);base64,(.+)$/)
1466
+ if (dataMatch) {
1467
+ return {
1468
+ type: "image",
1469
+ source: {
1470
+ type: "base64",
1471
+ media_type: dataMatch[1]!,
1472
+ data: dataMatch[2]!
1473
+ }
1474
+ }
1475
+ }
1476
+ return {
1477
+ type: "image",
1478
+ source: { type: "url", url }
1479
+ }
1480
+ }
1481
+ return part
1482
+ })
1483
+ }
1484
+
1485
+ function openaiToAnthropicMessages(messages: any[]): { system?: string; messages: any[] } {
1486
+ let system: string | undefined
1487
+ const converted: any[] = []
1488
+
1489
+ for (const msg of messages) {
1490
+ if (msg.role === "system") {
1491
+ const sysText = typeof msg.content === "string" ? msg.content
1492
+ : Array.isArray(msg.content) ? msg.content.filter((p: any) => p.type === "text").map((p: any) => p.text ?? "").join("")
1493
+ : String(msg.content ?? "")
1494
+ system = (system ? system + "\n" : "") + sysText
1495
+ } else if (msg.role === "user") {
1496
+ converted.push({ role: "user", content: convertOpenaiContent(msg.content) })
1497
+ } else if (msg.role === "assistant") {
1498
+ if (msg.tool_calls?.length) {
1499
+ const content: any[] = []
1500
+ if (msg.content) content.push({ type: "text", text: msg.content })
1501
+ for (const tc of msg.tool_calls) {
1502
+ content.push({
1503
+ type: "tool_use",
1504
+ id: tc.id,
1505
+ name: tc.function?.name ?? "",
1506
+ input: tc.function?.arguments ? JSON.parse(tc.function.arguments) : {}
1507
+ })
1508
+ }
1509
+ converted.push({ role: "assistant", content })
1510
+ } else {
1511
+ converted.push({ role: "assistant", content: msg.content ?? "" })
1512
+ }
1513
+ } else if (msg.role === "tool") {
1514
+ converted.push({
1515
+ role: "user",
1516
+ content: [{
1517
+ type: "tool_result",
1518
+ tool_use_id: msg.tool_call_id,
1519
+ content: msg.content ?? ""
1520
+ }]
1521
+ })
1522
+ }
1523
+ }
1524
+ return { system, messages: converted }
1525
+ }
1526
+
1527
+ function openaiToAnthropicTools(tools: any[]): any[] {
1528
+ return tools
1529
+ .filter((t: any) => t.type === "function" && t.function)
1530
+ .map((t: any) => ({
1531
+ name: t.function.name,
1532
+ description: t.function.description ?? "",
1533
+ input_schema: t.function.parameters ?? { type: "object", properties: {} }
1534
+ }))
1535
+ }
1536
+
1537
+ function anthropicToOpenaiResponse(anthropicBody: any, model: string): any {
1538
+ const textBlocks = (anthropicBody.content ?? []).filter((b: any) => b.type === "text")
1539
+ const toolBlocks = (anthropicBody.content ?? []).filter((b: any) => b.type === "tool_use")
1540
+
1541
+ const text = textBlocks.map((b: any) => b.text).join("") || (toolBlocks.length > 0 ? null : "")
1542
+
1543
+ const message: any = { role: "assistant", content: text }
1544
+
1545
+ if (toolBlocks.length > 0) {
1546
+ message.tool_calls = toolBlocks.map((b: any, i: number) => ({
1547
+ id: b.id,
1548
+ type: "function",
1549
+ function: {
1550
+ name: b.name,
1551
+ arguments: JSON.stringify(b.input ?? {})
1552
+ }
1553
+ }))
1554
+ }
1555
+
1556
+ const finishReason = anthropicBody.stop_reason === "tool_use" ? "tool_calls"
1557
+ : anthropicBody.stop_reason === "max_tokens" ? "length"
1558
+ : "stop"
1559
+
1560
+ return {
1561
+ id: generateId("chatcmpl-"),
1562
+ object: "chat.completion",
1563
+ created: Math.floor(Date.now() / 1000),
1564
+ model,
1565
+ choices: [{
1566
+ index: 0,
1567
+ message,
1568
+ finish_reason: finishReason
1569
+ }],
1570
+ usage: {
1571
+ prompt_tokens: anthropicBody.usage?.input_tokens ?? 0,
1572
+ completion_tokens: anthropicBody.usage?.output_tokens ?? 0,
1573
+ total_tokens: (anthropicBody.usage?.input_tokens ?? 0) + (anthropicBody.usage?.output_tokens ?? 0)
1574
+ }
1575
+ }
1576
+ }
1577
+
1578
+ const handleChatCompletions = async (c: Context) => {
1579
+ try {
1580
+ let body: any
1581
+ try {
1582
+ body = await c.req.json()
1583
+ } catch {
1584
+ return c.json({ error: { message: "Request body must be valid JSON", type: "invalid_request_error" } }, 400)
1585
+ }
1586
+
1587
+ if (!body.messages || !Array.isArray(body.messages) || body.messages.length === 0) {
1588
+ return c.json({ error: { message: "messages is required and must be a non-empty array", type: "invalid_request_error" } }, 400)
1589
+ }
1590
+
1591
+ const { system, messages } = openaiToAnthropicMessages(body.messages)
1592
+ const stream = body.stream ?? false
1593
+ const requestedModel = body.model ?? "claude-sonnet-4-6"
1594
+
1595
+ const anthropicBody: any = {
1596
+ model: requestedModel,
1597
+ messages,
1598
+ stream,
1599
+ }
1600
+ if (system) anthropicBody.system = system
1601
+ if (body.max_tokens || body.max_completion_tokens) {
1602
+ anthropicBody.max_tokens = body.max_tokens ?? body.max_completion_tokens
1603
+ }
1604
+ if (body.temperature !== undefined) anthropicBody.temperature = body.temperature
1605
+ if (body.top_p !== undefined) anthropicBody.top_p = body.top_p
1606
+ if (body.stop) anthropicBody.stop_sequences = Array.isArray(body.stop) ? body.stop : [body.stop]
1607
+ if (body.tools?.length) {
1608
+ anthropicBody.tools = openaiToAnthropicTools(body.tools)
1609
+ }
1610
+
1611
+ const internalHeaders: Record<string, string> = { "Content-Type": "application/json" }
1612
+ const authHeader = c.req.header("authorization") ?? c.req.header("x-api-key")
1613
+ if (authHeader) {
1614
+ if (c.req.header("authorization")) internalHeaders["authorization"] = authHeader
1615
+ else internalHeaders["x-api-key"] = authHeader
1616
+ }
1617
+ const internalRes = await app.fetch(new Request(`http://localhost/v1/messages`, {
1618
+ method: "POST",
1619
+ headers: internalHeaders,
1620
+ body: JSON.stringify(anthropicBody)
1621
+ }))
1622
+
1623
+ if (!stream) {
1624
+ const anthropicJson = await internalRes.json() as any
1625
+ if (anthropicJson.type === "error") {
1626
+ return c.json({ error: anthropicJson.error }, internalRes.status as any)
1627
+ }
1628
+ return c.json(anthropicToOpenaiResponse(anthropicJson, requestedModel))
1629
+ }
1630
+
1631
+ const includeUsage = body.stream_options?.include_usage === true
1632
+ const encoder = new TextEncoder()
1633
+ const readable = new ReadableStream({
1634
+ async start(controller) {
1635
+ try {
1636
+ const reader = internalRes.body?.getReader()
1637
+ if (!reader) { controller.close(); return }
1638
+
1639
+ const decoder = new TextDecoder()
1640
+ let buffer = ""
1641
+ const chatId = generateId("chatcmpl-")
1642
+ const created = Math.floor(Date.now() / 1000)
1643
+ let sentRole = false
1644
+ let finishReason: string | null = null
1645
+ const activeToolCalls: Map<number, { id: string; name: string }> = new Map()
1646
+ let toolCallIndex = 0
1647
+ let usageInfo: { input_tokens: number; output_tokens: number } | null = null
1648
+
1649
+ while (true) {
1650
+ const { done, value } = await reader.read()
1651
+ if (done) break
1652
+ buffer += decoder.decode(value, { stream: true })
1653
+
1654
+ const lines = buffer.split("\n")
1655
+ buffer = lines.pop() ?? ""
1656
+
1657
+ for (const line of lines) {
1658
+ if (!line.startsWith("data: ")) continue
1659
+ try {
1660
+ const event = JSON.parse(line.slice(6))
1661
+
1662
+ if (!sentRole && (event.type === "content_block_start" || event.type === "content_block_delta")) {
1663
+ sentRole = true
1664
+ controller.enqueue(encoder.encode(`data: ${JSON.stringify({
1665
+ id: chatId, object: "chat.completion.chunk", created, model: requestedModel,
1666
+ choices: [{ index: 0, delta: { role: "assistant" }, finish_reason: null }]
1667
+ })}\n\n`))
1668
+ }
1669
+
1670
+ if (event.type === "content_block_start" && event.content_block?.type === "tool_use") {
1671
+ const idx = toolCallIndex++
1672
+ activeToolCalls.set(event.index, { id: event.content_block.id, name: event.content_block.name })
1673
+ controller.enqueue(encoder.encode(`data: ${JSON.stringify({
1674
+ id: chatId, object: "chat.completion.chunk", created, model: requestedModel,
1675
+ choices: [{ index: 0, delta: {
1676
+ tool_calls: [{ index: idx, id: event.content_block.id, type: "function", function: { name: event.content_block.name, arguments: "" } }]
1677
+ }, finish_reason: null }]
1678
+ })}\n\n`))
1679
+ } else if (event.type === "content_block_delta" && event.delta?.type === "input_json_delta") {
1680
+ const tc = activeToolCalls.get(event.index)
1681
+ if (tc) {
1682
+ const idx = Array.from(activeToolCalls.keys()).indexOf(event.index)
1683
+ controller.enqueue(encoder.encode(`data: ${JSON.stringify({
1684
+ id: chatId, object: "chat.completion.chunk", created, model: requestedModel,
1685
+ choices: [{ index: 0, delta: {
1686
+ tool_calls: [{ index: idx, function: { arguments: event.delta.partial_json } }]
1687
+ }, finish_reason: null }]
1688
+ })}\n\n`))
1689
+ }
1690
+ } else if (event.type === "content_block_delta" && event.delta?.type === "text_delta") {
1691
+ controller.enqueue(encoder.encode(`data: ${JSON.stringify({
1692
+ id: chatId, object: "chat.completion.chunk", created, model: requestedModel,
1693
+ choices: [{ index: 0, delta: { content: event.delta.text }, finish_reason: null }]
1694
+ })}\n\n`))
1695
+ } else if (event.type === "message_delta") {
1696
+ const sr = event.delta?.stop_reason
1697
+ finishReason = sr === "tool_use" ? "tool_calls" : sr === "max_tokens" ? "length" : "stop"
1698
+ if (event.usage) {
1699
+ const prevInput: number = usageInfo?.input_tokens ?? 0
1700
+ const prevOutput: number = usageInfo?.output_tokens ?? 0
1701
+ usageInfo = {
1702
+ input_tokens: event.usage.input_tokens ?? prevInput,
1703
+ output_tokens: event.usage.output_tokens ?? prevOutput
1704
+ }
1705
+ }
1706
+ } else if (event.type === "message_start" && event.message?.usage) {
1707
+ usageInfo = { input_tokens: event.message.usage.input_tokens ?? 0, output_tokens: 0 }
1708
+ } else if (event.type === "message_stop") {
1709
+ const finalChunk: any = {
1710
+ id: chatId, object: "chat.completion.chunk", created, model: requestedModel,
1711
+ choices: [{ index: 0, delta: {}, finish_reason: finishReason ?? "stop" }]
1712
+ }
1713
+ if (includeUsage && usageInfo) {
1714
+ finalChunk.usage = {
1715
+ prompt_tokens: usageInfo.input_tokens,
1716
+ completion_tokens: usageInfo.output_tokens,
1717
+ total_tokens: usageInfo.input_tokens + usageInfo.output_tokens
1718
+ }
1719
+ }
1720
+ controller.enqueue(encoder.encode(`data: ${JSON.stringify(finalChunk)}\n\n`))
1721
+ controller.enqueue(encoder.encode("data: [DONE]\n\n"))
1722
+ }
1723
+ } catch {}
1724
+ }
1725
+ }
1726
+ controller.close()
1727
+ } catch {
1728
+ controller.close()
1729
+ }
1730
+ }
1731
+ })
1732
+
1733
+ return new Response(readable, {
1734
+ headers: {
1735
+ "Content-Type": "text/event-stream",
1736
+ "Cache-Control": "no-cache",
1737
+ "Connection": "keep-alive"
1738
+ }
1739
+ })
1740
+ } catch (error) {
1741
+ return c.json({
1742
+ error: { message: error instanceof Error ? error.message : "Unknown error", type: "server_error" }
1743
+ }, 500)
1744
+ }
1745
+ }
1746
+
1747
+ app.post("/v1/chat/completions", handleChatCompletions)
1748
+ app.post("/chat/completions", handleChatCompletions)
1749
+
1750
+ // OpenAI-format model listing
1751
+ const handleOpenaiModels = (c: Context) => c.json({
1752
+ object: "list",
1753
+ data: MODELS.map(m => ({
1754
+ id: m.id,
1755
+ object: "model",
1756
+ created: Math.floor(new Date(m.created_at).getTime() / 1000),
1757
+ owned_by: "anthropic"
1758
+ }))
1759
+ })
1760
+ app.get("/v1/chat/models", handleOpenaiModels)
1761
+
1762
+ // 404 catch-all
1763
+ app.all("*", (c) => c.json({
1764
+ type: "error",
1765
+ error: { type: "not_found_error", message: `${c.req.method} ${c.req.path} not found` }
1766
+ }, 404))
1767
+
1768
+ return { app, config: finalConfig }
1769
+ }
1770
+
1771
+ export async function startProxyServer(config: Partial<ProxyConfig> = {}) {
1772
+ const { app, config: finalConfig } = createProxyServer(config)
1773
+
1774
+ const server = Bun.serve({
1775
+ port: finalConfig.port,
1776
+ hostname: finalConfig.host,
1777
+ fetch: app.fetch,
1778
+ idleTimeout: 0
1779
+ })
1780
+
1781
+ // Startup log with full configuration
1782
+ logInfo("proxy.started", {
1783
+ version: PROXY_VERSION,
1784
+ host: finalConfig.host,
1785
+ port: finalConfig.port,
1786
+ stallTimeoutMs: finalConfig.stallTimeoutMs,
1787
+ maxDurationMs: finalConfig.maxDurationMs,
1788
+ maxOutputChars: finalConfig.maxOutputChars,
1789
+ maxConcurrent: MAX_CONCURRENT,
1790
+ queueTimeoutMs: QUEUE_TIMEOUT_MS,
1791
+ claudeExecutable,
1792
+ logDir: LOG_DIR,
1793
+ debug: finalConfig.debug,
1794
+ pid: process.pid,
1795
+ })
1796
+
1797
+ console.log(`Claude SDK Proxy v${PROXY_VERSION} running at http://${finalConfig.host}:${finalConfig.port}`)
1798
+ console.log(` Logs: ${LOG_DIR}`)
1799
+ console.log(` Debug: http://${finalConfig.host}:${finalConfig.port}/debug/stats`)
1800
+
1801
+ // Periodic health logging (every 5 minutes)
1802
+ const healthInterval = setInterval(() => {
1803
+ const mem = process.memoryUsage()
1804
+ const stats = traceStore.getStats()
1805
+ logInfo("proxy.health", {
1806
+ pid: process.pid,
1807
+ rssBytes: mem.rss,
1808
+ rssMB: +(mem.rss / 1024 / 1024).toFixed(1),
1809
+ heapUsedMB: +(mem.heapUsed / 1024 / 1024).toFixed(1),
1810
+ heapTotalMB: +(mem.heapTotal / 1024 / 1024).toFixed(1),
1811
+ externalMB: +(mem.external / 1024 / 1024).toFixed(1),
1812
+ uptimeMs: stats.uptimeMs,
1813
+ totalRequests: stats.requests.total,
1814
+ totalErrors: stats.requests.errors,
1815
+ activeRequests: stats.requests.active,
1816
+ queueActive: requestQueue.activeCount,
1817
+ queueWaiting: requestQueue.waitingCount,
1818
+ })
1819
+ }, 300_000) // 5 minutes
1820
+
1821
+ // Graceful shutdown
1822
+ const shutdown = (signal: string) => {
1823
+ const stats = traceStore.getStats()
1824
+ logInfo("proxy.shutdown", {
1825
+ signal,
1826
+ pid: process.pid,
1827
+ totalRequests: stats.requests.total,
1828
+ totalErrors: stats.requests.errors,
1829
+ activeRequests: stats.requests.active,
1830
+ uptimeMs: stats.uptimeMs,
1831
+ })
1832
+ clearInterval(healthInterval)
1833
+ console.log(`\nReceived ${signal}, shutting down...`)
1834
+ server.stop(true)
1835
+ process.exit(0)
1836
+ }
1837
+ process.on("SIGINT", () => shutdown("SIGINT"))
1838
+ process.on("SIGTERM", () => shutdown("SIGTERM"))
1839
+
1840
+ return server
1841
+ }