claude-sdk-proxy 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1024 @@
1
+ import { Hono } from "hono"
2
+ import { cors } from "hono/cors"
3
+ import { query } from "@anthropic-ai/claude-agent-sdk"
4
+ import type { Context } from "hono"
5
+ import type { ProxyConfig } from "./types"
6
+ import { DEFAULT_PROXY_CONFIG } from "./types"
7
+ import { claudeLog } from "../logger"
8
+ import { execSync } from "child_process"
9
+ import { existsSync, writeFileSync, unlinkSync, readFileSync } from "fs"
10
+ import { tmpdir } from "os"
11
+ import { randomBytes } from "crypto"
12
+ import { fileURLToPath } from "url"
13
+ import { join, dirname } from "path"
14
+ import { createMcpServer, type McpServerState } from "../mcpTools"
15
+
16
+ const PROXY_VERSION: string = (() => {
17
+ try {
18
+ const pkg = JSON.parse(readFileSync(join(dirname(fileURLToPath(import.meta.url)), "../../package.json"), "utf-8"))
19
+ return pkg.version ?? "unknown"
20
+ } catch { return "unknown" }
21
+ })()
22
+
23
+ // Only block tools that add noise — everything else (Read, Write, Edit, Bash,
24
+ // Glob, Grep, WebFetch, WebSearch) uses Claude Code's robust built-in implementations.
25
+ const BLOCKED_BUILTIN_TOOLS = ["TodoWrite", "NotebookEdit"]
26
+
27
+ const MCP_SERVER_NAME = "opencode"
28
+
29
+ function resolveClaudeExecutable(): string {
30
+ try {
31
+ const sdkPath = fileURLToPath(import.meta.resolve("@anthropic-ai/claude-agent-sdk"))
32
+ const sdkCliJs = join(dirname(sdkPath), "cli.js")
33
+ if (existsSync(sdkCliJs)) return sdkCliJs
34
+ } catch {}
35
+ try {
36
+ const claudePath = execSync("which claude", { encoding: "utf-8" }).trim()
37
+ if (claudePath && existsSync(claudePath)) return claudePath
38
+ } catch {}
39
+ throw new Error("Could not find Claude Code executable. Install: npm install -g @anthropic-ai/claude-code")
40
+ }
41
+
42
+ const claudeExecutable = resolveClaudeExecutable()
43
+
44
+ // ── Concurrency control ──────────────────────────────────────────────────────
45
+ // Limits simultaneous Claude SDK sessions to prevent resource exhaustion.
46
+
47
+ const MAX_CONCURRENT = parseInt(process.env.CLAUDE_PROXY_MAX_CONCURRENT ?? "5", 10)
48
+
49
+ class RequestQueue {
50
+ private active = 0
51
+ private waiting: Array<() => void> = []
52
+
53
+ get activeCount() { return this.active }
54
+ get waitingCount() { return this.waiting.length }
55
+
56
+ async acquire(): Promise<void> {
57
+ if (this.active < MAX_CONCURRENT) {
58
+ this.active++
59
+ return
60
+ }
61
+ return new Promise<void>((resolve) => {
62
+ this.waiting.push(() => { this.active++; resolve() })
63
+ })
64
+ }
65
+
66
+ release(): void {
67
+ this.active--
68
+ const next = this.waiting.shift()
69
+ if (next) next()
70
+ }
71
+ }
72
+
73
+ const requestQueue = new RequestQueue()
74
+
75
+ function mapModelToClaudeModel(model: string): "sonnet" | "opus" | "haiku" {
76
+ if (model.includes("opus")) return "opus"
77
+ if (model.includes("haiku")) return "haiku"
78
+ return "sonnet"
79
+ }
80
+
81
+ // ── Content-block serialization ──────────────────────────────────────────────
82
+
83
+ function saveImageToTemp(block: any, tempFiles: string[]): string | null {
84
+ try {
85
+ let data: string | undefined
86
+ let mediaType = "image/jpeg"
87
+
88
+ if (typeof block.data === "string") {
89
+ data = block.data
90
+ mediaType = block.media_type || mediaType
91
+ } else if (block.source) {
92
+ if (block.source.type === "base64" && block.source.data) {
93
+ data = block.source.data
94
+ mediaType = block.source.media_type || mediaType
95
+ } else if (block.source.url) {
96
+ return block.source.url
97
+ }
98
+ }
99
+
100
+ if (!data) return null
101
+
102
+ const ext = mediaType.split("/")[1]?.replace("jpeg", "jpg") || "jpg"
103
+ const tmpPath = join(tmpdir(), `proxy-img-${randomBytes(8).toString("hex")}.${ext}`)
104
+ writeFileSync(tmpPath, Buffer.from(data, "base64"))
105
+ tempFiles.push(tmpPath)
106
+ return tmpPath
107
+ } catch {
108
+ return null
109
+ }
110
+ }
111
+
112
+ function serializeBlock(block: any, tempFiles: string[]): string {
113
+ switch (block.type) {
114
+ case "text":
115
+ return block.text || ""
116
+ case "image": {
117
+ const imgPath = saveImageToTemp(block, tempFiles)
118
+ return imgPath ? `[Image: ${imgPath}]` : "[Image: (unable to save)]"
119
+ }
120
+ case "tool_use":
121
+ // Use <tool_use> XML format so the model continues using parseable blocks
122
+ return `<tool_use>\n{"name": "${block.name}", "input": ${JSON.stringify(block.input ?? {})}}\n</tool_use>`
123
+ case "tool_result": {
124
+ const content = Array.isArray(block.content)
125
+ ? block.content.filter((b: any) => b.type === "text").map((b: any) => b.text).join("")
126
+ : String(block.content ?? "")
127
+ const truncated = content.length > 4000
128
+ ? content.slice(0, 4000) + `\n...[truncated ${content.length - 4000} chars]`
129
+ : content
130
+ return `<tool_result tool_use_id="${block.tool_use_id}">\n${truncated}\n</tool_result>`
131
+ }
132
+ case "thinking":
133
+ return ""
134
+ default:
135
+ return ""
136
+ }
137
+ }
138
+
139
+ function serializeContent(content: string | Array<any>, tempFiles: string[]): string {
140
+ if (typeof content === "string") return content
141
+ if (!Array.isArray(content)) return String(content)
142
+ return content.map(b => serializeBlock(b, tempFiles)).filter(Boolean).join("\n")
143
+ }
144
+
145
+ function cleanupTempFiles(tempFiles: string[]) {
146
+ for (const f of tempFiles) {
147
+ try { unlinkSync(f) } catch {}
148
+ }
149
+ }
150
+
151
+ // ── Client tool-use support ──────────────────────────────────────────────────
152
+ // When the caller provides tool definitions (e.g. Claude Code, LangChain, etc.)
153
+ // we switch to single-turn mode: inject tool defs into the system prompt, run
154
+ // one LLM turn, parse <tool_use> blocks from the output, and return them as
155
+ // proper Anthropic tool_use content blocks.
156
+ //
157
+ // We stay in agent mode (multi-turn, built-in + MCP tools) when:
158
+ // - No tools in the request, OR
159
+ // - The request has markers indicating the agent manages its own tool loop
160
+
161
+ function isClientToolMode(body: any): boolean {
162
+ if (!body.tools?.length) return false
163
+ if (body.messages?.some((m: any) =>
164
+ Array.isArray(m.content) && m.content.some((b: any) => b.type === "tool_result")
165
+ )) return true
166
+ const sysText = Array.isArray(body.system)
167
+ ? body.system.filter((b: any) => b.type === "text").map((b: any) => b.text).join(" ")
168
+ : String(body.system ?? "")
169
+ if (sysText.includes("conversation_label") || sysText.includes("chat id:")) return false
170
+ return true
171
+ }
172
+
173
+ function buildClientToolsPrompt(tools: any[]): string {
174
+ const defs = tools.map((t: any) => {
175
+ const schema = t.input_schema ? `\nInput schema:\n${JSON.stringify(t.input_schema, null, 2)}` : ""
176
+ return `### ${t.name}\n${t.description ?? ""}${schema}`
177
+ }).join("\n\n")
178
+ return `\n\n## Available Tools\n\nTo call a tool, output a <tool_use> block:\n\n` +
179
+ `<tool_use>\n{"name": "TOOL_NAME", "input": {ARGUMENTS}}\n</tool_use>\n\n` +
180
+ `- You may write reasoning text before the block\n` +
181
+ `- Call multiple tools by including multiple <tool_use> blocks\n` +
182
+ `- Each block must be valid JSON with "name" and "input" keys\n\n` +
183
+ defs
184
+ }
185
+
186
+ interface ToolCall { id: string; name: string; input: unknown }
187
+
188
+ function parseToolUse(text: string): { toolCalls: ToolCall[]; textBefore: string } {
189
+ const calls: ToolCall[] = []
190
+ let firstIdx = -1
191
+
192
+ // Parse <tool_use> XML blocks (primary format)
193
+ const xmlRegex = /<tool_use>([\s\S]*?)<\/tool_use>/g
194
+ let m: RegExpExecArray | null
195
+ while ((m = xmlRegex.exec(text)) !== null) {
196
+ if (firstIdx < 0) firstIdx = m.index
197
+ try {
198
+ const p = JSON.parse(m[1]!.trim())
199
+ calls.push({
200
+ id: `toolu_${randomBytes(16).toString("hex")}`,
201
+ name: String(p.name ?? ""),
202
+ input: p.input ?? {}
203
+ })
204
+ } catch { /* skip malformed block */ }
205
+ }
206
+
207
+ // Fallback: parse [Tool call: name\nInput: {...}] format
208
+ if (calls.length === 0) {
209
+ const bracketRegex = /\[Tool call:\s*(\w+)\s*\nInput:\s*([\s\S]*?)\]/g
210
+ while ((m = bracketRegex.exec(text)) !== null) {
211
+ if (firstIdx < 0) firstIdx = m.index
212
+ try {
213
+ const input = JSON.parse(m[2]!.trim())
214
+ calls.push({
215
+ id: `toolu_${randomBytes(16).toString("hex")}`,
216
+ name: m[1]!.trim(),
217
+ input
218
+ })
219
+ } catch { /* skip malformed block */ }
220
+ }
221
+ }
222
+
223
+ return { toolCalls: calls, textBefore: firstIdx > 0 ? text.slice(0, firstIdx).trim() : "" }
224
+ }
225
+
226
+ function roughTokens(text: string): number {
227
+ return Math.ceil((text ?? "").length / 4)
228
+ }
229
+
230
+ // ── Query options builder ────────────────────────────────────────────────────
231
+
232
+ function buildQueryOptions(
233
+ model: "sonnet" | "opus" | "haiku",
234
+ opts: {
235
+ partial?: boolean
236
+ clientToolMode?: boolean
237
+ systemPrompt?: string
238
+ mcpState?: McpServerState
239
+ abortController?: AbortController
240
+ maxThinkingTokens?: number
241
+ } = {}
242
+ ) {
243
+ const base = {
244
+ model,
245
+ pathToClaudeCodeExecutable: claudeExecutable,
246
+ permissionMode: "bypassPermissions" as const,
247
+ allowDangerouslySkipPermissions: true,
248
+ persistSession: false,
249
+ settingSources: [],
250
+ ...(opts.partial ? { includePartialMessages: true } : {}),
251
+ ...(opts.abortController ? { abortController: opts.abortController } : {}),
252
+ ...(opts.maxThinkingTokens ? { maxThinkingTokens: opts.maxThinkingTokens } : {}),
253
+ ...(opts.systemPrompt ? { systemPrompt: opts.systemPrompt } : {}),
254
+ disallowedTools: [...BLOCKED_BUILTIN_TOOLS],
255
+ }
256
+
257
+ if (opts.clientToolMode) {
258
+ // Disable ALL built-in tools — the caller manages its own tool loop.
259
+ // Tool definitions are already baked into the systemPrompt.
260
+ return {
261
+ ...base,
262
+ maxTurns: 1,
263
+ tools: [] as string[],
264
+ }
265
+ }
266
+
267
+ return {
268
+ ...base,
269
+ maxTurns: 50,
270
+ mcpServers: { [MCP_SERVER_NAME]: createMcpServer(opts.mcpState) }
271
+ }
272
+ }
273
+
274
+ // ── Route handler ────────────────────────────────────────────────────────────
275
+
276
+ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
277
+ const finalConfig = { ...DEFAULT_PROXY_CONFIG, ...config }
278
+ const app = new Hono()
279
+
280
+ app.use("*", cors())
281
+
282
+ // Optional API key validation — when CLAUDE_PROXY_API_KEY is set,
283
+ // require a matching x-api-key or Authorization: Bearer header.
284
+ const requiredApiKey = process.env.CLAUDE_PROXY_API_KEY
285
+ if (requiredApiKey) {
286
+ app.use("*", async (c, next) => {
287
+ // Skip auth for health check and OPTIONS
288
+ if (c.req.path === "/" || c.req.method === "OPTIONS") return next()
289
+ const key = c.req.header("x-api-key")
290
+ ?? c.req.header("authorization")?.replace(/^Bearer\s+/i, "")
291
+ if (key !== requiredApiKey) {
292
+ return c.json({
293
+ type: "error",
294
+ error: { type: "authentication_error", message: "Invalid API key" }
295
+ }, 401)
296
+ }
297
+ return next()
298
+ })
299
+ }
300
+
301
+ // Anthropic-compatible headers + request logging
302
+ app.use("*", async (c, next) => {
303
+ const start = Date.now()
304
+ const requestId = c.req.header("x-request-id") ?? `req_${randomBytes(12).toString("hex")}`
305
+ c.header("x-request-id", requestId)
306
+ c.header("request-id", requestId)
307
+ // Echo back Anthropic-standard headers
308
+ c.header("anthropic-version", "2024-10-22")
309
+ const betaHeader = c.req.header("anthropic-beta")
310
+ if (betaHeader) c.header("anthropic-beta", betaHeader)
311
+ await next()
312
+ const ms = Date.now() - start
313
+ claudeLog("proxy.http", { method: c.req.method, path: c.req.path, status: c.res.status, ms, requestId })
314
+ })
315
+
316
+ app.get("/", (c) => c.json({
317
+ status: "ok",
318
+ service: "claude-max-proxy",
319
+ version: PROXY_VERSION,
320
+ format: "anthropic",
321
+ endpoints: ["/v1/messages", "/v1/models", "/v1/chat/completions"],
322
+ queue: { active: requestQueue.activeCount, waiting: requestQueue.waitingCount, max: MAX_CONCURRENT }
323
+ }))
324
+
325
+ const MODELS = [
326
+ { type: "model", id: "claude-opus-4-6", display_name: "Claude Opus 4.6", created_at: "2025-08-01T00:00:00Z" },
327
+ { type: "model", id: "claude-opus-4-6-20250801", display_name: "Claude Opus 4.6", created_at: "2025-08-01T00:00:00Z" },
328
+ { type: "model", id: "claude-sonnet-4-6", display_name: "Claude Sonnet 4.6", created_at: "2025-08-01T00:00:00Z" },
329
+ { type: "model", id: "claude-sonnet-4-6-20250801", display_name: "Claude Sonnet 4.6", created_at: "2025-08-01T00:00:00Z" },
330
+ { type: "model", id: "claude-sonnet-4-5-20250929", display_name: "Claude Sonnet 4.5", created_at: "2025-09-29T00:00:00Z" },
331
+ { type: "model", id: "claude-haiku-4-5", display_name: "Claude Haiku 4.5", created_at: "2025-10-01T00:00:00Z" },
332
+ { type: "model", id: "claude-haiku-4-5-20251001", display_name: "Claude Haiku 4.5", created_at: "2025-10-01T00:00:00Z" },
333
+ ]
334
+
335
+ // Dual-format model data: includes fields for both Anthropic and OpenAI SDKs
336
+ const MODELS_DUAL = MODELS.map(m => ({
337
+ ...m,
338
+ object: "model" as const,
339
+ created: Math.floor(new Date(m.created_at).getTime() / 1000),
340
+ owned_by: "anthropic" as const
341
+ }))
342
+
343
+ const handleModels = (c: Context) => c.json({ object: "list", data: MODELS_DUAL })
344
+ app.get("/v1/models", handleModels)
345
+ app.get("/models", handleModels)
346
+
347
+ const handleModel = (c: Context) => {
348
+ const id = c.req.param("id")
349
+ const model = MODELS_DUAL.find(m => m.id === id)
350
+ if (!model) return c.json({ type: "error", error: { type: "not_found_error", message: `Model \`${id}\` not found` } }, 404)
351
+ return c.json(model)
352
+ }
353
+ app.get("/v1/models/:id", handleModel)
354
+ app.get("/models/:id", handleModel)
355
+
356
+ const handleCountTokens = async (c: Context) => {
357
+ try {
358
+ const body = await c.req.json()
359
+ const sysText = Array.isArray(body.system)
360
+ ? body.system.filter((b: any) => b.type === "text").map((b: any) => b.text).join("\n")
361
+ : String(body.system ?? "")
362
+ const msgText = (body.messages ?? [])
363
+ .map((m: any) => typeof m.content === "string" ? m.content : JSON.stringify(m.content))
364
+ .join("\n")
365
+ return c.json({ input_tokens: roughTokens(sysText + msgText) })
366
+ } catch {
367
+ return c.json({ input_tokens: 0 })
368
+ }
369
+ }
370
+ app.post("/v1/messages/count_tokens", handleCountTokens)
371
+ app.post("/messages/count_tokens", handleCountTokens)
372
+
373
+ const handleMessages = async (c: Context) => {
374
+ const reqId = randomBytes(4).toString("hex")
375
+ try {
376
+ let body: any
377
+ try {
378
+ body = await c.req.json()
379
+ } catch {
380
+ return c.json({ type: "error", error: { type: "invalid_request_error", message: "Request body must be valid JSON" } }, 400)
381
+ }
382
+
383
+ if (!body.messages || !Array.isArray(body.messages) || body.messages.length === 0) {
384
+ return c.json({ type: "error", error: { type: "invalid_request_error", message: "messages is required and must be a non-empty array" } }, 400)
385
+ }
386
+
387
+ const model = mapModelToClaudeModel(body.model || "sonnet")
388
+ const stream = body.stream ?? false
389
+ const clientToolMode = isClientToolMode(body)
390
+ const mcpState: McpServerState = { messageSent: false }
391
+ const abortController = new AbortController()
392
+ const timeout = setTimeout(() => abortController.abort(), finalConfig.requestTimeoutMs)
393
+
394
+ // Extended thinking: extract budget_tokens from thinking parameter
395
+ const maxThinkingTokens = body.thinking?.type === "enabled" ? body.thinking.budget_tokens : undefined
396
+
397
+ claudeLog("proxy.request", { reqId, model, stream, msgs: body.messages?.length, clientToolMode, ...(maxThinkingTokens ? { maxThinkingTokens } : {}), queueActive: requestQueue.activeCount, queueWaiting: requestQueue.waitingCount })
398
+
399
+ // Acquire a slot in the concurrency queue
400
+ await requestQueue.acquire()
401
+
402
+ const tempFiles: string[] = []
403
+
404
+ let systemContext = ""
405
+ if (body.system) {
406
+ if (typeof body.system === "string") {
407
+ systemContext = body.system
408
+ } else if (Array.isArray(body.system)) {
409
+ systemContext = body.system
410
+ .filter((b: any) => b.type === "text" && b.text)
411
+ .map((b: any) => b.text)
412
+ .join("\n")
413
+ }
414
+ }
415
+
416
+ // Build the prompt from messages. The SDK's query() takes a single prompt
417
+ // string. To avoid the model continuing a "Human:/Assistant:" format in its
418
+ // response, we use neutral delimiters and only the last user message as the
419
+ // primary prompt when there's minimal context.
420
+ const messages = body.messages as Array<{ role: string; content: string | Array<any> }>
421
+
422
+ let prompt: string
423
+ let systemPrompt: string | undefined
424
+
425
+ if (clientToolMode) {
426
+ // Client tool mode: serialize all messages as context, inject tools
427
+ const conversationParts = messages
428
+ .map((m) => {
429
+ const label = m.role === "assistant" ? "[assistant]" : "[user]"
430
+ return `${label}\n${serializeContent(m.content, tempFiles)}`
431
+ })
432
+ .join("\n\n")
433
+ const toolsSection = buildClientToolsPrompt(body.tools)
434
+ systemPrompt = systemContext
435
+ ? `${systemContext}${toolsSection}`
436
+ : toolsSection
437
+ prompt = conversationParts
438
+ } else if (messages.length === 1) {
439
+ // Single message: pass directly as prompt (most common case)
440
+ systemPrompt = systemContext || undefined
441
+ prompt = serializeContent(messages[0]!.content, tempFiles)
442
+ } else {
443
+ // Multi-turn: build conversation context with neutral delimiters.
444
+ // Put prior turns in system prompt as context, last user message as prompt.
445
+ const lastMsg = messages[messages.length - 1]!
446
+ const priorMsgs = messages.slice(0, -1)
447
+
448
+ const contextParts = priorMsgs
449
+ .map((m) => {
450
+ const label = m.role === "assistant" ? "[assistant]" : "[user]"
451
+ return `${label}\n${serializeContent(m.content, tempFiles)}`
452
+ })
453
+ .join("\n\n")
454
+
455
+ const baseSystem = systemContext || ""
456
+ const contextSection = contextParts
457
+ ? `\n\n<conversation_history>\n${contextParts}\n</conversation_history>`
458
+ : ""
459
+ systemPrompt = (baseSystem + contextSection).trim() || undefined
460
+ prompt = serializeContent(lastMsg.content, tempFiles)
461
+ }
462
+
463
+ // ── Non-streaming ──────────────────────────────────────────────────────
464
+ if (!stream) {
465
+ let fullText = ""
466
+ let lastCleanText = ""
467
+ try {
468
+ for await (const message of query({ prompt, options: buildQueryOptions(model, { partial: false, clientToolMode, systemPrompt, mcpState, abortController, maxThinkingTokens }) })) {
469
+ if (message.type === "assistant") {
470
+ let turnText = ""
471
+ let hasToolUse = false
472
+ for (const block of message.message.content) {
473
+ if (block.type === "text") turnText += block.text
474
+ if (block.type === "tool_use") hasToolUse = true
475
+ }
476
+ if (!hasToolUse && turnText) {
477
+ lastCleanText = turnText
478
+ }
479
+ fullText = turnText
480
+ }
481
+ }
482
+ } finally {
483
+ clearTimeout(timeout)
484
+ cleanupTempFiles(tempFiles)
485
+ requestQueue.release()
486
+ }
487
+ // In agent mode, prefer the last turn that had no tool_use
488
+ if (!clientToolMode && lastCleanText) fullText = lastCleanText
489
+
490
+ if (clientToolMode) {
491
+ const { toolCalls, textBefore } = parseToolUse(fullText)
492
+ const content: any[] = []
493
+ if (textBefore) content.push({ type: "text", text: textBefore })
494
+ for (const tc of toolCalls) content.push({ type: "tool_use", id: tc.id, name: tc.name, input: tc.input })
495
+ if (content.length === 0) content.push({ type: "text", text: fullText || "..." })
496
+ const stopReason = toolCalls.length > 0 ? "tool_use" : "end_turn"
497
+ claudeLog("proxy.response", { reqId, len: fullText.length, toolCalls: toolCalls.length })
498
+ return c.json({
499
+ id: `msg_${Date.now()}`,
500
+ type: "message", role: "assistant", content,
501
+ model: body.model, stop_reason: stopReason, stop_sequence: null,
502
+ usage: { input_tokens: roughTokens(prompt), output_tokens: roughTokens(fullText) }
503
+ })
504
+ }
505
+
506
+ // If the MCP message tool delivered anything, suppress the proxy's
507
+ // own text response so the client doesn't double-deliver.
508
+ if (mcpState.messageSent) fullText = "NO_REPLY"
509
+ if (!fullText || !fullText.trim()) fullText = "..."
510
+ claudeLog("proxy.response", { reqId, len: fullText.length, messageSent: mcpState.messageSent })
511
+ return c.json({
512
+ id: `msg_${Date.now()}`,
513
+ type: "message", role: "assistant",
514
+ content: [{ type: "text", text: fullText }],
515
+ model: body.model, stop_reason: "end_turn", stop_sequence: null,
516
+ usage: { input_tokens: roughTokens(prompt), output_tokens: roughTokens(fullText) }
517
+ })
518
+ }
519
+
520
+ // ── Streaming ──────────────────────────────────────────────────────────
521
+ const encoder = new TextEncoder()
522
+ const readable = new ReadableStream({
523
+ async start(controller) {
524
+ const messageId = `msg_${Date.now()}`
525
+ let queueReleased = false
526
+ const releaseQueue = () => { if (!queueReleased) { queueReleased = true; requestQueue.release() } }
527
+
528
+ const sse = (event: string, data: object) => {
529
+ try {
530
+ controller.enqueue(encoder.encode(`event: ${event}\ndata: ${JSON.stringify(data)}\n\n`))
531
+ } catch {}
532
+ }
533
+
534
+ try {
535
+ const heartbeat = setInterval(() => {
536
+ try { controller.enqueue(encoder.encode(": ping\n\n")) } catch { clearInterval(heartbeat) }
537
+ }, 15_000)
538
+
539
+ sse("message_start", {
540
+ type: "message_start",
541
+ message: {
542
+ id: messageId, type: "message", role: "assistant", content: [],
543
+ model: body.model, stop_reason: null, stop_sequence: null,
544
+ usage: { input_tokens: roughTokens(prompt), output_tokens: 0 }
545
+ }
546
+ })
547
+
548
+ // ── Client tool mode: buffer → emit blocks at end ─────────────
549
+ if (clientToolMode) {
550
+ let fullText = ""
551
+ try {
552
+ for await (const message of query({ prompt, options: buildQueryOptions(model, { partial: true, clientToolMode: true, systemPrompt, abortController, maxThinkingTokens }) })) {
553
+ if (message.type === "stream_event") {
554
+ const ev = message.event as any
555
+ if (ev.type === "content_block_delta" && ev.delta?.type === "text_delta") {
556
+ fullText += ev.delta.text ?? ""
557
+ }
558
+ }
559
+ }
560
+ } finally {
561
+ clearInterval(heartbeat)
562
+ clearTimeout(timeout)
563
+ cleanupTempFiles(tempFiles)
564
+ releaseQueue()
565
+ }
566
+
567
+ const { toolCalls, textBefore } = parseToolUse(fullText)
568
+ claudeLog("proxy.stream.done", { reqId, len: fullText.length, toolCalls: toolCalls.length })
569
+
570
+ let blockIdx = 0
571
+ const textContent = toolCalls.length === 0 ? (fullText || "...") : textBefore
572
+ if (textContent) {
573
+ sse("content_block_start", { type: "content_block_start", index: blockIdx, content_block: { type: "text", text: "" } })
574
+ sse("content_block_delta", { type: "content_block_delta", index: blockIdx, delta: { type: "text_delta", text: textContent } })
575
+ sse("content_block_stop", { type: "content_block_stop", index: blockIdx })
576
+ blockIdx++
577
+ } else if (toolCalls.length === 0) {
578
+ sse("content_block_start", { type: "content_block_start", index: 0, content_block: { type: "text", text: "" } })
579
+ sse("content_block_delta", { type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "..." } })
580
+ sse("content_block_stop", { type: "content_block_stop", index: 0 })
581
+ blockIdx = 1
582
+ }
583
+ for (const tc of toolCalls) {
584
+ sse("content_block_start", { type: "content_block_start", index: blockIdx, content_block: { type: "tool_use", id: tc.id, name: tc.name, input: "" } })
585
+ sse("content_block_delta", { type: "content_block_delta", index: blockIdx, delta: { type: "input_json_delta", partial_json: JSON.stringify(tc.input) } })
586
+ sse("content_block_stop", { type: "content_block_stop", index: blockIdx })
587
+ blockIdx++
588
+ }
589
+
590
+ const stopReason = toolCalls.length > 0 ? "tool_use" : "end_turn"
591
+ sse("message_delta", { type: "message_delta", delta: { stop_reason: stopReason, stop_sequence: null }, usage: { output_tokens: roughTokens(fullText) } })
592
+ sse("message_stop", { type: "message_stop" })
593
+ controller.close()
594
+ return
595
+ }
596
+
597
+ // ── Agent mode: real-time streaming ─────────────────────────
598
+ // Forward text deltas to the client as they arrive from the SDK.
599
+ // For single-turn (most chat requests), this gives true token-by-
600
+ // token streaming. For multi-turn (agent tool use), the client
601
+ // sees all turns' text streamed in real-time.
602
+ sse("content_block_start", { type: "content_block_start", index: 0, content_block: { type: "text", text: "" } })
603
+
604
+ let fullText = ""
605
+ let hasStreamed = false
606
+ try {
607
+ for await (const message of query({ prompt, options: buildQueryOptions(model, { partial: true, systemPrompt, mcpState, abortController, maxThinkingTokens }) })) {
608
+ if (message.type === "stream_event") {
609
+ const ev = message.event as any
610
+ if (ev.type === "content_block_delta" && ev.delta?.type === "text_delta") {
611
+ const text = ev.delta.text ?? ""
612
+ if (text) {
613
+ fullText += text
614
+ hasStreamed = true
615
+ sse("content_block_delta", { type: "content_block_delta", index: 0, delta: { type: "text_delta", text } })
616
+ }
617
+ }
618
+ }
619
+ }
620
+ } finally {
621
+ clearInterval(heartbeat)
622
+ clearTimeout(timeout)
623
+ cleanupTempFiles(tempFiles)
624
+ releaseQueue()
625
+ }
626
+
627
+ claudeLog("proxy.stream.done", { reqId, len: fullText.length, messageSent: mcpState.messageSent })
628
+
629
+ if (mcpState.messageSent) {
630
+ sse("content_block_delta", { type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "\nNO_REPLY" } })
631
+ } else if (!hasStreamed) {
632
+ sse("content_block_delta", { type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "..." } })
633
+ }
634
+
635
+ sse("content_block_stop", { type: "content_block_stop", index: 0 })
636
+ sse("message_delta", { type: "message_delta", delta: { stop_reason: "end_turn", stop_sequence: null }, usage: { output_tokens: roughTokens(fullText) } })
637
+ sse("message_stop", { type: "message_stop" })
638
+ controller.close()
639
+
640
+ } catch (error) {
641
+ clearTimeout(timeout)
642
+ releaseQueue()
643
+ const isAbort = error instanceof Error && error.name === "AbortError"
644
+ const errMsg = isAbort ? "Request timeout" : (error instanceof Error ? error.message : "Unknown error")
645
+ const errType = isAbort ? "timeout_error" : "api_error"
646
+ claudeLog("proxy.stream.error", { reqId, error: errMsg })
647
+ cleanupTempFiles(tempFiles)
648
+ try {
649
+ sse("error", { type: "error", error: { type: errType, message: errMsg } })
650
+ controller.close()
651
+ } catch {}
652
+ }
653
+ }
654
+ })
655
+
656
+ return new Response(readable, {
657
+ headers: {
658
+ "Content-Type": "text/event-stream",
659
+ "Cache-Control": "no-cache",
660
+ "Connection": "keep-alive"
661
+ }
662
+ })
663
+
664
+ } catch (error) {
665
+ const isAbort = error instanceof Error && error.name === "AbortError"
666
+ const errMsg = isAbort ? "Request timeout" : (error instanceof Error ? error.message : "Unknown error")
667
+ const errType = isAbort ? "timeout_error" : "api_error"
668
+ const status = isAbort ? 408 : 500
669
+ claudeLog("proxy.error", { reqId, error: errMsg })
670
+ return c.json({ type: "error", error: { type: errType, message: errMsg } }, status)
671
+ }
672
+ }
673
+
674
+ app.post("/v1/messages", handleMessages)
675
+ app.post("/messages", handleMessages)
676
+
677
+ // Stub: batches API not supported
678
+ const handleBatches = (c: Context) => c.json({
679
+ type: "error",
680
+ error: { type: "not_implemented_error", message: "Batches API is not supported by this proxy" }
681
+ }, 501)
682
+ app.post("/v1/messages/batches", handleBatches)
683
+ app.get("/v1/messages/batches", handleBatches)
684
+ app.get("/v1/messages/batches/:id", handleBatches)
685
+
686
+ // ── OpenAI-compatible /v1/chat/completions ─────────────────────────────
687
+ // Translates OpenAI ChatCompletion format to/from Anthropic Messages API
688
+ // so tools expecting OpenAI endpoints (LangChain, LiteLLM, etc.) just work.
689
+
690
+ function convertOpenaiContent(content: any): any {
691
+ // String content → pass through
692
+ if (typeof content === "string") return content
693
+ if (!Array.isArray(content)) return String(content ?? "")
694
+
695
+ // Array content → convert image_url parts to Anthropic image blocks
696
+ return content.map((part: any) => {
697
+ if (part.type === "text") return { type: "text", text: part.text ?? "" }
698
+ if (part.type === "image_url" && part.image_url?.url) {
699
+ const url = part.image_url.url as string
700
+ // Data URL: data:image/jpeg;base64,...
701
+ const dataMatch = url.match(/^data:(image\/\w+);base64,(.+)$/)
702
+ if (dataMatch) {
703
+ return {
704
+ type: "image",
705
+ source: {
706
+ type: "base64",
707
+ media_type: dataMatch[1]!,
708
+ data: dataMatch[2]!
709
+ }
710
+ }
711
+ }
712
+ // HTTP URL — pass as URL source
713
+ return {
714
+ type: "image",
715
+ source: { type: "url", url }
716
+ }
717
+ }
718
+ return part
719
+ })
720
+ }
721
+
722
+ function openaiToAnthropicMessages(messages: any[]): { system?: string; messages: any[] } {
723
+ let system: string | undefined
724
+ const converted: any[] = []
725
+
726
+ for (const msg of messages) {
727
+ if (msg.role === "system") {
728
+ system = (system ? system + "\n" : "") + (typeof msg.content === "string" ? msg.content : "")
729
+ } else if (msg.role === "user") {
730
+ converted.push({ role: "user", content: convertOpenaiContent(msg.content) })
731
+ } else if (msg.role === "assistant") {
732
+ // Handle assistant messages with tool_calls (OpenAI format)
733
+ if (msg.tool_calls?.length) {
734
+ const content: any[] = []
735
+ if (msg.content) content.push({ type: "text", text: msg.content })
736
+ for (const tc of msg.tool_calls) {
737
+ content.push({
738
+ type: "tool_use",
739
+ id: tc.id,
740
+ name: tc.function?.name ?? "",
741
+ input: tc.function?.arguments ? JSON.parse(tc.function.arguments) : {}
742
+ })
743
+ }
744
+ converted.push({ role: "assistant", content })
745
+ } else {
746
+ converted.push({ role: "assistant", content: msg.content ?? "" })
747
+ }
748
+ } else if (msg.role === "tool") {
749
+ // OpenAI tool result → Anthropic tool_result
750
+ converted.push({
751
+ role: "user",
752
+ content: [{
753
+ type: "tool_result",
754
+ tool_use_id: msg.tool_call_id,
755
+ content: msg.content ?? ""
756
+ }]
757
+ })
758
+ }
759
+ }
760
+ return { system, messages: converted }
761
+ }
762
+
763
+ function openaiToAnthropicTools(tools: any[]): any[] {
764
+ return tools
765
+ .filter((t: any) => t.type === "function" && t.function)
766
+ .map((t: any) => ({
767
+ name: t.function.name,
768
+ description: t.function.description ?? "",
769
+ input_schema: t.function.parameters ?? { type: "object", properties: {} }
770
+ }))
771
+ }
772
+
773
+ function anthropicToOpenaiResponse(anthropicBody: any, model: string): any {
774
+ const textBlocks = (anthropicBody.content ?? []).filter((b: any) => b.type === "text")
775
+ const toolBlocks = (anthropicBody.content ?? []).filter((b: any) => b.type === "tool_use")
776
+
777
+ const text = textBlocks.map((b: any) => b.text).join("") || (toolBlocks.length > 0 ? null : "")
778
+
779
+ const message: any = { role: "assistant", content: text }
780
+
781
+ if (toolBlocks.length > 0) {
782
+ message.tool_calls = toolBlocks.map((b: any, i: number) => ({
783
+ id: b.id,
784
+ type: "function",
785
+ function: {
786
+ name: b.name,
787
+ arguments: JSON.stringify(b.input ?? {})
788
+ }
789
+ }))
790
+ }
791
+
792
+ const finishReason = anthropicBody.stop_reason === "tool_use" ? "tool_calls"
793
+ : anthropicBody.stop_reason === "max_tokens" ? "length"
794
+ : "stop"
795
+
796
+ return {
797
+ id: `chatcmpl-${Date.now()}`,
798
+ object: "chat.completion",
799
+ created: Math.floor(Date.now() / 1000),
800
+ model,
801
+ choices: [{
802
+ index: 0,
803
+ message,
804
+ finish_reason: finishReason
805
+ }],
806
+ usage: {
807
+ prompt_tokens: anthropicBody.usage?.input_tokens ?? 0,
808
+ completion_tokens: anthropicBody.usage?.output_tokens ?? 0,
809
+ total_tokens: (anthropicBody.usage?.input_tokens ?? 0) + (anthropicBody.usage?.output_tokens ?? 0)
810
+ }
811
+ }
812
+ }
813
+
814
+ const handleChatCompletions = async (c: Context) => {
815
+ try {
816
+ let body: any
817
+ try {
818
+ body = await c.req.json()
819
+ } catch {
820
+ return c.json({ error: { message: "Request body must be valid JSON", type: "invalid_request_error" } }, 400)
821
+ }
822
+
823
+ if (!body.messages || !Array.isArray(body.messages) || body.messages.length === 0) {
824
+ return c.json({ error: { message: "messages is required and must be a non-empty array", type: "invalid_request_error" } }, 400)
825
+ }
826
+
827
+ const { system, messages } = openaiToAnthropicMessages(body.messages)
828
+ const stream = body.stream ?? false
829
+ const requestedModel = body.model ?? "claude-sonnet-4-6"
830
+
831
+ // Build Anthropic-format request body
832
+ const anthropicBody: any = {
833
+ model: requestedModel,
834
+ messages,
835
+ stream,
836
+ }
837
+ if (system) anthropicBody.system = system
838
+ if (body.max_tokens) anthropicBody.max_tokens = body.max_tokens
839
+ if (body.temperature !== undefined) anthropicBody.temperature = body.temperature
840
+ // Convert OpenAI tools format to Anthropic tools format
841
+ if (body.tools?.length) {
842
+ anthropicBody.tools = openaiToAnthropicTools(body.tools)
843
+ }
844
+
845
+ // Forward to our own /v1/messages handler by making an internal request
846
+ const internalHeaders: Record<string, string> = { "Content-Type": "application/json" }
847
+ const authHeader = c.req.header("authorization") ?? c.req.header("x-api-key")
848
+ if (authHeader) {
849
+ if (c.req.header("authorization")) internalHeaders["authorization"] = authHeader
850
+ else internalHeaders["x-api-key"] = authHeader
851
+ }
852
+ const internalRes = await app.fetch(new Request(`http://localhost/v1/messages`, {
853
+ method: "POST",
854
+ headers: internalHeaders,
855
+ body: JSON.stringify(anthropicBody)
856
+ }))
857
+
858
+ if (!stream) {
859
+ const anthropicJson = await internalRes.json() as any
860
+ if (anthropicJson.type === "error") {
861
+ return c.json({ error: anthropicJson.error }, internalRes.status as any)
862
+ }
863
+ return c.json(anthropicToOpenaiResponse(anthropicJson, requestedModel))
864
+ }
865
+
866
+ // Streaming: translate SSE events from Anthropic format to OpenAI format
867
+ const includeUsage = body.stream_options?.include_usage === true
868
+ const encoder = new TextEncoder()
869
+ const readable = new ReadableStream({
870
+ async start(controller) {
871
+ try {
872
+ const reader = internalRes.body?.getReader()
873
+ if (!reader) { controller.close(); return }
874
+
875
+ const decoder = new TextDecoder()
876
+ let buffer = ""
877
+ const chatId = `chatcmpl-${Date.now()}`
878
+ const created = Math.floor(Date.now() / 1000)
879
+ let sentRole = false
880
+ let finishReason: string | null = null
881
+ // Track active tool calls for streaming
882
+ const activeToolCalls: Map<number, { id: string; name: string; argBuffer: string }> = new Map()
883
+ let toolCallIndex = 0
884
+ let usageInfo: { input_tokens: number; output_tokens: number } | null = null
885
+
886
+ while (true) {
887
+ const { done, value } = await reader.read()
888
+ if (done) break
889
+ buffer += decoder.decode(value, { stream: true })
890
+
891
+ const lines = buffer.split("\n")
892
+ buffer = lines.pop() ?? ""
893
+
894
+ for (const line of lines) {
895
+ if (!line.startsWith("data: ")) continue
896
+ try {
897
+ const event = JSON.parse(line.slice(6))
898
+
899
+ // Emit role delta on first event
900
+ if (!sentRole && (event.type === "content_block_start" || event.type === "content_block_delta")) {
901
+ sentRole = true
902
+ controller.enqueue(encoder.encode(`data: ${JSON.stringify({
903
+ id: chatId, object: "chat.completion.chunk", created, model: requestedModel,
904
+ choices: [{ index: 0, delta: { role: "assistant" }, finish_reason: null }]
905
+ })}\n\n`))
906
+ }
907
+
908
+ if (event.type === "content_block_start" && event.content_block?.type === "tool_use") {
909
+ // Start of a tool_use block → emit tool_call function header
910
+ const idx = toolCallIndex++
911
+ activeToolCalls.set(event.index, { id: event.content_block.id, name: event.content_block.name, argBuffer: "" })
912
+ controller.enqueue(encoder.encode(`data: ${JSON.stringify({
913
+ id: chatId, object: "chat.completion.chunk", created, model: requestedModel,
914
+ choices: [{ index: 0, delta: {
915
+ tool_calls: [{ index: idx, id: event.content_block.id, type: "function", function: { name: event.content_block.name, arguments: "" } }]
916
+ }, finish_reason: null }]
917
+ })}\n\n`))
918
+ } else if (event.type === "content_block_delta" && event.delta?.type === "input_json_delta") {
919
+ // Tool call argument streaming
920
+ const tc = activeToolCalls.get(event.index)
921
+ if (tc) {
922
+ const idx = Array.from(activeToolCalls.keys()).indexOf(event.index)
923
+ controller.enqueue(encoder.encode(`data: ${JSON.stringify({
924
+ id: chatId, object: "chat.completion.chunk", created, model: requestedModel,
925
+ choices: [{ index: 0, delta: {
926
+ tool_calls: [{ index: idx, function: { arguments: event.delta.partial_json } }]
927
+ }, finish_reason: null }]
928
+ })}\n\n`))
929
+ }
930
+ } else if (event.type === "content_block_delta" && event.delta?.type === "text_delta") {
931
+ controller.enqueue(encoder.encode(`data: ${JSON.stringify({
932
+ id: chatId, object: "chat.completion.chunk", created, model: requestedModel,
933
+ choices: [{ index: 0, delta: { content: event.delta.text }, finish_reason: null }]
934
+ })}\n\n`))
935
+ } else if (event.type === "message_delta") {
936
+ // Capture finish reason and usage for final chunk
937
+ const sr = event.delta?.stop_reason
938
+ finishReason = sr === "tool_use" ? "tool_calls" : sr === "max_tokens" ? "length" : "stop"
939
+ if (event.usage) {
940
+ usageInfo = { input_tokens: event.usage.input_tokens ?? 0, output_tokens: event.usage.output_tokens ?? 0 }
941
+ }
942
+ } else if (event.type === "message_start" && event.message?.usage) {
943
+ // Capture input token count from message_start
944
+ usageInfo = { input_tokens: event.message.usage.input_tokens ?? 0, output_tokens: 0 }
945
+ } else if (event.type === "message_stop") {
946
+ const finalChunk: any = {
947
+ id: chatId, object: "chat.completion.chunk", created, model: requestedModel,
948
+ choices: [{ index: 0, delta: {}, finish_reason: finishReason ?? "stop" }]
949
+ }
950
+ if (includeUsage && usageInfo) {
951
+ finalChunk.usage = {
952
+ prompt_tokens: usageInfo.input_tokens,
953
+ completion_tokens: usageInfo.output_tokens,
954
+ total_tokens: usageInfo.input_tokens + usageInfo.output_tokens
955
+ }
956
+ }
957
+ controller.enqueue(encoder.encode(`data: ${JSON.stringify(finalChunk)}\n\n`))
958
+ controller.enqueue(encoder.encode("data: [DONE]\n\n"))
959
+ }
960
+ } catch {}
961
+ }
962
+ }
963
+ controller.close()
964
+ } catch {
965
+ controller.close()
966
+ }
967
+ }
968
+ })
969
+
970
+ return new Response(readable, {
971
+ headers: {
972
+ "Content-Type": "text/event-stream",
973
+ "Cache-Control": "no-cache",
974
+ "Connection": "keep-alive"
975
+ }
976
+ })
977
+ } catch (error) {
978
+ return c.json({
979
+ error: { message: error instanceof Error ? error.message : "Unknown error", type: "server_error" }
980
+ }, 500)
981
+ }
982
+ }
983
+
984
+ app.post("/v1/chat/completions", handleChatCompletions)
985
+ app.post("/chat/completions", handleChatCompletions)
986
+
987
+ // OpenAI-format model listing
988
+ const handleOpenaiModels = (c: Context) => c.json({
989
+ object: "list",
990
+ data: MODELS.map(m => ({
991
+ id: m.id,
992
+ object: "model",
993
+ created: Math.floor(new Date(m.created_at).getTime() / 1000),
994
+ owned_by: "anthropic"
995
+ }))
996
+ })
997
+ app.get("/v1/chat/models", handleOpenaiModels)
998
+
999
+ return { app, config: finalConfig }
1000
+ }
1001
+
1002
+ export async function startProxyServer(config: Partial<ProxyConfig> = {}) {
1003
+ const { app, config: finalConfig } = createProxyServer(config)
1004
+
1005
+ const server = Bun.serve({
1006
+ port: finalConfig.port,
1007
+ hostname: finalConfig.host,
1008
+ fetch: app.fetch,
1009
+ idleTimeout: 0
1010
+ })
1011
+
1012
+ console.log(`Claude SDK Proxy v${PROXY_VERSION} running at http://${finalConfig.host}:${finalConfig.port}`)
1013
+
1014
+ // Graceful shutdown
1015
+ const shutdown = (signal: string) => {
1016
+ console.log(`\nReceived ${signal}, shutting down...`)
1017
+ server.stop(true) // true = wait for in-flight requests
1018
+ process.exit(0)
1019
+ }
1020
+ process.on("SIGINT", () => shutdown("SIGINT"))
1021
+ process.on("SIGTERM", () => shutdown("SIGTERM"))
1022
+
1023
+ return server
1024
+ }