claude-sdk-proxy 2.2.1 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -185,7 +185,7 @@ claude-sdk-proxy [options]
185
185
  | `CLAUDE_PROXY_DEBUG` | unset | Enable debug logging (`1` to enable) |
186
186
  | `CLAUDE_PROXY_API_KEY` | unset | When set, require this key via `x-api-key` or `Authorization: Bearer` header |
187
187
  | `CLAUDE_PROXY_MAX_CONCURRENT` | `5` | Max simultaneous Claude SDK sessions |
188
- | `CLAUDE_PROXY_TIMEOUT_MS` | `300000` | Per-request timeout in milliseconds |
188
+ | `CLAUDE_PROXY_TIMEOUT_MS` | `1800000` | Per-request timeout in milliseconds (default 30 minutes) |
189
189
 
190
190
  ## Testing
191
191
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-sdk-proxy",
3
- "version": "2.2.1",
3
+ "version": "2.3.0",
4
4
  "description": "Anthropic Messages API proxy backed by Claude Agent SDK — use Claude Max with any API client",
5
5
  "type": "module",
6
6
  "main": "./src/proxy/server.ts",
@@ -13,6 +13,15 @@ import { fileURLToPath } from "url"
13
13
  import { join, dirname } from "path"
14
14
  import { createMcpServer, type McpServerState } from "../mcpTools"
15
15
 
16
+ // Base62 ID generator — matches Anthropic's real ID format (e.g. msg_01XFDUDYJgAACzvnptvVoYEL)
17
+ const BASE62 = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
18
+ function generateId(prefix: string, length = 24): string {
19
+ const bytes = randomBytes(length)
20
+ let id = prefix
21
+ for (let i = 0; i < length; i++) id += BASE62[bytes[i]! % 62]
22
+ return id
23
+ }
24
+
16
25
  const PROXY_VERSION: string = (() => {
17
26
  try {
18
27
  const pkg = JSON.parse(readFileSync(join(dirname(fileURLToPath(import.meta.url)), "../../package.json"), "utf-8"))
@@ -197,7 +206,7 @@ function parseToolUse(text: string): { toolCalls: ToolCall[]; textBefore: string
197
206
  try {
198
207
  const p = JSON.parse(m[1]!.trim())
199
208
  calls.push({
200
- id: `toolu_${randomBytes(16).toString("hex")}`,
209
+ id: generateId("toolu_"),
201
210
  name: String(p.name ?? ""),
202
211
  input: p.input ?? {}
203
212
  })
@@ -212,7 +221,7 @@ function parseToolUse(text: string): { toolCalls: ToolCall[]; textBefore: string
212
221
  try {
213
222
  const input = JSON.parse(m[2]!.trim())
214
223
  calls.push({
215
- id: `toolu_${randomBytes(16).toString("hex")}`,
224
+ id: generateId("toolu_"),
216
225
  name: m[1]!.trim(),
217
226
  input
218
227
  })
@@ -237,7 +246,7 @@ function buildQueryOptions(
237
246
  systemPrompt?: string
238
247
  mcpState?: McpServerState
239
248
  abortController?: AbortController
240
- maxThinkingTokens?: number
249
+ thinking?: { type: "adaptive" } | { type: "enabled"; budgetTokens?: number } | { type: "disabled" }
241
250
  } = {}
242
251
  ) {
243
252
  const base = {
@@ -249,7 +258,7 @@ function buildQueryOptions(
249
258
  settingSources: [],
250
259
  ...(opts.partial ? { includePartialMessages: true } : {}),
251
260
  ...(opts.abortController ? { abortController: opts.abortController } : {}),
252
- ...(opts.maxThinkingTokens ? { maxThinkingTokens: opts.maxThinkingTokens } : {}),
261
+ ...(opts.thinking ? { thinking: opts.thinking } : {}),
253
262
  ...(opts.systemPrompt ? { systemPrompt: opts.systemPrompt } : {}),
254
263
  disallowedTools: [...BLOCKED_BUILTIN_TOOLS],
255
264
  }
@@ -266,7 +275,7 @@ function buildQueryOptions(
266
275
 
267
276
  return {
268
277
  ...base,
269
- maxTurns: 50,
278
+ maxTurns: 200,
270
279
  mcpServers: { [MCP_SERVER_NAME]: createMcpServer(opts.mcpState) }
271
280
  }
272
281
  }
@@ -291,7 +300,8 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
291
300
  if (key !== requiredApiKey) {
292
301
  return c.json({
293
302
  type: "error",
294
- error: { type: "authentication_error", message: "Invalid API key" }
303
+ error: { type: "authentication_error", message: "Invalid API key" },
304
+ request_id: c.res.headers.get("request-id") ?? generateId("req_")
295
305
  }, 401)
296
306
  }
297
307
  return next()
@@ -301,11 +311,11 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
301
311
  // Anthropic-compatible headers + request logging
302
312
  app.use("*", async (c, next) => {
303
313
  const start = Date.now()
304
- const requestId = c.req.header("x-request-id") ?? `req_${randomBytes(12).toString("hex")}`
314
+ const requestId = c.req.header("x-request-id") ?? generateId("req_")
305
315
  c.header("x-request-id", requestId)
306
316
  c.header("request-id", requestId)
307
317
  // Echo back Anthropic-standard headers
308
- c.header("anthropic-version", "2024-10-22")
318
+ c.header("anthropic-version", "2023-06-01")
309
319
  const betaHeader = c.req.header("anthropic-beta")
310
320
  if (betaHeader) c.header("anthropic-beta", betaHeader)
311
321
  await next()
@@ -315,7 +325,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
315
325
 
316
326
  app.get("/", (c) => c.json({
317
327
  status: "ok",
318
- service: "claude-max-proxy",
328
+ service: "claude-sdk-proxy",
319
329
  version: PROXY_VERSION,
320
330
  format: "anthropic",
321
331
  endpoints: ["/v1/messages", "/v1/models", "/v1/chat/completions"],
@@ -371,17 +381,17 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
371
381
  app.post("/messages/count_tokens", handleCountTokens)
372
382
 
373
383
  const handleMessages = async (c: Context) => {
374
- const reqId = randomBytes(4).toString("hex")
384
+ const reqId = generateId("req_")
375
385
  try {
376
386
  let body: any
377
387
  try {
378
388
  body = await c.req.json()
379
389
  } catch {
380
- return c.json({ type: "error", error: { type: "invalid_request_error", message: "Request body must be valid JSON" } }, 400)
390
+ return c.json({ type: "error", error: { type: "invalid_request_error", message: "Request body must be valid JSON" }, request_id: reqId }, 400)
381
391
  }
382
392
 
383
393
  if (!body.messages || !Array.isArray(body.messages) || body.messages.length === 0) {
384
- return c.json({ type: "error", error: { type: "invalid_request_error", message: "messages is required and must be a non-empty array" } }, 400)
394
+ return c.json({ type: "error", error: { type: "invalid_request_error", message: "messages is required and must be a non-empty array" }, request_id: reqId }, 400)
385
395
  }
386
396
 
387
397
  const model = mapModelToClaudeModel(body.model || "sonnet")
@@ -391,13 +401,12 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
391
401
  const abortController = new AbortController()
392
402
  const timeout = setTimeout(() => abortController.abort(), finalConfig.requestTimeoutMs)
393
403
 
394
- // Extended thinking: extract budget_tokens from thinking parameter
395
- const maxThinkingTokens = body.thinking?.type === "enabled" ? body.thinking.budget_tokens : undefined
396
-
397
- claudeLog("proxy.request", { reqId, model, stream, msgs: body.messages?.length, clientToolMode, ...(maxThinkingTokens ? { maxThinkingTokens } : {}), queueActive: requestQueue.activeCount, queueWaiting: requestQueue.waitingCount })
398
-
399
- // Acquire a slot in the concurrency queue
400
- await requestQueue.acquire()
404
+ // Extended thinking: map Anthropic API thinking param to SDK ThinkingConfig
405
+ const thinking: { type: "adaptive" } | { type: "enabled"; budgetTokens?: number } | { type: "disabled" } | undefined =
406
+ body.thinking?.type === "enabled" ? { type: "enabled", budgetTokens: body.thinking.budget_tokens }
407
+ : body.thinking?.type === "disabled" ? { type: "disabled" }
408
+ : body.thinking?.type === "adaptive" ? { type: "adaptive" }
409
+ : undefined
401
410
 
402
411
  const tempFiles: string[] = []
403
412
 
@@ -460,12 +469,18 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
460
469
  prompt = serializeContent(lastMsg.content, tempFiles)
461
470
  }
462
471
 
472
+ claudeLog("proxy.request", { reqId, model, stream, msgs: body.messages?.length, clientToolMode, ...(thinking ? { thinking: thinking.type } : {}), queueActive: requestQueue.activeCount, queueWaiting: requestQueue.waitingCount })
473
+
474
+ // Acquire a slot in the concurrency queue — all code after this MUST
475
+ // release via the try/finally blocks in both streaming and non-streaming paths.
476
+ await requestQueue.acquire()
477
+
463
478
  // ── Non-streaming ──────────────────────────────────────────────────────
464
479
  if (!stream) {
465
480
  let fullText = ""
466
481
  let lastCleanText = ""
467
482
  try {
468
- for await (const message of query({ prompt, options: buildQueryOptions(model, { partial: false, clientToolMode, systemPrompt, mcpState, abortController, maxThinkingTokens }) })) {
483
+ for await (const message of query({ prompt, options: buildQueryOptions(model, { partial: false, clientToolMode, systemPrompt, mcpState, abortController, thinking }) })) {
469
484
  if (message.type === "assistant") {
470
485
  let turnText = ""
471
486
  let hasToolUse = false
@@ -496,7 +511,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
496
511
  const stopReason = toolCalls.length > 0 ? "tool_use" : "end_turn"
497
512
  claudeLog("proxy.response", { reqId, len: fullText.length, toolCalls: toolCalls.length })
498
513
  return c.json({
499
- id: `msg_${Date.now()}`,
514
+ id: generateId("msg_"),
500
515
  type: "message", role: "assistant", content,
501
516
  model: body.model, stop_reason: stopReason, stop_sequence: null,
502
517
  usage: { input_tokens: roughTokens(prompt), output_tokens: roughTokens(fullText) }
@@ -509,7 +524,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
509
524
  if (!fullText || !fullText.trim()) fullText = "..."
510
525
  claudeLog("proxy.response", { reqId, len: fullText.length, messageSent: mcpState.messageSent })
511
526
  return c.json({
512
- id: `msg_${Date.now()}`,
527
+ id: generateId("msg_"),
513
528
  type: "message", role: "assistant",
514
529
  content: [{ type: "text", text: fullText }],
515
530
  model: body.model, stop_reason: "end_turn", stop_sequence: null,
@@ -520,8 +535,12 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
520
535
  // ── Streaming ──────────────────────────────────────────────────────────
521
536
  const encoder = new TextEncoder()
522
537
  const readable = new ReadableStream({
538
+ cancel() {
539
+ // Client disconnected — abort the SDK query to free resources
540
+ abortController.abort()
541
+ },
523
542
  async start(controller) {
524
- const messageId = `msg_${Date.now()}`
543
+ const messageId = generateId("msg_")
525
544
  let queueReleased = false
526
545
  const releaseQueue = () => { if (!queueReleased) { queueReleased = true; requestQueue.release() } }
527
546
 
@@ -533,7 +552,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
533
552
 
534
553
  try {
535
554
  const heartbeat = setInterval(() => {
536
- try { controller.enqueue(encoder.encode(": ping\n\n")) } catch { clearInterval(heartbeat) }
555
+ try { controller.enqueue(encoder.encode(`event: ping\ndata: {"type": "ping"}\n\n`)) } catch { clearInterval(heartbeat) }
537
556
  }, 15_000)
538
557
 
539
558
  sse("message_start", {
@@ -541,7 +560,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
541
560
  message: {
542
561
  id: messageId, type: "message", role: "assistant", content: [],
543
562
  model: body.model, stop_reason: null, stop_sequence: null,
544
- usage: { input_tokens: roughTokens(prompt), output_tokens: 0 }
563
+ usage: { input_tokens: roughTokens(prompt), output_tokens: 1 }
545
564
  }
546
565
  })
547
566
 
@@ -549,7 +568,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
549
568
  if (clientToolMode) {
550
569
  let fullText = ""
551
570
  try {
552
- for await (const message of query({ prompt, options: buildQueryOptions(model, { partial: true, clientToolMode: true, systemPrompt, abortController, maxThinkingTokens }) })) {
571
+ for await (const message of query({ prompt, options: buildQueryOptions(model, { partial: true, clientToolMode: true, systemPrompt, abortController, thinking }) })) {
553
572
  if (message.type === "stream_event") {
554
573
  const ev = message.event as any
555
574
  if (ev.type === "content_block_delta" && ev.delta?.type === "text_delta") {
@@ -581,7 +600,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
581
600
  blockIdx = 1
582
601
  }
583
602
  for (const tc of toolCalls) {
584
- sse("content_block_start", { type: "content_block_start", index: blockIdx, content_block: { type: "tool_use", id: tc.id, name: tc.name, input: "" } })
603
+ sse("content_block_start", { type: "content_block_start", index: blockIdx, content_block: { type: "tool_use", id: tc.id, name: tc.name, input: {} } })
585
604
  sse("content_block_delta", { type: "content_block_delta", index: blockIdx, delta: { type: "input_json_delta", partial_json: JSON.stringify(tc.input) } })
586
605
  sse("content_block_stop", { type: "content_block_stop", index: blockIdx })
587
606
  blockIdx++
@@ -604,7 +623,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
604
623
  let fullText = ""
605
624
  let hasStreamed = false
606
625
  try {
607
- for await (const message of query({ prompt, options: buildQueryOptions(model, { partial: true, systemPrompt, mcpState, abortController, maxThinkingTokens }) })) {
626
+ for await (const message of query({ prompt, options: buildQueryOptions(model, { partial: true, systemPrompt, mcpState, abortController, thinking }) })) {
608
627
  if (message.type === "stream_event") {
609
628
  const ev = message.event as any
610
629
  if (ev.type === "content_block_delta" && ev.delta?.type === "text_delta") {
@@ -642,11 +661,11 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
642
661
  releaseQueue()
643
662
  const isAbort = error instanceof Error && error.name === "AbortError"
644
663
  const errMsg = isAbort ? "Request timeout" : (error instanceof Error ? error.message : "Unknown error")
645
- const errType = isAbort ? "timeout_error" : "api_error"
664
+ const errType = isAbort ? "overloaded_error" : "api_error"
646
665
  claudeLog("proxy.stream.error", { reqId, error: errMsg })
647
666
  cleanupTempFiles(tempFiles)
648
667
  try {
649
- sse("error", { type: "error", error: { type: errType, message: errMsg } })
668
+ sse("error", { type: "error", error: { type: errType, message: errMsg }, request_id: reqId })
650
669
  controller.close()
651
670
  } catch {}
652
671
  }
@@ -664,10 +683,14 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
664
683
  } catch (error) {
665
684
  const isAbort = error instanceof Error && error.name === "AbortError"
666
685
  const errMsg = isAbort ? "Request timeout" : (error instanceof Error ? error.message : "Unknown error")
667
- const errType = isAbort ? "timeout_error" : "api_error"
668
- const status = isAbort ? 408 : 500
686
+ const errType = isAbort ? "overloaded_error" : "api_error"
669
687
  claudeLog("proxy.error", { reqId, error: errMsg })
670
- return c.json({ type: "error", error: { type: errType, message: errMsg } }, status)
688
+ if (isAbort) {
689
+ return new Response(JSON.stringify({ type: "error", error: { type: errType, message: errMsg }, request_id: reqId }), {
690
+ status: 529, headers: { "Content-Type": "application/json" }
691
+ })
692
+ }
693
+ return c.json({ type: "error", error: { type: errType, message: errMsg }, request_id: reqId }, 500)
671
694
  }
672
695
  }
673
696
 
@@ -725,7 +748,10 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
725
748
 
726
749
  for (const msg of messages) {
727
750
  if (msg.role === "system") {
728
- system = (system ? system + "\n" : "") + (typeof msg.content === "string" ? msg.content : "")
751
+ const sysText = typeof msg.content === "string" ? msg.content
752
+ : Array.isArray(msg.content) ? msg.content.filter((p: any) => p.type === "text").map((p: any) => p.text ?? "").join("")
753
+ : String(msg.content ?? "")
754
+ system = (system ? system + "\n" : "") + sysText
729
755
  } else if (msg.role === "user") {
730
756
  converted.push({ role: "user", content: convertOpenaiContent(msg.content) })
731
757
  } else if (msg.role === "assistant") {
@@ -794,7 +820,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
794
820
  : "stop"
795
821
 
796
822
  return {
797
- id: `chatcmpl-${Date.now()}`,
823
+ id: generateId("chatcmpl-"),
798
824
  object: "chat.completion",
799
825
  created: Math.floor(Date.now() / 1000),
800
826
  model,
@@ -835,8 +861,12 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
835
861
  stream,
836
862
  }
837
863
  if (system) anthropicBody.system = system
838
- if (body.max_tokens) anthropicBody.max_tokens = body.max_tokens
864
+ if (body.max_tokens || body.max_completion_tokens) {
865
+ anthropicBody.max_tokens = body.max_tokens ?? body.max_completion_tokens
866
+ }
839
867
  if (body.temperature !== undefined) anthropicBody.temperature = body.temperature
868
+ if (body.top_p !== undefined) anthropicBody.top_p = body.top_p
869
+ if (body.stop) anthropicBody.stop_sequences = Array.isArray(body.stop) ? body.stop : [body.stop]
840
870
  // Convert OpenAI tools format to Anthropic tools format
841
871
  if (body.tools?.length) {
842
872
  anthropicBody.tools = openaiToAnthropicTools(body.tools)
@@ -874,12 +904,12 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
874
904
 
875
905
  const decoder = new TextDecoder()
876
906
  let buffer = ""
877
- const chatId = `chatcmpl-${Date.now()}`
907
+ const chatId = generateId("chatcmpl-")
878
908
  const created = Math.floor(Date.now() / 1000)
879
909
  let sentRole = false
880
910
  let finishReason: string | null = null
881
911
  // Track active tool calls for streaming
882
- const activeToolCalls: Map<number, { id: string; name: string; argBuffer: string }> = new Map()
912
+ const activeToolCalls: Map<number, { id: string; name: string }> = new Map()
883
913
  let toolCallIndex = 0
884
914
  let usageInfo: { input_tokens: number; output_tokens: number } | null = null
885
915
 
@@ -908,7 +938,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
908
938
  if (event.type === "content_block_start" && event.content_block?.type === "tool_use") {
909
939
  // Start of a tool_use block → emit tool_call function header
910
940
  const idx = toolCallIndex++
911
- activeToolCalls.set(event.index, { id: event.content_block.id, name: event.content_block.name, argBuffer: "" })
941
+ activeToolCalls.set(event.index, { id: event.content_block.id, name: event.content_block.name })
912
942
  controller.enqueue(encoder.encode(`data: ${JSON.stringify({
913
943
  id: chatId, object: "chat.completion.chunk", created, model: requestedModel,
914
944
  choices: [{ index: 0, delta: {
@@ -937,7 +967,12 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
937
967
  const sr = event.delta?.stop_reason
938
968
  finishReason = sr === "tool_use" ? "tool_calls" : sr === "max_tokens" ? "length" : "stop"
939
969
  if (event.usage) {
940
- usageInfo = { input_tokens: event.usage.input_tokens ?? 0, output_tokens: event.usage.output_tokens ?? 0 }
970
+ const prevInput: number = usageInfo?.input_tokens ?? 0
971
+ const prevOutput: number = usageInfo?.output_tokens ?? 0
972
+ usageInfo = {
973
+ input_tokens: event.usage.input_tokens ?? prevInput,
974
+ output_tokens: event.usage.output_tokens ?? prevOutput
975
+ }
941
976
  }
942
977
  } else if (event.type === "message_start" && event.message?.usage) {
943
978
  // Capture input token count from message_start
@@ -996,6 +1031,12 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
996
1031
  })
997
1032
  app.get("/v1/chat/models", handleOpenaiModels)
998
1033
 
1034
+ // 404 catch-all — return Anthropic-format error for unknown routes
1035
+ app.all("*", (c) => c.json({
1036
+ type: "error",
1037
+ error: { type: "not_found_error", message: `${c.req.method} ${c.req.path} not found` }
1038
+ }, 404))
1039
+
999
1040
  return { app, config: finalConfig }
1000
1041
  }
1001
1042
 
@@ -9,5 +9,5 @@ export const DEFAULT_PROXY_CONFIG: ProxyConfig = {
9
9
  port: 3456,
10
10
  host: "127.0.0.1",
11
11
  debug: process.env.CLAUDE_PROXY_DEBUG === "1" || process.env.OPENCODE_CLAUDE_PROVIDER_DEBUG === "1",
12
- requestTimeoutMs: parseInt(process.env.CLAUDE_PROXY_TIMEOUT_MS ?? "300000", 10),
12
+ requestTimeoutMs: parseInt(process.env.CLAUDE_PROXY_TIMEOUT_MS ?? "1800000", 10),
13
13
  }