npm - opencode-claude-max-proxy - Versions diffs - 1.0.2 → 1.8.0 - Mend

opencode-claude-max-proxy 1.0.2 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/README.md +272 -113
package/bin/claude-proxy-supervisor.sh +62 -0
package/bin/claude-proxy.ts +26 -1
package/package.json +9 -6
package/src/logger.ts +68 -6
package/src/proxy/agentDefs.ts +102 -0
package/src/proxy/agentMatch.ts +93 -0
package/src/proxy/passthroughTools.ts +108 -0
package/src/proxy/server.ts +947 -118
package/src/proxy/types.ts +3 -1

package/src/proxy/server.ts CHANGED Viewed

@@ -5,6 +5,250 @@ import type { Context } from "hono"
 import type { ProxyConfig } from "./types"
 import { DEFAULT_PROXY_CONFIG } from "./types"
 import { claudeLog } from "../logger"
+import { execSync } from "child_process"
+import { existsSync } from "fs"
+import { fileURLToPath } from "url"
+import { join, dirname } from "path"
+import { opencodeMcpServer } from "../mcpTools"
+import { randomUUID, createHash } from "crypto"
+import { withClaudeLogContext } from "../logger"
+import { fuzzyMatchAgentName } from "./agentMatch"
+import { buildAgentDefinitions } from "./agentDefs"
+import { createPassthroughMcpServer, stripMcpPrefix, PASSTHROUGH_MCP_NAME, PASSTHROUGH_MCP_PREFIX } from "./passthroughTools"
+// --- Session Tracking ---
+// Maps OpenCode session ID (or fingerprint) → Claude SDK session ID
+interface SessionState {
+  claudeSessionId: string
+  lastAccess: number
+  messageCount: number
+}
+const sessionCache = new Map<string, SessionState>()
+const fingerprintCache = new Map<string, SessionState>()
+/** Clear all session caches (used in tests) */
+export function clearSessionCache() {
+  sessionCache.clear()
+  fingerprintCache.clear()
+}
+// Clean stale sessions every hour — sessions survive a full workday
+const SESSION_TTL_MS = 24 * 60 * 60 * 1000 // 24 hours
+setInterval(() => {
+  const now = Date.now()
+  for (const [key, val] of sessionCache) {
+    if (now - val.lastAccess > SESSION_TTL_MS) sessionCache.delete(key)
+  }
+  for (const [key, val] of fingerprintCache) {
+    if (now - val.lastAccess > SESSION_TTL_MS) fingerprintCache.delete(key)
+  }
+}, 60 * 60 * 1000)
+/** Hash the first user message to fingerprint a conversation */
+function getConversationFingerprint(messages: Array<{ role: string; content: any }>): string {
+  const firstUser = messages?.find((m) => m.role === "user")
+  if (!firstUser) return ""
+  const text = typeof firstUser.content === "string"
+    ? firstUser.content
+    : Array.isArray(firstUser.content)
+      ? firstUser.content.filter((b: any) => b.type === "text").map((b: any) => b.text).join("")
+      : ""
+  if (!text) return ""
+  return createHash("sha256").update(text.slice(0, 2000)).digest("hex").slice(0, 16)
+}
+/** Look up a cached session by header or fingerprint */
+function lookupSession(
+  opencodeSessionId: string | undefined,
+  messages: Array<{ role: string; content: any }>
+): SessionState | undefined {
+  // Primary: use x-opencode-session header
+  if (opencodeSessionId) {
+    return sessionCache.get(opencodeSessionId)
+  }
+  // Fallback: fingerprint (only when no header is present)
+  const fp = getConversationFingerprint(messages)
+  if (fp) return fingerprintCache.get(fp)
+  return undefined
+}
+/** Store a session mapping */
+function storeSession(
+  opencodeSessionId: string | undefined,
+  messages: Array<{ role: string; content: any }>,
+  claudeSessionId: string
+) {
+  if (!claudeSessionId) return
+  const state: SessionState = { claudeSessionId, lastAccess: Date.now(), messageCount: messages?.length || 0 }
+  if (opencodeSessionId) sessionCache.set(opencodeSessionId, state)
+  const fp = getConversationFingerprint(messages)
+  if (fp) fingerprintCache.set(fp, state)
+}
+/** Extract only the last user message (for resume — SDK already has history) */
+function getLastUserMessage(messages: Array<{ role: string; content: any }>): Array<{ role: string; content: any }> {
+  for (let i = messages.length - 1; i >= 0; i--) {
+    if (messages[i]?.role === "user") return [messages[i]!]
+  }
+  return messages.slice(-1)
+}
+// --- Error Classification ---
+// Detect specific SDK errors and return helpful messages to the client
+function classifyError(errMsg: string): { status: number; type: string; message: string } {
+  const lower = errMsg.toLowerCase()
+  // Authentication failures
+  if (lower.includes("401") || lower.includes("authentication") || lower.includes("invalid auth") || lower.includes("credentials")) {
+    return {
+      status: 401,
+      type: "authentication_error",
+      message: "Claude authentication expired or invalid. Run 'claude login' in your terminal to re-authenticate, then restart the proxy."
+    }
+  }
+  // Rate limiting
+  if (lower.includes("429") || lower.includes("rate limit") || lower.includes("too many requests")) {
+    return {
+      status: 429,
+      type: "rate_limit_error",
+      message: "Claude Max rate limit reached. Wait a moment and try again."
+    }
+  }
+  // Billing / subscription
+  if (lower.includes("402") || lower.includes("billing") || lower.includes("subscription") || lower.includes("payment")) {
+    return {
+      status: 402,
+      type: "billing_error",
+      message: "Claude Max subscription issue. Check your subscription status at https://claude.ai/settings/subscription"
+    }
+  }
+  // SDK process crash
+  if (lower.includes("exited with code") || lower.includes("process exited")) {
+    const codeMatch = errMsg.match(/exited with code (\d+)/)
+    const code = codeMatch ? codeMatch[1] : "unknown"
+    // Code 1 with no other info is usually auth
+    if (code === "1" && !lower.includes("tool") && !lower.includes("mcp")) {
+      return {
+        status: 401,
+        type: "authentication_error",
+        message: "Claude Code process crashed (exit code 1). This usually means authentication expired. Run 'claude login' in your terminal to re-authenticate, then restart the proxy."
+      }
+    }
+    return {
+      status: 502,
+      type: "api_error",
+      message: `Claude Code process exited unexpectedly (code ${code}). Check proxy logs for details. If this persists, try 'claude login' to refresh authentication.`
+    }
+  }
+  // Timeout
+  if (lower.includes("timeout") || lower.includes("timed out")) {
+    return {
+      status: 504,
+      type: "timeout_error",
+      message: "Request timed out. The operation may have been too complex. Try a simpler request."
+    }
+  }
+  // Server errors from Anthropic
+  if (lower.includes("500") || lower.includes("server error") || lower.includes("internal error")) {
+    return {
+      status: 502,
+      type: "api_error",
+      message: "Claude API returned a server error. This is usually temporary — try again in a moment."
+    }
+  }
+  // Overloaded
+  if (lower.includes("503") || lower.includes("overloaded")) {
+    return {
+      status: 503,
+      type: "overloaded_error",
+      message: "Claude is temporarily overloaded. Try again in a few seconds."
+    }
+  }
+  // Default
+  return {
+    status: 500,
+    type: "api_error",
+    message: errMsg || "Unknown error"
+  }
+}
+// Block SDK built-in tools so Claude only uses MCP tools (which have correct param names)
+const BLOCKED_BUILTIN_TOOLS = [
+  "Read", "Write", "Edit", "MultiEdit",
+  "Bash", "Glob", "Grep", "NotebookEdit",
+  "WebFetch", "WebSearch", "TodoWrite"
+]
+// Claude Code SDK tools that have NO equivalent in OpenCode.
+// Only block these — everything else either has an OpenCode equivalent
+// or is handled by OpenCode's own tool system.
+//
+// Tools where OpenCode has an equivalent but with a DIFFERENT name/schema
+// are blocked so Claude uses OpenCode's version instead of the SDK's.
+// See schema-incompatible section below.
+//
+// These truly have NO OpenCode equivalent (BLOCKED):
+const CLAUDE_CODE_ONLY_TOOLS = [
+  "ToolSearch",        // Claude Code deferred tool loading (internal mechanism)
+  "CronCreate",        // Claude Code cron jobs
+  "CronDelete",        // Claude Code cron jobs
+  "CronList",          // Claude Code cron jobs
+  "EnterPlanMode",     // Claude Code mode switching (OpenCode uses plan agent instead)
+  "ExitPlanMode",      // Claude Code mode switching
+  "EnterWorktree",     // Claude Code git worktree management
+  "ExitWorktree",      // Claude Code git worktree management
+  "NotebookEdit",      // Jupyter notebook editing
+  // Schema-incompatible: SDK tool name differs from OpenCode's.
+  // If Claude calls the SDK version, OpenCode won't recognize it.
+  // Block the SDK's so Claude only sees OpenCode's definitions.
+  "TodoWrite",         // OpenCode: todowrite (requires 'priority' field)
+  "AskUserQuestion",   // OpenCode: question
+  "Skill",             // OpenCode: skill / skill_mcp / slashcommand
+  "Agent",             // OpenCode: delegate_task / task
+  "TaskOutput",        // OpenCode: background_output
+  "TaskStop",          // OpenCode: background_cancel
+  "WebSearch",         // OpenCode: websearch_web_search_exa
+]
+const MCP_SERVER_NAME = "opencode"
+const ALLOWED_MCP_TOOLS = [
+  `mcp__${MCP_SERVER_NAME}__read`,
+  `mcp__${MCP_SERVER_NAME}__write`,
+  `mcp__${MCP_SERVER_NAME}__edit`,
+  `mcp__${MCP_SERVER_NAME}__bash`,
+  `mcp__${MCP_SERVER_NAME}__glob`,
+  `mcp__${MCP_SERVER_NAME}__grep`
+]
+function resolveClaudeExecutable(): string {
+  // 1. Try the SDK's bundled cli.js (same dir as this module's SDK)
+  try {
+    const sdkPath = fileURLToPath(import.meta.resolve("@anthropic-ai/claude-agent-sdk"))
+    const sdkCliJs = join(dirname(sdkPath), "cli.js")
+    if (existsSync(sdkCliJs)) return sdkCliJs
+  } catch {}
+  // 2. Try the system-installed claude binary
+  try {
+    const claudePath = execSync("which claude", { encoding: "utf-8" }).trim()
+    if (claudePath && existsSync(claudePath)) return claudePath
+  } catch {}
+  throw new Error("Could not find Claude Code executable. Install via: npm install -g @anthropic-ai/claude-code")
+}
+const claudeExecutable = resolveClaudeExecutable()
 function mapModelToClaudeModel(model: string): "sonnet" | "opus" | "haiku" {
   if (model.includes("opus")) return "opus"
@@ -12,6 +256,11 @@ function mapModelToClaudeModel(model: string): "sonnet" | "opus" | "haiku" {
   return "sonnet"
 }
+function isClosedControllerError(error: unknown): boolean {
+  if (!(error instanceof Error)) return false
+  return error.message.includes("Controller is already closed")
+}
 export function createProxyServer(config: Partial<ProxyConfig> = {}) {
   const finalConfig = { ...DEFAULT_PROXY_CONFIG, ...config }
   const app = new Hono()
@@ -28,25 +277,131 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
     })
   })
-  const handleMessages = async (c: Context) => {
-    try {
-      const body = await c.req.json()
-      const model = mapModelToClaudeModel(body.model || "sonnet")
-      const stream = body.stream ?? true
+  // --- Concurrency Control ---
+  // Each request spawns an SDK subprocess (cli.js, ~11MB). Spawning multiple
+  // simultaneously can crash the process. Serialize SDK queries with a queue.
+  const MAX_CONCURRENT_SESSIONS = parseInt(process.env.CLAUDE_PROXY_MAX_CONCURRENT || "10", 10)
+  let activeSessions = 0
+  const sessionQueue: Array<{ resolve: () => void }> = []
+  async function acquireSession(): Promise<void> {
+    if (activeSessions < MAX_CONCURRENT_SESSIONS) {
+      activeSessions++
+      return
+    }
+    // Wait for a slot
+    return new Promise<void>((resolve) => {
+      sessionQueue.push({ resolve })
+    })
+  }
+  function releaseSession(): void {
+    activeSessions--
+    const next = sessionQueue.shift()
+    if (next) {
+      activeSessions++
+      next.resolve()
+    }
+  }
+  const handleMessages = async (
+    c: Context,
+    requestMeta: { requestId: string; endpoint: string; queueEnteredAt: number; queueStartedAt: number }
+  ) => {
+    const requestStartAt = Date.now()
+    return withClaudeLogContext({ requestId: requestMeta.requestId, endpoint: requestMeta.endpoint }, async () => {
+      try {
+        const body = await c.req.json()
+        const model = mapModelToClaudeModel(body.model || "sonnet")
+        const stream = body.stream ?? true
+        const workingDirectory = process.env.CLAUDE_PROXY_WORKDIR || process.cwd()
-      claudeLog("proxy.anthropic.request", { model, stream, messageCount: body.messages?.length })
+        // Strip env vars that cause SDK subprocess to load unwanted plugins/features
+        const { CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS, ...cleanEnv } = process.env
+        // Session resume: look up cached Claude SDK session
+        const opencodeSessionId = c.req.header("x-opencode-session")
+        const cachedSession = lookupSession(opencodeSessionId, body.messages || [])
+        const resumeSessionId = cachedSession?.claudeSessionId
+        const isResume = Boolean(resumeSessionId)
+        // Debug: log request details
+        const msgSummary = body.messages?.map((m: any) => {
+          const contentTypes = Array.isArray(m.content)
+            ? m.content.map((b: any) => b.type).join(",")
+            : "string"
+          return `${m.role}[${contentTypes}]`
+        }).join(" → ")
+        console.error(`[PROXY] ${requestMeta.requestId} model=${model} stream=${stream} tools=${body.tools?.length ?? 0} resume=${isResume} active=${activeSessions}/${MAX_CONCURRENT_SESSIONS} msgs=${msgSummary}`)
+        claudeLog("request.received", {
+          model,
+          stream,
+          queueWaitMs: requestMeta.queueStartedAt - requestMeta.queueEnteredAt,
+          messageCount: Array.isArray(body.messages) ? body.messages.length : 0,
+          hasSystemPrompt: Boolean(body.system)
+        })
+      // Build system context from the request's system prompt
+      let systemContext = ""
+      if (body.system) {
+        if (typeof body.system === "string") {
+          systemContext = body.system
+        } else if (Array.isArray(body.system)) {
+          systemContext = body.system
+            .filter((b: any) => b.type === "text" && b.text)
+            .map((b: any) => b.text)
+            .join("\n")
+        }
+      }
+      // Extract available agent types from the Task tool definition.
+      // Used for: 1) SDK agent definitions, 2) fuzzy matching in PreToolUse hook, 3) prompt hints
+      let validAgentNames: string[] = []
+      let sdkAgents: Record<string, any> = {}
+      if (Array.isArray(body.tools)) {
+        const taskTool = body.tools.find((t: any) => t.name === "task" || t.name === "Task")
+        if (taskTool?.description) {
+          // Build SDK agent definitions from the Task tool description.
+          // This makes the SDK's native Task handler recognize agent names
+          // from OpenCode (with or without oh-my-opencode).
+          sdkAgents = buildAgentDefinitions(taskTool.description, [...ALLOWED_MCP_TOOLS])
+          validAgentNames = Object.keys(sdkAgents)
+          if (process.env.CLAUDE_PROXY_DEBUG) {
+            claudeLog("debug.agents", { names: validAgentNames, count: validAgentNames.length })
+          }
+          if (validAgentNames.length > 0) {
+            systemContext += `\n\nIMPORTANT: When using the task/Task tool, the subagent_type parameter must be one of these exact values (case-sensitive, lowercase): ${validAgentNames.join(", ")}. Do NOT capitalize or modify these names.`
+          }
+        }
+      }
-      const prompt = body.messages
-        ?.map((m: { role: string; content: string | Array<{ type: string; text?: string }> }) => {
+      // When resuming, only send the last user message (SDK already has history)
+      const messagesToConvert = isResume
+        ? getLastUserMessage(body.messages || [])
+        : body.messages
+      // Convert messages to a text prompt, preserving all content types
+      const conversationParts = messagesToConvert
+        ?.map((m: { role: string; content: string | Array<{ type: string; text?: string; content?: string; tool_use_id?: string; name?: string; input?: unknown; id?: string }> }) => {
           const role = m.role === "assistant" ? "Assistant" : "Human"
           let content: string
           if (typeof m.content === "string") {
             content = m.content
           } else if (Array.isArray(m.content)) {
             content = m.content
-              .filter((block) => block.type === "text" && block.text)
-              .map((block) => block.text)
-              .join("")
+              .map((block: any) => {
+                if (block.type === "text" && block.text) return block.text
+                if (block.type === "tool_use") return `[Tool Use: ${block.name}(${JSON.stringify(block.input)})]`
+                if (block.type === "tool_result") return `[Tool Result for ${block.tool_use_id}: ${typeof block.content === "string" ? block.content : JSON.stringify(block.content)}]`
+                return ""
+              })
+              .filter(Boolean)
+              .join("\n")
           } else {
             content = String(m.content)
           }
@@ -54,136 +409,595 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
         })
         .join("\n\n") || ""
-      if (!stream) {
-        let fullContent = ""
-        const response = query({
-          prompt,
-          options: { maxTurns: 1, model }
-        })
+      // --- Passthrough mode ---
+      // When enabled, ALL tool execution is forwarded to OpenCode instead of
+      // being handled internally. This enables multi-model agent delegation
+      // (e.g., oracle on GPT-5.2, explore on Gemini via oh-my-opencode).
+      const passthrough = Boolean(process.env.CLAUDE_PROXY_PASSTHROUGH)
+      const capturedToolUses: Array<{ id: string; name: string; input: any }> = []
+      // In passthrough mode, register OpenCode's tools as MCP tools so Claude
+      // can actually call them (not just see them as text descriptions).
+      let passthroughMcp: ReturnType<typeof createPassthroughMcpServer> | undefined
+      if (passthrough && Array.isArray(body.tools) && body.tools.length > 0) {
+        passthroughMcp = createPassthroughMcpServer(body.tools)
+      }
+      // In passthrough mode: block ALL tools, capture them for forwarding
+      // In normal mode: only fix agent names on Task tool
+      const sdkHooks = passthrough
+        ? {
+            PreToolUse: [{
+              matcher: "",  // Match ALL tools
+              hooks: [async (input: any) => {
+                capturedToolUses.push({
+                  id: input.tool_use_id,
+                  name: stripMcpPrefix(input.tool_name),
+                  input: input.tool_input,
+                })
+                return {
+                  decision: "block" as const,
+                  reason: "Forwarding to client for execution",
+                }
+              }],
+            }],
+          }
+        : validAgentNames.length > 0
+          ? {
+              PreToolUse: [{
+                matcher: "Task",
+                hooks: [async (input: any) => ({
+                  hookSpecificOutput: {
+                    hookEventName: "PreToolUse" as const,
+                    updatedInput: {
+                      ...input.tool_input,
+                      subagent_type: fuzzyMatchAgentName(
+                        String(input.tool_input?.subagent_type || ""),
+                        validAgentNames
+                      ),
+                    },
+                  },
+                })],
+              }],
+            }
+          : undefined
+      // Combine system context with conversation
+      const prompt = systemContext
+        ? `${systemContext}\n\n${conversationParts}`
+        : conversationParts
+        if (!stream) {
+          const contentBlocks: Array<Record<string, unknown>> = []
+          let assistantMessages = 0
+          const upstreamStartAt = Date.now()
+          let firstChunkAt: number | undefined
+          let currentSessionId: string | undefined
+          claudeLog("upstream.start", { mode: "non_stream", model })
+          try {
+            const response = query({
+              prompt,
+              options: {
+                maxTurns: passthrough ? 1 : 100,
+                cwd: workingDirectory,
+                model,
+                pathToClaudeCodeExecutable: claudeExecutable,
+                permissionMode: "bypassPermissions",
+                allowDangerouslySkipPermissions: true,
+                // In passthrough mode: block ALL SDK built-in tools, use OpenCode's via MCP
+                // In normal mode: block built-ins, use our own MCP replacements
+                ...(passthrough
+                  ? {
+                      disallowedTools: [...BLOCKED_BUILTIN_TOOLS, ...CLAUDE_CODE_ONLY_TOOLS],
+                      ...(passthroughMcp ? {
+                        allowedTools: passthroughMcp.toolNames,
+                        mcpServers: { [PASSTHROUGH_MCP_NAME]: passthroughMcp.server },
+                      } : {}),
+                    }
+                  : {
+                      disallowedTools: [...BLOCKED_BUILTIN_TOOLS],
+                      allowedTools: [...ALLOWED_MCP_TOOLS],
+                      mcpServers: { [MCP_SERVER_NAME]: opencodeMcpServer },
+                    }),
+                plugins: [],
+                env: { ...cleanEnv, ENABLE_TOOL_SEARCH: "false" },
+                ...(Object.keys(sdkAgents).length > 0 ? { agents: sdkAgents } : {}),
+                ...(resumeSessionId ? { resume: resumeSessionId } : {}),
+                ...(sdkHooks ? { hooks: sdkHooks } : {}),
+              }
+            })
+            for await (const message of response) {
+              // Capture session ID from SDK messages
+              if ((message as any).session_id) {
+                currentSessionId = (message as any).session_id
+              }
+              if (message.type === "assistant") {
+                assistantMessages += 1
+                if (!firstChunkAt) {
+                  firstChunkAt = Date.now()
+                  claudeLog("upstream.first_chunk", {
+                    mode: "non_stream",
+                    model,
+                    ttfbMs: firstChunkAt - upstreamStartAt
+                  })
+                }
+                // Preserve ALL content blocks (text, tool_use, thinking, etc.)
+                for (const block of message.message.content) {
+                  const b = block as Record<string, unknown>
+                  // In passthrough mode, strip MCP prefix from tool names
+                  if (passthrough && b.type === "tool_use" && typeof b.name === "string") {
+                    b.name = stripMcpPrefix(b.name as string)
+                  }
+                  contentBlocks.push(b)
+                }
+              }
+            }
+            claudeLog("upstream.completed", {
+              mode: "non_stream",
+              model,
+              assistantMessages,
+              durationMs: Date.now() - upstreamStartAt
+            })
+          } catch (error) {
+            claudeLog("upstream.failed", {
+              mode: "non_stream",
+              model,
+              durationMs: Date.now() - upstreamStartAt,
+              error: error instanceof Error ? error.message : String(error)
+            })
+            throw error
+          }
-        for await (const message of response) {
-          if (message.type === "assistant") {
-            for (const block of message.message.content) {
-              if (block.type === "text") {
-                fullContent += block.text
+          // In passthrough mode, add captured tool_use blocks from the hook
+          // (the SDK may not include them in content after blocking)
+          if (passthrough && capturedToolUses.length > 0) {
+            for (const tu of capturedToolUses) {
+              // Only add if not already in contentBlocks
+              if (!contentBlocks.some((b) => b.type === "tool_use" && (b as any).id === tu.id)) {
+                contentBlocks.push({
+                  type: "tool_use",
+                  id: tu.id,
+                  name: tu.name,
+                  input: tu.input,
+                })
               }
             }
           }
+          // Determine stop_reason based on content: tool_use if any tool blocks, else end_turn
+          const hasToolUse = contentBlocks.some((b) => b.type === "tool_use")
+          const stopReason = hasToolUse ? "tool_use" : "end_turn"
+          // If no content at all, add a fallback text block
+          if (contentBlocks.length === 0) {
+            contentBlocks.push({
+              type: "text",
+              text: "I can help with that. Could you provide more details about what you'd like me to do?"
+            })
+            claudeLog("response.fallback_used", { mode: "non_stream", reason: "no_content_blocks" })
+          }
+          claudeLog("response.completed", {
+            mode: "non_stream",
+            model,
+            durationMs: Date.now() - requestStartAt,
+            contentBlocks: contentBlocks.length,
+            hasToolUse
+          })
+          // Store session for future resume
+          if (currentSessionId) {
+            storeSession(opencodeSessionId, body.messages || [], currentSessionId)
+          }
+          const responseSessionId = currentSessionId || resumeSessionId || `session_${Date.now()}`
+          return new Response(JSON.stringify({
+            id: `msg_${Date.now()}`,
+            type: "message",
+            role: "assistant",
+            content: contentBlocks,
+            model: body.model,
+            stop_reason: stopReason,
+            usage: { input_tokens: 0, output_tokens: 0 }
+          }), {
+            headers: {
+              "Content-Type": "application/json",
+              "X-Claude-Session-ID": responseSessionId,
+            }
+          })
         }
-        return c.json({
-          id: `msg_${Date.now()}`,
-          type: "message",
-          role: "assistant",
-          content: [{ type: "text", text: fullContent }],
-          model: body.model,
-          stop_reason: "end_turn",
-          usage: { input_tokens: 0, output_tokens: 0 }
-        })
-      }
+        const encoder = new TextEncoder()
+        const readable = new ReadableStream({
+          async start(controller) {
+            const upstreamStartAt = Date.now()
+            let firstChunkAt: number | undefined
+            let heartbeatCount = 0
+            let streamEventsSeen = 0
+            let eventsForwarded = 0
+            let textEventsForwarded = 0
+            let bytesSent = 0
+            let streamClosed = false
-      const encoder = new TextEncoder()
-      const readable = new ReadableStream({
-        async start(controller) {
-          try {
-            controller.enqueue(encoder.encode(`event: message_start\ndata: ${JSON.stringify({
-              type: "message_start",
-              message: {
-                id: `msg_${Date.now()}`,
-                type: "message",
-                role: "assistant",
-                content: [],
-                model: body.model,
-                stop_reason: null,
-                usage: { input_tokens: 0, output_tokens: 0 }
+            claudeLog("upstream.start", { mode: "stream", model })
+            const safeEnqueue = (payload: Uint8Array, source: string): boolean => {
+              if (streamClosed) return false
+              try {
+                controller.enqueue(payload)
+                bytesSent += payload.byteLength
+                return true
+              } catch (error) {
+                if (isClosedControllerError(error)) {
+                  streamClosed = true
+                  claudeLog("stream.client_closed", { source, streamEventsSeen, eventsForwarded })
+                  return false
+                }
+                claudeLog("stream.enqueue_failed", {
+                  source,
+                  error: error instanceof Error ? error.message : String(error)
+                })
+                throw error
               }
-            })}\n\n`))
+            }
-            controller.enqueue(encoder.encode(`event: content_block_start\ndata: ${JSON.stringify({
-              type: "content_block_start",
-              index: 0,
-              content_block: { type: "text", text: "" }
-            })}\n\n`))
+            try {
+              let currentSessionId: string | undefined
+              const response = query({
+                prompt,
+                options: {
+                  maxTurns: passthrough ? 1 : 100,
+                  cwd: workingDirectory,
+                  model,
+                  pathToClaudeCodeExecutable: claudeExecutable,
+                  includePartialMessages: true,
+                  permissionMode: "bypassPermissions",
+                  allowDangerouslySkipPermissions: true,
+                  ...(passthrough
+                    ? {
+                        disallowedTools: [...BLOCKED_BUILTIN_TOOLS, ...CLAUDE_CODE_ONLY_TOOLS],
+                        ...(passthroughMcp ? {
+                          allowedTools: passthroughMcp.toolNames,
+                          mcpServers: { [PASSTHROUGH_MCP_NAME]: passthroughMcp.server },
+                        } : {}),
+                      }
+                    : {
+                        disallowedTools: [...BLOCKED_BUILTIN_TOOLS],
+                        allowedTools: [...ALLOWED_MCP_TOOLS],
+                        mcpServers: { [MCP_SERVER_NAME]: opencodeMcpServer },
+                      }),
+                  plugins: [],
+                  env: { ...cleanEnv, ENABLE_TOOL_SEARCH: "false" },
+                  ...(Object.keys(sdkAgents).length > 0 ? { agents: sdkAgents } : {}),
+                  ...(resumeSessionId ? { resume: resumeSessionId } : {}),
+                  ...(sdkHooks ? { hooks: sdkHooks } : {}),
+                }
+              })
-            const response = query({
-              prompt,
-              options: { maxTurns: 1, model }
-            })
+              const heartbeat = setInterval(() => {
+                heartbeatCount += 1
+                try {
+                  const payload = encoder.encode(`: ping\n\n`)
+                  if (!safeEnqueue(payload, "heartbeat")) {
+                    clearInterval(heartbeat)
+                    return
+                  }
+                  if (heartbeatCount % 5 === 0) {
+                    claudeLog("stream.heartbeat", { count: heartbeatCount })
+                  }
+                } catch (error) {
+                  claudeLog("stream.heartbeat_failed", {
+                    count: heartbeatCount,
+                    error: error instanceof Error ? error.message : String(error)
+                  })
+                  clearInterval(heartbeat)
+                }
+              }, 15_000)
+              const skipBlockIndices = new Set<number>()
+              let messageStartEmitted = false // Track if we've sent a message_start to the client
-            const heartbeat = setInterval(() => {
               try {
-                controller.enqueue(encoder.encode(`: ping\n\n`))
-              } catch {
+                for await (const message of response) {
+                  if (streamClosed) {
+                    break
+                  }
+                  // Capture session ID from any SDK message
+                  if ((message as any).session_id) {
+                    currentSessionId = (message as any).session_id
+                  }
+                  if (message.type === "stream_event") {
+                    streamEventsSeen += 1
+                    if (!firstChunkAt) {
+                      firstChunkAt = Date.now()
+                      claudeLog("upstream.first_chunk", {
+                        mode: "stream",
+                        model,
+                        ttfbMs: firstChunkAt - upstreamStartAt
+                      })
+                    }
+                    const event = message.event
+                    const eventType = (event as any).type
+                    const eventIndex = (event as any).index as number | undefined
+                    // Track MCP tool blocks (mcp__opencode__*) — these are internal tools
+                    // that the SDK executes. Don't forward them to OpenCode.
+                    if (eventType === "message_start") {
+                      skipBlockIndices.clear()
+                      // Only emit the first message_start — subsequent ones are internal SDK turns
+                      if (messageStartEmitted) {
+                        continue
+                      }
+                      messageStartEmitted = true
+                    }
+                    // Skip intermediate message_stop events (SDK will start another turn)
+                    // Only emit message_stop when the final message ends
+                    if (eventType === "message_stop") {
+                      // Peek: if there are more events coming, skip this message_stop
+                      // We handle this by only emitting message_stop at the very end (after the loop)
+                      continue
+                    }
+                    if (eventType === "content_block_start") {
+                      const block = (event as any).content_block
+                      if (block?.type === "tool_use" && typeof block.name === "string") {
+                        if (passthrough && block.name.startsWith(PASSTHROUGH_MCP_PREFIX)) {
+                          // Passthrough mode: strip prefix and forward to OpenCode
+                          block.name = stripMcpPrefix(block.name)
+                        } else if (block.name.startsWith("mcp__")) {
+                          // Internal mode: skip all MCP tool blocks (internal execution)
+                          if (eventIndex !== undefined) skipBlockIndices.add(eventIndex)
+                          continue
+                        }
+                      }
+                    }
+                    // Skip deltas and stops for MCP tool blocks
+                    if (eventIndex !== undefined && skipBlockIndices.has(eventIndex)) {
+                      continue
+                    }
+                    // Skip intermediate message_delta with stop_reason: tool_use
+                    // (SDK is about to execute MCP tools and continue)
+                    if (eventType === "message_delta") {
+                      const stopReason = (event as any).delta?.stop_reason
+                      if (stopReason === "tool_use" && skipBlockIndices.size > 0) {
+                        // All tool_use blocks in this turn were MCP — skip this delta
+                        continue
+                      }
+                    }
+                    // Forward all other events (text, non-MCP tool_use like Task, message events)
+                    const payload = encoder.encode(`event: ${eventType}\ndata: ${JSON.stringify(event)}\n\n`)
+                    if (!safeEnqueue(payload, `stream_event:${eventType}`)) {
+                      break
+                    }
+                    eventsForwarded += 1
+                    if (eventType === "content_block_delta") {
+                      const delta = (event as any).delta
+                      if (delta?.type === "text_delta") {
+                        textEventsForwarded += 1
+                      }
+                    }
+                  }
+                }
+              } finally {
                 clearInterval(heartbeat)
               }
-            }, 15_000)
-            try {
-              for await (const message of response) {
-                if (message.type === "assistant") {
-                  for (const block of message.message.content) {
-                    if (block.type === "text") {
-                      controller.enqueue(encoder.encode(`event: content_block_delta\ndata: ${JSON.stringify({
+              claudeLog("upstream.completed", {
+                mode: "stream",
+                model,
+                durationMs: Date.now() - upstreamStartAt,
+                streamEventsSeen,
+                eventsForwarded,
+                textEventsForwarded
+              })
+              // Store session for future resume
+              if (currentSessionId) {
+                storeSession(opencodeSessionId, body.messages || [], currentSessionId)
+              }
+              if (!streamClosed) {
+                // In passthrough mode, emit captured tool_use blocks as stream events
+                if (passthrough && capturedToolUses.length > 0 && messageStartEmitted) {
+                  for (let i = 0; i < capturedToolUses.length; i++) {
+                    const tu = capturedToolUses[i]!
+                    const blockIndex = eventsForwarded + i
+                    // content_block_start
+                    safeEnqueue(encoder.encode(
+                      `event: content_block_start\ndata: ${JSON.stringify({
+                        type: "content_block_start",
+                        index: blockIndex,
+                        content_block: { type: "tool_use", id: tu.id, name: tu.name, input: {} }
+                      })}\n\n`
+                    ), "passthrough_tool_block_start")
+                    // input_json_delta with the full input
+                    safeEnqueue(encoder.encode(
+                      `event: content_block_delta\ndata: ${JSON.stringify({
                         type: "content_block_delta",
-                        index: 0,
-                        delta: { type: "text_delta", text: block.text }
-                      })}\n\n`))
-                    }
+                        index: blockIndex,
+                        delta: { type: "input_json_delta", partial_json: JSON.stringify(tu.input) }
+                      })}\n\n`
+                    ), "passthrough_tool_input")
+                    // content_block_stop
+                    safeEnqueue(encoder.encode(
+                      `event: content_block_stop\ndata: ${JSON.stringify({
+                        type: "content_block_stop",
+                        index: blockIndex
+                      })}\n\n`
+                    ), "passthrough_tool_block_stop")
                   }
+                  // Emit message_delta with stop_reason: "tool_use"
+                  safeEnqueue(encoder.encode(
+                    `event: message_delta\ndata: ${JSON.stringify({
+                      type: "message_delta",
+                      delta: { stop_reason: "tool_use", stop_sequence: null },
+                      usage: { output_tokens: 0 }
+                    })}\n\n`
+                  ), "passthrough_message_delta")
+                }
+                // Emit the final message_stop (we skipped all intermediate ones)
+                if (messageStartEmitted) {
+                  safeEnqueue(encoder.encode(`event: message_stop\ndata: {"type":"message_stop"}\n\n`), "final_message_stop")
+                }
+                try { controller.close() } catch {}
+                streamClosed = true
+                claudeLog("stream.ended", {
+                  model,
+                  streamEventsSeen,
+                  eventsForwarded,
+                  textEventsForwarded,
+                  bytesSent,
+                  durationMs: Date.now() - requestStartAt
+                })
+                claudeLog("response.completed", {
+                  mode: "stream",
+                  model,
+                  durationMs: Date.now() - requestStartAt,
+                  streamEventsSeen,
+                  eventsForwarded,
+                  textEventsForwarded
+                })
+                if (textEventsForwarded === 0) {
+                  claudeLog("response.empty_stream", {
+                    model,
+                    streamEventsSeen,
+                    eventsForwarded,
+                    reason: "no_text_deltas_forwarded"
+                  })
                 }
               }
-            } finally {
-              clearInterval(heartbeat)
+            } catch (error) {
+              if (isClosedControllerError(error)) {
+                streamClosed = true
+                claudeLog("stream.client_closed", {
+                  source: "stream_catch",
+                  streamEventsSeen,
+                  eventsForwarded,
+                  textEventsForwarded,
+                  durationMs: Date.now() - requestStartAt
+                })
+                return
+              }
+              claudeLog("upstream.failed", {
+                mode: "stream",
+                model,
+                durationMs: Date.now() - upstreamStartAt,
+                streamEventsSeen,
+                textEventsForwarded,
+                error: error instanceof Error ? error.message : String(error)
+              })
+              const streamErr = classifyError(error instanceof Error ? error.message : String(error))
+              claudeLog("proxy.anthropic.error", { error: error instanceof Error ? error.message : String(error), classified: streamErr.type })
+              safeEnqueue(encoder.encode(`event: error\ndata: ${JSON.stringify({
+                type: "error",
+                error: { type: streamErr.type, message: streamErr.message }
+              })}\n\n`), "error_event")
+              if (!streamClosed) {
+                try { controller.close() } catch {}
+                streamClosed = true
+              }
             }
+          }
+        })
-            controller.enqueue(encoder.encode(`event: content_block_stop\ndata: ${JSON.stringify({
-              type: "content_block_stop",
-              index: 0
-            })}\n\n`))
+        const streamSessionId = resumeSessionId || `session_${Date.now()}`
+        return new Response(readable, {
+          headers: {
+            "Content-Type": "text/event-stream",
+            "Cache-Control": "no-cache",
+            Connection: "keep-alive",
+            "X-Claude-Session-ID": streamSessionId
+          }
+        })
+      } catch (error) {
+        const errMsg = error instanceof Error ? error.message : String(error)
+        claudeLog("error.unhandled", {
+          durationMs: Date.now() - requestStartAt,
+          error: errMsg
+        })
-            controller.enqueue(encoder.encode(`event: message_delta\ndata: ${JSON.stringify({
-              type: "message_delta",
-              delta: { stop_reason: "end_turn" },
-              usage: { output_tokens: 0 }
-            })}\n\n`))
+        // Detect specific error types and return helpful messages
+        const classified = classifyError(errMsg)
-            controller.enqueue(encoder.encode(`event: message_stop\ndata: ${JSON.stringify({
-              type: "message_stop"
-            })}\n\n`))
+        claudeLog("proxy.error", { error: errMsg, classified: classified.type })
+        return new Response(
+          JSON.stringify({ type: "error", error: { type: classified.type, message: classified.message } }),
+          { status: classified.status, headers: { "Content-Type": "application/json" } }
+        )
+      }
+    })
+  }
-            controller.close()
-          } catch (error) {
-            claudeLog("proxy.anthropic.error", { error: error instanceof Error ? error.message : String(error) })
-            controller.enqueue(encoder.encode(`event: error\ndata: ${JSON.stringify({
-              type: "error",
-              error: { type: "api_error", message: error instanceof Error ? error.message : "Unknown error" }
-            })}\n\n`))
-            controller.close()
-          }
-        }
-      })
+  app.post("/v1/messages", async (c) => {
+    const requestId = c.req.header("x-request-id") || randomUUID()
+    const startedAt = Date.now()
+    claudeLog("request.enter", { requestId, endpoint: "/v1/messages" })
+    return handleMessages(c, { requestId, endpoint: "/v1/messages", queueEnteredAt: startedAt, queueStartedAt: startedAt })
+  })
-      return new Response(readable, {
-        headers: {
-          "Content-Type": "text/event-stream",
-          "Cache-Control": "no-cache",
-          Connection: "keep-alive"
-        }
+  app.post("/messages", async (c) => {
+    const requestId = c.req.header("x-request-id") || randomUUID()
+    const startedAt = Date.now()
+    claudeLog("request.enter", { requestId, endpoint: "/messages" })
+    return handleMessages(c, { requestId, endpoint: "/messages", queueEnteredAt: startedAt, queueStartedAt: startedAt })
+  })
+  // Health check endpoint — verifies auth status
+  app.get("/health", (c) => {
+    try {
+      const authJson = execSync("claude auth status", { encoding: "utf-8", timeout: 5000 })
+      const auth = JSON.parse(authJson)
+      if (!auth.loggedIn) {
+        return c.json({
+          status: "unhealthy",
+          error: "Not logged in. Run: claude login",
+          auth: { loggedIn: false }
+        }, 503)
+      }
+      return c.json({
+        status: "healthy",
+        auth: {
+          loggedIn: true,
+          email: auth.email,
+          subscriptionType: auth.subscriptionType,
+        },
+        mode: process.env.CLAUDE_PROXY_PASSTHROUGH ? "passthrough" : "internal",
       })
-    } catch (error) {
-      claudeLog("proxy.error", { error: error instanceof Error ? error.message : String(error) })
+    } catch {
       return c.json({
-        type: "error",
-        error: {
-          type: "api_error",
-          message: error instanceof Error ? error.message : "Unknown error"
-        }
-      }, 500)
+        status: "degraded",
+        error: "Could not verify auth status",
+        mode: process.env.CLAUDE_PROXY_PASSTHROUGH ? "passthrough" : "internal",
+      })
     }
-  }
-  app.post("/v1/messages", handleMessages)
-  app.post("/messages", handleMessages)
+  })
   return { app, config: finalConfig }
 }
@@ -191,11 +1005,26 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}) {
 export async function startProxyServer(config: Partial<ProxyConfig> = {}) {
   const { app, config: finalConfig } = createProxyServer(config)
-  const server = Bun.serve({
-    port: finalConfig.port,
-    hostname: finalConfig.host,
-    fetch: app.fetch
-  })
+  let server
+  try {
+    server = Bun.serve({
+      port: finalConfig.port,
+      hostname: finalConfig.host,
+      idleTimeout: finalConfig.idleTimeoutSeconds,
+      fetch: app.fetch
+    })
+  } catch (error: unknown) {
+    if (error instanceof Error && "code" in error && error.code === "EADDRINUSE") {
+      console.error(`\nError: Port ${finalConfig.port} is already in use.`)
+      console.error(`\nIs another instance of the proxy already running?`)
+      console.error(`  Check with: lsof -i :${finalConfig.port}`)
+      console.error(`  Kill it with: kill $(lsof -ti :${finalConfig.port})`)
+      console.error(`\nOr use a different port:`)
+      console.error(`  CLAUDE_PROXY_PORT=4567 bun run proxy`)
+      process.exit(1)
+    }
+    throw error
+  }
   console.log(`Claude Max Proxy (Anthropic API) running at http://${finalConfig.host}:${finalConfig.port}`)
   console.log(`\nTo use with OpenCode, run:`)