npm - mobygate - Versions diffs - 0.3.0 - Mend

mobygate 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/CHANGELOG.md +207 -0
package/LICENSE +21 -0
package/README.md +429 -0
package/bin/mobygate.js +443 -0
package/index.html +805 -0
package/launchd/ai.mobygate.auth-refresh.plist +83 -0
package/lib/ascii.js +108 -0
package/lib/config.js +131 -0
package/lib/dashboard-bus.js +158 -0
package/lib/platform.js +584 -0
package/lib/session-store.js +112 -0
package/mcp-inspect.mjs +186 -0
package/package.json +62 -0
package/scripts/auth-helper.js +198 -0
package/scripts/auth-refresh.js +41 -0
package/scripts/auth-status.js +36 -0
package/server.js +1076 -0

package/server.js ADDED Viewed

@@ -0,0 +1,1076 @@
+/**
+ * Mobygate
+ *
+ * OpenAI-compatible local proxy for Claude Max. The Möbius-strip gateway:
+ * OpenAI-shape requests in, Claude Max subscription inference out, on a single
+ * continuous loop. Routes through the Claude Agent SDK (no CLI spawn hacks).
+ *
+ * Flow: Client → POST /v1/chat/completions → Agent SDK query() → SSE back
+ */
+import { fileURLToPath } from 'url';
+import { dirname, join } from 'path';
+// Preflight: if node_modules is stale/missing, die with a readable message
+// instead of an opaque ERR_MODULE_NOT_FOUND stack trace. Common after a fresh
+// clone or an SDK-version bump where the user forgot `npm install`.
+let express, uuidModule, sdkModule;
+try {
+  [express, uuidModule, sdkModule] = await Promise.all([
+    import('express').then((m) => m.default),
+    import('uuid'),
+    import('@anthropic-ai/claude-agent-sdk'),
+  ]);
+} catch (e) {
+  const missing = /Cannot find package|ERR_MODULE_NOT_FOUND/.test(e?.message || '');
+  if (missing) {
+    const rawPkg = /Cannot find package '([^']+)'/.exec(e.message)?.[1] || 'a dependency';
+    const pkg = rawPkg.length > 48 ? rawPkg.slice(0, 45) + '...' : rawPkg;
+    console.error([
+      '',
+      '╔═══════════════════════════════════════════════════════════════════╗',
+      '║  Mobygate — startup aborted                                       ║',
+      '╠═══════════════════════════════════════════════════════════════════╣',
+      `║  Missing package: ${pkg.padEnd(48)}║`,
+      '║                                                                   ║',
+      '║  Fix:    npm install                                              ║',
+      '║  Or:     npm run up    (installs deps, then starts the server)    ║',
+      '║                                                                   ║',
+      '║  After pulling new commits, always run `npm install` first.       ║',
+      '╚═══════════════════════════════════════════════════════════════════╝',
+      '',
+    ].join('\n'));
+    process.exit(1);
+  }
+  throw e;
+}
+const { v4: uuidv4 } = uuidModule;
+const { query } = sdkModule;
+// Auth helper — wraps query() with 401-retry and exposes refresh probe
+import { runWithAuthRetry, getAuthStatus, forceRefresh, is401Error, isAuthFailureText, AuthFailureInResultText } from './scripts/auth-helper.js';
+import { banner } from './lib/ascii.js';
+import { bus as dashboardBus } from './lib/dashboard-bus.js';
+import { loadSessions, saveSessions, flushSessionsNow } from './lib/session-store.js';
+import { LOGS_DIR } from './lib/config.js';
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+const PORT = parseInt(process.env.PORT || '3456', 10);
+const DEFAULT_MODEL = process.env.DEFAULT_MODEL || 'claude-opus-4-7[1m]';
+const SESSION_TTL_MS = parseInt(process.env.SESSION_TTL_MS || String(60 * 60 * 1000), 10); // 1 hour default
+// ---------------------------------------------------------------------------
+// Session store — maps client keys → SDK session IDs (persisted to disk)
+// ---------------------------------------------------------------------------
+// Rehydrated from ~/.mobygate/sessions.json at module load so sessions
+// survive `mobygate restart`, crashes, and reboots. Every mutation (create,
+// update, expire, manual-delete) triggers a debounced write; SIGTERM/SIGINT
+// flushes synchronously so we don't lose the last 500 ms of changes.
+const sessions = loadSessions(SESSION_TTL_MS);
+if (sessions.size > 0) console.log(`[session] rehydrated ${sessions.size} session(s) from disk`);
+function getSession(clientKey) {
+  if (!clientKey) return null;
+  const entry = sessions.get(clientKey);
+  if (!entry) return null;
+  if (Date.now() - entry.lastUsed > SESSION_TTL_MS) {
+    console.log(`  [session] expired: ${clientKey} (idle ${Math.round((Date.now() - entry.lastUsed) / 1000)}s)`);
+    sessions.delete(clientKey);
+    dashboardBus?.emitEvent?.({ type: 'session.expired', key: clientKey, reason: 'ttl' });
+    saveSessions(sessions);
+    return null;
+  }
+  return entry;
+}
+function upsertSession(clientKey, sdkSessionId, model) {
+  const existing = sessions.get(clientKey);
+  if (existing) {
+    existing.sdkSessionId = sdkSessionId;
+    existing.lastUsed = Date.now();
+    existing.messageCount++;
+    dashboardBus?.emitEvent?.({ type: 'session.updated', key: clientKey, messageCount: existing.messageCount });
+    saveSessions(sessions);
+    return existing;
+  }
+  const entry = { sdkSessionId, model, lastUsed: Date.now(), messageCount: 1, createdAt: Date.now() };
+  sessions.set(clientKey, entry);
+  dashboardBus?.emitEvent?.({ type: 'session.created', key: clientKey, model });
+  saveSessions(sessions);
+  return entry;
+}
+// Periodic cleanup of expired sessions
+setInterval(() => {
+  const now = Date.now();
+  let cleaned = 0;
+  for (const [key, entry] of sessions) {
+    if (now - entry.lastUsed > SESSION_TTL_MS) {
+      sessions.delete(key);
+      dashboardBus?.emitEvent?.({ type: 'session.expired', key, reason: 'ttl' });
+      cleaned++;
+    }
+  }
+  if (cleaned > 0) {
+    console.log(`  [session] cleanup: removed ${cleaned} expired session(s), ${sessions.size} active`);
+    saveSessions(sessions);
+  }
+}, 5 * 60 * 1000); // every 5 minutes
+// Flush pending writes on graceful shutdown so the last 500 ms of session
+// mutations aren't lost when launchd/systemd/Task Scheduler stops the
+// service (e.g. on `mobygate restart`).
+for (const sig of ['SIGTERM', 'SIGINT', 'SIGHUP']) {
+  process.on(sig, () => {
+    try { flushSessionsNow(); } catch {}
+    process.exit(0);
+  });
+}
+// ---------------------------------------------------------------------------
+// Model mapping — OpenAI model names → SDK model identifiers
+// ---------------------------------------------------------------------------
+// Opus 4.7 ships a native 1M-context variant addressed as `claude-opus-4-7[1m]`.
+// Default opus aliases route to the 1M form to match the advertised context window.
+// Pass `claude-opus-4-7-200k` for the standard (cheaper) 200k variant.
+const MODEL_MAP = {
+  'claude-opus-4': 'claude-opus-4-7[1m]',
+  'claude-opus-4-6': 'claude-opus-4-6',
+  'claude-opus-4-7': 'claude-opus-4-7[1m]',
+  'claude-opus-4-7[1m]': 'claude-opus-4-7[1m]',
+  'claude-opus-4-7-1m': 'claude-opus-4-7[1m]',
+  'claude-opus-4-7-200k': 'claude-opus-4-7',
+  'claude-sonnet-4': 'claude-sonnet-4-5-20250929',
+  'claude-sonnet-4-5': 'claude-sonnet-4-5-20250929',
+  'claude-sonnet-4-6': 'claude-sonnet-4-5-20250929', // SDK resolves 4-6 to non-existent dated version
+  'claude-haiku-4': 'claude-haiku-4-5-20251001',
+  'claude-haiku-4-5': 'claude-haiku-4-5-20251001',
+  'opus': 'claude-opus-4-7[1m]',
+  'sonnet': 'claude-sonnet-4-5-20250929',
+  'haiku': 'claude-haiku-4-5-20251001',
+};
+function resolveModel(model) {
+  if (!model) return DEFAULT_MODEL;
+  // Strip provider prefix (e.g., "claude-max-proxy/claude-opus-4-6" → "claude-opus-4-6")
+  const stripped = model.replace(/^[^/]+\//, '');
+  return MODEL_MAP[stripped] || MODEL_MAP[model] || DEFAULT_MODEL;
+}
+// ---------------------------------------------------------------------------
+// OpenAI messages → single prompt string
+// ---------------------------------------------------------------------------
+function extractContent(content) {
+  if (typeof content === 'string') return content;
+  if (Array.isArray(content)) {
+    return content
+      .map((part) => {
+        if (typeof part === 'string') return part;
+        if (part.type === 'text') return part.text;
+        if (part.type === 'image_url') return ''; // images carried separately; drop from text
+        return JSON.stringify(part);
+      })
+      .filter(Boolean)
+      .join('\n');
+  }
+  if (content && typeof content === 'object') return JSON.stringify(content);
+  return String(content || '');
+}
+// Convert an OpenAI message.content array into Anthropic image content blocks.
+// Supports both data: URLs (base64) and remote https URLs.
+function extractImageBlocks(content) {
+  if (!Array.isArray(content)) return [];
+  const blocks = [];
+  for (const part of content) {
+    if (!part || part.type !== 'image_url') continue;
+    const url = typeof part.image_url === 'string' ? part.image_url : part.image_url?.url;
+    if (!url) continue;
+    const dataMatch = /^data:([^;]+);base64,(.+)$/.exec(url);
+    if (dataMatch) {
+      blocks.push({ type: 'image', source: { type: 'base64', media_type: dataMatch[1], data: dataMatch[2] } });
+    } else {
+      blocks.push({ type: 'image', source: { type: 'url', url } });
+    }
+  }
+  return blocks;
+}
+// Collect images from the LAST user message (OpenAI only attaches images to the latest turn).
+function collectImages(messages) {
+  for (let i = messages.length - 1; i >= 0; i--) {
+    if (messages[i].role === 'user') return extractImageBlocks(messages[i].content);
+  }
+  return [];
+}
+// ---------------------------------------------------------------------------
+// Tool calling (Path B: prompt-embedded protocol)
+// ---------------------------------------------------------------------------
+// The Claude Agent SDK cannot stream OpenAI-style function-call events back to
+// the caller (MCP handlers execute in-process and pollute session state; see
+// README "Known Gaps"). Workaround: inject client-provided tool schemas into
+// the system prompt and instruct the model to emit <tool_call>{...}</tool_call>
+// tags. We parse those out and re-emit as OpenAI `tool_calls`. Tool results
+// coming back from the client get wrapped in <tool_result> blocks.
+function hasTools(body) {
+  return Array.isArray(body?.tools) && body.tools.length > 0;
+}
+function buildToolInstructions(tools) {
+  const lines = [
+    'You have access to CLIENT-DEFINED tools listed below. To invoke a tool, emit one or more <tool_call> tags, each containing a strict JSON object with "name" and "arguments":',
+    '',
+    '<tool_call>{"name":"<tool_name>","arguments":{<args>}}</tool_call>',
+    '',
+    'Rules:',
+    '- Do NOT wrap <tool_call> tags in markdown code fences.',
+    '- When you emit <tool_call> tags, output ONLY the tags — no prose, no explanation, no other text.',
+    '- You may emit multiple <tool_call> tags to request parallel calls.',
+    '- Tool results will be returned as <tool_result id="..." name="...">...</tool_result> blocks. After results arrive, continue toward the final answer.',
+    '- When you have the final answer and need no more tool calls, respond normally WITHOUT any <tool_call> tag.',
+    '- Do NOT call any other tool (Read, Bash, Grep, etc.) — only the tools listed below.',
+    '',
+    'Available tools:',
+  ];
+  for (const t of tools) {
+    if (t?.type !== 'function' || !t.function) continue;
+    const fn = t.function;
+    lines.push(`<tool name="${fn.name}">`);
+    if (fn.description) lines.push(`  <description>${fn.description}</description>`);
+    lines.push(`  <parameters>${JSON.stringify(fn.parameters || { type: 'object', properties: {} })}</parameters>`);
+    lines.push('</tool>');
+  }
+  return lines.join('\n');
+}
+function formatAssistantForReplay(msg) {
+  const parts = [];
+  const text = extractContent(msg.content);
+  if (text) parts.push(text);
+  if (Array.isArray(msg.tool_calls)) {
+    for (const tc of msg.tool_calls) {
+      if (tc?.type === 'function' && tc.function) {
+        let args = {};
+        try { args = JSON.parse(tc.function.arguments || '{}'); } catch {}
+        parts.push(`<tool_call>${JSON.stringify({ name: tc.function.name, arguments: args })}</tool_call>`);
+      }
+    }
+  }
+  return parts.join('\n');
+}
+function formatToolResult(msg) {
+  const content = extractContent(msg.content);
+  const id = msg.tool_call_id || 'unknown';
+  const name = msg.name || '';
+  return `<tool_result id="${id}" name="${name}">\n${content}\n</tool_result>`;
+}
+// Parse the model's text output for <tool_call> tags. Returns
+//   { toolCalls: [{id, name, arguments}], textBefore: string }
+// when at least one valid call is found, else null.
+function parseToolCalls(text) {
+  if (!text || !text.includes('<tool_call>')) return null;
+  const re = /<tool_call>\s*([\s\S]*?)\s*<\/tool_call>/g;
+  const calls = [];
+  let firstIdx = -1;
+  let m;
+  while ((m = re.exec(text)) !== null) {
+    if (firstIdx === -1) firstIdx = m.index;
+    try {
+      const obj = JSON.parse(m[1]);
+      if (obj && typeof obj.name === 'string') {
+        calls.push({
+          id: `call_${uuidv4().replace(/-/g, '').slice(0, 20)}`,
+          name: obj.name,
+          arguments: JSON.stringify(obj.arguments ?? {}),
+        });
+      }
+    } catch {
+      // ignore malformed tool_call blocks
+    }
+  }
+  if (!calls.length) return null;
+  return { toolCalls: calls, textBefore: text.slice(0, firstIdx).trim() };
+}
+// Detect whether the running text contains a COMPLETE <tool_call>...</tool_call>
+// pair — used to abort the SDK early once a call has been emitted.
+function hasCompleteToolCall(text) {
+  return /<tool_call>\s*[\s\S]*?<\/tool_call>/.test(text);
+}
+function messagesToPrompt(messages, { resuming = false, tools = null } = {}) {
+  // When resuming, the SDK already has full history. Only send the new tail:
+  // tool_results (if the client is replying with tool outputs) and/or a fresh
+  // user message.
+  if (resuming) {
+    const toolResults = [];
+    let userText = '';
+    for (let i = messages.length - 1; i >= 0; i--) {
+      const msg = messages[i];
+      if (msg.role === 'tool') {
+        toolResults.unshift(formatToolResult(msg));
+      } else if (msg.role === 'user') {
+        userText = extractContent(msg.content);
+        break;
+      } else {
+        break;
+      }
+    }
+    const parts = [];
+    if (toolResults.length) {
+      parts.push(`<tool_results>\n${toolResults.join('\n')}\n</tool_results>`);
+      // The model sometimes treats a bare <tool_results> block as "just data"
+      // and returns empty. A short nudge keeps the turn productive without
+      // biasing what comes next.
+      if (!userText) parts.push('Use the tool results above to continue toward the final answer. If more tool calls are needed, emit them; otherwise respond directly.');
+    }
+    if (userText) parts.push(userText);
+    return parts.join('\n\n') || extractContent(messages[messages.length - 1].content);
+  }
+  const parts = [];
+  // Tool instructions prepended once at the top of the system context.
+  if (tools && tools.length) {
+    parts.push(`<system>\n${buildToolInstructions(tools)}\n</system>\n`);
+  }
+  // Group consecutive tool-role messages so they emit as one <tool_results> block.
+  let toolBuffer = [];
+  const flushTools = () => {
+    if (toolBuffer.length) {
+      parts.push(`<tool_results>\n${toolBuffer.join('\n')}\n</tool_results>\n`);
+      toolBuffer = [];
+    }
+  };
+  for (const msg of messages) {
+    if (msg.role === 'tool') {
+      toolBuffer.push(formatToolResult(msg));
+      continue;
+    }
+    flushTools();
+    switch (msg.role) {
+      case 'system':
+        parts.push(`<system>\n${extractContent(msg.content)}\n</system>\n`);
+        break;
+      case 'user':
+        parts.push(extractContent(msg.content));
+        break;
+      case 'assistant':
+        parts.push(`<previous_response>\n${formatAssistantForReplay(msg)}\n</previous_response>\n`);
+        break;
+    }
+  }
+  flushTools();
+  return parts.join('\n').trim();
+}
+// Wrap a prompt + optional image blocks into the form query() expects.
+// Returns a string when there are no images (fast path), or an async iterable
+// yielding one SDKUserMessage with multi-part content when there are.
+function buildQueryPrompt(promptText, imageBlocks) {
+  if (!imageBlocks.length) return promptText;
+  const content = [
+    { type: 'text', text: promptText || '' },
+    ...imageBlocks,
+  ];
+  async function* gen() {
+    yield {
+      type: 'user',
+      message: { role: 'user', content },
+      parent_tool_use_id: null,
+    };
+  }
+  return gen();
+}
+// ---------------------------------------------------------------------------
+// Normalize model name for OpenAI response format
+// ---------------------------------------------------------------------------
+function normalizeModelName(model) {
+  if (model?.includes('opus')) return 'claude-opus-4';
+  if (model?.includes('sonnet')) return 'claude-sonnet-4';
+  if (model?.includes('haiku')) return 'claude-haiku-4';
+  return model || 'claude-sonnet-4';
+}
+// ---------------------------------------------------------------------------
+// SSE helpers
+// ---------------------------------------------------------------------------
+function makeChunk(requestId, model, content, role, finishReason) {
+  return {
+    id: `chatcmpl-${requestId}`,
+    object: 'chat.completion.chunk',
+    created: Math.floor(Date.now() / 1000),
+    model: normalizeModelName(model),
+    choices: [{
+      index: 0,
+      delta: {
+        ...(role ? { role } : {}),
+        ...(content !== undefined ? { content } : {}),
+      },
+      finish_reason: finishReason || null,
+    }],
+  };
+}
+function sendSSE(res, data) {
+  if (!res.writableEnded) {
+    res.write(`data: ${JSON.stringify(data)}\n\n`);
+  }
+}
+// ---------------------------------------------------------------------------
+// POST /v1/chat/completions — streaming
+// ---------------------------------------------------------------------------
+async function handleStreaming(req, res, body, requestId, sessionKey) {
+  const existing = getSession(sessionKey);
+  const resuming = !!existing?.sdkSessionId;
+  const toolsEnabled = hasTools(body);
+  const promptText = messagesToPrompt(body.messages, { resuming, tools: toolsEnabled ? body.tools : null });
+  const images = collectImages(body.messages);
+  const prompt = buildQueryPrompt(promptText, images);
+  const model = resolveModel(body.model);
+  if (images.length) console.log(`  [multimodal] ${images.length} image block(s)`);
+  if (toolsEnabled) console.log(`  [tools] ${body.tools.length} client tool(s) — buffering stream`);
+  res.setHeader('Content-Type', 'text/event-stream');
+  res.setHeader('Cache-Control', 'no-cache');
+  res.setHeader('Connection', 'keep-alive');
+  res.setHeader('X-Request-Id', requestId);
+  if (sessionKey) res.setHeader('X-Session-Id', sessionKey);
+  res.flushHeaders();
+  res.write(':ok\n\n');
+  const abortController = new AbortController();
+  let isFirst = true;
+  let resolvedModel = model;
+  let capturedSessionId = existing?.sdkSessionId || null;
+  let clientDisconnected = false;
+  res.on('close', () => {
+    clientDisconnected = true;
+    abortController.abort();
+  });
+  if (resuming) {
+    console.log(`  [session] resuming: ${sessionKey} → sdk=${existing.sdkSessionId} (msgs=${existing.messageCount})`);
+  }
+  let bufferedText = ''; // only used when toolsEnabled
+  const runQuery = async () => {
+    // Reset per-attempt state so a 401 retry starts clean
+    bufferedText = '';
+    isFirst = true;
+    resolvedModel = model;
+    capturedSessionId = existing?.sdkSessionId || null;
+    for await (const message of query({
+      prompt,
+      options: {
+        model,
+        maxTurns: toolsEnabled ? 5 : 200,
+        permissionMode: 'bypassPermissions',
+        allowDangerouslySkipPermissions: true,
+        abortController,
+        ...(toolsEnabled ? { allowedTools: [] } : {}),
+        ...(resuming ? { resume: existing.sdkSessionId } : {}),
+        ...(sessionKey && !resuming ? { persistSession: true } : {}),
+      },
+    })) {
+      if (clientDisconnected) break;
+      const msgPreview = message.type === 'assistant'
+        ? `content_keys=${Object.keys(message).join(',')}`
+        : message.type === 'result'
+          ? `result=${(message.result || '').slice(0, 60)}`
+          : message.subtype || '';
+      console.log(`  [msg] type=${message.type} ${msgPreview}`);
+      if (message.type === 'system' && message.subtype === 'init' && message.model) {
+        resolvedModel = message.model;
+      }
+      if (message.type === 'assistant' && message.session_id && !capturedSessionId) {
+        capturedSessionId = message.session_id;
+        console.log(`  [session] captured sdk session: ${capturedSessionId}`);
+      }
+      // Extract text from this assistant message
+      let turnText = '';
+      if (message.type === 'assistant' && message.message?.content) {
+        const content = message.message.content;
+        if (Array.isArray(content)) {
+          for (const b of content) if (b.type === 'text' && b.text) turnText += b.text;
+        } else if (typeof content === 'string') {
+          turnText = content;
+        }
+      }
+      // Detect auth failure surfaced inline (common on long-running proxies
+      // where the SDK's cached creds expire). Throw so runWithAuthRetry
+      // treats it like a real 401 exception.
+      if (turnText && isAuthFailureText(turnText) && isFirst) {
+        abortController.abort();
+        throw new AuthFailureInResultText(turnText);
+      }
+      if (turnText) {
+        if (toolsEnabled) {
+          bufferedText += turnText;
+          // Abort early once we see a complete <tool_call>...</tool_call>
+          if (hasCompleteToolCall(bufferedText)) {
+            console.log('  [tools] complete tool_call detected — aborting SDK');
+            abortController.abort();
+            break;
+          }
+        } else {
+          sendSSE(res, makeChunk(requestId, resolvedModel, turnText, isFirst ? 'assistant' : undefined, null));
+          isFirst = false;
+        }
+      }
+      if (message.type === 'result') {
+        if (message.result && isAuthFailureText(message.result) && isFirst) {
+          throw new AuthFailureInResultText(message.result);
+        }
+        if (!toolsEnabled && message.result && isFirst) {
+          sendSSE(res, makeChunk(requestId, resolvedModel, message.result, 'assistant', null));
+          isFirst = false;
+        }
+        if (toolsEnabled && !bufferedText && message.result) bufferedText = message.result;
+        break;
+      }
+    }
+  };
+  try {
+    await runWithAuthRetry({
+      attempt: runQuery,
+      // Only retry if we haven't written a real chunk yet. In tools mode we
+      // buffer internally so any retry is safe regardless.
+      bailIfStarted: () => !toolsEnabled && !isFirst,
+      onRefreshing: (err) => console.warn(`[auth] 401 on stream — refreshing (${err.message?.slice(0, 80)})`),
+      onRetry: (r) => console.log(`[auth] refreshed in ${r.durationMs}ms — retrying stream`),
+    });
+  } catch (err) {
+    // Abort from tool-call detection surfaces as an abort error — not a real failure
+    const isAbort = err?.name === 'AbortError' || /aborted/i.test(err?.message || '');
+    if (!clientDisconnected && !(toolsEnabled && isAbort)) {
+      console.error('[stream] SDK error:', err.message);
+      sendSSE(res, { error: { message: err.message, type: 'server_error', code: null } });
+    }
+  }
+  if (sessionKey && capturedSessionId) {
+    upsertSession(sessionKey, capturedSessionId, resolvedModel);
+  }
+  // Tools mode: emit the buffered response as a single chunk with either
+  // tool_calls (+ finish_reason: tool_calls) or plain text (+ stop).
+  if (toolsEnabled && !res.writableEnded) {
+    const parsed = parseToolCalls(bufferedText);
+    if (parsed) {
+      console.log(`  [tools] emitting ${parsed.toolCalls.length} tool_call(s)`);
+      const chunk = {
+        id: `chatcmpl-${requestId}`,
+        object: 'chat.completion.chunk',
+        created: Math.floor(Date.now() / 1000),
+        model: normalizeModelName(resolvedModel),
+        choices: [{
+          index: 0,
+          delta: {
+            role: 'assistant',
+            content: parsed.textBefore || null,
+            tool_calls: parsed.toolCalls.map((tc, i) => ({
+              index: i,
+              id: tc.id,
+              type: 'function',
+              function: { name: tc.name, arguments: tc.arguments },
+            })),
+          },
+          finish_reason: 'tool_calls',
+        }],
+      };
+      sendSSE(res, chunk);
+    } else {
+      sendSSE(res, makeChunk(requestId, resolvedModel, bufferedText, 'assistant', null));
+      sendSSE(res, makeChunk(requestId, resolvedModel, undefined, undefined, 'stop'));
+    }
+    res.write('data: [DONE]\n\n');
+    res.end();
+    return;
+  }
+  if (!res.writableEnded) {
+    sendSSE(res, makeChunk(requestId, resolvedModel, undefined, undefined, 'stop'));
+    res.write('data: [DONE]\n\n');
+    res.end();
+  }
+}
+// ---------------------------------------------------------------------------
+// POST /v1/chat/completions — non-streaming
+// ---------------------------------------------------------------------------
+async function handleNonStreaming(res, body, requestId, sessionKey) {
+  const existing = getSession(sessionKey);
+  const resuming = !!existing?.sdkSessionId;
+  const toolsEnabled = hasTools(body);
+  const promptText = messagesToPrompt(body.messages, { resuming, tools: toolsEnabled ? body.tools : null });
+  const images = collectImages(body.messages);
+  const prompt = buildQueryPrompt(promptText, images);
+  const model = resolveModel(body.model);
+  if (images.length) console.log(`  [multimodal] ${images.length} image block(s)`);
+  if (toolsEnabled) console.log(`  [tools] ${body.tools.length} client tool(s)`);
+  let resultText = '';
+  let resolvedModel = model;
+  let inputTokens = 0;
+  let outputTokens = 0;
+  let capturedSessionId = existing?.sdkSessionId || null;
+  const abortController = new AbortController();
+  if (resuming) {
+    console.log(`  [session] resuming: ${sessionKey} → sdk=${existing.sdkSessionId} (msgs=${existing.messageCount})`);
+  }
+  const runQuery = async () => {
+    // Reset per-attempt state so a 401 retry starts clean
+    resultText = '';
+    resolvedModel = model;
+    inputTokens = 0;
+    outputTokens = 0;
+    capturedSessionId = existing?.sdkSessionId || null;
+    for await (const message of query({
+      prompt,
+      options: {
+        model,
+        maxTurns: toolsEnabled ? 5 : 200,
+        permissionMode: 'bypassPermissions',
+        allowDangerouslySkipPermissions: true,
+        abortController,
+        ...(toolsEnabled ? { allowedTools: [] } : {}),
+        ...(resuming ? { resume: existing.sdkSessionId } : {}),
+        ...(sessionKey && !resuming ? { persistSession: true } : {}),
+      },
+    })) {
+      if (message.type === 'system' && message.subtype === 'init' && message.model) {
+        resolvedModel = message.model;
+      }
+      if (message.type === 'assistant' && message.session_id && !capturedSessionId) {
+        capturedSessionId = message.session_id;
+        console.log(`  [session] captured sdk session: ${capturedSessionId}`);
+      }
+      if (message.type === 'assistant' && message.message?.content) {
+        const content = message.message.content;
+        if (Array.isArray(content)) {
+          for (const block of content) {
+            if (block.type === 'text') resultText += block.text || '';
+          }
+        } else if (typeof content === 'string') {
+          resultText += content;
+        }
+        // Detect auth failure surfaced inline (long-running proxy, cached creds)
+        if (isAuthFailureText(resultText)) {
+          abortController.abort();
+          throw new AuthFailureInResultText(resultText);
+        }
+        // Abort early once we see a complete <tool_call>...</tool_call>
+        if (toolsEnabled && hasCompleteToolCall(resultText)) {
+          console.log('  [tools] complete tool_call detected — aborting SDK');
+          abortController.abort();
+          break;
+        }
+      }
+      if (message.type === 'result') {
+        if (message.result && !resultText) resultText = message.result;
+        if (isAuthFailureText(resultText)) {
+          throw new AuthFailureInResultText(resultText);
+        }
+        inputTokens = message.input_tokens || 0;
+        outputTokens = message.output_tokens || 0;
+        break;
+      }
+    }
+  };
+  try {
+    await runWithAuthRetry({
+      attempt: runQuery,
+      // Non-streaming never writes to res until the end — retry is always safe
+      bailIfStarted: () => false,
+      onRefreshing: (err) => console.warn(`[auth] 401 on sync call — refreshing (${err.message?.slice(0, 80)})`),
+      onRetry: (r) => console.log(`[auth] refreshed in ${r.durationMs}ms — retrying sync call`),
+    });
+  } catch (err) {
+    const isAbort = err?.name === 'AbortError' || /aborted/i.test(err?.message || '');
+    if (!(toolsEnabled && isAbort)) {
+      console.error('[non-stream] SDK error:', err.message);
+      return res.status(500).json({ error: { message: err.message, type: 'server_error', code: null } });
+    }
+  }
+  if (sessionKey && capturedSessionId) {
+    upsertSession(sessionKey, capturedSessionId, resolvedModel);
+  }
+  const responseHeaders = {};
+  if (sessionKey) responseHeaders['X-Session-Id'] = sessionKey;
+  // Tool-calling response shape
+  if (toolsEnabled) {
+    const parsed = parseToolCalls(resultText);
+    if (parsed) {
+      console.log(`  [tools] emitting ${parsed.toolCalls.length} tool_call(s)`);
+      return res.set(responseHeaders).json({
+        id: `chatcmpl-${requestId}`,
+        object: 'chat.completion',
+        created: Math.floor(Date.now() / 1000),
+        model: normalizeModelName(resolvedModel),
+        choices: [{
+          index: 0,
+          message: {
+            role: 'assistant',
+            content: parsed.textBefore || null,
+            tool_calls: parsed.toolCalls.map((tc) => ({
+              id: tc.id,
+              type: 'function',
+              function: { name: tc.name, arguments: tc.arguments },
+            })),
+          },
+          finish_reason: 'tool_calls',
+        }],
+        usage: { prompt_tokens: inputTokens, completion_tokens: outputTokens, total_tokens: inputTokens + outputTokens },
+      });
+    }
+    // No tool_call tags → fall through to normal text response
+  }
+  res.set(responseHeaders).json({
+    id: `chatcmpl-${requestId}`,
+    object: 'chat.completion',
+    created: Math.floor(Date.now() / 1000),
+    model: normalizeModelName(resolvedModel),
+    choices: [{
+      index: 0,
+      message: { role: 'assistant', content: resultText },
+      finish_reason: 'stop',
+    }],
+    usage: { prompt_tokens: inputTokens, completion_tokens: outputTokens, total_tokens: inputTokens + outputTokens },
+  });
+}
+// ---------------------------------------------------------------------------
+// Express app
+// ---------------------------------------------------------------------------
+const app = express();
+app.use(express.json({ limit: '10mb' }));
+// GET / — serve dashboard
+app.get('/', (_req, res) => {
+  res.sendFile(join(__dirname, 'index.html'));
+});
+// POST /v1/chat/completions
+app.post('/v1/chat/completions', async (req, res) => {
+  const requestId = uuidv4().replace(/-/g, '').slice(0, 24);
+  const body = req.body;
+  if (!body.messages || !Array.isArray(body.messages) || body.messages.length === 0) {
+    return res.status(400).json({
+      error: { message: 'messages is required and must be a non-empty array', type: 'invalid_request_error', code: 'invalid_messages' },
+    });
+  }
+  // Session key: X-Session-Id header > body.session_id > null (stateless)
+  const sessionKey = req.headers['x-session-id'] || body.session_id || null;
+  const existing = getSession(sessionKey);
+  const sessionTag = sessionKey ? ` | session=${sessionKey}${existing ? ' (resume)' : ' (new)'}` : '';
+  console.log(`[${new Date().toISOString()}] ${body.stream ? 'stream' : 'sync'} | model=${body.model} → ${resolveModel(body.model)} | msgs=${body.messages.length}${sessionTag}`);
+  // Dashboard: request.start
+  const startedAt = Date.now();
+  const imageBlocks = collectImages(body.messages).length;
+  dashboardBus.emitEvent({
+    type: 'request.start',
+    id: requestId,
+    method: 'POST',
+    path: '/v1/chat/completions',
+    model: body.model,
+    resolvedModel: resolveModel(body.model),
+    session: sessionKey,
+    stream: !!body.stream,
+    tools: hasTools(body),
+    images: imageBlocks,
+    messages: body.messages.length,
+    resuming: !!existing,
+  });
+  // Capture tokens / status from the response for the corresponding end event.
+  // We hook res.end to read the JSON body that non-streaming handlers wrote,
+  // and rely on res.on('finish') for streaming.
+  let endEmitted = false;
+  const emitEnd = (overrides = {}) => {
+    if (endEmitted) return;
+    endEmitted = true;
+    dashboardBus.emitEvent({
+      type: 'request.end',
+      id: requestId,
+      durationMs: Date.now() - startedAt,
+      status: res.statusCode < 400 ? 'ok' : 'error',
+      httpStatus: res.statusCode,
+      ...overrides,
+    });
+  };
+  res.on('finish', () => emitEnd());
+  res.on('close', () => { if (!endEmitted) emitEnd({ status: 'error', error: 'client_disconnect' }); });
+  if (body.stream) {
+    await handleStreaming(req, res, body, requestId, sessionKey);
+  } else {
+    await handleNonStreaming(res, body, requestId, sessionKey);
+  }
+});
+// GET /v1/models
+app.get('/v1/models', (_req, res) => {
+  const now = Math.floor(Date.now() / 1000);
+  res.json({
+    object: 'list',
+    data: [
+      { id: 'claude-opus-4-7', object: 'model', owned_by: 'anthropic', created: now, context_length: 1000000 },
+      { id: 'claude-opus-4-7-200k', object: 'model', owned_by: 'anthropic', created: now, context_length: 200000 },
+      { id: 'claude-opus-4-6', object: 'model', owned_by: 'anthropic', created: now, context_length: 1000000 },
+      { id: 'claude-sonnet-4-6', object: 'model', owned_by: 'anthropic', created: now, context_length: 1000000 },
+      { id: 'claude-haiku-4-5', object: 'model', owned_by: 'anthropic', created: now, context_length: 200000 },
+    ],
+  });
+});
+// GET /sessions — list active sessions
+app.get('/sessions', (_req, res) => {
+  const list = [];
+  for (const [key, entry] of sessions) {
+    list.push({
+      sessionKey: key,
+      sdkSessionId: entry.sdkSessionId,
+      model: entry.model,
+      messageCount: entry.messageCount,
+      createdAt: new Date(entry.createdAt).toISOString(),
+      lastUsed: new Date(entry.lastUsed).toISOString(),
+      idleSeconds: Math.round((Date.now() - entry.lastUsed) / 1000),
+      ttlRemainingSeconds: Math.max(0, Math.round((SESSION_TTL_MS - (Date.now() - entry.lastUsed)) / 1000)),
+    });
+  }
+  res.json({ active: list.length, sessions: list });
+});
+// GET /sessions/:key — get specific session
+app.get('/sessions/:key', (req, res) => {
+  const entry = sessions.get(req.params.key);
+  if (!entry) return res.status(404).json({ error: 'Session not found' });
+  res.json({
+    sessionKey: req.params.key,
+    sdkSessionId: entry.sdkSessionId,
+    model: entry.model,
+    messageCount: entry.messageCount,
+    createdAt: new Date(entry.createdAt).toISOString(),
+    lastUsed: new Date(entry.lastUsed).toISOString(),
+    idleSeconds: Math.round((Date.now() - entry.lastUsed) / 1000),
+    ttlRemainingSeconds: Math.max(0, Math.round((SESSION_TTL_MS - (Date.now() - entry.lastUsed)) / 1000)),
+  });
+});
+// DELETE /sessions/:key — clear a session
+app.delete('/sessions/:key', (req, res) => {
+  const existed = sessions.delete(req.params.key);
+  if (existed) {
+    dashboardBus.emitEvent({ type: 'session.expired', key: req.params.key, reason: 'manual' });
+    saveSessions(sessions);
+  }
+  res.json({ deleted: existed, sessionKey: req.params.key });
+});
+// DELETE /sessions — clear all sessions
+app.delete('/sessions', (_req, res) => {
+  const keys = [...sessions.keys()];
+  const count = sessions.size;
+  sessions.clear();
+  for (const key of keys) dashboardBus.emitEvent({ type: 'session.expired', key, reason: 'manual_all' });
+  saveSessions(sessions);
+  res.json({ deleted: count });
+});
+// GET /health
+app.get('/health', (_req, res) => {
+  res.json({
+    status: 'ok',
+    provider: 'claude-agent-sdk',
+    activeSessions: sessions.size,
+    sessionTtlMs: SESSION_TTL_MS,
+    timestamp: new Date().toISOString(),
+  });
+});
+// GET /auth/status
+// Reports CLI-side auth state plus (optionally) a real probe against Anthropic.
+// Pass ?quick=1 to skip the probe (reads keychain only — cheap).
+app.get('/auth/status', async (req, res) => {
+  const quick = req.query.quick === '1' || req.query.quick === 'true';
+  const status = await getAuthStatus();
+  if (!quick && status.ok && status.loggedIn) {
+    const probe = await forceRefresh();
+    return res.json({
+      ...status,
+      verified: !!probe.ok,
+      probeMs: probe.durationMs,
+      probeError: probe.error,
+      timestamp: new Date().toISOString(),
+    });
+  }
+  res.json({ ...status, verified: null, timestamp: new Date().toISOString() });
+});
+// POST /auth/refresh
+// Fires the refresh probe. Intended for use by cron / launchd.
+app.post('/auth/refresh', async (_req, res) => {
+  const probe = await forceRefresh();
+  dashboardBus.emitEvent({ type: 'auth.refresh', ok: probe.ok, durationMs: probe.durationMs, error: probe.error });
+  res.status(probe.ok ? 200 : 502).json({
+    ...probe,
+    timestamp: new Date().toISOString(),
+  });
+});
+// ---------------------------------------------------------------------------
+// Dashboard — live event stream + snapshots
+// ---------------------------------------------------------------------------
+// GET /events — SSE stream of dashboard events
+app.get('/events', (req, res) => {
+  res.setHeader('Content-Type', 'text/event-stream');
+  res.setHeader('Cache-Control', 'no-cache, no-transform');
+  res.setHeader('Connection', 'keep-alive');
+  res.setHeader('X-Accel-Buffering', 'no');
+  res.flushHeaders();
+  res.write(':ok\n\n'); // comment to open the stream
+  const listener = (ev) => {
+    if (!res.writableEnded) res.write(`data: ${JSON.stringify(ev)}\n\n`);
+  };
+  dashboardBus.on('event', listener);
+  const heartbeat = setInterval(() => { if (!res.writableEnded) res.write(':heartbeat\n\n'); }, 15_000);
+  req.on('close', () => {
+    clearInterval(heartbeat);
+    dashboardBus.off('event', listener);
+  });
+});
+// Cached build metadata — read once at startup
+let BUILD_META = null;
+async function loadBuildMeta() {
+  if (BUILD_META) return BUILD_META;
+  try {
+    const { readFile } = await import('fs/promises');
+    const pkg = JSON.parse(await readFile(join(__dirname, 'package.json'), 'utf8'));
+    BUILD_META = {
+      name: pkg.name,
+      version: pkg.version,
+      platform: `${process.platform}-${process.arch}`,
+      node: process.version,
+      contextWindow: DEFAULT_MODEL.includes('[1m]') ? 1_000_000 : 200_000,
+    };
+  } catch (e) {
+    BUILD_META = { name: 'mobygate', version: 'unknown', platform: process.platform, node: process.version, contextWindow: null };
+  }
+  return BUILD_META;
+}
+// GET /dashboard/recent — ring-buffer snapshot for initial page load
+app.get('/dashboard/recent', async (req, res) => {
+  const limit = Math.min(500, parseInt(req.query.limit || '100', 10));
+  res.json({
+    recent: dashboardBus.getRecent({ limit }),
+    stats: dashboardBus.getStats(),
+    activeSessions: sessions.size,
+    port: PORT,
+    defaultModel: DEFAULT_MODEL,
+    build: await loadBuildMeta(),
+  });
+});
+// GET /dashboard/sessions — active session detail for the dashboard
+app.get('/dashboard/sessions', (_req, res) => {
+  const now = Date.now();
+  const list = [];
+  for (const [key, entry] of sessions) {
+    list.push({
+      key,
+      sdkSessionId: entry.sdkSessionId,
+      model: entry.model,
+      messageCount: entry.messageCount,
+      createdAt: new Date(entry.createdAt).toISOString(),
+      lastUsedAt: new Date(entry.lastUsed).toISOString(),
+      idleSec: Math.floor((now - entry.lastUsed) / 1000),
+      ttlRemainingSec: Math.max(0, Math.floor((SESSION_TTL_MS - (now - entry.lastUsed)) / 1000)),
+    });
+  }
+  // Most recently used first
+  list.sort((a, b) => a.idleSec - b.idleSec);
+  res.json({ sessions: list, ttlMs: SESSION_TTL_MS });
+});
+// GET /dashboard/logs — tail the server log file
+app.get('/dashboard/logs', async (req, res) => {
+  const lines = Math.min(2000, parseInt(req.query.lines || '200', 10));
+  const logPath = join(LOGS_DIR, 'server.log');
+  try {
+    const { readFile, stat } = await import('fs/promises');
+    const exists = await stat(logPath).catch(() => null);
+    if (!exists) return res.json({ path: logPath, lines: [], note: 'log file does not exist yet' });
+    // Read the whole file (tolerable for a dev proxy log; rotate if > 10 MB).
+    const raw = await readFile(logPath, 'utf8');
+    const split = raw.split(/\r?\n/);
+    const tail = split.slice(-lines - 1, -1); // drop trailing blank
+    res.json({ path: logPath, lines: tail, totalLines: split.length - 1, sizeBytes: exists.size });
+  } catch (e) {
+    res.status(500).json({ error: e.message });
+  }
+});
+// ---------------------------------------------------------------------------
+// Start
+// ---------------------------------------------------------------------------
+app.listen(PORT, async () => {
+  const ttlMin = Math.round(SESSION_TTL_MS / 60000);
+  const meta = await loadBuildMeta();
+  console.log(banner({ version: meta.version }));
+  console.log(`    port         ${PORT}`);
+  console.log(`    model        ${DEFAULT_MODEL}`);
+  console.log(`    session TTL  ${ttlMin} min`);
+  console.log(`    dashboard    http://localhost:${PORT}`);
+  console.log('');
+  dashboardBus.emitEvent({ type: 'server.boot', port: PORT, defaultModel: DEFAULT_MODEL });
+});