npm - mobygate - Versions diffs - 0.6.2 → 0.7.0 - Mend

mobygate 0.6.2 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/CHANGELOG.md CHANGED Viewed

@@ -4,6 +4,72 @@ All notable changes to mobygate are documented here. Format loosely follows
 [Keep a Changelog](https://keepachangelog.com/en/1.1.0/); version numbers are
 [Semantic Versioning](https://semver.org/).
+## [0.7.0] — 2026-04-24
+Phase 2: native Anthropic Messages surface.
+Mobygate is now a **dual-surface gateway** — the existing OpenAI-compat
+endpoint at `/v1/chat/completions` keeps working unchanged for Hermes
+and other OpenAI-shaped clients, and a new `POST /v1/messages` endpoint
+speaks native Anthropic Messages wire format for OpenClaw and any other
+Anthropic-shaped client. Both surfaces translate to the same underlying
+`query()` call on the Claude Agent SDK.
+### Added
+- **`POST /v1/messages`** — non-streaming + streaming. Accepts the
+  Anthropic Messages request shape (model, messages, system, tools,
+  max_tokens, stream, etc.) with native content blocks (`text`, `image`,
+  `tool_use`, `tool_result`) and returns native Anthropic responses.
+- **Native Anthropic SSE streaming** — emits `message_start` →
+  `content_block_start/delta/stop` (per block, with sequential index) →
+  `message_delta` (stop_reason, usage) → `message_stop`. Tool calls
+  stream as `content_block_start` with `content_block: {type: 'tool_use'}`
+  followed by `content_block_delta` with `delta: {type: 'input_json_delta'}`.
+- **Image passthrough** on `/v1/messages` — base64 data URLs and HTTP
+  URLs both flow through to the SDK as Anthropic image content blocks.
+- New module: `lib/anthropic.js` — request translator, response builder,
+  streaming SSE translator, stop-reason mapper.
+### Changed
+- **Tool calling on `/v1/messages` reuses Phase 1's native MCP path**
+  (from `lib/tool-bridge.js`). No prompt-injected `<tool_call>` text
+  protocol on the new surface — the model emits genuine `tool_use`
+  content blocks via SDK MCP registration, and we surface them
+  structurally. (Earlier WIP work attempted to revert the Phase 1
+  tool architecture for this surface; that's been undone in favor of
+  reusing the proven path that ships in Hermes today.)
+### Known limitation (carried from Phase 1)
+- Inbound `tool_result` blocks on a resumed turn are still spliced as
+  `<tool_results>` text into the next prompt, rather than passed
+  through as native Anthropic `tool_result` content blocks. Reason:
+  aborting the SDK on a `tool_use` prevents the assistant turn from
+  being persisted in session state — on resume, native tool_result
+  blocks have nothing to bind to and the model re-calls the tool.
+  Text-form works because the resumed model has the prior turn in
+  conversational context. A future refactor will keep the SDK
+  iterator alive across HTTP request boundaries to lift this.
+### Not in scope (deferred to a later release)
+- Streaming retrofit on the `/v1/chat/completions` endpoint (currently
+  buffers tool-mode responses). Mentioned as a Phase 2 candidate; held
+  for a focused pass.
+- `cache_control` passthrough — Anthropic's prompt caching is a billing
+  feature on API keys, not OAuth Max. We don't pass these headers
+  through; nothing to gain on this billing tier.
+### OpenClaw migration
+After this release, register a second provider entry pointing at the
+new endpoint (`api: "anthropic-messages"`, `baseUrl: "http://localhost:3456"`,
+endpoint resolved as `:baseUrl/v1/messages`). The existing
+`claude-max-proxy/*` provider stays registered for clients that want
+the OpenAI-compat surface (Hermes).
 ## [0.6.2] — 2026-04-24
 ### Fixed

package/lib/anthropic.js ADDED Viewed

@@ -0,0 +1,379 @@
+/**
+ * Anthropic Messages translation layer.
+ *
+ * Translates between the native Anthropic Messages wire format
+ * (POST /v1/messages) and the Claude Agent SDK's `query()` shape used
+ * internally by mobygate. The SDK is the single source of truth for
+ * inference; this module just bridges request and response shapes so
+ * Anthropic-shaped clients (OpenClaw, etc.) can use native blocks
+ * (`text` / `image` / `tool_use` / `tool_result`) over the wire.
+ *
+ * Tool calling reuses the Phase 1 native-MCP path from lib/tool-bridge.js
+ * — client-defined tools are registered with the SDK as in-process MCP
+ * tools (Zod schemas converted from JSON Schema), the model emits real
+ * `tool_use` content blocks in its assistant stream, and we surface
+ * those structurally instead of regex-parsing them out of text. NO
+ * `<tool_call>` text protocol on this surface.
+ *
+ * Inbound `tool_result` blocks (when the client returns tool outputs in
+ * a follow-up turn) are still spliced as text on the resumed prompt.
+ * Same Phase 1 limitation: aborting the SDK on a tool_use prevents the
+ * assistant turn from being persisted in session state, so a native
+ * tool_result has nothing to bind to on resume. A future refactor that
+ * keeps the SDK iterator alive across HTTP request boundaries will lift
+ * this; until then, text-form works because the resumed model has the
+ * prior turn in conversational context.
+ */
+import { v4 as uuidv4 } from 'uuid';
+// ---------------------------------------------------------------------------
+// Content extraction — read individual block types out of an Anthropic message
+// ---------------------------------------------------------------------------
+export function anthropicTextOf(content) {
+  if (typeof content === 'string') return content;
+  if (!Array.isArray(content)) return '';
+  return content
+    .filter((b) => b?.type === 'text')
+    .map((b) => b.text || '')
+    .join('');
+}
+export function anthropicImagesOf(content) {
+  if (!Array.isArray(content)) return [];
+  return content
+    .filter((b) => b?.type === 'image' && b.source)
+    .map((b) => ({ type: 'image', source: b.source }));
+}
+export function anthropicToolResultsOf(content) {
+  if (!Array.isArray(content)) return [];
+  return content.filter((b) => b?.type === 'tool_result');
+}
+export function buildAnthropicSystemString(system) {
+  if (!system) return '';
+  if (typeof system === 'string') return system;
+  if (Array.isArray(system)) {
+    return system
+      .filter((b) => b?.type === 'text')
+      .map((b) => b.text || '')
+      .join('\n');
+  }
+  return '';
+}
+export function hasAnthropicTools(body) {
+  return Array.isArray(body?.tools) && body.tools.length > 0;
+}
+// ---------------------------------------------------------------------------
+// Tool-result text wrapping (inbound side, Phase 1 limitation persists)
+// ---------------------------------------------------------------------------
+function stringifyToolResultBody(content) {
+  if (typeof content === 'string') return content;
+  if (Array.isArray(content)) {
+    return content
+      .map((b) => {
+        if (b?.type === 'text') return b.text || '';
+        if (b?.type === 'image') return '[image content omitted in tool_result text replay]';
+        return JSON.stringify(b);
+      })
+      .filter(Boolean)
+      .join('\n');
+  }
+  return content == null ? '' : String(content);
+}
+function formatToolResultBlock(block) {
+  const id = block.tool_use_id || 'unknown';
+  const body = stringifyToolResultBody(block.content);
+  const errAttr = block.is_error ? ' is_error="true"' : '';
+  return `<tool_result id="${id}"${errAttr}>\n${body}\n</tool_result>`;
+}
+// ---------------------------------------------------------------------------
+// Request translation: Anthropic Messages → SDK prompt string
+// ---------------------------------------------------------------------------
+// IMPORTANT: this returns just a string. Tool definitions are NOT injected
+// into the prompt — the caller registers them with the SDK as MCP tools
+// (see lib/tool-bridge.js #buildClientToolsServer). This is a deliberate
+// reversal of OpenClaw's earlier WIP, which fell back to the legacy
+// `<tool_call>` text protocol; the native MCP path proven in Phase 1
+// works fine and we don't need to maintain two tool implementations.
+export function anthropicMessagesToPrompt(body, { resuming = false } = {}) {
+  const messages = body.messages || [];
+  const system = buildAnthropicSystemString(body.system);
+  if (resuming) {
+    // SDK has full history. Send only the new tail: tool_results from
+    // the last user message (if any) plus any fresh user text.
+    const last = messages[messages.length - 1];
+    if (!last || last.role !== 'user') return '';
+    const trBlocks = anthropicToolResultsOf(last.content);
+    const text = anthropicTextOf(last.content);
+    const parts = [];
+    if (trBlocks.length) {
+      parts.push(`<tool_results>\n${trBlocks.map(formatToolResultBlock).join('\n')}\n</tool_results>`);
+    }
+    if (text) parts.push(text);
+    return parts.join('\n\n');
+  }
+  // Fresh request: serialize visible history. System prompt at top, then
+  // each turn. Assistant turns replay as best-effort text — tool_use
+  // blocks in the history are dropped (rare in practice; clients almost
+  // always use session keys for multi-turn tool conversations).
+  const parts = [];
+  if (system) parts.push(`<system>\n${system}\n</system>\n`);
+  let toolBuffer = [];
+  const flushTools = () => {
+    if (toolBuffer.length) {
+      parts.push(`<tool_results>\n${toolBuffer.join('\n')}\n</tool_results>\n`);
+      toolBuffer = [];
+    }
+  };
+  for (const msg of messages) {
+    if (msg.role === 'user') {
+      const trBlocks = anthropicToolResultsOf(msg.content);
+      for (const b of trBlocks) toolBuffer.push(formatToolResultBlock(b));
+      const text = anthropicTextOf(msg.content);
+      if (text) {
+        flushTools();
+        parts.push(text);
+      }
+    } else if (msg.role === 'assistant') {
+      flushTools();
+      const text = anthropicTextOf(msg.content);
+      if (text) parts.push(`<previous_response>\n${text}\n</previous_response>\n`);
+    }
+  }
+  flushTools();
+  return parts.join('\n').trim();
+}
+/**
+ * Pull image blocks from the latest user message. Anthropic only attaches
+ * images to user turns; we ignore older turns to mirror how the SDK + API
+ * treat current-turn vs historical content.
+ */
+export function collectAnthropicImages(messages) {
+  for (let i = messages.length - 1; i >= 0; i--) {
+    const msg = messages[i];
+    if (msg.role === 'user') {
+      const imgs = anthropicImagesOf(msg.content);
+      if (imgs.length) return imgs;
+    }
+  }
+  return [];
+}
+// ---------------------------------------------------------------------------
+// Stop reason mapping
+// ---------------------------------------------------------------------------
+export function mapStopReason(sdkResult) {
+  if (!sdkResult) return 'end_turn';
+  const sr = sdkResult.stop_reason;
+  if (sr === 'tool_use') return 'tool_use';
+  if (sr === 'max_tokens' || sr === 'max_output_tokens') return 'max_tokens';
+  if (sr === 'stop_sequence') return 'stop_sequence';
+  if (sdkResult.subtype === 'error_max_turns') return 'max_tokens';
+  return 'end_turn';
+}
+// ---------------------------------------------------------------------------
+// Non-streaming response builder
+// ---------------------------------------------------------------------------
+// Takes already-collected text + native tool_use blocks (from
+// extractToolUses in tool-bridge.js) — does NOT parse anything from text.
+// The handler in server.js does the SDK iteration and hands us the result.
+export function buildAnthropicResponse({
+  rawText = '',
+  toolUses = [],
+  model,
+  usage,
+  requestId,
+  stopReason,
+}) {
+  const id = `msg_${(requestId || uuidv4().replace(/-/g, '')).slice(0, 24)}`;
+  const content = [];
+  if (rawText) content.push({ type: 'text', text: rawText });
+  for (const tu of toolUses) {
+    // tool_use blocks from extractToolUses() are formatted for OpenAI:
+    // {id, name, arguments: <stringified-json>}. Anthropic wants {id, name, input}
+    // where input is the parsed object. Reverse the stringify.
+    let input = {};
+    try { input = JSON.parse(tu.arguments || '{}'); } catch {}
+    content.push({ type: 'tool_use', id: tu.id, name: tu.name, input });
+  }
+  // Empty content array would be invalid in the Anthropic API. If the
+  // model produced nothing actionable (rare — usually means an SDK error
+  // path), emit a single empty text block so clients don't crash on it.
+  if (content.length === 0) content.push({ type: 'text', text: '' });
+  return {
+    id,
+    type: 'message',
+    role: 'assistant',
+    model: model || 'claude-opus-4',
+    content,
+    stop_reason: stopReason || (toolUses.length ? 'tool_use' : 'end_turn'),
+    stop_sequence: null,
+    usage: {
+      input_tokens: usage?.input_tokens || 0,
+      output_tokens: usage?.output_tokens || 0,
+    },
+  };
+}
+// ---------------------------------------------------------------------------
+// Streaming SSE translator
+// ---------------------------------------------------------------------------
+// Emits Anthropic-shaped events on an Express res. The caller drives it
+// from the SDK iteration loop:
+//
+//   const tx = makeStreamTranslator({ res, requestId, model });
+//   tx.start(resolvedModel, inputTokens);
+//   for await (const message of query(...)) {
+//     // text deltas:
+//     for (const block of message.message?.content || []) {
+//       if (block.type === 'text') tx.pushTextDelta(block.text);
+//     }
+//     // native tool_use:
+//     if (hasToolUse(message)) {
+//       for (const tu of extractToolUses(message)) tx.pushToolUse(tu);
+//       tx.finish({ stopReason: 'tool_use', usage: ... });
+//       break;
+//     }
+//   }
+//   tx.finish({ stopReason: 'end_turn', usage: ... });
+export function makeStreamTranslator({ res, requestId, model }) {
+  let started = false;
+  let blockIndex = -1;
+  let textBlockOpen = false;
+  let finished = false;
+  const messageId = `msg_${(requestId || uuidv4().replace(/-/g, '')).slice(0, 24)}`;
+  const sendEvent = (event, data) => {
+    if (res.writableEnded) return;
+    res.write(`event: ${event}\n`);
+    res.write(`data: ${JSON.stringify(data)}\n\n`);
+  };
+  const start = (resolvedModel, inputTokens = 0) => {
+    if (started) return;
+    started = true;
+    sendEvent('message_start', {
+      type: 'message_start',
+      message: {
+        id: messageId,
+        type: 'message',
+        role: 'assistant',
+        model: resolvedModel || model,
+        content: [],
+        stop_reason: null,
+        stop_sequence: null,
+        usage: { input_tokens: inputTokens, output_tokens: 0 },
+      },
+    });
+  };
+  const openTextBlock = () => {
+    if (textBlockOpen) return;
+    blockIndex++;
+    textBlockOpen = true;
+    sendEvent('content_block_start', {
+      type: 'content_block_start',
+      index: blockIndex,
+      content_block: { type: 'text', text: '' },
+    });
+  };
+  const closeTextBlock = () => {
+    if (!textBlockOpen) return;
+    sendEvent('content_block_stop', { type: 'content_block_stop', index: blockIndex });
+    textBlockOpen = false;
+  };
+  const pushTextDelta = (text) => {
+    if (!text || finished) return;
+    if (!started) start(model, 0);
+    openTextBlock();
+    sendEvent('content_block_delta', {
+      type: 'content_block_delta',
+      index: blockIndex,
+      delta: { type: 'text_delta', text },
+    });
+  };
+  /**
+   * Emit a native tool_use as content_block_start + input_json_delta +
+   * content_block_stop. The SDK gives us the full input object up-front
+   * (we don't see the model streaming JSON character by character —
+   * that's exposed via the raw API but the Agent SDK aggregates), so
+   * we ship it as one delta. Clients that handle character-streamed
+   * input_json_delta still parse fine because partial_json across
+   * deltas concatenates to the same final string.
+   *
+   * `tu` is in OpenAI shape from extractToolUses: {id, name, arguments}
+   * where arguments is a JSON string.
+   */
+  const pushToolUse = (tu) => {
+    if (finished) return;
+    if (!started) start(model, 0);
+    closeTextBlock();
+    blockIndex++;
+    sendEvent('content_block_start', {
+      type: 'content_block_start',
+      index: blockIndex,
+      content_block: { type: 'tool_use', id: tu.id, name: tu.name, input: {} },
+    });
+    sendEvent('content_block_delta', {
+      type: 'content_block_delta',
+      index: blockIndex,
+      delta: { type: 'input_json_delta', partial_json: tu.arguments || '{}' },
+    });
+    sendEvent('content_block_stop', { type: 'content_block_stop', index: blockIndex });
+  };
+  const finish = ({ stopReason = 'end_turn', usage = {} } = {}) => {
+    if (finished) return;
+    finished = true;
+    if (!started) start(model, 0);
+    closeTextBlock();
+    sendEvent('message_delta', {
+      type: 'message_delta',
+      delta: { stop_reason: stopReason, stop_sequence: null },
+      usage: { output_tokens: usage.output_tokens || 0 },
+    });
+    sendEvent('message_stop', { type: 'message_stop' });
+    if (!res.writableEnded) res.end();
+  };
+  const error = (err) => {
+    if (finished || res.writableEnded) return;
+    finished = true;
+    sendEvent('error', {
+      type: 'error',
+      error: { type: 'api_error', message: err?.message || String(err) },
+    });
+    if (!res.writableEnded) res.end();
+  };
+  return {
+    start,
+    pushTextDelta,
+    pushToolUse,
+    finish,
+    error,
+    get hasStarted() { return started; },
+  };
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "mobygate",
-  "version": "0.6.2",
+  "version": "0.7.0",
   "description": "OpenAI-compatible local proxy for Claude Max. The Möbius-strip gateway: OpenAI shape in, Claude Max out.",
   "type": "module",
   "main": "server.js",

package/server.js CHANGED Viewed

@@ -68,6 +68,14 @@ import {
   readUpdateLogTail,
   getCurrentVersion,
 } from './lib/updater.js';
+import {
+  anthropicMessagesToPrompt,
+  collectAnthropicImages,
+  buildAnthropicResponse,
+  makeStreamTranslator,
+  hasAnthropicTools,
+  mapStopReason,
+} from './lib/anthropic.js';
 const __filename = fileURLToPath(import.meta.url);
 const __dirname = dirname(__filename);
@@ -765,6 +773,376 @@ async function handleNonStreaming(res, body, requestId, sessionKey) {
   });
 }
+// ---------------------------------------------------------------------------
+// POST /v1/messages — Anthropic-native surface (non-streaming + streaming)
+// ---------------------------------------------------------------------------
+// The dual-surface architecture: Hermes uses /v1/chat/completions
+// (OpenAI shape), OpenClaw uses /v1/messages (Anthropic shape). Both
+// translate to the SAME underlying SDK query() — the surfaces are pure
+// translators over a single inference engine.
+//
+// Tool calling: reuses Phase 1's native MCP path from lib/tool-bridge.js.
+// No prompt-injected tool definitions, no <tool_call> text parsing.
+// Inbound tool_results still spliced as text on resume (see anthropic.js
+// docstring for why — Phase 1 limitation, not lifted here).
+async function handleAnthropicNonStreaming(res, body, requestId, sessionKey) {
+  const existing = getSession(sessionKey);
+  const resuming = !!existing?.sdkSessionId;
+  const toolsEnabled = hasAnthropicTools(body);
+  const promptText = anthropicMessagesToPrompt(body, { resuming });
+  const images = collectAnthropicImages(body.messages || []);
+  const prompt = buildQueryPrompt(promptText, images);
+  const model = resolveModel(body.model);
+  // Translate Anthropic tool defs → OpenAI shape that buildClientToolsServer
+  // expects. Both go through the same JSON-Schema → Zod path on the way to
+  // MCP; the wrapper shape difference is just `function:{name, parameters}`
+  // vs `{name, input_schema}`.
+  const toolsForBridge = toolsEnabled
+    ? body.tools.map((t) => ({
+        type: 'function',
+        function: { name: t.name, description: t.description || '', parameters: t.input_schema || {} },
+      }))
+    : null;
+  const clientToolsServer = toolsForBridge ? buildClientToolsServer(toolsForBridge) : null;
+  if (images.length) console.log(`  [multimodal] ${images.length} image block(s)`);
+  if (toolsEnabled) console.log(`  [tools] ${body.tools.length} client tool(s) registered as MCP`);
+  let resultText = '';
+  let collectedToolCalls = [];
+  let resolvedModel = model;
+  let inputTokens = 0;
+  let outputTokens = 0;
+  let capturedSessionId = existing?.sdkSessionId || null;
+  let stopReason = 'end_turn';
+  const abortController = new AbortController();
+  if (resuming) {
+    console.log(`  [session] resuming: ${sessionKey} → sdk=${existing.sdkSessionId} (msgs=${existing.messageCount})`);
+  }
+  const runQuery = async () => {
+    resultText = '';
+    collectedToolCalls = [];
+    resolvedModel = model;
+    inputTokens = 0;
+    outputTokens = 0;
+    capturedSessionId = existing?.sdkSessionId || null;
+    stopReason = 'end_turn';
+    for await (const message of query({
+      prompt,
+      options: {
+        model,
+        maxTurns: toolsEnabled ? 5 : 200,
+        permissionMode: 'bypassPermissions',
+        allowDangerouslySkipPermissions: true,
+        abortController,
+        ...(clientToolsServer
+          ? {
+              mcpServers: { [MCP_SERVER_NAME]: clientToolsServer },
+              allowedTools: [`${MCP_TOOL_PREFIX}*`],
+            }
+          : toolsEnabled
+            ? { allowedTools: [] }
+            : {}),
+        ...(resuming ? { resume: existing.sdkSessionId } : {}),
+        ...(sessionKey && !resuming ? { persistSession: true } : {}),
+      },
+    })) {
+      if (message.type === 'system' && message.subtype === 'init' && message.model) {
+        resolvedModel = message.model;
+      }
+      if (message.type === 'assistant' && message.session_id && !capturedSessionId) {
+        capturedSessionId = message.session_id;
+        console.log(`  [session] captured sdk session: ${capturedSessionId}`);
+      }
+      if (message.type === 'assistant' && message.message?.content) {
+        const content = message.message.content;
+        if (Array.isArray(content)) {
+          for (const block of content) {
+            if (block.type === 'text') resultText += block.text || '';
+          }
+        } else if (typeof content === 'string') {
+          resultText += content;
+        }
+        if (isAuthFailureText(resultText)) {
+          abortController.abort();
+          throw new AuthFailureInResultText(resultText);
+        }
+        if (toolsEnabled && hasToolUse(message)) {
+          const calls = extractToolUses(message);
+          if (calls.length) {
+            collectedToolCalls.push(...calls);
+            stopReason = 'tool_use';
+            console.log(`  [tools] ${calls.length} native tool_use block(s) — aborting SDK`);
+            abortController.abort();
+            break;
+          }
+        }
+      }
+      if (message.type === 'result') {
+        if (message.result && !resultText) resultText = message.result;
+        if (isAuthFailureText(resultText)) {
+          throw new AuthFailureInResultText(resultText);
+        }
+        inputTokens = message.input_tokens || 0;
+        outputTokens = message.output_tokens || 0;
+        stopReason = mapStopReason(message);
+        break;
+      }
+    }
+  };
+  try {
+    await runWithAuthRetry({
+      attempt: runQuery,
+      bailIfStarted: () => false,
+      onRefreshing: (err) => console.warn(`[auth] 401 on /v1/messages — refreshing (${err.message?.slice(0, 80)})`),
+      onRetry: (r) => console.log(`[auth] refreshed in ${r.durationMs}ms — retrying /v1/messages`),
+    });
+  } catch (err) {
+    const isAbort = err?.name === 'AbortError' || /aborted/i.test(err?.message || '');
+    if (!(toolsEnabled && isAbort)) {
+      console.error('[/v1/messages] SDK error:', err.message);
+      return res.status(500).json({
+        type: 'error',
+        error: { type: 'api_error', message: err.message },
+      });
+    }
+  }
+  if (sessionKey && capturedSessionId) {
+    upsertSession(sessionKey, capturedSessionId, resolvedModel);
+  }
+  if (sessionKey) res.setHeader('X-Session-Id', sessionKey);
+  res.json(buildAnthropicResponse({
+    rawText: resultText.trim(),
+    toolUses: collectedToolCalls,
+    model: resolvedModel,
+    usage: { input_tokens: inputTokens, output_tokens: outputTokens },
+    requestId,
+    stopReason,
+  }));
+}
+async function handleAnthropicStreaming(req, res, body, requestId, sessionKey) {
+  const existing = getSession(sessionKey);
+  const resuming = !!existing?.sdkSessionId;
+  const toolsEnabled = hasAnthropicTools(body);
+  const promptText = anthropicMessagesToPrompt(body, { resuming });
+  const images = collectAnthropicImages(body.messages || []);
+  const prompt = buildQueryPrompt(promptText, images);
+  const model = resolveModel(body.model);
+  const toolsForBridge = toolsEnabled
+    ? body.tools.map((t) => ({
+        type: 'function',
+        function: { name: t.name, description: t.description || '', parameters: t.input_schema || {} },
+      }))
+    : null;
+  const clientToolsServer = toolsForBridge ? buildClientToolsServer(toolsForBridge) : null;
+  if (images.length) console.log(`  [multimodal] ${images.length} image block(s)`);
+  if (toolsEnabled) console.log(`  [tools] ${body.tools.length} client tool(s) registered as MCP`);
+  res.setHeader('Content-Type', 'text/event-stream');
+  res.setHeader('Cache-Control', 'no-cache');
+  res.setHeader('Connection', 'keep-alive');
+  res.setHeader('X-Request-Id', requestId);
+  if (sessionKey) res.setHeader('X-Session-Id', sessionKey);
+  res.flushHeaders();
+  const tx = makeStreamTranslator({ res, requestId, model });
+  const abortController = new AbortController();
+  let resolvedModel = model;
+  let capturedSessionId = existing?.sdkSessionId || null;
+  let inputTokens = 0;
+  let outputTokens = 0;
+  let stopReason = 'end_turn';
+  let clientDisconnected = false;
+  let textEmittedSoFar = ''; // dedup against same-message reflow from SDK
+  let toolUseEmitted = false;
+  res.on('close', () => {
+    clientDisconnected = true;
+    abortController.abort();
+  });
+  if (resuming) {
+    console.log(`  [session] resuming: ${sessionKey} → sdk=${existing.sdkSessionId} (msgs=${existing.messageCount})`);
+  }
+  const runQuery = async () => {
+    // Reset per-attempt state in case of 401-retry. Note: tx is reused
+    // across retries, so a successful retry that comes after we already
+    // emitted message_start would surface as a confused stream. We bail
+    // out of retry once the translator has started (see bailIfStarted).
+    resolvedModel = model;
+    capturedSessionId = existing?.sdkSessionId || null;
+    inputTokens = 0;
+    outputTokens = 0;
+    stopReason = 'end_turn';
+    textEmittedSoFar = '';
+    toolUseEmitted = false;
+    for await (const message of query({
+      prompt,
+      options: {
+        model,
+        maxTurns: toolsEnabled ? 5 : 200,
+        permissionMode: 'bypassPermissions',
+        allowDangerouslySkipPermissions: true,
+        abortController,
+        ...(clientToolsServer
+          ? {
+              mcpServers: { [MCP_SERVER_NAME]: clientToolsServer },
+              allowedTools: [`${MCP_TOOL_PREFIX}*`],
+            }
+          : toolsEnabled
+            ? { allowedTools: [] }
+            : {}),
+        ...(resuming ? { resume: existing.sdkSessionId } : {}),
+        ...(sessionKey && !resuming ? { persistSession: true } : {}),
+      },
+    })) {
+      if (clientDisconnected) break;
+      if (message.type === 'system' && message.subtype === 'init' && message.model) {
+        resolvedModel = message.model;
+        tx.start(resolvedModel, 0);
+      }
+      if (message.type === 'assistant' && message.session_id && !capturedSessionId) {
+        capturedSessionId = message.session_id;
+        console.log(`  [session] captured sdk session: ${capturedSessionId}`);
+      }
+      if (message.type === 'assistant' && message.message?.content) {
+        const content = message.message.content;
+        // Auth-failure short-circuit: throw so runWithAuthRetry handles it.
+        // Only safe before any text has been streamed (otherwise we've
+        // already corrupted the SSE stream and can't undo).
+        if (Array.isArray(content)) {
+          let combined = '';
+          for (const b of content) if (b?.type === 'text' && b.text) combined += b.text;
+          if (combined && isAuthFailureText(combined) && !tx.hasStarted) {
+            abortController.abort();
+            throw new AuthFailureInResultText(combined);
+          }
+        }
+        // Tool_use detection: emit tool_use blocks structurally and abort.
+        // We do this BEFORE streaming text deltas from this message so the
+        // tool_use block is properly framed (after any pending text block
+        // closes). The translator handles the close-text → open-tool-use
+        // sequencing internally.
+        if (toolsEnabled && hasToolUse(message)) {
+          const calls = extractToolUses(message);
+          if (calls.length) {
+            // Emit any text from this same message *before* the tool_use
+            // (Anthropic streams sometimes have text + tool_use in one
+            // assistant message — preserve that ordering).
+            if (Array.isArray(content)) {
+              for (const b of content) {
+                if (b?.type === 'text' && b.text) {
+                  // Compute delta vs what we've emitted to avoid duplication
+                  // on aggregator-style assistant messages that resend the
+                  // whole accumulated text.
+                  const delta = b.text.startsWith(textEmittedSoFar)
+                    ? b.text.slice(textEmittedSoFar.length)
+                    : b.text;
+                  if (delta) {
+                    tx.pushTextDelta(delta);
+                    textEmittedSoFar += delta;
+                  }
+                }
+              }
+            }
+            for (const tu of calls) tx.pushToolUse(tu);
+            toolUseEmitted = true;
+            stopReason = 'tool_use';
+            console.log(`  [tools] ${calls.length} native tool_use block(s) — aborting SDK`);
+            abortController.abort();
+            break;
+          }
+        }
+        // Plain text-only assistant message: stream the delta.
+        if (Array.isArray(content)) {
+          let combined = '';
+          for (const b of content) if (b?.type === 'text' && b.text) combined += b.text;
+          if (combined) {
+            const delta = combined.startsWith(textEmittedSoFar)
+              ? combined.slice(textEmittedSoFar.length)
+              : combined;
+            if (delta) {
+              tx.pushTextDelta(delta);
+              textEmittedSoFar += delta;
+            }
+          }
+        } else if (typeof content === 'string' && content) {
+          const delta = content.startsWith(textEmittedSoFar)
+            ? content.slice(textEmittedSoFar.length)
+            : content;
+          if (delta) {
+            tx.pushTextDelta(delta);
+            textEmittedSoFar += delta;
+          }
+        }
+      }
+      if (message.type === 'result') {
+        if (message.result && !textEmittedSoFar && !toolUseEmitted) {
+          // Some SDK paths only deliver text via the final result message
+          // (no streaming assistant messages). Emit it here as a single
+          // delta — clients see this as "model started + finished in one
+          // chunk", which is valid SSE.
+          tx.pushTextDelta(message.result);
+        }
+        if (isAuthFailureText(message.result || '') && !tx.hasStarted) {
+          throw new AuthFailureInResultText(message.result);
+        }
+        inputTokens = message.input_tokens || 0;
+        outputTokens = message.output_tokens || 0;
+        if (!toolUseEmitted) stopReason = mapStopReason(message);
+        break;
+      }
+    }
+  };
+  try {
+    await runWithAuthRetry({
+      attempt: runQuery,
+      // Once we've emitted message_start or any deltas, the SSE stream is
+      // committed — a retry would fragment it. Same logic as the OpenAI
+      // surface (bail once anything has been written).
+      bailIfStarted: () => tx.hasStarted,
+      onRefreshing: (err) => console.warn(`[auth] 401 on /v1/messages stream — refreshing (${err.message?.slice(0, 80)})`),
+      onRetry: (r) => console.log(`[auth] refreshed in ${r.durationMs}ms — retrying /v1/messages stream`),
+    });
+  } catch (err) {
+    const isAbort = err?.name === 'AbortError' || /aborted/i.test(err?.message || '');
+    if (!clientDisconnected && !(toolsEnabled && isAbort)) {
+      console.error('[/v1/messages stream] SDK error:', err.message);
+      tx.error(err);
+      return;
+    }
+  }
+  if (sessionKey && capturedSessionId) {
+    upsertSession(sessionKey, capturedSessionId, resolvedModel);
+  }
+  tx.finish({ stopReason, usage: { output_tokens: outputTokens } });
+}
 // ---------------------------------------------------------------------------
 // Express app
 // ---------------------------------------------------------------------------
@@ -866,6 +1244,69 @@ app.post('/v1/chat/completions', async (req, res) => {
   }
 });
+// POST /v1/messages — Anthropic-native surface (for OpenClaw etc.).
+// Same dispatch shape as /v1/chat/completions, different translator pair.
+// Both endpoints terminate at the same SDK query() under the hood; this
+// route exists so Anthropic-shaped clients get native blocks (text /
+// image / tool_use / tool_result) without going through OpenAI shape.
+app.post('/v1/messages', async (req, res) => {
+  const requestId = uuidv4().replace(/-/g, '').slice(0, 24);
+  const body = req.body;
+  if (!body?.messages || !Array.isArray(body.messages) || body.messages.length === 0) {
+    return res.status(400).json({
+      type: 'error',
+      error: { type: 'invalid_request_error', message: 'messages is required and must be a non-empty array' },
+    });
+  }
+  const sessionKey = req.headers['x-session-id'] || body.session_id || null;
+  const existing = getSession(sessionKey);
+  const sessionTag = sessionKey ? ` | session=${sessionKey}${existing ? ' (resume)' : ' (new)'}` : '';
+  console.log(`[${new Date().toISOString()}] anthropic ${body.stream ? 'stream' : 'sync'} | model=${body.model} → ${resolveModel(body.model)} | msgs=${body.messages.length}${sessionTag}`);
+  // Dashboard event — same shape as the OpenAI route, just labeled by path.
+  const startedAt = Date.now();
+  const imageBlocks = collectAnthropicImages(body.messages || []).length;
+  dashboardBus.emitEvent({
+    type: 'request.start',
+    id: requestId,
+    method: 'POST',
+    path: '/v1/messages',
+    model: body.model,
+    resolvedModel: resolveModel(body.model),
+    session: sessionKey,
+    stream: !!body.stream,
+    tools: hasAnthropicTools(body),
+    images: imageBlocks,
+    messages: body.messages.length,
+    resuming: !!existing,
+  });
+  let endEmitted = false;
+  const emitEnd = (overrides = {}) => {
+    if (endEmitted) return;
+    endEmitted = true;
+    dashboardBus.emitEvent({
+      type: 'request.end',
+      id: requestId,
+      durationMs: Date.now() - startedAt,
+      status: res.statusCode < 400 ? 'ok' : 'error',
+      httpStatus: res.statusCode,
+      ...overrides,
+    });
+  };
+  res.on('finish', () => emitEnd());
+  res.on('close', () => { if (!endEmitted) emitEnd({ status: 'error', error: 'client_disconnect' }); });
+  if (body.stream) {
+    await handleAnthropicStreaming(req, res, body, requestId, sessionKey);
+  } else {
+    await handleAnthropicNonStreaming(res, body, requestId, sessionKey);
+  }
+});
 // GET /v1/models
 app.get('/v1/models', (_req, res) => {
   const now = Math.floor(Date.now() / 1000);