npm - mobygate - Versions diffs - 0.5.3 → 0.6.1 - Mend

mobygate 0.5.3 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/server.js CHANGED Viewed

@@ -53,6 +53,21 @@ import { banner } from './lib/ascii.js';
 import { bus as dashboardBus } from './lib/dashboard-bus.js';
 import { loadSessions, saveSessions, flushSessionsNow } from './lib/session-store.js';
 import { LOGS_DIR } from './lib/config.js';
+import {
+  buildClientToolsServer,
+  extractToolUses,
+  hasToolUse,
+  toolMessagesToText,
+  MCP_SERVER_NAME,
+  MCP_TOOL_PREFIX,
+} from './lib/tool-bridge.js';
+import {
+  getUpdateCheck,
+  applyUpdate,
+  readUpdateState,
+  readUpdateLogTail,
+  getCurrentVersion,
+} from './lib/updater.js';
 const __filename = fileURLToPath(import.meta.url);
 const __dirname = dirname(__filename);
@@ -214,114 +229,45 @@ function collectImages(messages) {
 }
 // ---------------------------------------------------------------------------
-// Tool calling (Path B: prompt-embedded protocol)
+// Tool calling (Phase 1: native MCP tools — no more <tool_call> text hack)
 // ---------------------------------------------------------------------------
-// The Claude Agent SDK cannot stream OpenAI-style function-call events back to
-// the caller (MCP handlers execute in-process and pollute session state; see
-// README "Known Gaps"). Workaround: inject client-provided tool schemas into
-// the system prompt and instruct the model to emit <tool_call>{...}</tool_call>
-// tags. We parse those out and re-emit as OpenAI `tool_calls`. Tool results
-// coming back from the client get wrapped in <tool_result> blocks.
+// Client-provided OpenAI tools are registered with the SDK as in-process MCP
+// tools (see lib/tool-bridge.js). The model emits **native** tool_use content
+// blocks in its assistant messages; we abort the SDK on the first one and
+// return OpenAI tool_calls to the client. When the client replies with tool
+// results, we send them back as Anthropic tool_result content blocks inside
+// a single SDKUserMessage — round-tripping cleanly through the SDK session.
 function hasTools(body) {
   return Array.isArray(body?.tools) && body.tools.length > 0;
 }
-function buildToolInstructions(tools) {
-  const lines = [
-    'You have access to CLIENT-DEFINED tools listed below. To invoke a tool, emit one or more <tool_call> tags, each containing a strict JSON object with "name" and "arguments":',
-    '',
-    '<tool_call>{"name":"<tool_name>","arguments":{<args>}}</tool_call>',
-    '',
-    'Rules:',
-    '- Do NOT wrap <tool_call> tags in markdown code fences.',
-    '- When you emit <tool_call> tags, output ONLY the tags — no prose, no explanation, no other text.',
-    '- You may emit multiple <tool_call> tags to request parallel calls.',
-    '- Tool results will be returned as <tool_result id="..." name="...">...</tool_result> blocks. After results arrive, continue toward the final answer.',
-    '- When you have the final answer and need no more tool calls, respond normally WITHOUT any <tool_call> tag.',
-    '- Do NOT call any other tool (Read, Bash, Grep, etc.) — only the tools listed below.',
-    '',
-    'Available tools:',
-  ];
-  for (const t of tools) {
-    if (t?.type !== 'function' || !t.function) continue;
-    const fn = t.function;
-    lines.push(`<tool name="${fn.name}">`);
-    if (fn.description) lines.push(`  <description>${fn.description}</description>`);
-    lines.push(`  <parameters>${JSON.stringify(fn.parameters || { type: 'object', properties: {} })}</parameters>`);
-    lines.push('</tool>');
-  }
-  return lines.join('\n');
-}
-function formatAssistantForReplay(msg) {
-  const parts = [];
-  const text = extractContent(msg.content);
-  if (text) parts.push(text);
-  if (Array.isArray(msg.tool_calls)) {
-    for (const tc of msg.tool_calls) {
-      if (tc?.type === 'function' && tc.function) {
-        let args = {};
-        try { args = JSON.parse(tc.function.arguments || '{}'); } catch {}
-        parts.push(`<tool_call>${JSON.stringify({ name: tc.function.name, arguments: args })}</tool_call>`);
-      }
-    }
-  }
-  return parts.join('\n');
-}
-function formatToolResult(msg) {
-  const content = extractContent(msg.content);
-  const id = msg.tool_call_id || 'unknown';
-  const name = msg.name || '';
-  return `<tool_result id="${id}" name="${name}">\n${content}\n</tool_result>`;
-}
-// Parse the model's text output for <tool_call> tags. Returns
-//   { toolCalls: [{id, name, arguments}], textBefore: string }
-// when at least one valid call is found, else null.
-function parseToolCalls(text) {
-  if (!text || !text.includes('<tool_call>')) return null;
-  const re = /<tool_call>\s*([\s\S]*?)\s*<\/tool_call>/g;
-  const calls = [];
-  let firstIdx = -1;
-  let m;
-  while ((m = re.exec(text)) !== null) {
-    if (firstIdx === -1) firstIdx = m.index;
-    try {
-      const obj = JSON.parse(m[1]);
-      if (obj && typeof obj.name === 'string') {
-        calls.push({
-          id: `call_${uuidv4().replace(/-/g, '').slice(0, 20)}`,
-          name: obj.name,
-          arguments: JSON.stringify(obj.arguments ?? {}),
-        });
-      }
-    } catch {
-      // ignore malformed tool_call blocks
-    }
-  }
-  if (!calls.length) return null;
-  return { toolCalls: calls, textBefore: text.slice(0, firstIdx).trim() };
-}
-// Detect whether the running text contains a COMPLETE <tool_call>...</tool_call>
-// pair — used to abort the SDK early once a call has been emitted.
-function hasCompleteToolCall(text) {
-  return /<tool_call>\s*[\s\S]*?<\/tool_call>/.test(text);
-}
-function messagesToPrompt(messages, { resuming = false, tools = null } = {}) {
-  // When resuming, the SDK already has full history. Only send the new tail:
-  // tool_results (if the client is replying with tool outputs) and/or a fresh
-  // user message.
+/**
+ * Build the prompt text from the OpenAI messages array.
+ *
+ * Returns `{ promptText }` — a single string ready for the SDK. Tool
+ * results are spliced in as <tool_results> XML when present (see
+ * lib/tool-bridge.js#toolMessagesToText for why we don't use native
+ * tool_result content blocks yet).
+ *
+ * Resuming vs fresh:
+ *   - Resuming: SDK has full history. We only send the new tail —
+ *     trailing tool results plus the most recent user text, if any.
+ *   - Fresh: SDK starts cold. We serialize the visible history with
+ *     <system>/<previous_response>/<tool_results> tags. No tool-
+ *     instruction injection — the SDK MCP registration handles that.
+ */
+function messagesToPrompt(messages, { resuming = false } = {}) {
   if (resuming) {
-    const toolResults = [];
+    // Walk backwards from the end, collecting trailing tool messages and
+    // the most recent user text. Tool results are formatted as a text
+    // block (see lib/tool-bridge.js#toolMessagesToText for the rationale).
+    const trailingToolMessages = [];
     let userText = '';
     for (let i = messages.length - 1; i >= 0; i--) {
       const msg = messages[i];
       if (msg.role === 'tool') {
-        toolResults.unshift(formatToolResult(msg));
+        trailingToolMessages.unshift(msg);
       } else if (msg.role === 'user') {
         userText = extractContent(msg.content);
         break;
@@ -329,39 +275,20 @@ function messagesToPrompt(messages, { resuming = false, tools = null } = {}) {
         break;
       }
     }
+    const toolResultsText = toolMessagesToText(trailingToolMessages);
     const parts = [];
-    if (toolResults.length) {
-      parts.push(`<tool_results>\n${toolResults.join('\n')}\n</tool_results>`);
-      // The model sometimes treats a bare <tool_results> block as "just data"
-      // and returns empty. A short nudge keeps the turn productive without
-      // biasing what comes next.
-      if (!userText) parts.push('Use the tool results above to continue toward the final answer. If more tool calls are needed, emit them; otherwise respond directly.');
-    }
+    if (toolResultsText) parts.push(toolResultsText);
     if (userText) parts.push(userText);
-    return parts.join('\n\n') || extractContent(messages[messages.length - 1].content);
+    return {
+      promptText: parts.join('\n\n') || extractContent(messages[messages.length - 1]?.content || ''),
+    };
   }
+  // Fresh request: serialize visible history as XML-wrapped text. No
+  // tool-instruction injection (the model learns about tools via the SDK
+  // MCP registration, not the prompt).
   const parts = [];
-  // Tool instructions prepended once at the top of the system context.
-  if (tools && tools.length) {
-    parts.push(`<system>\n${buildToolInstructions(tools)}\n</system>\n`);
-  }
-  // Group consecutive tool-role messages so they emit as one <tool_results> block.
-  let toolBuffer = [];
-  const flushTools = () => {
-    if (toolBuffer.length) {
-      parts.push(`<tool_results>\n${toolBuffer.join('\n')}\n</tool_results>\n`);
-      toolBuffer = [];
-    }
-  };
   for (const msg of messages) {
-    if (msg.role === 'tool') {
-      toolBuffer.push(formatToolResult(msg));
-      continue;
-    }
-    flushTools();
     switch (msg.role) {
       case 'system':
         parts.push(`<system>\n${extractContent(msg.content)}\n</system>\n`);
@@ -369,18 +296,34 @@ function messagesToPrompt(messages, { resuming = false, tools = null } = {}) {
       case 'user':
         parts.push(extractContent(msg.content));
         break;
-      case 'assistant':
-        parts.push(`<previous_response>\n${formatAssistantForReplay(msg)}\n</previous_response>\n`);
+      case 'assistant': {
+        // Best-effort replay. tool_calls in non-resume history are dropped;
+        // the model can usually infer continuity from the surrounding text.
+        const text = extractContent(msg.content);
+        if (text) parts.push(`<previous_response>\n${text}\n</previous_response>\n`);
+        break;
+      }
+      case 'tool': {
+        // Tool messages on a fresh turn (rare — clients normally use
+        // session keys). Splice as text since there's no preceding
+        // tool_use turn we can bind to natively.
+        const text = toolMessagesToText([msg]);
+        if (text) parts.push(text);
         break;
+      }
     }
   }
-  flushTools();
-  return parts.join('\n').trim();
+  return {
+    promptText: parts.join('\n').trim(),
+  };
 }
-// Wrap a prompt + optional image blocks into the form query() expects.
-// Returns a string when there are no images (fast path), or an async iterable
-// yielding one SDKUserMessage with multi-part content when there are.
+/**
+ * Wrap promptText + optional image blocks into the form query() expects.
+ * Returns a string for the fast path (text-only, no images), or an
+ * async iterable yielding one SDKUserMessage with multi-part content
+ * when there are images.
+ */
 function buildQueryPrompt(promptText, imageBlocks) {
   if (!imageBlocks.length) return promptText;
   const content = [
@@ -443,12 +386,15 @@ async function handleStreaming(req, res, body, requestId, sessionKey) {
   const existing = getSession(sessionKey);
   const resuming = !!existing?.sdkSessionId;
   const toolsEnabled = hasTools(body);
-  const promptText = messagesToPrompt(body.messages, { resuming, tools: toolsEnabled ? body.tools : null });
+  const { promptText } = messagesToPrompt(body.messages, { resuming });
   const images = collectImages(body.messages);
   const prompt = buildQueryPrompt(promptText, images);
   const model = resolveModel(body.model);
+  // Build the in-process MCP server exposing client tools to the SDK.
+  // null when toolsEnabled is false (or all tools are malformed).
+  const clientToolsServer = toolsEnabled ? buildClientToolsServer(body.tools) : null;
   if (images.length) console.log(`  [multimodal] ${images.length} image block(s)`);
-  if (toolsEnabled) console.log(`  [tools] ${body.tools.length} client tool(s) — buffering stream`);
+  if (toolsEnabled) console.log(`  [tools] ${body.tools.length} client tool(s) registered as MCP`);
   res.setHeader('Content-Type', 'text/event-stream');
   res.setHeader('Cache-Control', 'no-cache');
@@ -473,11 +419,17 @@ async function handleStreaming(req, res, body, requestId, sessionKey) {
     console.log(`  [session] resuming: ${sessionKey} → sdk=${existing.sdkSessionId} (msgs=${existing.messageCount})`);
   }
-  let bufferedText = ''; // only used when toolsEnabled
+  // Tools-mode buffers text and collects native tool_use blocks. If the
+  // model emits text first then a tool_use, we want both: textBefore as
+  // the assistant content, plus the tool_calls. (Most clients display the
+  // text and then act on the tool_calls.)
+  let bufferedText = '';
+  let collectedToolCalls = []; // [{id, name, arguments}] from extractToolUses()
   const runQuery = async () => {
     // Reset per-attempt state so a 401 retry starts clean
     bufferedText = '';
+    collectedToolCalls = [];
     isFirst = true;
     resolvedModel = model;
     capturedSessionId = existing?.sdkSessionId || null;
@@ -490,7 +442,18 @@ async function handleStreaming(req, res, body, requestId, sessionKey) {
         permissionMode: 'bypassPermissions',
         allowDangerouslySkipPermissions: true,
         abortController,
-        ...(toolsEnabled ? { allowedTools: [] } : {}),
+        // Tools-mode: register client tools as an in-process MCP server
+        // and allow only those (no Bash/Read/etc. — the SDK's built-ins
+        // would pollute the session and leak through to the model).
+        ...(clientToolsServer
+          ? {
+              mcpServers: { [MCP_SERVER_NAME]: clientToolsServer },
+              allowedTools: [`${MCP_TOOL_PREFIX}*`],
+            }
+          : toolsEnabled
+            // Tools were requested but none were valid — disable all tools.
+            ? { allowedTools: [] }
+            : {}),
         ...(resuming ? { resume: existing.sdkSessionId } : {}),
         ...(sessionKey && !resuming ? { persistSession: true } : {}),
       },
@@ -532,15 +495,25 @@ async function handleStreaming(req, res, body, requestId, sessionKey) {
         throw new AuthFailureInResultText(turnText);
       }
+      // Tools-mode: check for native tool_use content blocks. The moment
+      // we see one, abort the SDK — we don't want our stub handler to
+      // hang waiting on an execution that's actually happening client-side.
+      if (toolsEnabled && message.type === 'assistant' && hasToolUse(message)) {
+        const calls = extractToolUses(message);
+        if (calls.length) {
+          collectedToolCalls.push(...calls);
+          if (turnText) bufferedText += turnText;
+          console.log(`  [tools] ${calls.length} native tool_use block(s) — aborting SDK`);
+          abortController.abort();
+          break;
+        }
+      }
       if (turnText) {
         if (toolsEnabled) {
+          // Buffer text in case it precedes a tool_use, or ends up as the
+          // final response when the model decides not to call any tools.
           bufferedText += turnText;
-          // Abort early once we see a complete <tool_call>...</tool_call>
-          if (hasCompleteToolCall(bufferedText)) {
-            console.log('  [tools] complete tool_call detected — aborting SDK');
-            abortController.abort();
-            break;
-          }
         } else {
           sendSSE(res, makeChunk(requestId, resolvedModel, turnText, isFirst ? 'assistant' : undefined, null));
           isFirst = false;
@@ -586,9 +559,8 @@ async function handleStreaming(req, res, body, requestId, sessionKey) {
   // Tools mode: emit the buffered response as a single chunk with either
   // tool_calls (+ finish_reason: tool_calls) or plain text (+ stop).
   if (toolsEnabled && !res.writableEnded) {
-    const parsed = parseToolCalls(bufferedText);
-    if (parsed) {
-      console.log(`  [tools] emitting ${parsed.toolCalls.length} tool_call(s)`);
+    if (collectedToolCalls.length > 0) {
+      console.log(`  [tools] emitting ${collectedToolCalls.length} tool_call(s)`);
       const chunk = {
         id: `chatcmpl-${requestId}`,
         object: 'chat.completion.chunk',
@@ -598,8 +570,8 @@ async function handleStreaming(req, res, body, requestId, sessionKey) {
           index: 0,
           delta: {
             role: 'assistant',
-            content: parsed.textBefore || null,
-            tool_calls: parsed.toolCalls.map((tc, i) => ({
+            content: bufferedText.trim() || null,
+            tool_calls: collectedToolCalls.map((tc, i) => ({
               index: i,
               id: tc.id,
               type: 'function',
@@ -634,14 +606,16 @@ async function handleNonStreaming(res, body, requestId, sessionKey) {
   const existing = getSession(sessionKey);
   const resuming = !!existing?.sdkSessionId;
   const toolsEnabled = hasTools(body);
-  const promptText = messagesToPrompt(body.messages, { resuming, tools: toolsEnabled ? body.tools : null });
+  const { promptText } = messagesToPrompt(body.messages, { resuming });
   const images = collectImages(body.messages);
   const prompt = buildQueryPrompt(promptText, images);
   const model = resolveModel(body.model);
+  const clientToolsServer = toolsEnabled ? buildClientToolsServer(body.tools) : null;
   if (images.length) console.log(`  [multimodal] ${images.length} image block(s)`);
-  if (toolsEnabled) console.log(`  [tools] ${body.tools.length} client tool(s)`);
+  if (toolsEnabled) console.log(`  [tools] ${body.tools.length} client tool(s) registered as MCP`);
   let resultText = '';
+  let collectedToolCalls = [];
   let resolvedModel = model;
   let inputTokens = 0;
   let outputTokens = 0;
@@ -655,6 +629,7 @@ async function handleNonStreaming(res, body, requestId, sessionKey) {
   const runQuery = async () => {
     // Reset per-attempt state so a 401 retry starts clean
     resultText = '';
+    collectedToolCalls = [];
     resolvedModel = model;
     inputTokens = 0;
     outputTokens = 0;
@@ -668,7 +643,14 @@ async function handleNonStreaming(res, body, requestId, sessionKey) {
         permissionMode: 'bypassPermissions',
         allowDangerouslySkipPermissions: true,
         abortController,
-        ...(toolsEnabled ? { allowedTools: [] } : {}),
+        ...(clientToolsServer
+          ? {
+              mcpServers: { [MCP_SERVER_NAME]: clientToolsServer },
+              allowedTools: [`${MCP_TOOL_PREFIX}*`],
+            }
+          : toolsEnabled
+            ? { allowedTools: [] }
+            : {}),
         ...(resuming ? { resume: existing.sdkSessionId } : {}),
         ...(sessionKey && !resuming ? { persistSession: true } : {}),
       },
@@ -696,11 +678,15 @@ async function handleNonStreaming(res, body, requestId, sessionKey) {
           abortController.abort();
           throw new AuthFailureInResultText(resultText);
         }
-        // Abort early once we see a complete <tool_call>...</tool_call>
-        if (toolsEnabled && hasCompleteToolCall(resultText)) {
-          console.log('  [tools] complete tool_call detected — aborting SDK');
-          abortController.abort();
-          break;
+        // Native tool_use detection — abort the moment a tool_use lands.
+        if (toolsEnabled && hasToolUse(message)) {
+          const calls = extractToolUses(message);
+          if (calls.length) {
+            collectedToolCalls.push(...calls);
+            console.log(`  [tools] ${calls.length} native tool_use block(s) — aborting SDK`);
+            abortController.abort();
+            break;
+          }
         }
       }
@@ -740,32 +726,29 @@ async function handleNonStreaming(res, body, requestId, sessionKey) {
   if (sessionKey) responseHeaders['X-Session-Id'] = sessionKey;
   // Tool-calling response shape
-  if (toolsEnabled) {
-    const parsed = parseToolCalls(resultText);
-    if (parsed) {
-      console.log(`  [tools] emitting ${parsed.toolCalls.length} tool_call(s)`);
-      return res.set(responseHeaders).json({
-        id: `chatcmpl-${requestId}`,
-        object: 'chat.completion',
-        created: Math.floor(Date.now() / 1000),
-        model: normalizeModelName(resolvedModel),
-        choices: [{
-          index: 0,
-          message: {
-            role: 'assistant',
-            content: parsed.textBefore || null,
-            tool_calls: parsed.toolCalls.map((tc) => ({
-              id: tc.id,
-              type: 'function',
-              function: { name: tc.name, arguments: tc.arguments },
-            })),
-          },
-          finish_reason: 'tool_calls',
-        }],
-        usage: { prompt_tokens: inputTokens, completion_tokens: outputTokens, total_tokens: inputTokens + outputTokens },
-      });
-    }
-    // No tool_call tags → fall through to normal text response
+  if (toolsEnabled && collectedToolCalls.length > 0) {
+    console.log(`  [tools] emitting ${collectedToolCalls.length} tool_call(s)`);
+    return res.set(responseHeaders).json({
+      id: `chatcmpl-${requestId}`,
+      object: 'chat.completion',
+      created: Math.floor(Date.now() / 1000),
+      model: normalizeModelName(resolvedModel),
+      choices: [{
+        index: 0,
+        message: {
+          role: 'assistant',
+          content: resultText.trim() || null,
+          tool_calls: collectedToolCalls.map((tc) => ({
+            id: tc.id,
+            type: 'function',
+            function: { name: tc.name, arguments: tc.arguments },
+          })),
+        },
+        finish_reason: 'tool_calls',
+      }],
+      usage: { prompt_tokens: inputTokens, completion_tokens: outputTokens, total_tokens: inputTokens + outputTokens },
+    });
+    // No tool_use blocks → fall through to normal text response
   }
   res.set(responseHeaders).json({
@@ -1090,6 +1073,62 @@ app.get('/dashboard/logs', async (req, res) => {
   }
 });
+// ---------------------------------------------------------------------------
+// Updater — dashboard-driven "update available → update now" flow
+// ---------------------------------------------------------------------------
+// GET /update/check — is there a newer mobygate on npm?
+// Response: { current, latest, updateAvailable, installMode, canApply, cached, error }
+// Safe to poll: the npm registry call is cached for 15 min in-process.
+app.get('/update/check', async (req, res) => {
+  try {
+    const force = req.query.force === '1' || req.query.force === 'true';
+    const info = await getUpdateCheck({ force });
+    res.json(info);
+  } catch (e) {
+    res.status(500).json({ error: e.message });
+  }
+});
+// POST /update/apply — fire the update in a detached child process.
+// We return immediately with { started, pid }. The child runs
+// `npm install -g mobygate@latest` (or `git pull && npm install`), then
+// restarts the service — which kills us. The dashboard polls
+// /update/status to show progress and reconnects once the new server is up.
+app.post('/update/apply', (_req, res) => {
+  try {
+    const result = applyUpdate({});
+    const status = result.started ? 202 : 409;
+    res.status(status).json({ ...result, currentVersion: getCurrentVersion() });
+    if (result.started) {
+      dashboardBus.emitEvent({ type: 'update.started', pid: result.pid, mode: result.mode });
+    }
+  } catch (e) {
+    res.status(500).json({ started: false, error: e.message });
+  }
+});
+// GET /update/status — progress for a running (or just-finished) update.
+// The dashboard polls this during apply. `running` is determined by
+// PID liveness, so even if our process is the one getting restarted,
+// the new one answers correctly.
+app.get('/update/status', (req, res) => {
+  const state = readUpdateState();
+  let running = false;
+  if (state.pid) {
+    try { process.kill(state.pid, 0); running = true; } catch {}
+  }
+  const lines = Math.min(1000, parseInt(req.query.lines || '200', 10));
+  res.json({
+    running,
+    pid: state.pid || null,
+    startedAt: state.startedAt || null,
+    mode: state.mode || null,
+    lines: readUpdateLogTail({ lines }),
+    currentVersion: getCurrentVersion(),
+  });
+});
 // ---------------------------------------------------------------------------
 // Start
 // ---------------------------------------------------------------------------