npm - mobygate - Versions diffs - 0.8.0 → 0.8.2 - Mend

mobygate 0.8.0 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/CHANGELOG.md +172 -0
package/bin/mobygate.js +74 -0
package/index.html +1 -0
package/inspector.html +422 -0
package/lib/anthropic.js +23 -0
package/lib/connectors/hermes.js +3 -1
package/lib/connectors/openclaw.js +39 -2
package/lib/connectors/safety.js +18 -1
package/lib/request-capture.js +394 -0
package/lib/session-derive.js +20 -1
package/package.json +2 -1
package/server.js +263 -10

package/lib/request-capture.js ADDED Viewed

@@ -0,0 +1,394 @@
+/**
+ * Request capture — diagnostic dump of inbound /v1/messages and
+ * /v1/chat/completions request bodies (and response usage) to disk,
+ * with a human-readable summary that breaks down system-block sizes,
+ * cache_control markers, tool blocks, message counts, token estimates,
+ * and (when response data is available) actual cache hit rates.
+ *
+ * Off by default. Three ways to turn it on, in order of precedence:
+ *
+ *   1. Env var:    MOBY_CAPTURE=1 mobygate start
+ *   2. Touch file: touch ~/.mobygate/.capture-enabled
+ *   3. (env/file unset → off)
+ *
+ * The touch-file path lets the dashboard toggle capture live without
+ * restarting mobygate. Removing the file disables capture immediately.
+ *
+ * Output: ~/.mobygate/captures/{timestamp}_{path}_{requestId}.{json,summary.txt}
+ *
+ *   .json         — raw request body (pretty-printed)
+ *   .summary.txt  — analysis: system blocks, cache markers, message
+ *                   timeline, tool definitions, token breakdown, and
+ *                   (after response lands) actual usage with cache hits
+ *
+ * Auto-rotation: oldest captures are deleted to keep total count
+ * under MOBY_CAPTURE_KEEP (default 100 captures = 200 files since we
+ * write 2 per request).
+ *
+ * Throws nothing — capture failures log a warning and return. Capture
+ * never blocks request processing.
+ */
+import { writeFile, mkdir, appendFile, readdir, unlink, stat } from 'fs/promises';
+import { existsSync } from 'fs';
+import { join } from 'path';
+import { homedir } from 'os';
+const CAPTURE_DIR = process.env.MOBYGATE_CAPTURE_DIR
+  || join(process.env.MOBYGATE_HOME || join(homedir(), '.mobygate'), 'captures');
+const TOGGLE_FILE = join(process.env.MOBYGATE_HOME || join(homedir(), '.mobygate'), '.capture-enabled');
+const KEEP_COUNT = parseInt(process.env.MOBY_CAPTURE_KEEP || '100', 10);
+// In-memory map of requestId → summary file path. Populated by
+// captureRequest() and consumed by captureResponse() so we can append
+// response data to the same summary file we wrote on the way in.
+const inFlightSummaries = new Map();
+let dirEnsured = false;
+async function ensureDir() {
+  if (dirEnsured) return;
+  if (!existsSync(CAPTURE_DIR)) {
+    await mkdir(CAPTURE_DIR, { recursive: true });
+  }
+  dirEnsured = true;
+}
+/**
+ * Estimate token count from a string. Rough — 4 chars per token is
+ * the standard back-of-envelope for English+code mixed content.
+ */
+function estimateTokens(s) {
+  if (!s) return 0;
+  return Math.round(String(s).length / 4);
+}
+/**
+ * Walk a content array (or string) and sum total characters across all
+ * text blocks. Anthropic's content can be a bare string or an array of
+ * { type: 'text'|'image'|'tool_use'|'tool_result', ... } blocks.
+ */
+function contentBytes(content) {
+  if (typeof content === 'string') return content.length;
+  if (!Array.isArray(content)) return 0;
+  let total = 0;
+  for (const block of content) {
+    if (typeof block === 'string') { total += block.length; continue; }
+    if (!block || typeof block !== 'object') continue;
+    if (typeof block.text === 'string') total += block.text.length;
+    if (typeof block.input === 'object') total += JSON.stringify(block.input).length;
+    if (typeof block.content === 'string') total += block.content.length;
+    if (Array.isArray(block.content)) total += contentBytes(block.content);
+  }
+  return total;
+}
+/**
+ * Returns a tool's name from either Anthropic-shape (top-level `name`)
+ * or OpenAI-shape (nested under `function.name`). Used by the summary
+ * tools listing — earlier we showed "(unnamed)" for OpenAI tools because
+ * we only checked the top-level `name` field.
+ */
+function toolName(t) {
+  if (!t || typeof t !== 'object') return '(unnamed)';
+  if (typeof t.name === 'string') return t.name;
+  if (t.function && typeof t.function.name === 'string') return t.function.name;
+  return '(unnamed)';
+}
+/**
+ * Build a human-readable analysis of an Anthropic-shape body. Works for
+ * both /v1/messages (native) and translated /v1/chat/completions bodies
+ * where messages have content arrays.
+ */
+function analyzeAnthropic(body) {
+  const lines = [];
+  lines.push(`model:           ${body.model || '(none)'}`);
+  lines.push(`stream:          ${!!body.stream}`);
+  lines.push(`max_tokens:      ${body.max_tokens ?? body.max_completion_tokens ?? '(none)'}`);
+  lines.push(`temperature:     ${body.temperature ?? '(default)'}`);
+  lines.push(`session_id:      ${body.session_id ?? '(none)'}`);
+  lines.push('');
+  // System block(s) — Anthropic accepts string or array of {type, text, cache_control?}
+  const sys = body.system;
+  if (typeof sys === 'string') {
+    lines.push(`system:          1 block (string), ${sys.length} bytes, ~${estimateTokens(sys)} tokens`);
+    lines.push(`  cache_control: NONE (system is bare string — markers only work on array form)`);
+  } else if (Array.isArray(sys)) {
+    const totalBytes = sys.reduce((acc, b) => acc + (b?.text?.length || 0), 0);
+    lines.push(`system:          ${sys.length} blocks (array), ${totalBytes} bytes, ~${estimateTokens(' '.repeat(totalBytes))} tokens`);
+    sys.forEach((block, i) => {
+      const bytes = block?.text?.length || 0;
+      const marker = block?.cache_control ? ` [cache_control: ${JSON.stringify(block.cache_control)}]` : '';
+      lines.push(`  [${i}] ${block?.type || '?'} ${bytes} bytes${marker}`);
+    });
+    const cached = sys.filter(b => b?.cache_control).length;
+    lines.push(`  cache_control: ${cached}/${sys.length} system blocks marked`);
+  } else {
+    lines.push(`system:          (none)`);
+  }
+  lines.push('');
+  // Messages breakdown
+  const msgs = body.messages || [];
+  lines.push(`messages:        ${msgs.length}`);
+  let totalContentBytes = 0;
+  let imageCount = 0;
+  let toolUseCount = 0;
+  let toolResultCount = 0;
+  let cacheControlInMessages = 0;
+  msgs.forEach((m, i) => {
+    const bytes = contentBytes(m.content);
+    totalContentBytes += bytes;
+    if (Array.isArray(m.content)) {
+      for (const b of m.content) {
+        if (b?.type === 'image') imageCount += 1;
+        if (b?.type === 'tool_use') toolUseCount += 1;
+        if (b?.type === 'tool_result') toolResultCount += 1;
+        if (b?.cache_control) cacheControlInMessages += 1;
+      }
+    }
+    if (i < 3 || i >= msgs.length - 2) {
+      const role = m.role || '?';
+      const preview = (typeof m.content === 'string' ? m.content : JSON.stringify(m.content)).slice(0, 80).replace(/\s+/g, ' ');
+      lines.push(`  [${i}] ${role.padEnd(10)} ${bytes.toString().padStart(7)} b   ${preview}${preview.length >= 80 ? '…' : ''}`);
+    } else if (i === 3 && msgs.length > 5) {
+      lines.push(`  ... ${msgs.length - 5} more messages omitted ...`);
+    }
+  });
+  lines.push('');
+  lines.push(`messages bytes:  ${totalContentBytes} (~${estimateTokens(' '.repeat(totalContentBytes))} tokens)`);
+  lines.push(`images:          ${imageCount}`);
+  lines.push(`tool_use:        ${toolUseCount}`);
+  lines.push(`tool_result:     ${toolResultCount}`);
+  lines.push(`cache_control in messages: ${cacheControlInMessages}`);
+  lines.push('');
+  // Tools (declared client tools) — handle both Anthropic and OpenAI shapes
+  if (Array.isArray(body.tools)) {
+    const toolBytes = JSON.stringify(body.tools).length;
+    lines.push(`tools declared:  ${body.tools.length} (${toolBytes} bytes of schema)`);
+    body.tools.slice(0, 10).forEach(t => {
+      lines.push(`  - ${toolName(t)}`);
+    });
+    if (body.tools.length > 10) lines.push(`  ... and ${body.tools.length - 10} more`);
+  } else {
+    lines.push('tools declared:  (none)');
+  }
+  lines.push('');
+  // Grand total estimate
+  const sysBytes = typeof sys === 'string' ? sys.length
+    : Array.isArray(sys) ? sys.reduce((a, b) => a + (b?.text?.length || 0), 0)
+    : 0;
+  const toolBytes = Array.isArray(body.tools) ? JSON.stringify(body.tools).length : 0;
+  const grand = sysBytes + totalContentBytes + toolBytes;
+  lines.push(`────`);
+  lines.push(`grand total:     ${grand} bytes ≈ ${estimateTokens(' '.repeat(grand))} input tokens`);
+  lines.push(`  system:        ${sysBytes} (${pct(sysBytes, grand)}%)`);
+  lines.push(`  messages:      ${totalContentBytes} (${pct(totalContentBytes, grand)}%)`);
+  lines.push(`  tool schemas:  ${toolBytes} (${pct(toolBytes, grand)}%)`);
+  return lines.join('\n');
+}
+function pct(part, total) {
+  if (!total) return '0';
+  return ((part / total) * 100).toFixed(1);
+}
+/**
+ * Capture a request to disk. `path` is the route ('/v1/messages' or
+ * '/v1/chat/completions'), used for filename only. `body` is the parsed
+ * request body. `meta` carries session-key resolution info.
+ *
+ * Returns nothing. Errors logged to console.warn and swallowed — capture
+ * is best-effort and must not block requests.
+ */
+export async function captureRequest({ path, body, requestId, sessionKey, sessionKeySource }) {
+  if (!isCaptureEnabled()) return;
+  try {
+    await ensureDir();
+    const ts = new Date().toISOString().replace(/[:.]/g, '-').replace('T', '_').slice(0, 19);
+    const slug = path.replace(/[\/]/g, '-').replace(/^-/, '');
+    const baseName = `${ts}_${slug}_${requestId}`;
+    const jsonPath = join(CAPTURE_DIR, `${baseName}.json`);
+    const summaryPath = join(CAPTURE_DIR, `${baseName}.summary.txt`);
+    const header = [
+      `mobygate request capture`,
+      `─────────────────────────`,
+      `timestamp:       ${new Date().toISOString()}`,
+      `path:            ${path}`,
+      `request_id:      ${requestId}`,
+      `session_key:     ${sessionKey || '(none)'}`,
+      `session_source:  ${sessionKeySource || '(unknown)'}`,
+      ``,
+    ].join('\n');
+    const analysis = analyzeAnthropic(body);
+    await Promise.all([
+      writeFile(jsonPath, JSON.stringify(body, null, 2), 'utf8'),
+      writeFile(summaryPath, header + analysis + '\n', 'utf8'),
+    ]);
+    // Remember the summary path so captureResponse() can append to it.
+    inFlightSummaries.set(requestId, summaryPath);
+    // Best-effort prune to stay under the cap. Don't await — let it run
+    // alongside the next request.
+    pruneOldCaptures().catch(() => {});
+    console.log(`[capture] ${baseName} (${jsonPath.replace(homedir(), '~')})`);
+  } catch (e) {
+    console.warn(`[capture] failed for ${requestId}: ${e.message}`);
+  }
+}
+/**
+ * Append response usage data to the summary file we wrote on the request
+ * side. If the request's summary file isn't found in our in-flight map,
+ * we silently no-op — that means capture wasn't enabled when the request
+ * came in, or this requestId was never captured. Calling captureResponse
+ * is always safe.
+ *
+ * `usage` should be the SDK's NonNullableUsage shape:
+ *   { input_tokens, output_tokens, cache_read_input_tokens,
+ *     cache_creation_input_tokens, ... }
+ *
+ * `meta` carries: durationMs, status, stopReason, model.
+ */
+export async function captureResponse({ requestId, usage, durationMs, status, stopReason, model, error }) {
+  const summaryPath = inFlightSummaries.get(requestId);
+  if (!summaryPath) return;
+  inFlightSummaries.delete(requestId);
+  try {
+    const u = usage || {};
+    const totalInput = (u.input_tokens || 0) + (u.cache_read_input_tokens || 0) + (u.cache_creation_input_tokens || 0);
+    const cacheHitPct = totalInput > 0 ? (((u.cache_read_input_tokens || 0) / totalInput) * 100).toFixed(1) : '0';
+    const lines = [
+      ``,
+      `═══ RESPONSE ═══`,
+      `status:                       ${status || '(unknown)'}`,
+      `duration:                     ${durationMs ? durationMs + ' ms' : '(unknown)'}`,
+      `model:                        ${model || '(unknown)'}`,
+      `stop_reason:                  ${stopReason || '(none)'}`,
+    ];
+    if (error) {
+      lines.push(`error:                        ${error}`);
+    }
+    if (usage) {
+      lines.push(``);
+      lines.push(`usage:`);
+      lines.push(`  input_tokens (uncached):    ${u.input_tokens ?? 0}`);
+      lines.push(`  cache_read_input_tokens:    ${u.cache_read_input_tokens ?? 0}   (charged 0.1x)`);
+      lines.push(`  cache_creation_input_tokens: ${u.cache_creation_input_tokens ?? 0}   (charged 1.25x)`);
+      lines.push(`  output_tokens:              ${u.output_tokens ?? 0}`);
+      lines.push(``);
+      lines.push(`cache hit rate:               ${cacheHitPct}%   (${u.cache_read_input_tokens ?? 0} of ${totalInput} input tokens)`);
+      // Effective cost (in equivalent uncached tokens):
+      //   uncached input × 1.0 + cache_read × 0.1 + cache_create × 1.25 + output × 5.0 (per Anthropic Opus pricing)
+      // For reference only — actual billing depends on model.
+      const effectiveInput =
+        (u.input_tokens ?? 0) * 1.0 +
+        (u.cache_read_input_tokens ?? 0) * 0.1 +
+        (u.cache_creation_input_tokens ?? 0) * 1.25;
+      lines.push(`effective input cost:         ${effectiveInput.toFixed(0)} input-tokens-equiv (vs ${totalInput} wire-level)`);
+      const savings = totalInput > 0 ? (((totalInput - effectiveInput) / totalInput) * 100).toFixed(1) : '0';
+      lines.push(`savings from cache:           ${savings}%`);
+    }
+    await appendFile(summaryPath, lines.join('\n') + '\n', 'utf8');
+  } catch (e) {
+    console.warn(`[capture] response append failed for ${requestId}: ${e.message}`);
+  }
+}
+/**
+ * Prune old capture files to stay under KEEP_COUNT. Sorts by mtime,
+ * keeps the newest 2*KEEP_COUNT files (since each request writes 2
+ * files: .json and .summary.txt). Best-effort — failures swallowed.
+ */
+async function pruneOldCaptures() {
+  if (!existsSync(CAPTURE_DIR)) return;
+  let entries;
+  try {
+    entries = await readdir(CAPTURE_DIR);
+  } catch (e) {
+    return;
+  }
+  if (entries.length <= KEEP_COUNT * 2) return;
+  // Stat all files for mtime, sort newest-first, drop the tail.
+  const stats = [];
+  for (const name of entries) {
+    const full = join(CAPTURE_DIR, name);
+    try {
+      const st = await stat(full);
+      if (st.isFile()) stats.push({ name, full, mtime: st.mtimeMs });
+    } catch {}
+  }
+  stats.sort((a, b) => b.mtime - a.mtime);
+  const toDelete = stats.slice(KEEP_COUNT * 2);
+  for (const f of toDelete) {
+    try { await unlink(f.full); } catch {}
+  }
+}
+let cachedFlag;
+/**
+ * Returns true if request capture is enabled. Three sources, in order:
+ *   1. MOBY_CAPTURE env var (set/unset)
+ *   2. Touch file at ~/.mobygate/.capture-enabled
+ *   3. Default: false
+ *
+ * Cached for 1s to avoid spamming process.env / fs.exists on every
+ * request. The 1s cache is short enough to feel "live" when toggled
+ * from the dashboard, fast enough to not bottleneck request handling.
+ */
+export function isCaptureEnabled() {
+  const now = Date.now();
+  if (cachedFlag && cachedFlag.expires > now) return cachedFlag.value;
+  let value = false;
+  if (process.env.MOBY_CAPTURE === '1' || process.env.MOBY_CAPTURE === 'true') {
+    value = true;
+  } else if (existsSync(TOGGLE_FILE)) {
+    value = true;
+  }
+  cachedFlag = { value, expires: now + 1000 };
+  return value;
+}
+/**
+ * Programmatic toggle — flips the touch file. Returns the new state.
+ * Used by the dashboard toggle button.
+ */
+export async function setCaptureEnabled(enabled) {
+  await ensureDir();
+  const dir = join(process.env.MOBYGATE_HOME || join(homedir(), '.mobygate'));
+  if (!existsSync(dir)) await mkdir(dir, { recursive: true });
+  if (enabled) {
+    await writeFile(TOGGLE_FILE, `enabled at ${new Date().toISOString()}\n`, 'utf8');
+  } else {
+    try { await unlink(TOGGLE_FILE); } catch {}
+  }
+  cachedFlag = null; // invalidate so next isCaptureEnabled() reads fresh
+  return isCaptureEnabled();
+}
+export const CAPTURE_DIR_PATH = CAPTURE_DIR;
+export const CAPTURE_TOGGLE_FILE = TOGGLE_FILE;

package/lib/session-derive.js CHANGED Viewed

@@ -40,6 +40,12 @@
  *     user message from history mid-conversation, the auto-key changes
  *     and the SDK starts a new session. One turn of double-billing,
  *     then we're back on the new key. Acceptable.
+ *   - **Multi-agent collisions** (fixed in v0.8.2): two agents that
+ *     share boilerplate at the start of their system prompt previously
+ *     collided onto one session key when the trim window only covered
+ *     the boilerplate. SYSTEM_TRIM was raised from 500 to 20000 chars
+ *     to capture the per-agent personality content that follows the
+ *     shared preamble. See note on the constant below for details.
  *
  * Opt-out: `X-Session-Id: none` tells us the client explicitly wants
  * stateless behavior — we return null and the request flows through
@@ -51,7 +57,20 @@
 import { createHash } from 'crypto';
 const HASH_LEN = 16;
-const SYSTEM_TRIM = 500;
+// SYSTEM_TRIM was 500 in v0.7.1 — large enough for casual single-agent
+// scenarios (Hermes, single-bot OpenClaw) but caused collisions when
+// multiple agents shared a common boilerplate prefix. Observed in v0.8.1
+// production: Lux + Mercury (two OpenClaw agents) both started their
+// system prompt with the OpenClaw "You are a personal assistant…"
+// boilerplate that filled the first ~500 chars, so their personality
+// markers (loaded from per-agent SOUL.md / IDENTITY.md / etc.) didn't
+// reach the hash and they collided onto the same session key.
+//
+// Bumping to 20kB covers realistic agent system prompts including
+// rich workspace bootstrap (Lux: ~42kB, Mercury: ~80kB total — but
+// the first 20kB has more than enough divergence to fingerprint each).
+// SHA-256 cost on 20kB is ~10-20µs, irrelevant per request.
+const SYSTEM_TRIM = 20000;
 const USER_TRIM = 500;
 /**

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "mobygate",
-  "version": "0.8.0",
+  "version": "0.8.2",
   "description": "OpenAI-compatible local proxy for Claude Max. The Möbius-strip gateway: OpenAI shape in, Claude Max out.",
   "type": "module",
   "main": "server.js",
@@ -58,6 +58,7 @@
     "launchd",
     "server.js",
     "index.html",
+    "inspector.html",
     "mcp-inspect.mjs",
     "README.md",
     "CHANGELOG.md",