mobygate 0.8.0 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,394 @@
1
+ /**
2
+ * Request capture — diagnostic dump of inbound /v1/messages and
3
+ * /v1/chat/completions request bodies (and response usage) to disk,
4
+ * with a human-readable summary that breaks down system-block sizes,
5
+ * cache_control markers, tool blocks, message counts, token estimates,
6
+ * and (when response data is available) actual cache hit rates.
7
+ *
8
+ * Off by default. Three ways to turn it on, in order of precedence:
9
+ *
10
+ * 1. Env var: MOBY_CAPTURE=1 mobygate start
11
+ * 2. Touch file: touch ~/.mobygate/.capture-enabled
12
+ * 3. (env/file unset → off)
13
+ *
14
+ * The touch-file path lets the dashboard toggle capture live without
15
+ * restarting mobygate. Removing the file disables capture immediately.
16
+ *
17
+ * Output: ~/.mobygate/captures/{timestamp}_{path}_{requestId}.{json,summary.txt}
18
+ *
19
+ * .json — raw request body (pretty-printed)
20
+ * .summary.txt — analysis: system blocks, cache markers, message
21
+ * timeline, tool definitions, token breakdown, and
22
+ * (after response lands) actual usage with cache hits
23
+ *
24
+ * Auto-rotation: oldest captures are deleted to keep total count
25
+ * under MOBY_CAPTURE_KEEP (default 100 captures = 200 files since we
26
+ * write 2 per request).
27
+ *
28
+ * Throws nothing — capture failures log a warning and return. Capture
29
+ * never blocks request processing.
30
+ */
31
+
32
+ import { writeFile, mkdir, appendFile, readdir, unlink, stat } from 'fs/promises';
33
+ import { existsSync } from 'fs';
34
+ import { join } from 'path';
35
+ import { homedir } from 'os';
36
+
37
+ const CAPTURE_DIR = process.env.MOBYGATE_CAPTURE_DIR
38
+ || join(process.env.MOBYGATE_HOME || join(homedir(), '.mobygate'), 'captures');
39
+
40
+ const TOGGLE_FILE = join(process.env.MOBYGATE_HOME || join(homedir(), '.mobygate'), '.capture-enabled');
41
+
42
+ const KEEP_COUNT = parseInt(process.env.MOBY_CAPTURE_KEEP || '100', 10);
43
+
44
+ // In-memory map of requestId → summary file path. Populated by
45
+ // captureRequest() and consumed by captureResponse() so we can append
46
+ // response data to the same summary file we wrote on the way in.
47
+ const inFlightSummaries = new Map();
48
+
49
+ let dirEnsured = false;
50
+
51
+ async function ensureDir() {
52
+ if (dirEnsured) return;
53
+ if (!existsSync(CAPTURE_DIR)) {
54
+ await mkdir(CAPTURE_DIR, { recursive: true });
55
+ }
56
+ dirEnsured = true;
57
+ }
58
+
59
+ /**
60
+ * Estimate token count from a string. Rough — 4 chars per token is
61
+ * the standard back-of-envelope for English+code mixed content.
62
+ */
63
+ function estimateTokens(s) {
64
+ if (!s) return 0;
65
+ return Math.round(String(s).length / 4);
66
+ }
67
+
68
+ /**
69
+ * Walk a content array (or string) and sum total characters across all
70
+ * text blocks. Anthropic's content can be a bare string or an array of
71
+ * { type: 'text'|'image'|'tool_use'|'tool_result', ... } blocks.
72
+ */
73
+ function contentBytes(content) {
74
+ if (typeof content === 'string') return content.length;
75
+ if (!Array.isArray(content)) return 0;
76
+ let total = 0;
77
+ for (const block of content) {
78
+ if (typeof block === 'string') { total += block.length; continue; }
79
+ if (!block || typeof block !== 'object') continue;
80
+ if (typeof block.text === 'string') total += block.text.length;
81
+ if (typeof block.input === 'object') total += JSON.stringify(block.input).length;
82
+ if (typeof block.content === 'string') total += block.content.length;
83
+ if (Array.isArray(block.content)) total += contentBytes(block.content);
84
+ }
85
+ return total;
86
+ }
87
+
88
+ /**
89
+ * Returns a tool's name from either Anthropic-shape (top-level `name`)
90
+ * or OpenAI-shape (nested under `function.name`). Used by the summary
91
+ * tools listing — earlier we showed "(unnamed)" for OpenAI tools because
92
+ * we only checked the top-level `name` field.
93
+ */
94
+ function toolName(t) {
95
+ if (!t || typeof t !== 'object') return '(unnamed)';
96
+ if (typeof t.name === 'string') return t.name;
97
+ if (t.function && typeof t.function.name === 'string') return t.function.name;
98
+ return '(unnamed)';
99
+ }
100
+
101
+ /**
102
+ * Build a human-readable analysis of an Anthropic-shape body. Works for
103
+ * both /v1/messages (native) and translated /v1/chat/completions bodies
104
+ * where messages have content arrays.
105
+ */
106
+ function analyzeAnthropic(body) {
107
+ const lines = [];
108
+ lines.push(`model: ${body.model || '(none)'}`);
109
+ lines.push(`stream: ${!!body.stream}`);
110
+ lines.push(`max_tokens: ${body.max_tokens ?? body.max_completion_tokens ?? '(none)'}`);
111
+ lines.push(`temperature: ${body.temperature ?? '(default)'}`);
112
+ lines.push(`session_id: ${body.session_id ?? '(none)'}`);
113
+ lines.push('');
114
+
115
+ // System block(s) — Anthropic accepts string or array of {type, text, cache_control?}
116
+ const sys = body.system;
117
+ if (typeof sys === 'string') {
118
+ lines.push(`system: 1 block (string), ${sys.length} bytes, ~${estimateTokens(sys)} tokens`);
119
+ lines.push(` cache_control: NONE (system is bare string — markers only work on array form)`);
120
+ } else if (Array.isArray(sys)) {
121
+ const totalBytes = sys.reduce((acc, b) => acc + (b?.text?.length || 0), 0);
122
+ lines.push(`system: ${sys.length} blocks (array), ${totalBytes} bytes, ~${estimateTokens(' '.repeat(totalBytes))} tokens`);
123
+ sys.forEach((block, i) => {
124
+ const bytes = block?.text?.length || 0;
125
+ const marker = block?.cache_control ? ` [cache_control: ${JSON.stringify(block.cache_control)}]` : '';
126
+ lines.push(` [${i}] ${block?.type || '?'} ${bytes} bytes${marker}`);
127
+ });
128
+ const cached = sys.filter(b => b?.cache_control).length;
129
+ lines.push(` cache_control: ${cached}/${sys.length} system blocks marked`);
130
+ } else {
131
+ lines.push(`system: (none)`);
132
+ }
133
+ lines.push('');
134
+
135
+ // Messages breakdown
136
+ const msgs = body.messages || [];
137
+ lines.push(`messages: ${msgs.length}`);
138
+ let totalContentBytes = 0;
139
+ let imageCount = 0;
140
+ let toolUseCount = 0;
141
+ let toolResultCount = 0;
142
+ let cacheControlInMessages = 0;
143
+ msgs.forEach((m, i) => {
144
+ const bytes = contentBytes(m.content);
145
+ totalContentBytes += bytes;
146
+ if (Array.isArray(m.content)) {
147
+ for (const b of m.content) {
148
+ if (b?.type === 'image') imageCount += 1;
149
+ if (b?.type === 'tool_use') toolUseCount += 1;
150
+ if (b?.type === 'tool_result') toolResultCount += 1;
151
+ if (b?.cache_control) cacheControlInMessages += 1;
152
+ }
153
+ }
154
+ if (i < 3 || i >= msgs.length - 2) {
155
+ const role = m.role || '?';
156
+ const preview = (typeof m.content === 'string' ? m.content : JSON.stringify(m.content)).slice(0, 80).replace(/\s+/g, ' ');
157
+ lines.push(` [${i}] ${role.padEnd(10)} ${bytes.toString().padStart(7)} b ${preview}${preview.length >= 80 ? '…' : ''}`);
158
+ } else if (i === 3 && msgs.length > 5) {
159
+ lines.push(` ... ${msgs.length - 5} more messages omitted ...`);
160
+ }
161
+ });
162
+ lines.push('');
163
+ lines.push(`messages bytes: ${totalContentBytes} (~${estimateTokens(' '.repeat(totalContentBytes))} tokens)`);
164
+ lines.push(`images: ${imageCount}`);
165
+ lines.push(`tool_use: ${toolUseCount}`);
166
+ lines.push(`tool_result: ${toolResultCount}`);
167
+ lines.push(`cache_control in messages: ${cacheControlInMessages}`);
168
+ lines.push('');
169
+
170
+ // Tools (declared client tools) — handle both Anthropic and OpenAI shapes
171
+ if (Array.isArray(body.tools)) {
172
+ const toolBytes = JSON.stringify(body.tools).length;
173
+ lines.push(`tools declared: ${body.tools.length} (${toolBytes} bytes of schema)`);
174
+ body.tools.slice(0, 10).forEach(t => {
175
+ lines.push(` - ${toolName(t)}`);
176
+ });
177
+ if (body.tools.length > 10) lines.push(` ... and ${body.tools.length - 10} more`);
178
+ } else {
179
+ lines.push('tools declared: (none)');
180
+ }
181
+ lines.push('');
182
+
183
+ // Grand total estimate
184
+ const sysBytes = typeof sys === 'string' ? sys.length
185
+ : Array.isArray(sys) ? sys.reduce((a, b) => a + (b?.text?.length || 0), 0)
186
+ : 0;
187
+ const toolBytes = Array.isArray(body.tools) ? JSON.stringify(body.tools).length : 0;
188
+ const grand = sysBytes + totalContentBytes + toolBytes;
189
+ lines.push(`────`);
190
+ lines.push(`grand total: ${grand} bytes ≈ ${estimateTokens(' '.repeat(grand))} input tokens`);
191
+ lines.push(` system: ${sysBytes} (${pct(sysBytes, grand)}%)`);
192
+ lines.push(` messages: ${totalContentBytes} (${pct(totalContentBytes, grand)}%)`);
193
+ lines.push(` tool schemas: ${toolBytes} (${pct(toolBytes, grand)}%)`);
194
+
195
+ return lines.join('\n');
196
+ }
197
+
198
+ function pct(part, total) {
199
+ if (!total) return '0';
200
+ return ((part / total) * 100).toFixed(1);
201
+ }
202
+
203
+ /**
204
+ * Capture a request to disk. `path` is the route ('/v1/messages' or
205
+ * '/v1/chat/completions'), used for filename only. `body` is the parsed
206
+ * request body. `meta` carries session-key resolution info.
207
+ *
208
+ * Returns nothing. Errors logged to console.warn and swallowed — capture
209
+ * is best-effort and must not block requests.
210
+ */
211
+ export async function captureRequest({ path, body, requestId, sessionKey, sessionKeySource }) {
212
+ if (!isCaptureEnabled()) return;
213
+
214
+ try {
215
+ await ensureDir();
216
+
217
+ const ts = new Date().toISOString().replace(/[:.]/g, '-').replace('T', '_').slice(0, 19);
218
+ const slug = path.replace(/[\/]/g, '-').replace(/^-/, '');
219
+ const baseName = `${ts}_${slug}_${requestId}`;
220
+ const jsonPath = join(CAPTURE_DIR, `${baseName}.json`);
221
+ const summaryPath = join(CAPTURE_DIR, `${baseName}.summary.txt`);
222
+
223
+ const header = [
224
+ `mobygate request capture`,
225
+ `─────────────────────────`,
226
+ `timestamp: ${new Date().toISOString()}`,
227
+ `path: ${path}`,
228
+ `request_id: ${requestId}`,
229
+ `session_key: ${sessionKey || '(none)'}`,
230
+ `session_source: ${sessionKeySource || '(unknown)'}`,
231
+ ``,
232
+ ].join('\n');
233
+
234
+ const analysis = analyzeAnthropic(body);
235
+
236
+ await Promise.all([
237
+ writeFile(jsonPath, JSON.stringify(body, null, 2), 'utf8'),
238
+ writeFile(summaryPath, header + analysis + '\n', 'utf8'),
239
+ ]);
240
+
241
+ // Remember the summary path so captureResponse() can append to it.
242
+ inFlightSummaries.set(requestId, summaryPath);
243
+
244
+ // Best-effort prune to stay under the cap. Don't await — let it run
245
+ // alongside the next request.
246
+ pruneOldCaptures().catch(() => {});
247
+
248
+ console.log(`[capture] ${baseName} (${jsonPath.replace(homedir(), '~')})`);
249
+ } catch (e) {
250
+ console.warn(`[capture] failed for ${requestId}: ${e.message}`);
251
+ }
252
+ }
253
+
254
+ /**
255
+ * Append response usage data to the summary file we wrote on the request
256
+ * side. If the request's summary file isn't found in our in-flight map,
257
+ * we silently no-op — that means capture wasn't enabled when the request
258
+ * came in, or this requestId was never captured. Calling captureResponse
259
+ * is always safe.
260
+ *
261
+ * `usage` should be the SDK's NonNullableUsage shape:
262
+ * { input_tokens, output_tokens, cache_read_input_tokens,
263
+ * cache_creation_input_tokens, ... }
264
+ *
265
+ * `meta` carries: durationMs, status, stopReason, model.
266
+ */
267
+ export async function captureResponse({ requestId, usage, durationMs, status, stopReason, model, error }) {
268
+ const summaryPath = inFlightSummaries.get(requestId);
269
+ if (!summaryPath) return;
270
+ inFlightSummaries.delete(requestId);
271
+
272
+ try {
273
+ const u = usage || {};
274
+ const totalInput = (u.input_tokens || 0) + (u.cache_read_input_tokens || 0) + (u.cache_creation_input_tokens || 0);
275
+ const cacheHitPct = totalInput > 0 ? (((u.cache_read_input_tokens || 0) / totalInput) * 100).toFixed(1) : '0';
276
+
277
+ const lines = [
278
+ ``,
279
+ `═══ RESPONSE ═══`,
280
+ `status: ${status || '(unknown)'}`,
281
+ `duration: ${durationMs ? durationMs + ' ms' : '(unknown)'}`,
282
+ `model: ${model || '(unknown)'}`,
283
+ `stop_reason: ${stopReason || '(none)'}`,
284
+ ];
285
+
286
+ if (error) {
287
+ lines.push(`error: ${error}`);
288
+ }
289
+
290
+ if (usage) {
291
+ lines.push(``);
292
+ lines.push(`usage:`);
293
+ lines.push(` input_tokens (uncached): ${u.input_tokens ?? 0}`);
294
+ lines.push(` cache_read_input_tokens: ${u.cache_read_input_tokens ?? 0} (charged 0.1x)`);
295
+ lines.push(` cache_creation_input_tokens: ${u.cache_creation_input_tokens ?? 0} (charged 1.25x)`);
296
+ lines.push(` output_tokens: ${u.output_tokens ?? 0}`);
297
+ lines.push(``);
298
+ lines.push(`cache hit rate: ${cacheHitPct}% (${u.cache_read_input_tokens ?? 0} of ${totalInput} input tokens)`);
299
+
300
+ // Effective cost (in equivalent uncached tokens):
301
+ // uncached input × 1.0 + cache_read × 0.1 + cache_create × 1.25 + output × 5.0 (per Anthropic Opus pricing)
302
+ // For reference only — actual billing depends on model.
303
+ const effectiveInput =
304
+ (u.input_tokens ?? 0) * 1.0 +
305
+ (u.cache_read_input_tokens ?? 0) * 0.1 +
306
+ (u.cache_creation_input_tokens ?? 0) * 1.25;
307
+ lines.push(`effective input cost: ${effectiveInput.toFixed(0)} input-tokens-equiv (vs ${totalInput} wire-level)`);
308
+ const savings = totalInput > 0 ? (((totalInput - effectiveInput) / totalInput) * 100).toFixed(1) : '0';
309
+ lines.push(`savings from cache: ${savings}%`);
310
+ }
311
+
312
+ await appendFile(summaryPath, lines.join('\n') + '\n', 'utf8');
313
+ } catch (e) {
314
+ console.warn(`[capture] response append failed for ${requestId}: ${e.message}`);
315
+ }
316
+ }
317
+
318
+ /**
319
+ * Prune old capture files to stay under KEEP_COUNT. Sorts by mtime,
320
+ * keeps the newest 2*KEEP_COUNT files (since each request writes 2
321
+ * files: .json and .summary.txt). Best-effort — failures swallowed.
322
+ */
323
+ async function pruneOldCaptures() {
324
+ if (!existsSync(CAPTURE_DIR)) return;
325
+ let entries;
326
+ try {
327
+ entries = await readdir(CAPTURE_DIR);
328
+ } catch (e) {
329
+ return;
330
+ }
331
+ if (entries.length <= KEEP_COUNT * 2) return;
332
+
333
+ // Stat all files for mtime, sort newest-first, drop the tail.
334
+ const stats = [];
335
+ for (const name of entries) {
336
+ const full = join(CAPTURE_DIR, name);
337
+ try {
338
+ const st = await stat(full);
339
+ if (st.isFile()) stats.push({ name, full, mtime: st.mtimeMs });
340
+ } catch {}
341
+ }
342
+ stats.sort((a, b) => b.mtime - a.mtime);
343
+ const toDelete = stats.slice(KEEP_COUNT * 2);
344
+ for (const f of toDelete) {
345
+ try { await unlink(f.full); } catch {}
346
+ }
347
+ }
348
+
349
+ let cachedFlag;
350
+
351
+ /**
352
+ * Returns true if request capture is enabled. Three sources, in order:
353
+ * 1. MOBY_CAPTURE env var (set/unset)
354
+ * 2. Touch file at ~/.mobygate/.capture-enabled
355
+ * 3. Default: false
356
+ *
357
+ * Cached for 1s to avoid spamming process.env / fs.exists on every
358
+ * request. The 1s cache is short enough to feel "live" when toggled
359
+ * from the dashboard, fast enough to not bottleneck request handling.
360
+ */
361
+ export function isCaptureEnabled() {
362
+ const now = Date.now();
363
+ if (cachedFlag && cachedFlag.expires > now) return cachedFlag.value;
364
+
365
+ let value = false;
366
+ if (process.env.MOBY_CAPTURE === '1' || process.env.MOBY_CAPTURE === 'true') {
367
+ value = true;
368
+ } else if (existsSync(TOGGLE_FILE)) {
369
+ value = true;
370
+ }
371
+
372
+ cachedFlag = { value, expires: now + 1000 };
373
+ return value;
374
+ }
375
+
376
+ /**
377
+ * Programmatic toggle — flips the touch file. Returns the new state.
378
+ * Used by the dashboard toggle button.
379
+ */
380
+ export async function setCaptureEnabled(enabled) {
381
+ await ensureDir();
382
+ const dir = join(process.env.MOBYGATE_HOME || join(homedir(), '.mobygate'));
383
+ if (!existsSync(dir)) await mkdir(dir, { recursive: true });
384
+ if (enabled) {
385
+ await writeFile(TOGGLE_FILE, `enabled at ${new Date().toISOString()}\n`, 'utf8');
386
+ } else {
387
+ try { await unlink(TOGGLE_FILE); } catch {}
388
+ }
389
+ cachedFlag = null; // invalidate so next isCaptureEnabled() reads fresh
390
+ return isCaptureEnabled();
391
+ }
392
+
393
+ export const CAPTURE_DIR_PATH = CAPTURE_DIR;
394
+ export const CAPTURE_TOGGLE_FILE = TOGGLE_FILE;
@@ -40,6 +40,12 @@
40
40
  * user message from history mid-conversation, the auto-key changes
41
41
  * and the SDK starts a new session. One turn of double-billing,
42
42
  * then we're back on the new key. Acceptable.
43
+ * - **Multi-agent collisions** (fixed in v0.8.2): two agents that
44
+ * share boilerplate at the start of their system prompt previously
45
+ * collided onto one session key when the trim window only covered
46
+ * the boilerplate. SYSTEM_TRIM was raised from 500 to 20000 chars
47
+ * to capture the per-agent personality content that follows the
48
+ * shared preamble. See note on the constant below for details.
43
49
  *
44
50
  * Opt-out: `X-Session-Id: none` tells us the client explicitly wants
45
51
  * stateless behavior — we return null and the request flows through
@@ -51,7 +57,20 @@
51
57
  import { createHash } from 'crypto';
52
58
 
53
59
  const HASH_LEN = 16;
54
- const SYSTEM_TRIM = 500;
60
+ // SYSTEM_TRIM was 500 in v0.7.1 — large enough for casual single-agent
61
+ // scenarios (Hermes, single-bot OpenClaw) but caused collisions when
62
+ // multiple agents shared a common boilerplate prefix. Observed in v0.8.1
63
+ // production: Lux + Mercury (two OpenClaw agents) both started their
64
+ // system prompt with the OpenClaw "You are a personal assistant…"
65
+ // boilerplate that filled the first ~500 chars, so their personality
66
+ // markers (loaded from per-agent SOUL.md / IDENTITY.md / etc.) didn't
67
+ // reach the hash and they collided onto the same session key.
68
+ //
69
+ // Bumping to 20kB covers realistic agent system prompts including
70
+ // rich workspace bootstrap (Lux: ~42kB, Mercury: ~80kB total — but
71
+ // the first 20kB has more than enough divergence to fingerprint each).
72
+ // SHA-256 cost on 20kB is ~10-20µs, irrelevant per request.
73
+ const SYSTEM_TRIM = 20000;
55
74
  const USER_TRIM = 500;
56
75
 
57
76
  /**
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mobygate",
3
- "version": "0.8.0",
3
+ "version": "0.8.2",
4
4
  "description": "OpenAI-compatible local proxy for Claude Max. The Möbius-strip gateway: OpenAI shape in, Claude Max out.",
5
5
  "type": "module",
6
6
  "main": "server.js",
@@ -58,6 +58,7 @@
58
58
  "launchd",
59
59
  "server.js",
60
60
  "index.html",
61
+ "inspector.html",
61
62
  "mcp-inspect.mjs",
62
63
  "README.md",
63
64
  "CHANGELOG.md",