mobygate 0.8.0 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/server.js CHANGED
@@ -76,8 +76,10 @@ import {
76
76
  makeStreamTranslator,
77
77
  hasAnthropicTools,
78
78
  mapStopReason,
79
+ extractSdkUsage,
79
80
  } from './lib/anthropic.js';
80
81
  import { resolveSessionKey } from './lib/session-derive.js';
82
+ import { captureRequest, captureResponse, isCaptureEnabled, CAPTURE_DIR_PATH } from './lib/request-capture.js';
81
83
 
82
84
  const __filename = fileURLToPath(import.meta.url);
83
85
  const __dirname = dirname(__filename);
@@ -166,6 +168,17 @@ for (const sig of ['SIGTERM', 'SIGINT', 'SIGHUP']) {
166
168
  // Opus 4.7 ships a native 1M-context variant addressed as `claude-opus-4-7[1m]`.
167
169
  // Default opus aliases route to the 1M form to match the advertised context window.
168
170
  // Pass `claude-opus-4-7-200k` for the standard (cheaper) 200k variant.
171
+ //
172
+ // History: the sonnet-4-6 entry previously mapped to the dated
173
+ // `claude-sonnet-4-5-20250929` because at the time, the SDK didn't
174
+ // recognize `claude-sonnet-4-6` natively. The SDK has since added
175
+ // native support for the un-dated 4-6 alias, so sonnet-4-6 requests
176
+ // were silently being downgraded to retired 4-5-20250929. This caused
177
+ // the "Sonnet only" Anthropic quota to show 0% usage even when Lux
178
+ // and Mercury (configured for sonnet-4-6) were chatting actively —
179
+ // the SDK was accepting the retired model id but Claude was likely
180
+ // falling back to opus or returning a zero-billed response. Fixed in
181
+ // v0.8.2 by routing 4-6 through directly.
169
182
  const MODEL_MAP = {
170
183
  'claude-opus-4': 'claude-opus-4-7[1m]',
171
184
  'claude-opus-4-6': 'claude-opus-4-6',
@@ -173,13 +186,13 @@ const MODEL_MAP = {
173
186
  'claude-opus-4-7[1m]': 'claude-opus-4-7[1m]',
174
187
  'claude-opus-4-7-1m': 'claude-opus-4-7[1m]',
175
188
  'claude-opus-4-7-200k': 'claude-opus-4-7',
176
- 'claude-sonnet-4': 'claude-sonnet-4-5-20250929',
177
- 'claude-sonnet-4-5': 'claude-sonnet-4-5-20250929',
178
- 'claude-sonnet-4-6': 'claude-sonnet-4-5-20250929', // SDK resolves 4-6 to non-existent dated version
189
+ 'claude-sonnet-4': 'claude-sonnet-4-6', // current latest sonnet
190
+ 'claude-sonnet-4-5': 'claude-sonnet-4-5-20250929', // explicit request for older 4-5
191
+ 'claude-sonnet-4-6': 'claude-sonnet-4-6', // SDK now supports natively; was retired-mapped before v0.8.2
179
192
  'claude-haiku-4': 'claude-haiku-4-5-20251001',
180
193
  'claude-haiku-4-5': 'claude-haiku-4-5-20251001',
181
194
  'opus': 'claude-opus-4-7[1m]',
182
- 'sonnet': 'claude-sonnet-4-5-20250929',
195
+ 'sonnet': 'claude-sonnet-4-6', // current latest sonnet
183
196
  'haiku': 'claude-haiku-4-5-20251001',
184
197
  };
185
198
 
@@ -444,6 +457,10 @@ async function handleStreaming(req, res, body, requestId, sessionKey) {
444
457
  let resolvedModel = model;
445
458
  let capturedSessionId = existing?.sdkSessionId || null;
446
459
  let clientDisconnected = false;
460
+ let inputTokens = 0;
461
+ let outputTokens = 0;
462
+ let cacheReadTokens = 0;
463
+ let cacheCreateTokens = 0;
447
464
 
448
465
  res.on('close', () => {
449
466
  clientDisconnected = true;
@@ -568,6 +585,11 @@ async function handleStreaming(req, res, body, requestId, sessionKey) {
568
585
  isFirst = false;
569
586
  }
570
587
  if (toolsEnabled && !bufferedText && message.result) bufferedText = message.result;
588
+ const usage = extractSdkUsage(message);
589
+ inputTokens = usage.input_tokens;
590
+ outputTokens = usage.output_tokens;
591
+ cacheReadTokens = usage.cache_read_input_tokens;
592
+ cacheCreateTokens = usage.cache_creation_input_tokens;
571
593
  break;
572
594
  }
573
595
  }
@@ -627,6 +649,13 @@ async function handleStreaming(req, res, body, requestId, sessionKey) {
627
649
  }
628
650
  res.write('data: [DONE]\n\n');
629
651
  res.end();
652
+ captureResponse({
653
+ requestId,
654
+ usage: { input_tokens: inputTokens, output_tokens: outputTokens, cache_read_input_tokens: cacheReadTokens, cache_creation_input_tokens: cacheCreateTokens },
655
+ status: 'ok',
656
+ stopReason: collectedToolCalls.length > 0 ? 'tool_use' : 'end_turn',
657
+ model: resolvedModel,
658
+ });
630
659
  return;
631
660
  }
632
661
 
@@ -635,6 +664,14 @@ async function handleStreaming(req, res, body, requestId, sessionKey) {
635
664
  res.write('data: [DONE]\n\n');
636
665
  res.end();
637
666
  }
667
+
668
+ captureResponse({
669
+ requestId,
670
+ usage: { input_tokens: inputTokens, output_tokens: outputTokens, cache_read_input_tokens: cacheReadTokens, cache_creation_input_tokens: cacheCreateTokens },
671
+ status: clientDisconnected ? 'client_disconnect' : 'ok',
672
+ stopReason: 'end_turn',
673
+ model: resolvedModel,
674
+ });
638
675
  }
639
676
 
640
677
  // ---------------------------------------------------------------------------
@@ -670,6 +707,9 @@ async function handleNonStreaming(res, body, requestId, sessionKey) {
670
707
  let resolvedModel = model;
671
708
  let inputTokens = 0;
672
709
  let outputTokens = 0;
710
+ let cacheReadTokens = 0;
711
+ let cacheCreateTokens = 0;
712
+ let stopReason = 'end_turn';
673
713
  let capturedSessionId = existing?.sdkSessionId || null;
674
714
  const abortController = new AbortController();
675
715
 
@@ -750,8 +790,12 @@ async function handleNonStreaming(res, body, requestId, sessionKey) {
750
790
  if (isAuthFailureText(resultText)) {
751
791
  throw new AuthFailureInResultText(resultText);
752
792
  }
753
- inputTokens = message.input_tokens || 0;
754
- outputTokens = message.output_tokens || 0;
793
+ const usage = extractSdkUsage(message);
794
+ inputTokens = usage.input_tokens;
795
+ outputTokens = usage.output_tokens;
796
+ cacheReadTokens = usage.cache_read_input_tokens;
797
+ cacheCreateTokens = usage.cache_creation_input_tokens;
798
+ if (message.subtype) stopReason = message.subtype;
755
799
  break;
756
800
  }
757
801
  }
@@ -818,6 +862,14 @@ async function handleNonStreaming(res, body, requestId, sessionKey) {
818
862
  }],
819
863
  usage: { prompt_tokens: inputTokens, completion_tokens: outputTokens, total_tokens: inputTokens + outputTokens },
820
864
  });
865
+
866
+ captureResponse({
867
+ requestId,
868
+ usage: { input_tokens: inputTokens, output_tokens: outputTokens, cache_read_input_tokens: cacheReadTokens, cache_creation_input_tokens: cacheCreateTokens },
869
+ status: 'ok',
870
+ stopReason,
871
+ model: resolvedModel,
872
+ });
821
873
  }
822
874
 
823
875
  // ---------------------------------------------------------------------------
@@ -870,6 +922,8 @@ async function handleAnthropicNonStreaming(res, body, requestId, sessionKey) {
870
922
  let resolvedModel = model;
871
923
  let inputTokens = 0;
872
924
  let outputTokens = 0;
925
+ let cacheReadTokens = 0;
926
+ let cacheCreateTokens = 0;
873
927
  let capturedSessionId = existing?.sdkSessionId || null;
874
928
  let stopReason = 'end_turn';
875
929
  const abortController = new AbortController();
@@ -950,8 +1004,11 @@ async function handleAnthropicNonStreaming(res, body, requestId, sessionKey) {
950
1004
  if (isAuthFailureText(resultText)) {
951
1005
  throw new AuthFailureInResultText(resultText);
952
1006
  }
953
- inputTokens = message.input_tokens || 0;
954
- outputTokens = message.output_tokens || 0;
1007
+ const usage = extractSdkUsage(message);
1008
+ inputTokens = usage.input_tokens;
1009
+ outputTokens = usage.output_tokens;
1010
+ cacheReadTokens = usage.cache_read_input_tokens;
1011
+ cacheCreateTokens = usage.cache_creation_input_tokens;
955
1012
  stopReason = mapStopReason(message);
956
1013
  break;
957
1014
  }
@@ -990,6 +1047,14 @@ async function handleAnthropicNonStreaming(res, body, requestId, sessionKey) {
990
1047
  requestId,
991
1048
  stopReason,
992
1049
  }));
1050
+
1051
+ captureResponse({
1052
+ requestId,
1053
+ usage: { input_tokens: inputTokens, output_tokens: outputTokens, cache_read_input_tokens: cacheReadTokens, cache_creation_input_tokens: cacheCreateTokens },
1054
+ status: 'ok',
1055
+ stopReason,
1056
+ model: resolvedModel,
1057
+ });
993
1058
  }
994
1059
 
995
1060
  async function handleAnthropicStreaming(req, res, body, requestId, sessionKey) {
@@ -1033,6 +1098,8 @@ async function handleAnthropicStreaming(req, res, body, requestId, sessionKey) {
1033
1098
  let capturedSessionId = existing?.sdkSessionId || null;
1034
1099
  let inputTokens = 0;
1035
1100
  let outputTokens = 0;
1101
+ let cacheReadTokens = 0;
1102
+ let cacheCreateTokens = 0;
1036
1103
  let stopReason = 'end_turn';
1037
1104
  let clientDisconnected = false;
1038
1105
  let textEmittedSoFar = ''; // dedup against same-message reflow from SDK
@@ -1182,8 +1249,11 @@ async function handleAnthropicStreaming(req, res, body, requestId, sessionKey) {
1182
1249
  if (isAuthFailureText(message.result || '') && !tx.hasStarted) {
1183
1250
  throw new AuthFailureInResultText(message.result);
1184
1251
  }
1185
- inputTokens = message.input_tokens || 0;
1186
- outputTokens = message.output_tokens || 0;
1252
+ const usage = extractSdkUsage(message);
1253
+ inputTokens = usage.input_tokens;
1254
+ outputTokens = usage.output_tokens;
1255
+ cacheReadTokens = usage.cache_read_input_tokens;
1256
+ cacheCreateTokens = usage.cache_creation_input_tokens;
1187
1257
  if (!toolUseEmitted) stopReason = mapStopReason(message);
1188
1258
  break;
1189
1259
  }
@@ -1214,6 +1284,14 @@ async function handleAnthropicStreaming(req, res, body, requestId, sessionKey) {
1214
1284
  }
1215
1285
 
1216
1286
  tx.finish({ stopReason, usage: { output_tokens: outputTokens } });
1287
+
1288
+ captureResponse({
1289
+ requestId,
1290
+ usage: { input_tokens: inputTokens, output_tokens: outputTokens, cache_read_input_tokens: cacheReadTokens, cache_creation_input_tokens: cacheCreateTokens },
1291
+ status: 'ok',
1292
+ stopReason,
1293
+ model: resolvedModel,
1294
+ });
1217
1295
  }
1218
1296
 
1219
1297
  // ---------------------------------------------------------------------------
@@ -1311,6 +1389,19 @@ app.get('/', async (_req, res) => {
1311
1389
  }
1312
1390
  });
1313
1391
 
1392
+ // /inspector — session inspector UI for browsing captures.
1393
+ // Backed by /dashboard/captures and /dashboard/captures/:filename.
1394
+ app.get('/inspector', async (_req, res) => {
1395
+ res.setHeader('Cache-Control', 'no-cache, no-store, must-revalidate');
1396
+ try {
1397
+ const { readFile } = await import('fs/promises');
1398
+ const html = await readFile(join(__dirname, 'inspector.html'), 'utf8');
1399
+ res.type('html').send(html);
1400
+ } catch (e) {
1401
+ res.status(404).type('text').send(`inspector.html not found at ${join(__dirname, 'inspector.html')}`);
1402
+ }
1403
+ });
1404
+
1314
1405
  // POST /v1/chat/completions
1315
1406
  app.post('/v1/chat/completions', async (req, res) => {
1316
1407
  const requestId = uuidv4().replace(/-/g, '').slice(0, 24);
@@ -1339,6 +1430,9 @@ app.post('/v1/chat/completions', async (req, res) => {
1339
1430
 
1340
1431
  console.log(`[${new Date().toISOString()}] ${body.stream ? 'stream' : 'sync'} | model=${body.model} → ${resolveModel(body.model)} | msgs=${body.messages.length}${sessionTag}`);
1341
1432
 
1433
+ // Diagnostic capture — off by default, enable with MOBY_CAPTURE=1.
1434
+ captureRequest({ path: '/v1/chat/completions', body, requestId, sessionKey, sessionKeySource });
1435
+
1342
1436
  // Dashboard: request.start
1343
1437
  const startedAt = Date.now();
1344
1438
  const imageBlocks = collectImages(body.messages).length;
@@ -1411,6 +1505,10 @@ app.post('/v1/messages', async (req, res) => {
1411
1505
 
1412
1506
  console.log(`[${new Date().toISOString()}] anthropic ${body.stream ? 'stream' : 'sync'} | model=${body.model} → ${resolveModel(body.model)} | msgs=${body.messages.length}${sessionTag}`);
1413
1507
 
1508
+ // Diagnostic capture — off by default, enable with MOBY_CAPTURE=1.
1509
+ // Writes raw body + summary to ~/.mobygate/captures/.
1510
+ captureRequest({ path: '/v1/messages', body, requestId, sessionKey, sessionKeySource });
1511
+
1414
1512
  // Dashboard event — same shape as the OpenAI route, just labeled by path.
1415
1513
  const startedAt = Date.now();
1416
1514
  const imageBlocks = collectAnthropicImages(body.messages || []).length;
@@ -1659,6 +1757,158 @@ app.get('/dashboard/logs', requireLocalOrigin, async (req, res) => {
1659
1757
  }
1660
1758
  });
1661
1759
 
1760
+ // ---------------------------------------------------------------------------
1761
+ // Captures — diagnostic request/response inspector for the dashboard
1762
+ // ---------------------------------------------------------------------------
1763
+ //
1764
+ // These endpoints back the session-inspector UI. They expose the contents
1765
+ // of `~/.mobygate/captures/` (created by lib/request-capture.js) so the
1766
+ // dashboard can list past requests, drill into individual ones, and
1767
+ // toggle capture on/off live without restarting mobygate.
1768
+ //
1769
+ // All capture endpoints require local-origin (DNS-rebinding protection)
1770
+ // because they expose full request bodies including conversation content.
1771
+
1772
+ // Helper: parse a capture filename and return its components.
1773
+ // "2026-04-28_03-49-05_v1-chat-completions_abc123.json" → { ts, slug, requestId }
1774
+ function parseCaptureFilename(name) {
1775
+ const match = name.match(/^(\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2})_([\w-]+)_([0-9a-f]+)\.(json|summary\.txt)$/);
1776
+ if (!match) return null;
1777
+ return {
1778
+ timestamp: match[1].replace('_', 'T').replace(/-/g, (m, i) => i < 10 ? '-' : ':') + 'Z',
1779
+ slug: '/' + match[2].replace(/-/g, '/'),
1780
+ requestId: match[3],
1781
+ type: match[4],
1782
+ };
1783
+ }
1784
+
1785
+ // Quick-read a summary.txt for the listing card (avoid loading the full JSON).
1786
+ async function readSummaryQuick(summaryPath) {
1787
+ const { readFile } = await import('fs/promises');
1788
+ const text = await readFile(summaryPath, 'utf8');
1789
+ const grab = (re, dflt = null) => { const m = text.match(re); return m ? m[1].trim() : dflt; };
1790
+ return {
1791
+ sessionKey: grab(/^session_key:\s+(\S+)/m),
1792
+ sessionSrc: grab(/^session_source:\s+(\S+)/m),
1793
+ model: grab(/^model:\s+(.+?)$/m),
1794
+ stream: grab(/^stream:\s+(\S+)/m) === 'true',
1795
+ msgCount: parseInt(grab(/^messages:\s+(\d+)/m, '0'), 10),
1796
+ sysBytes: parseInt(grab(/^\s*system:\s+(\d+)/m, '0'), 10),
1797
+ grandBytes: parseInt(grab(/^grand total:\s+(\d+)/m, '0'), 10),
1798
+ grandTokens: parseInt(grab(/≈\s+(\d+)\s+input/, '0'), 10),
1799
+ cacheControlSystem: grab(/cache_control:\s+(\d+\/\d+)\s+system/),
1800
+ // Response side (only present after captureResponse fires)
1801
+ inputTokens: parseInt(grab(/input_tokens \(uncached\):\s+(\d+)/, '0'), 10),
1802
+ cacheRead: parseInt(grab(/cache_read_input_tokens:\s+(\d+)/, '0'), 10),
1803
+ cacheCreate: parseInt(grab(/cache_creation_input_tokens:\s+(\d+)/, '0'), 10),
1804
+ outputTokens: parseInt(grab(/output_tokens:\s+(\d+)/, '0'), 10),
1805
+ cacheHitPct: grab(/cache hit rate:\s+([\d.]+)%/),
1806
+ durationMs: parseInt(grab(/^duration:\s+(\d+) ms/m, '0'), 10),
1807
+ status: grab(/^status:\s+(\S+)/m),
1808
+ stopReason: grab(/^stop_reason:\s+(\S+)/m),
1809
+ };
1810
+ }
1811
+
1812
+ // GET /dashboard/captures — list captures (newest first), with summary stats.
1813
+ app.get('/dashboard/captures', requireLocalOrigin, async (req, res) => {
1814
+ try {
1815
+ const { readdir, stat } = await import('fs/promises');
1816
+ const { CAPTURE_DIR_PATH } = await import('./lib/request-capture.js');
1817
+ const limit = Math.min(500, parseInt(req.query.limit || '100', 10));
1818
+
1819
+ let entries;
1820
+ try {
1821
+ entries = await readdir(CAPTURE_DIR_PATH);
1822
+ } catch {
1823
+ return res.json({ captures: [], dir: CAPTURE_DIR_PATH, note: 'capture dir does not exist (capture has not run yet)' });
1824
+ }
1825
+
1826
+ // Pair .json with .summary.txt by matching the base name.
1827
+ const jsonFiles = entries.filter((n) => n.endsWith('.json'));
1828
+ const items = [];
1829
+ for (const jsonName of jsonFiles) {
1830
+ const summaryName = jsonName.replace(/\.json$/, '.summary.txt');
1831
+ if (!entries.includes(summaryName)) continue;
1832
+ const meta = parseCaptureFilename(jsonName);
1833
+ if (!meta) continue;
1834
+ const fullJson = join(CAPTURE_DIR_PATH, jsonName);
1835
+ const fullSummary = join(CAPTURE_DIR_PATH, summaryName);
1836
+ try {
1837
+ const [stJson, stSummary, summary] = await Promise.all([
1838
+ stat(fullJson),
1839
+ stat(fullSummary),
1840
+ readSummaryQuick(fullSummary),
1841
+ ]);
1842
+ items.push({
1843
+ filename: jsonName,
1844
+ summaryFilename: summaryName,
1845
+ ts: stJson.mtimeMs,
1846
+ path: meta.slug,
1847
+ requestId: meta.requestId,
1848
+ jsonBytes: stJson.size,
1849
+ ...summary,
1850
+ });
1851
+ } catch {}
1852
+ }
1853
+ items.sort((a, b) => b.ts - a.ts);
1854
+ res.json({ captures: items.slice(0, limit), total: items.length, dir: CAPTURE_DIR_PATH });
1855
+ } catch (e) {
1856
+ res.status(500).json({ error: e.message });
1857
+ }
1858
+ });
1859
+
1860
+ // GET /dashboard/captures/:filename — full body + summary for one capture.
1861
+ // Filename must end in .json (we serve the body and infer the summary path).
1862
+ app.get('/dashboard/captures/:filename', requireLocalOrigin, async (req, res) => {
1863
+ try {
1864
+ const { readFile } = await import('fs/promises');
1865
+ const { CAPTURE_DIR_PATH } = await import('./lib/request-capture.js');
1866
+ const filename = req.params.filename;
1867
+ // Defense in depth: reject anything with path separators or .. — must
1868
+ // be a bare filename within the capture dir.
1869
+ if (!/^[\w.-]+\.json$/.test(filename)) {
1870
+ return res.status(400).json({ error: 'invalid filename' });
1871
+ }
1872
+ const jsonPath = join(CAPTURE_DIR_PATH, filename);
1873
+ const summaryPath = join(CAPTURE_DIR_PATH, filename.replace(/\.json$/, '.summary.txt'));
1874
+ const [bodyRaw, summaryRaw] = await Promise.all([
1875
+ readFile(jsonPath, 'utf8'),
1876
+ readFile(summaryPath, 'utf8').catch(() => '(summary not found)'),
1877
+ ]);
1878
+ res.json({
1879
+ filename,
1880
+ body: JSON.parse(bodyRaw),
1881
+ summary: summaryRaw,
1882
+ });
1883
+ } catch (e) {
1884
+ res.status(404).json({ error: e.message });
1885
+ }
1886
+ });
1887
+
1888
+ // GET /dashboard/captures-state — is capture currently enabled?
1889
+ app.get('/dashboard/captures-state', requireLocalOrigin, async (_req, res) => {
1890
+ const { isCaptureEnabled, CAPTURE_DIR_PATH, CAPTURE_TOGGLE_FILE } = await import('./lib/request-capture.js');
1891
+ res.json({
1892
+ enabled: isCaptureEnabled(),
1893
+ captureDir: CAPTURE_DIR_PATH,
1894
+ toggleFile: CAPTURE_TOGGLE_FILE,
1895
+ envVar: !!(process.env.MOBY_CAPTURE === '1' || process.env.MOBY_CAPTURE === 'true'),
1896
+ });
1897
+ });
1898
+
1899
+ // POST /dashboard/captures-toggle — flip the touch file on/off.
1900
+ // Body: { enabled: true | false }
1901
+ app.post('/dashboard/captures-toggle', requireLocalOrigin, async (req, res) => {
1902
+ try {
1903
+ const { setCaptureEnabled } = await import('./lib/request-capture.js');
1904
+ const target = !!req.body?.enabled;
1905
+ const newState = await setCaptureEnabled(target);
1906
+ res.json({ enabled: newState });
1907
+ } catch (e) {
1908
+ res.status(500).json({ error: e.message });
1909
+ }
1910
+ });
1911
+
1662
1912
  // ---------------------------------------------------------------------------
1663
1913
  // Updater — dashboard-driven "update available → update now" flow
1664
1914
  // ---------------------------------------------------------------------------
@@ -1727,6 +1977,9 @@ app.listen(PORT, BIND, async () => {
1727
1977
  console.log(` model ${DEFAULT_MODEL}`);
1728
1978
  console.log(` session TTL ${ttlMin} min`);
1729
1979
  console.log(` dashboard http://localhost:${PORT}`);
1980
+ if (isCaptureEnabled()) {
1981
+ console.log(` capture ON → ${CAPTURE_DIR_PATH.replace(process.env.HOME || '', '~')}`);
1982
+ }
1730
1983
  console.log('');
1731
1984
  dashboardBus.emitEvent({ type: 'server.boot', port: PORT, bind: BIND, defaultModel: DEFAULT_MODEL });
1732
1985
  });