mobygate 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/server.js ADDED
@@ -0,0 +1,1076 @@
1
+ /**
2
+ * Mobygate
3
+ *
4
+ * OpenAI-compatible local proxy for Claude Max. The Möbius-strip gateway:
5
+ * OpenAI-shape requests in, Claude Max subscription inference out, on a single
6
+ * continuous loop. Routes through the Claude Agent SDK (no CLI spawn hacks).
7
+ *
8
+ * Flow: Client → POST /v1/chat/completions → Agent SDK query() → SSE back
9
+ */
10
+
11
+ import { fileURLToPath } from 'url';
12
+ import { dirname, join } from 'path';
13
+
14
+ // Preflight: if node_modules is stale/missing, die with a readable message
15
+ // instead of an opaque ERR_MODULE_NOT_FOUND stack trace. Common after a fresh
16
+ // clone or an SDK-version bump where the user forgot `npm install`.
17
+ let express, uuidModule, sdkModule;
18
+ try {
19
+ [express, uuidModule, sdkModule] = await Promise.all([
20
+ import('express').then((m) => m.default),
21
+ import('uuid'),
22
+ import('@anthropic-ai/claude-agent-sdk'),
23
+ ]);
24
+ } catch (e) {
25
+ const missing = /Cannot find package|ERR_MODULE_NOT_FOUND/.test(e?.message || '');
26
+ if (missing) {
27
+ const rawPkg = /Cannot find package '([^']+)'/.exec(e.message)?.[1] || 'a dependency';
28
+ const pkg = rawPkg.length > 48 ? rawPkg.slice(0, 45) + '...' : rawPkg;
29
+ console.error([
30
+ '',
31
+ '╔═══════════════════════════════════════════════════════════════════╗',
32
+ '║ Mobygate — startup aborted ║',
33
+ '╠═══════════════════════════════════════════════════════════════════╣',
34
+ `║ Missing package: ${pkg.padEnd(48)}║`,
35
+ '║ ║',
36
+ '║ Fix: npm install ║',
37
+ '║ Or: npm run up (installs deps, then starts the server) ║',
38
+ '║ ║',
39
+ '║ After pulling new commits, always run `npm install` first. ║',
40
+ '╚═══════════════════════════════════════════════════════════════════╝',
41
+ '',
42
+ ].join('\n'));
43
+ process.exit(1);
44
+ }
45
+ throw e;
46
+ }
47
+ const { v4: uuidv4 } = uuidModule;
48
+ const { query } = sdkModule;
49
+
50
+ // Auth helper — wraps query() with 401-retry and exposes refresh probe
51
+ import { runWithAuthRetry, getAuthStatus, forceRefresh, is401Error, isAuthFailureText, AuthFailureInResultText } from './scripts/auth-helper.js';
52
+ import { banner } from './lib/ascii.js';
53
+ import { bus as dashboardBus } from './lib/dashboard-bus.js';
54
+ import { loadSessions, saveSessions, flushSessionsNow } from './lib/session-store.js';
55
+ import { LOGS_DIR } from './lib/config.js';
56
+
57
+ const __filename = fileURLToPath(import.meta.url);
58
+ const __dirname = dirname(__filename);
59
+
60
+ const PORT = parseInt(process.env.PORT || '3456', 10);
61
+ const DEFAULT_MODEL = process.env.DEFAULT_MODEL || 'claude-opus-4-7[1m]';
62
+ const SESSION_TTL_MS = parseInt(process.env.SESSION_TTL_MS || String(60 * 60 * 1000), 10); // 1 hour default
63
+
64
+ // ---------------------------------------------------------------------------
65
+ // Session store — maps client keys → SDK session IDs (persisted to disk)
66
+ // ---------------------------------------------------------------------------
67
+ // Rehydrated from ~/.mobygate/sessions.json at module load so sessions
68
+ // survive `mobygate restart`, crashes, and reboots. Every mutation (create,
69
+ // update, expire, manual-delete) triggers a debounced write; SIGTERM/SIGINT
70
+ // flushes synchronously so we don't lose the last 500 ms of changes.
71
+
72
+ const sessions = loadSessions(SESSION_TTL_MS);
73
+ if (sessions.size > 0) console.log(`[session] rehydrated ${sessions.size} session(s) from disk`);
74
+
75
+ function getSession(clientKey) {
76
+ if (!clientKey) return null;
77
+ const entry = sessions.get(clientKey);
78
+ if (!entry) return null;
79
+ if (Date.now() - entry.lastUsed > SESSION_TTL_MS) {
80
+ console.log(` [session] expired: ${clientKey} (idle ${Math.round((Date.now() - entry.lastUsed) / 1000)}s)`);
81
+ sessions.delete(clientKey);
82
+ dashboardBus?.emitEvent?.({ type: 'session.expired', key: clientKey, reason: 'ttl' });
83
+ saveSessions(sessions);
84
+ return null;
85
+ }
86
+ return entry;
87
+ }
88
+
89
+ function upsertSession(clientKey, sdkSessionId, model) {
90
+ const existing = sessions.get(clientKey);
91
+ if (existing) {
92
+ existing.sdkSessionId = sdkSessionId;
93
+ existing.lastUsed = Date.now();
94
+ existing.messageCount++;
95
+ dashboardBus?.emitEvent?.({ type: 'session.updated', key: clientKey, messageCount: existing.messageCount });
96
+ saveSessions(sessions);
97
+ return existing;
98
+ }
99
+ const entry = { sdkSessionId, model, lastUsed: Date.now(), messageCount: 1, createdAt: Date.now() };
100
+ sessions.set(clientKey, entry);
101
+ dashboardBus?.emitEvent?.({ type: 'session.created', key: clientKey, model });
102
+ saveSessions(sessions);
103
+ return entry;
104
+ }
105
+
106
+ // Periodic cleanup of expired sessions
107
+ setInterval(() => {
108
+ const now = Date.now();
109
+ let cleaned = 0;
110
+ for (const [key, entry] of sessions) {
111
+ if (now - entry.lastUsed > SESSION_TTL_MS) {
112
+ sessions.delete(key);
113
+ dashboardBus?.emitEvent?.({ type: 'session.expired', key, reason: 'ttl' });
114
+ cleaned++;
115
+ }
116
+ }
117
+ if (cleaned > 0) {
118
+ console.log(` [session] cleanup: removed ${cleaned} expired session(s), ${sessions.size} active`);
119
+ saveSessions(sessions);
120
+ }
121
+ }, 5 * 60 * 1000); // every 5 minutes
122
+
123
+ // Flush pending writes on graceful shutdown so the last 500 ms of session
124
+ // mutations aren't lost when launchd/systemd/Task Scheduler stops the
125
+ // service (e.g. on `mobygate restart`).
126
+ for (const sig of ['SIGTERM', 'SIGINT', 'SIGHUP']) {
127
+ process.on(sig, () => {
128
+ try { flushSessionsNow(); } catch {}
129
+ process.exit(0);
130
+ });
131
+ }
132
+
133
+ // ---------------------------------------------------------------------------
134
+ // Model mapping — OpenAI model names → SDK model identifiers
135
+ // ---------------------------------------------------------------------------
136
+
137
+ // Opus 4.7 ships a native 1M-context variant addressed as `claude-opus-4-7[1m]`.
138
+ // Default opus aliases route to the 1M form to match the advertised context window.
139
+ // Pass `claude-opus-4-7-200k` for the standard (cheaper) 200k variant.
140
+ const MODEL_MAP = {
141
+ 'claude-opus-4': 'claude-opus-4-7[1m]',
142
+ 'claude-opus-4-6': 'claude-opus-4-6',
143
+ 'claude-opus-4-7': 'claude-opus-4-7[1m]',
144
+ 'claude-opus-4-7[1m]': 'claude-opus-4-7[1m]',
145
+ 'claude-opus-4-7-1m': 'claude-opus-4-7[1m]',
146
+ 'claude-opus-4-7-200k': 'claude-opus-4-7',
147
+ 'claude-sonnet-4': 'claude-sonnet-4-5-20250929',
148
+ 'claude-sonnet-4-5': 'claude-sonnet-4-5-20250929',
149
+ 'claude-sonnet-4-6': 'claude-sonnet-4-5-20250929', // SDK resolves 4-6 to non-existent dated version
150
+ 'claude-haiku-4': 'claude-haiku-4-5-20251001',
151
+ 'claude-haiku-4-5': 'claude-haiku-4-5-20251001',
152
+ 'opus': 'claude-opus-4-7[1m]',
153
+ 'sonnet': 'claude-sonnet-4-5-20250929',
154
+ 'haiku': 'claude-haiku-4-5-20251001',
155
+ };
156
+
157
+ function resolveModel(model) {
158
+ if (!model) return DEFAULT_MODEL;
159
+ // Strip provider prefix (e.g., "claude-max-proxy/claude-opus-4-6" → "claude-opus-4-6")
160
+ const stripped = model.replace(/^[^/]+\//, '');
161
+ return MODEL_MAP[stripped] || MODEL_MAP[model] || DEFAULT_MODEL;
162
+ }
163
+
164
+ // ---------------------------------------------------------------------------
165
+ // OpenAI messages → single prompt string
166
+ // ---------------------------------------------------------------------------
167
+
168
+ function extractContent(content) {
169
+ if (typeof content === 'string') return content;
170
+ if (Array.isArray(content)) {
171
+ return content
172
+ .map((part) => {
173
+ if (typeof part === 'string') return part;
174
+ if (part.type === 'text') return part.text;
175
+ if (part.type === 'image_url') return ''; // images carried separately; drop from text
176
+ return JSON.stringify(part);
177
+ })
178
+ .filter(Boolean)
179
+ .join('\n');
180
+ }
181
+ if (content && typeof content === 'object') return JSON.stringify(content);
182
+ return String(content || '');
183
+ }
184
+
185
+ // Convert an OpenAI message.content array into Anthropic image content blocks.
186
+ // Supports both data: URLs (base64) and remote https URLs.
187
+ function extractImageBlocks(content) {
188
+ if (!Array.isArray(content)) return [];
189
+ const blocks = [];
190
+ for (const part of content) {
191
+ if (!part || part.type !== 'image_url') continue;
192
+ const url = typeof part.image_url === 'string' ? part.image_url : part.image_url?.url;
193
+ if (!url) continue;
194
+ const dataMatch = /^data:([^;]+);base64,(.+)$/.exec(url);
195
+ if (dataMatch) {
196
+ blocks.push({ type: 'image', source: { type: 'base64', media_type: dataMatch[1], data: dataMatch[2] } });
197
+ } else {
198
+ blocks.push({ type: 'image', source: { type: 'url', url } });
199
+ }
200
+ }
201
+ return blocks;
202
+ }
203
+
204
+ // Collect images from the LAST user message (OpenAI only attaches images to the latest turn).
205
+ function collectImages(messages) {
206
+ for (let i = messages.length - 1; i >= 0; i--) {
207
+ if (messages[i].role === 'user') return extractImageBlocks(messages[i].content);
208
+ }
209
+ return [];
210
+ }
211
+
212
+ // ---------------------------------------------------------------------------
213
+ // Tool calling (Path B: prompt-embedded protocol)
214
+ // ---------------------------------------------------------------------------
215
+ // The Claude Agent SDK cannot stream OpenAI-style function-call events back to
216
+ // the caller (MCP handlers execute in-process and pollute session state; see
217
+ // README "Known Gaps"). Workaround: inject client-provided tool schemas into
218
+ // the system prompt and instruct the model to emit <tool_call>{...}</tool_call>
219
+ // tags. We parse those out and re-emit as OpenAI `tool_calls`. Tool results
220
+ // coming back from the client get wrapped in <tool_result> blocks.
221
+
222
+ function hasTools(body) {
223
+ return Array.isArray(body?.tools) && body.tools.length > 0;
224
+ }
225
+
226
+ function buildToolInstructions(tools) {
227
+ const lines = [
228
+ 'You have access to CLIENT-DEFINED tools listed below. To invoke a tool, emit one or more <tool_call> tags, each containing a strict JSON object with "name" and "arguments":',
229
+ '',
230
+ '<tool_call>{"name":"<tool_name>","arguments":{<args>}}</tool_call>',
231
+ '',
232
+ 'Rules:',
233
+ '- Do NOT wrap <tool_call> tags in markdown code fences.',
234
+ '- When you emit <tool_call> tags, output ONLY the tags — no prose, no explanation, no other text.',
235
+ '- You may emit multiple <tool_call> tags to request parallel calls.',
236
+ '- Tool results will be returned as <tool_result id="..." name="...">...</tool_result> blocks. After results arrive, continue toward the final answer.',
237
+ '- When you have the final answer and need no more tool calls, respond normally WITHOUT any <tool_call> tag.',
238
+ '- Do NOT call any other tool (Read, Bash, Grep, etc.) — only the tools listed below.',
239
+ '',
240
+ 'Available tools:',
241
+ ];
242
+ for (const t of tools) {
243
+ if (t?.type !== 'function' || !t.function) continue;
244
+ const fn = t.function;
245
+ lines.push(`<tool name="${fn.name}">`);
246
+ if (fn.description) lines.push(` <description>${fn.description}</description>`);
247
+ lines.push(` <parameters>${JSON.stringify(fn.parameters || { type: 'object', properties: {} })}</parameters>`);
248
+ lines.push('</tool>');
249
+ }
250
+ return lines.join('\n');
251
+ }
252
+
253
+ function formatAssistantForReplay(msg) {
254
+ const parts = [];
255
+ const text = extractContent(msg.content);
256
+ if (text) parts.push(text);
257
+ if (Array.isArray(msg.tool_calls)) {
258
+ for (const tc of msg.tool_calls) {
259
+ if (tc?.type === 'function' && tc.function) {
260
+ let args = {};
261
+ try { args = JSON.parse(tc.function.arguments || '{}'); } catch {}
262
+ parts.push(`<tool_call>${JSON.stringify({ name: tc.function.name, arguments: args })}</tool_call>`);
263
+ }
264
+ }
265
+ }
266
+ return parts.join('\n');
267
+ }
268
+
269
+ function formatToolResult(msg) {
270
+ const content = extractContent(msg.content);
271
+ const id = msg.tool_call_id || 'unknown';
272
+ const name = msg.name || '';
273
+ return `<tool_result id="${id}" name="${name}">\n${content}\n</tool_result>`;
274
+ }
275
+
276
+ // Parse the model's text output for <tool_call> tags. Returns
277
+ // { toolCalls: [{id, name, arguments}], textBefore: string }
278
+ // when at least one valid call is found, else null.
279
+ function parseToolCalls(text) {
280
+ if (!text || !text.includes('<tool_call>')) return null;
281
+ const re = /<tool_call>\s*([\s\S]*?)\s*<\/tool_call>/g;
282
+ const calls = [];
283
+ let firstIdx = -1;
284
+ let m;
285
+ while ((m = re.exec(text)) !== null) {
286
+ if (firstIdx === -1) firstIdx = m.index;
287
+ try {
288
+ const obj = JSON.parse(m[1]);
289
+ if (obj && typeof obj.name === 'string') {
290
+ calls.push({
291
+ id: `call_${uuidv4().replace(/-/g, '').slice(0, 20)}`,
292
+ name: obj.name,
293
+ arguments: JSON.stringify(obj.arguments ?? {}),
294
+ });
295
+ }
296
+ } catch {
297
+ // ignore malformed tool_call blocks
298
+ }
299
+ }
300
+ if (!calls.length) return null;
301
+ return { toolCalls: calls, textBefore: text.slice(0, firstIdx).trim() };
302
+ }
303
+
304
+ // Detect whether the running text contains a COMPLETE <tool_call>...</tool_call>
305
+ // pair — used to abort the SDK early once a call has been emitted.
306
+ function hasCompleteToolCall(text) {
307
+ return /<tool_call>\s*[\s\S]*?<\/tool_call>/.test(text);
308
+ }
309
+
310
+ function messagesToPrompt(messages, { resuming = false, tools = null } = {}) {
311
+ // When resuming, the SDK already has full history. Only send the new tail:
312
+ // tool_results (if the client is replying with tool outputs) and/or a fresh
313
+ // user message.
314
+ if (resuming) {
315
+ const toolResults = [];
316
+ let userText = '';
317
+ for (let i = messages.length - 1; i >= 0; i--) {
318
+ const msg = messages[i];
319
+ if (msg.role === 'tool') {
320
+ toolResults.unshift(formatToolResult(msg));
321
+ } else if (msg.role === 'user') {
322
+ userText = extractContent(msg.content);
323
+ break;
324
+ } else {
325
+ break;
326
+ }
327
+ }
328
+ const parts = [];
329
+ if (toolResults.length) {
330
+ parts.push(`<tool_results>\n${toolResults.join('\n')}\n</tool_results>`);
331
+ // The model sometimes treats a bare <tool_results> block as "just data"
332
+ // and returns empty. A short nudge keeps the turn productive without
333
+ // biasing what comes next.
334
+ if (!userText) parts.push('Use the tool results above to continue toward the final answer. If more tool calls are needed, emit them; otherwise respond directly.');
335
+ }
336
+ if (userText) parts.push(userText);
337
+ return parts.join('\n\n') || extractContent(messages[messages.length - 1].content);
338
+ }
339
+
340
+ const parts = [];
341
+ // Tool instructions prepended once at the top of the system context.
342
+ if (tools && tools.length) {
343
+ parts.push(`<system>\n${buildToolInstructions(tools)}\n</system>\n`);
344
+ }
345
+
346
+ // Group consecutive tool-role messages so they emit as one <tool_results> block.
347
+ let toolBuffer = [];
348
+ const flushTools = () => {
349
+ if (toolBuffer.length) {
350
+ parts.push(`<tool_results>\n${toolBuffer.join('\n')}\n</tool_results>\n`);
351
+ toolBuffer = [];
352
+ }
353
+ };
354
+
355
+ for (const msg of messages) {
356
+ if (msg.role === 'tool') {
357
+ toolBuffer.push(formatToolResult(msg));
358
+ continue;
359
+ }
360
+ flushTools();
361
+ switch (msg.role) {
362
+ case 'system':
363
+ parts.push(`<system>\n${extractContent(msg.content)}\n</system>\n`);
364
+ break;
365
+ case 'user':
366
+ parts.push(extractContent(msg.content));
367
+ break;
368
+ case 'assistant':
369
+ parts.push(`<previous_response>\n${formatAssistantForReplay(msg)}\n</previous_response>\n`);
370
+ break;
371
+ }
372
+ }
373
+ flushTools();
374
+ return parts.join('\n').trim();
375
+ }
376
+
377
+ // Wrap a prompt + optional image blocks into the form query() expects.
378
+ // Returns a string when there are no images (fast path), or an async iterable
379
+ // yielding one SDKUserMessage with multi-part content when there are.
380
+ function buildQueryPrompt(promptText, imageBlocks) {
381
+ if (!imageBlocks.length) return promptText;
382
+ const content = [
383
+ { type: 'text', text: promptText || '' },
384
+ ...imageBlocks,
385
+ ];
386
+ async function* gen() {
387
+ yield {
388
+ type: 'user',
389
+ message: { role: 'user', content },
390
+ parent_tool_use_id: null,
391
+ };
392
+ }
393
+ return gen();
394
+ }
395
+
396
+ // ---------------------------------------------------------------------------
397
+ // Normalize model name for OpenAI response format
398
+ // ---------------------------------------------------------------------------
399
+
400
+ function normalizeModelName(model) {
401
+ if (model?.includes('opus')) return 'claude-opus-4';
402
+ if (model?.includes('sonnet')) return 'claude-sonnet-4';
403
+ if (model?.includes('haiku')) return 'claude-haiku-4';
404
+ return model || 'claude-sonnet-4';
405
+ }
406
+
407
+ // ---------------------------------------------------------------------------
408
+ // SSE helpers
409
+ // ---------------------------------------------------------------------------
410
+
411
+ function makeChunk(requestId, model, content, role, finishReason) {
412
+ return {
413
+ id: `chatcmpl-${requestId}`,
414
+ object: 'chat.completion.chunk',
415
+ created: Math.floor(Date.now() / 1000),
416
+ model: normalizeModelName(model),
417
+ choices: [{
418
+ index: 0,
419
+ delta: {
420
+ ...(role ? { role } : {}),
421
+ ...(content !== undefined ? { content } : {}),
422
+ },
423
+ finish_reason: finishReason || null,
424
+ }],
425
+ };
426
+ }
427
+
428
+ function sendSSE(res, data) {
429
+ if (!res.writableEnded) {
430
+ res.write(`data: ${JSON.stringify(data)}\n\n`);
431
+ }
432
+ }
433
+
434
+ // ---------------------------------------------------------------------------
435
+ // POST /v1/chat/completions — streaming
436
+ // ---------------------------------------------------------------------------
437
+
438
+ async function handleStreaming(req, res, body, requestId, sessionKey) {
439
+ const existing = getSession(sessionKey);
440
+ const resuming = !!existing?.sdkSessionId;
441
+ const toolsEnabled = hasTools(body);
442
+ const promptText = messagesToPrompt(body.messages, { resuming, tools: toolsEnabled ? body.tools : null });
443
+ const images = collectImages(body.messages);
444
+ const prompt = buildQueryPrompt(promptText, images);
445
+ const model = resolveModel(body.model);
446
+ if (images.length) console.log(` [multimodal] ${images.length} image block(s)`);
447
+ if (toolsEnabled) console.log(` [tools] ${body.tools.length} client tool(s) — buffering stream`);
448
+
449
+ res.setHeader('Content-Type', 'text/event-stream');
450
+ res.setHeader('Cache-Control', 'no-cache');
451
+ res.setHeader('Connection', 'keep-alive');
452
+ res.setHeader('X-Request-Id', requestId);
453
+ if (sessionKey) res.setHeader('X-Session-Id', sessionKey);
454
+ res.flushHeaders();
455
+ res.write(':ok\n\n');
456
+
457
+ const abortController = new AbortController();
458
+ let isFirst = true;
459
+ let resolvedModel = model;
460
+ let capturedSessionId = existing?.sdkSessionId || null;
461
+ let clientDisconnected = false;
462
+
463
+ res.on('close', () => {
464
+ clientDisconnected = true;
465
+ abortController.abort();
466
+ });
467
+
468
+ if (resuming) {
469
+ console.log(` [session] resuming: ${sessionKey} → sdk=${existing.sdkSessionId} (msgs=${existing.messageCount})`);
470
+ }
471
+
472
+ let bufferedText = ''; // only used when toolsEnabled
473
+
474
+ const runQuery = async () => {
475
+ // Reset per-attempt state so a 401 retry starts clean
476
+ bufferedText = '';
477
+ isFirst = true;
478
+ resolvedModel = model;
479
+ capturedSessionId = existing?.sdkSessionId || null;
480
+
481
+ for await (const message of query({
482
+ prompt,
483
+ options: {
484
+ model,
485
+ maxTurns: toolsEnabled ? 5 : 200,
486
+ permissionMode: 'bypassPermissions',
487
+ allowDangerouslySkipPermissions: true,
488
+ abortController,
489
+ ...(toolsEnabled ? { allowedTools: [] } : {}),
490
+ ...(resuming ? { resume: existing.sdkSessionId } : {}),
491
+ ...(sessionKey && !resuming ? { persistSession: true } : {}),
492
+ },
493
+ })) {
494
+ if (clientDisconnected) break;
495
+
496
+ const msgPreview = message.type === 'assistant'
497
+ ? `content_keys=${Object.keys(message).join(',')}`
498
+ : message.type === 'result'
499
+ ? `result=${(message.result || '').slice(0, 60)}`
500
+ : message.subtype || '';
501
+ console.log(` [msg] type=${message.type} ${msgPreview}`);
502
+
503
+ if (message.type === 'system' && message.subtype === 'init' && message.model) {
504
+ resolvedModel = message.model;
505
+ }
506
+
507
+ if (message.type === 'assistant' && message.session_id && !capturedSessionId) {
508
+ capturedSessionId = message.session_id;
509
+ console.log(` [session] captured sdk session: ${capturedSessionId}`);
510
+ }
511
+
512
+ // Extract text from this assistant message
513
+ let turnText = '';
514
+ if (message.type === 'assistant' && message.message?.content) {
515
+ const content = message.message.content;
516
+ if (Array.isArray(content)) {
517
+ for (const b of content) if (b.type === 'text' && b.text) turnText += b.text;
518
+ } else if (typeof content === 'string') {
519
+ turnText = content;
520
+ }
521
+ }
522
+
523
+ // Detect auth failure surfaced inline (common on long-running proxies
524
+ // where the SDK's cached creds expire). Throw so runWithAuthRetry
525
+ // treats it like a real 401 exception.
526
+ if (turnText && isAuthFailureText(turnText) && isFirst) {
527
+ abortController.abort();
528
+ throw new AuthFailureInResultText(turnText);
529
+ }
530
+
531
+ if (turnText) {
532
+ if (toolsEnabled) {
533
+ bufferedText += turnText;
534
+ // Abort early once we see a complete <tool_call>...</tool_call>
535
+ if (hasCompleteToolCall(bufferedText)) {
536
+ console.log(' [tools] complete tool_call detected — aborting SDK');
537
+ abortController.abort();
538
+ break;
539
+ }
540
+ } else {
541
+ sendSSE(res, makeChunk(requestId, resolvedModel, turnText, isFirst ? 'assistant' : undefined, null));
542
+ isFirst = false;
543
+ }
544
+ }
545
+
546
+ if (message.type === 'result') {
547
+ if (message.result && isAuthFailureText(message.result) && isFirst) {
548
+ throw new AuthFailureInResultText(message.result);
549
+ }
550
+ if (!toolsEnabled && message.result && isFirst) {
551
+ sendSSE(res, makeChunk(requestId, resolvedModel, message.result, 'assistant', null));
552
+ isFirst = false;
553
+ }
554
+ if (toolsEnabled && !bufferedText && message.result) bufferedText = message.result;
555
+ break;
556
+ }
557
+ }
558
+ };
559
+
560
+ try {
561
+ await runWithAuthRetry({
562
+ attempt: runQuery,
563
+ // Only retry if we haven't written a real chunk yet. In tools mode we
564
+ // buffer internally so any retry is safe regardless.
565
+ bailIfStarted: () => !toolsEnabled && !isFirst,
566
+ onRefreshing: (err) => console.warn(`[auth] 401 on stream — refreshing (${err.message?.slice(0, 80)})`),
567
+ onRetry: (r) => console.log(`[auth] refreshed in ${r.durationMs}ms — retrying stream`),
568
+ });
569
+ } catch (err) {
570
+ // Abort from tool-call detection surfaces as an abort error — not a real failure
571
+ const isAbort = err?.name === 'AbortError' || /aborted/i.test(err?.message || '');
572
+ if (!clientDisconnected && !(toolsEnabled && isAbort)) {
573
+ console.error('[stream] SDK error:', err.message);
574
+ sendSSE(res, { error: { message: err.message, type: 'server_error', code: null } });
575
+ }
576
+ }
577
+
578
+ if (sessionKey && capturedSessionId) {
579
+ upsertSession(sessionKey, capturedSessionId, resolvedModel);
580
+ }
581
+
582
+ // Tools mode: emit the buffered response as a single chunk with either
583
+ // tool_calls (+ finish_reason: tool_calls) or plain text (+ stop).
584
+ if (toolsEnabled && !res.writableEnded) {
585
+ const parsed = parseToolCalls(bufferedText);
586
+ if (parsed) {
587
+ console.log(` [tools] emitting ${parsed.toolCalls.length} tool_call(s)`);
588
+ const chunk = {
589
+ id: `chatcmpl-${requestId}`,
590
+ object: 'chat.completion.chunk',
591
+ created: Math.floor(Date.now() / 1000),
592
+ model: normalizeModelName(resolvedModel),
593
+ choices: [{
594
+ index: 0,
595
+ delta: {
596
+ role: 'assistant',
597
+ content: parsed.textBefore || null,
598
+ tool_calls: parsed.toolCalls.map((tc, i) => ({
599
+ index: i,
600
+ id: tc.id,
601
+ type: 'function',
602
+ function: { name: tc.name, arguments: tc.arguments },
603
+ })),
604
+ },
605
+ finish_reason: 'tool_calls',
606
+ }],
607
+ };
608
+ sendSSE(res, chunk);
609
+ } else {
610
+ sendSSE(res, makeChunk(requestId, resolvedModel, bufferedText, 'assistant', null));
611
+ sendSSE(res, makeChunk(requestId, resolvedModel, undefined, undefined, 'stop'));
612
+ }
613
+ res.write('data: [DONE]\n\n');
614
+ res.end();
615
+ return;
616
+ }
617
+
618
+ if (!res.writableEnded) {
619
+ sendSSE(res, makeChunk(requestId, resolvedModel, undefined, undefined, 'stop'));
620
+ res.write('data: [DONE]\n\n');
621
+ res.end();
622
+ }
623
+ }
624
+
625
+ // ---------------------------------------------------------------------------
626
+ // POST /v1/chat/completions — non-streaming
627
+ // ---------------------------------------------------------------------------
628
+
629
+ async function handleNonStreaming(res, body, requestId, sessionKey) {
630
+ const existing = getSession(sessionKey);
631
+ const resuming = !!existing?.sdkSessionId;
632
+ const toolsEnabled = hasTools(body);
633
+ const promptText = messagesToPrompt(body.messages, { resuming, tools: toolsEnabled ? body.tools : null });
634
+ const images = collectImages(body.messages);
635
+ const prompt = buildQueryPrompt(promptText, images);
636
+ const model = resolveModel(body.model);
637
+ if (images.length) console.log(` [multimodal] ${images.length} image block(s)`);
638
+ if (toolsEnabled) console.log(` [tools] ${body.tools.length} client tool(s)`);
639
+
640
+ let resultText = '';
641
+ let resolvedModel = model;
642
+ let inputTokens = 0;
643
+ let outputTokens = 0;
644
+ let capturedSessionId = existing?.sdkSessionId || null;
645
+ const abortController = new AbortController();
646
+
647
+ if (resuming) {
648
+ console.log(` [session] resuming: ${sessionKey} → sdk=${existing.sdkSessionId} (msgs=${existing.messageCount})`);
649
+ }
650
+
651
+ const runQuery = async () => {
652
+ // Reset per-attempt state so a 401 retry starts clean
653
+ resultText = '';
654
+ resolvedModel = model;
655
+ inputTokens = 0;
656
+ outputTokens = 0;
657
+ capturedSessionId = existing?.sdkSessionId || null;
658
+
659
+ for await (const message of query({
660
+ prompt,
661
+ options: {
662
+ model,
663
+ maxTurns: toolsEnabled ? 5 : 200,
664
+ permissionMode: 'bypassPermissions',
665
+ allowDangerouslySkipPermissions: true,
666
+ abortController,
667
+ ...(toolsEnabled ? { allowedTools: [] } : {}),
668
+ ...(resuming ? { resume: existing.sdkSessionId } : {}),
669
+ ...(sessionKey && !resuming ? { persistSession: true } : {}),
670
+ },
671
+ })) {
672
+ if (message.type === 'system' && message.subtype === 'init' && message.model) {
673
+ resolvedModel = message.model;
674
+ }
675
+
676
+ if (message.type === 'assistant' && message.session_id && !capturedSessionId) {
677
+ capturedSessionId = message.session_id;
678
+ console.log(` [session] captured sdk session: ${capturedSessionId}`);
679
+ }
680
+
681
+ if (message.type === 'assistant' && message.message?.content) {
682
+ const content = message.message.content;
683
+ if (Array.isArray(content)) {
684
+ for (const block of content) {
685
+ if (block.type === 'text') resultText += block.text || '';
686
+ }
687
+ } else if (typeof content === 'string') {
688
+ resultText += content;
689
+ }
690
+ // Detect auth failure surfaced inline (long-running proxy, cached creds)
691
+ if (isAuthFailureText(resultText)) {
692
+ abortController.abort();
693
+ throw new AuthFailureInResultText(resultText);
694
+ }
695
+ // Abort early once we see a complete <tool_call>...</tool_call>
696
+ if (toolsEnabled && hasCompleteToolCall(resultText)) {
697
+ console.log(' [tools] complete tool_call detected — aborting SDK');
698
+ abortController.abort();
699
+ break;
700
+ }
701
+ }
702
+
703
+ if (message.type === 'result') {
704
+ if (message.result && !resultText) resultText = message.result;
705
+ if (isAuthFailureText(resultText)) {
706
+ throw new AuthFailureInResultText(resultText);
707
+ }
708
+ inputTokens = message.input_tokens || 0;
709
+ outputTokens = message.output_tokens || 0;
710
+ break;
711
+ }
712
+ }
713
+ };
714
+
715
+ try {
716
+ await runWithAuthRetry({
717
+ attempt: runQuery,
718
+ // Non-streaming never writes to res until the end — retry is always safe
719
+ bailIfStarted: () => false,
720
+ onRefreshing: (err) => console.warn(`[auth] 401 on sync call — refreshing (${err.message?.slice(0, 80)})`),
721
+ onRetry: (r) => console.log(`[auth] refreshed in ${r.durationMs}ms — retrying sync call`),
722
+ });
723
+ } catch (err) {
724
+ const isAbort = err?.name === 'AbortError' || /aborted/i.test(err?.message || '');
725
+ if (!(toolsEnabled && isAbort)) {
726
+ console.error('[non-stream] SDK error:', err.message);
727
+ return res.status(500).json({ error: { message: err.message, type: 'server_error', code: null } });
728
+ }
729
+ }
730
+
731
+ if (sessionKey && capturedSessionId) {
732
+ upsertSession(sessionKey, capturedSessionId, resolvedModel);
733
+ }
734
+
735
+ const responseHeaders = {};
736
+ if (sessionKey) responseHeaders['X-Session-Id'] = sessionKey;
737
+
738
+ // Tool-calling response shape
739
+ if (toolsEnabled) {
740
+ const parsed = parseToolCalls(resultText);
741
+ if (parsed) {
742
+ console.log(` [tools] emitting ${parsed.toolCalls.length} tool_call(s)`);
743
+ return res.set(responseHeaders).json({
744
+ id: `chatcmpl-${requestId}`,
745
+ object: 'chat.completion',
746
+ created: Math.floor(Date.now() / 1000),
747
+ model: normalizeModelName(resolvedModel),
748
+ choices: [{
749
+ index: 0,
750
+ message: {
751
+ role: 'assistant',
752
+ content: parsed.textBefore || null,
753
+ tool_calls: parsed.toolCalls.map((tc) => ({
754
+ id: tc.id,
755
+ type: 'function',
756
+ function: { name: tc.name, arguments: tc.arguments },
757
+ })),
758
+ },
759
+ finish_reason: 'tool_calls',
760
+ }],
761
+ usage: { prompt_tokens: inputTokens, completion_tokens: outputTokens, total_tokens: inputTokens + outputTokens },
762
+ });
763
+ }
764
+ // No tool_call tags → fall through to normal text response
765
+ }
766
+
767
+ res.set(responseHeaders).json({
768
+ id: `chatcmpl-${requestId}`,
769
+ object: 'chat.completion',
770
+ created: Math.floor(Date.now() / 1000),
771
+ model: normalizeModelName(resolvedModel),
772
+ choices: [{
773
+ index: 0,
774
+ message: { role: 'assistant', content: resultText },
775
+ finish_reason: 'stop',
776
+ }],
777
+ usage: { prompt_tokens: inputTokens, completion_tokens: outputTokens, total_tokens: inputTokens + outputTokens },
778
+ });
779
+ }
780
+
781
+ // ---------------------------------------------------------------------------
782
+ // Express app
783
+ // ---------------------------------------------------------------------------
784
+
785
+ const app = express();
786
+ app.use(express.json({ limit: '10mb' }));
787
+
788
+ // GET / — serve dashboard
789
+ app.get('/', (_req, res) => {
790
+ res.sendFile(join(__dirname, 'index.html'));
791
+ });
792
+
793
+ // POST /v1/chat/completions
794
+ app.post('/v1/chat/completions', async (req, res) => {
795
+ const requestId = uuidv4().replace(/-/g, '').slice(0, 24);
796
+ const body = req.body;
797
+
798
+ if (!body.messages || !Array.isArray(body.messages) || body.messages.length === 0) {
799
+ return res.status(400).json({
800
+ error: { message: 'messages is required and must be a non-empty array', type: 'invalid_request_error', code: 'invalid_messages' },
801
+ });
802
+ }
803
+
804
+ // Session key: X-Session-Id header > body.session_id > null (stateless)
805
+ const sessionKey = req.headers['x-session-id'] || body.session_id || null;
806
+ const existing = getSession(sessionKey);
807
+ const sessionTag = sessionKey ? ` | session=${sessionKey}${existing ? ' (resume)' : ' (new)'}` : '';
808
+
809
+ console.log(`[${new Date().toISOString()}] ${body.stream ? 'stream' : 'sync'} | model=${body.model} → ${resolveModel(body.model)} | msgs=${body.messages.length}${sessionTag}`);
810
+
811
+ // Dashboard: request.start
812
+ const startedAt = Date.now();
813
+ const imageBlocks = collectImages(body.messages).length;
814
+ dashboardBus.emitEvent({
815
+ type: 'request.start',
816
+ id: requestId,
817
+ method: 'POST',
818
+ path: '/v1/chat/completions',
819
+ model: body.model,
820
+ resolvedModel: resolveModel(body.model),
821
+ session: sessionKey,
822
+ stream: !!body.stream,
823
+ tools: hasTools(body),
824
+ images: imageBlocks,
825
+ messages: body.messages.length,
826
+ resuming: !!existing,
827
+ });
828
+
829
+ // Capture tokens / status from the response for the corresponding end event.
830
+ // We hook res.end to read the JSON body that non-streaming handlers wrote,
831
+ // and rely on res.on('finish') for streaming.
832
+ let endEmitted = false;
833
+ const emitEnd = (overrides = {}) => {
834
+ if (endEmitted) return;
835
+ endEmitted = true;
836
+ dashboardBus.emitEvent({
837
+ type: 'request.end',
838
+ id: requestId,
839
+ durationMs: Date.now() - startedAt,
840
+ status: res.statusCode < 400 ? 'ok' : 'error',
841
+ httpStatus: res.statusCode,
842
+ ...overrides,
843
+ });
844
+ };
845
+ res.on('finish', () => emitEnd());
846
+ res.on('close', () => { if (!endEmitted) emitEnd({ status: 'error', error: 'client_disconnect' }); });
847
+
848
+ if (body.stream) {
849
+ await handleStreaming(req, res, body, requestId, sessionKey);
850
+ } else {
851
+ await handleNonStreaming(res, body, requestId, sessionKey);
852
+ }
853
+ });
854
+
855
+ // GET /v1/models
856
+ app.get('/v1/models', (_req, res) => {
857
+ const now = Math.floor(Date.now() / 1000);
858
+ res.json({
859
+ object: 'list',
860
+ data: [
861
+ { id: 'claude-opus-4-7', object: 'model', owned_by: 'anthropic', created: now, context_length: 1000000 },
862
+ { id: 'claude-opus-4-7-200k', object: 'model', owned_by: 'anthropic', created: now, context_length: 200000 },
863
+ { id: 'claude-opus-4-6', object: 'model', owned_by: 'anthropic', created: now, context_length: 1000000 },
864
+ { id: 'claude-sonnet-4-6', object: 'model', owned_by: 'anthropic', created: now, context_length: 1000000 },
865
+ { id: 'claude-haiku-4-5', object: 'model', owned_by: 'anthropic', created: now, context_length: 200000 },
866
+ ],
867
+ });
868
+ });
869
+
870
+ // GET /sessions — list active sessions
871
+ app.get('/sessions', (_req, res) => {
872
+ const list = [];
873
+ for (const [key, entry] of sessions) {
874
+ list.push({
875
+ sessionKey: key,
876
+ sdkSessionId: entry.sdkSessionId,
877
+ model: entry.model,
878
+ messageCount: entry.messageCount,
879
+ createdAt: new Date(entry.createdAt).toISOString(),
880
+ lastUsed: new Date(entry.lastUsed).toISOString(),
881
+ idleSeconds: Math.round((Date.now() - entry.lastUsed) / 1000),
882
+ ttlRemainingSeconds: Math.max(0, Math.round((SESSION_TTL_MS - (Date.now() - entry.lastUsed)) / 1000)),
883
+ });
884
+ }
885
+ res.json({ active: list.length, sessions: list });
886
+ });
887
+
888
+ // GET /sessions/:key — get specific session
889
+ app.get('/sessions/:key', (req, res) => {
890
+ const entry = sessions.get(req.params.key);
891
+ if (!entry) return res.status(404).json({ error: 'Session not found' });
892
+ res.json({
893
+ sessionKey: req.params.key,
894
+ sdkSessionId: entry.sdkSessionId,
895
+ model: entry.model,
896
+ messageCount: entry.messageCount,
897
+ createdAt: new Date(entry.createdAt).toISOString(),
898
+ lastUsed: new Date(entry.lastUsed).toISOString(),
899
+ idleSeconds: Math.round((Date.now() - entry.lastUsed) / 1000),
900
+ ttlRemainingSeconds: Math.max(0, Math.round((SESSION_TTL_MS - (Date.now() - entry.lastUsed)) / 1000)),
901
+ });
902
+ });
903
+
904
+ // DELETE /sessions/:key — clear a session
905
+ app.delete('/sessions/:key', (req, res) => {
906
+ const existed = sessions.delete(req.params.key);
907
+ if (existed) {
908
+ dashboardBus.emitEvent({ type: 'session.expired', key: req.params.key, reason: 'manual' });
909
+ saveSessions(sessions);
910
+ }
911
+ res.json({ deleted: existed, sessionKey: req.params.key });
912
+ });
913
+
914
+ // DELETE /sessions — clear all sessions
915
+ app.delete('/sessions', (_req, res) => {
916
+ const keys = [...sessions.keys()];
917
+ const count = sessions.size;
918
+ sessions.clear();
919
+ for (const key of keys) dashboardBus.emitEvent({ type: 'session.expired', key, reason: 'manual_all' });
920
+ saveSessions(sessions);
921
+ res.json({ deleted: count });
922
+ });
923
+
924
+ // GET /health
925
+ app.get('/health', (_req, res) => {
926
+ res.json({
927
+ status: 'ok',
928
+ provider: 'claude-agent-sdk',
929
+ activeSessions: sessions.size,
930
+ sessionTtlMs: SESSION_TTL_MS,
931
+ timestamp: new Date().toISOString(),
932
+ });
933
+ });
934
+
935
+ // GET /auth/status
936
+ // Reports CLI-side auth state plus (optionally) a real probe against Anthropic.
937
+ // Pass ?quick=1 to skip the probe (reads keychain only — cheap).
938
+ app.get('/auth/status', async (req, res) => {
939
+ const quick = req.query.quick === '1' || req.query.quick === 'true';
940
+ const status = await getAuthStatus();
941
+ if (!quick && status.ok && status.loggedIn) {
942
+ const probe = await forceRefresh();
943
+ return res.json({
944
+ ...status,
945
+ verified: !!probe.ok,
946
+ probeMs: probe.durationMs,
947
+ probeError: probe.error,
948
+ timestamp: new Date().toISOString(),
949
+ });
950
+ }
951
+ res.json({ ...status, verified: null, timestamp: new Date().toISOString() });
952
+ });
953
+
954
+ // POST /auth/refresh
955
+ // Fires the refresh probe. Intended for use by cron / launchd.
956
+ app.post('/auth/refresh', async (_req, res) => {
957
+ const probe = await forceRefresh();
958
+ dashboardBus.emitEvent({ type: 'auth.refresh', ok: probe.ok, durationMs: probe.durationMs, error: probe.error });
959
+ res.status(probe.ok ? 200 : 502).json({
960
+ ...probe,
961
+ timestamp: new Date().toISOString(),
962
+ });
963
+ });
964
+
965
+ // ---------------------------------------------------------------------------
966
+ // Dashboard — live event stream + snapshots
967
+ // ---------------------------------------------------------------------------
968
+
969
+ // GET /events — SSE stream of dashboard events
970
+ app.get('/events', (req, res) => {
971
+ res.setHeader('Content-Type', 'text/event-stream');
972
+ res.setHeader('Cache-Control', 'no-cache, no-transform');
973
+ res.setHeader('Connection', 'keep-alive');
974
+ res.setHeader('X-Accel-Buffering', 'no');
975
+ res.flushHeaders();
976
+ res.write(':ok\n\n'); // comment to open the stream
977
+
978
+ const listener = (ev) => {
979
+ if (!res.writableEnded) res.write(`data: ${JSON.stringify(ev)}\n\n`);
980
+ };
981
+ dashboardBus.on('event', listener);
982
+ const heartbeat = setInterval(() => { if (!res.writableEnded) res.write(':heartbeat\n\n'); }, 15_000);
983
+
984
+ req.on('close', () => {
985
+ clearInterval(heartbeat);
986
+ dashboardBus.off('event', listener);
987
+ });
988
+ });
989
+
990
+ // Cached build metadata — read once at startup
991
+ let BUILD_META = null;
992
+ async function loadBuildMeta() {
993
+ if (BUILD_META) return BUILD_META;
994
+ try {
995
+ const { readFile } = await import('fs/promises');
996
+ const pkg = JSON.parse(await readFile(join(__dirname, 'package.json'), 'utf8'));
997
+ BUILD_META = {
998
+ name: pkg.name,
999
+ version: pkg.version,
1000
+ platform: `${process.platform}-${process.arch}`,
1001
+ node: process.version,
1002
+ contextWindow: DEFAULT_MODEL.includes('[1m]') ? 1_000_000 : 200_000,
1003
+ };
1004
+ } catch (e) {
1005
+ BUILD_META = { name: 'mobygate', version: 'unknown', platform: process.platform, node: process.version, contextWindow: null };
1006
+ }
1007
+ return BUILD_META;
1008
+ }
1009
+
1010
+ // GET /dashboard/recent — ring-buffer snapshot for initial page load
1011
+ app.get('/dashboard/recent', async (req, res) => {
1012
+ const limit = Math.min(500, parseInt(req.query.limit || '100', 10));
1013
+ res.json({
1014
+ recent: dashboardBus.getRecent({ limit }),
1015
+ stats: dashboardBus.getStats(),
1016
+ activeSessions: sessions.size,
1017
+ port: PORT,
1018
+ defaultModel: DEFAULT_MODEL,
1019
+ build: await loadBuildMeta(),
1020
+ });
1021
+ });
1022
+
1023
+ // GET /dashboard/sessions — active session detail for the dashboard
1024
+ app.get('/dashboard/sessions', (_req, res) => {
1025
+ const now = Date.now();
1026
+ const list = [];
1027
+ for (const [key, entry] of sessions) {
1028
+ list.push({
1029
+ key,
1030
+ sdkSessionId: entry.sdkSessionId,
1031
+ model: entry.model,
1032
+ messageCount: entry.messageCount,
1033
+ createdAt: new Date(entry.createdAt).toISOString(),
1034
+ lastUsedAt: new Date(entry.lastUsed).toISOString(),
1035
+ idleSec: Math.floor((now - entry.lastUsed) / 1000),
1036
+ ttlRemainingSec: Math.max(0, Math.floor((SESSION_TTL_MS - (now - entry.lastUsed)) / 1000)),
1037
+ });
1038
+ }
1039
+ // Most recently used first
1040
+ list.sort((a, b) => a.idleSec - b.idleSec);
1041
+ res.json({ sessions: list, ttlMs: SESSION_TTL_MS });
1042
+ });
1043
+
1044
+ // GET /dashboard/logs — tail the server log file
1045
+ app.get('/dashboard/logs', async (req, res) => {
1046
+ const lines = Math.min(2000, parseInt(req.query.lines || '200', 10));
1047
+ const logPath = join(LOGS_DIR, 'server.log');
1048
+ try {
1049
+ const { readFile, stat } = await import('fs/promises');
1050
+ const exists = await stat(logPath).catch(() => null);
1051
+ if (!exists) return res.json({ path: logPath, lines: [], note: 'log file does not exist yet' });
1052
+ // Read the whole file (tolerable for a dev proxy log; rotate if > 10 MB).
1053
+ const raw = await readFile(logPath, 'utf8');
1054
+ const split = raw.split(/\r?\n/);
1055
+ const tail = split.slice(-lines - 1, -1); // drop trailing blank
1056
+ res.json({ path: logPath, lines: tail, totalLines: split.length - 1, sizeBytes: exists.size });
1057
+ } catch (e) {
1058
+ res.status(500).json({ error: e.message });
1059
+ }
1060
+ });
1061
+
1062
+ // ---------------------------------------------------------------------------
1063
+ // Start
1064
+ // ---------------------------------------------------------------------------
1065
+
1066
+ app.listen(PORT, async () => {
1067
+ const ttlMin = Math.round(SESSION_TTL_MS / 60000);
1068
+ const meta = await loadBuildMeta();
1069
+ console.log(banner({ version: meta.version }));
1070
+ console.log(` port ${PORT}`);
1071
+ console.log(` model ${DEFAULT_MODEL}`);
1072
+ console.log(` session TTL ${ttlMin} min`);
1073
+ console.log(` dashboard http://localhost:${PORT}`);
1074
+ console.log('');
1075
+ dashboardBus.emitEvent({ type: 'server.boot', port: PORT, defaultModel: DEFAULT_MODEL });
1076
+ });