mobygate 0.8.4 → 0.9.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +544 -0
- package/bin/mobygate.js +214 -0
- package/dashboard.css +1 -0
- package/index.html +1 -15
- package/inspector.html +200 -3
- package/lib/anthropic.js +6 -1
- package/lib/captures-index.js +524 -0
- package/lib/inference-runner.js +753 -0
- package/lib/openai-translation.js +146 -0
- package/lib/quiet.js +249 -0
- package/lib/request-capture.js +24 -0
- package/package.json +4 -1
- package/server.js +389 -1151
package/server.js
CHANGED
|
@@ -70,16 +70,14 @@ import {
|
|
|
70
70
|
getCurrentVersion,
|
|
71
71
|
} from './lib/updater.js';
|
|
72
72
|
import {
|
|
73
|
-
anthropicMessagesToPrompt,
|
|
74
73
|
collectAnthropicImages,
|
|
75
|
-
buildAnthropicResponse,
|
|
76
|
-
makeStreamTranslator,
|
|
77
74
|
hasAnthropicTools,
|
|
78
|
-
mapStopReason,
|
|
79
|
-
extractSdkUsage,
|
|
80
75
|
} from './lib/anthropic.js';
|
|
76
|
+
import { hasTools, collectImages } from './lib/openai-translation.js';
|
|
77
|
+
import { runInference, openaiSurface, anthropicSurface } from './lib/inference-runner.js';
|
|
81
78
|
import { resolveSessionKey } from './lib/session-derive.js';
|
|
82
79
|
import { captureRequest, captureResponse, isCaptureEnabled, CAPTURE_DIR_PATH } from './lib/request-capture.js';
|
|
80
|
+
import { scrubAnthropicBody, quietDiagnose } from './lib/quiet.js';
|
|
83
81
|
|
|
84
82
|
const __filename = fileURLToPath(import.meta.url);
|
|
85
83
|
const __dirname = dirname(__filename);
|
|
@@ -89,8 +87,33 @@ const PORT = parseInt(process.env.PORT || '3456', 10);
|
|
|
89
87
|
// want to share the proxy on a network can set bind: 0.0.0.0 (or a specific
|
|
90
88
|
// interface) in ~/.mobygate/config.yaml, but should add auth in front of it.
|
|
91
89
|
const BIND = process.env.BIND || '127.0.0.1';
|
|
92
|
-
const DEFAULT_MODEL = process.env.DEFAULT_MODEL || 'claude-opus-4-
|
|
93
|
-
|
|
90
|
+
const DEFAULT_MODEL = process.env.DEFAULT_MODEL || 'claude-opus-4-8[1m]';
|
|
91
|
+
// SESSION_TTL_MS: how long mobygate holds onto an idle SDK session before
|
|
92
|
+
// expiring it from its in-memory + on-disk session store. v0.8.5 raises
|
|
93
|
+
// the default from 1h → 4h based on real-world usage data: most multi-
|
|
94
|
+
// channel users (Discord agents serving 20+ channels) revisit channels
|
|
95
|
+
// every few hours, and a 1h TTL forced a fresh `query()` (full prompt
|
|
96
|
+
// re-send) every time. With 4h, mobygate retains the SDK session ID for
|
|
97
|
+
// half a day, so the next request resumes via session-id rather than
|
|
98
|
+
// reissuing the entire prompt.
|
|
99
|
+
//
|
|
100
|
+
// Caveat — this only solves SDK-side session continuity. Anthropic's
|
|
101
|
+
// wire-side prompt cache (5 min default, 1h with the
|
|
102
|
+
// `extended-cache-ttl-2025-04-11` beta) is unaffected; the SDK doesn't
|
|
103
|
+
// currently expose that beta to callers, so cache-creation tax on idle
|
|
104
|
+
// channels still applies. The TTL bump is a partial mitigation, not a
|
|
105
|
+
// fix.
|
|
106
|
+
//
|
|
107
|
+
// Override: SESSION_TTL_MS=14400000 (env, in milliseconds)
|
|
108
|
+
// or MOBY_SESSION_TTL_HOURS=4 (more readable, also accepted)
|
|
109
|
+
const SESSION_TTL_MS = (() => {
|
|
110
|
+
if (process.env.SESSION_TTL_MS) return parseInt(process.env.SESSION_TTL_MS, 10);
|
|
111
|
+
if (process.env.MOBY_SESSION_TTL_HOURS) {
|
|
112
|
+
const h = parseFloat(process.env.MOBY_SESSION_TTL_HOURS);
|
|
113
|
+
if (h > 0) return Math.round(h * 60 * 60 * 1000);
|
|
114
|
+
}
|
|
115
|
+
return 4 * 60 * 60 * 1000; // 4h default (was 1h pre-v0.8.5)
|
|
116
|
+
})();
|
|
94
117
|
|
|
95
118
|
// ---------------------------------------------------------------------------
|
|
96
119
|
// Session store — maps client keys → SDK session IDs (persisted to disk)
|
|
@@ -180,7 +203,13 @@ for (const sig of ['SIGTERM', 'SIGINT', 'SIGHUP']) {
|
|
|
180
203
|
// falling back to opus or returning a zero-billed response. Fixed in
|
|
181
204
|
// v0.8.2 by routing 4-6 through directly.
|
|
182
205
|
const MODEL_MAP = {
|
|
183
|
-
|
|
206
|
+
// Latest opus → 4-8 (1M, Max-included — verified live 2026-05-29).
|
|
207
|
+
// 4-7 entries kept so explicit 4-7 requests still resolve.
|
|
208
|
+
'claude-opus-4': 'claude-opus-4-8[1m]',
|
|
209
|
+
'claude-opus-4-8': 'claude-opus-4-8[1m]',
|
|
210
|
+
'claude-opus-4-8[1m]': 'claude-opus-4-8[1m]',
|
|
211
|
+
'claude-opus-4-8-1m': 'claude-opus-4-8[1m]',
|
|
212
|
+
'claude-opus-4-8-200k': 'claude-opus-4-8',
|
|
184
213
|
'claude-opus-4-6': 'claude-opus-4-6',
|
|
185
214
|
'claude-opus-4-7': 'claude-opus-4-7[1m]',
|
|
186
215
|
'claude-opus-4-7[1m]': 'claude-opus-4-7[1m]',
|
|
@@ -199,7 +228,16 @@ const MODEL_MAP = {
|
|
|
199
228
|
'claude-sonnet-4-6-200k': 'claude-sonnet-4-6', // explicit 200k alias (redundant, kept for clarity)
|
|
200
229
|
'claude-haiku-4': 'claude-haiku-4-5-20251001',
|
|
201
230
|
'claude-haiku-4-5': 'claude-haiku-4-5-20251001',
|
|
202
|
-
|
|
231
|
+
// Fable 5 — distinct recent model family (parallel to Opus 4.x).
|
|
232
|
+
// 1M variant Max-included (verified live 2026-05-29). Additive: opus
|
|
233
|
+
// stays the default; fable resolves only when explicitly requested.
|
|
234
|
+
'claude-fable-5': 'claude-fable-5[1m]',
|
|
235
|
+
'claude-fable-5[1m]': 'claude-fable-5[1m]',
|
|
236
|
+
'claude-fable-5-1m': 'claude-fable-5[1m]',
|
|
237
|
+
'claude-fable-5-200k': 'claude-fable-5',
|
|
238
|
+
'fable': 'claude-fable-5[1m]',
|
|
239
|
+
'fable-200k': 'claude-fable-5',
|
|
240
|
+
'opus': 'claude-opus-4-8[1m]', // latest opus, 1M Max-included
|
|
203
241
|
'sonnet': 'claude-sonnet-4-6', // 200k default; use 'sonnet-1m' for explicit 1M
|
|
204
242
|
'sonnet-1m': 'claude-sonnet-4-6[1m]', // alias for 'sonnet' + explicit 1M opt-in
|
|
205
243
|
'haiku': 'claude-haiku-4-5-20251001',
|
|
@@ -212,1101 +250,6 @@ function resolveModel(model) {
|
|
|
212
250
|
return MODEL_MAP[stripped] || MODEL_MAP[model] || DEFAULT_MODEL;
|
|
213
251
|
}
|
|
214
252
|
|
|
215
|
-
// ---------------------------------------------------------------------------
|
|
216
|
-
// OpenAI messages → single prompt string
|
|
217
|
-
// ---------------------------------------------------------------------------
|
|
218
|
-
|
|
219
|
-
function extractContent(content) {
|
|
220
|
-
if (typeof content === 'string') return content;
|
|
221
|
-
if (Array.isArray(content)) {
|
|
222
|
-
return content
|
|
223
|
-
.map((part) => {
|
|
224
|
-
if (typeof part === 'string') return part;
|
|
225
|
-
if (part.type === 'text') return part.text;
|
|
226
|
-
if (part.type === 'image_url') return ''; // images carried separately; drop from text
|
|
227
|
-
return JSON.stringify(part);
|
|
228
|
-
})
|
|
229
|
-
.filter(Boolean)
|
|
230
|
-
.join('\n');
|
|
231
|
-
}
|
|
232
|
-
if (content && typeof content === 'object') return JSON.stringify(content);
|
|
233
|
-
return String(content || '');
|
|
234
|
-
}
|
|
235
|
-
|
|
236
|
-
// Convert an OpenAI message.content array into Anthropic image content blocks.
|
|
237
|
-
// Supports both data: URLs (base64) and remote https URLs.
|
|
238
|
-
function extractImageBlocks(content) {
|
|
239
|
-
if (!Array.isArray(content)) return [];
|
|
240
|
-
const blocks = [];
|
|
241
|
-
for (const part of content) {
|
|
242
|
-
if (!part || part.type !== 'image_url') continue;
|
|
243
|
-
const url = typeof part.image_url === 'string' ? part.image_url : part.image_url?.url;
|
|
244
|
-
if (!url) continue;
|
|
245
|
-
const dataMatch = /^data:([^;]+);base64,(.+)$/.exec(url);
|
|
246
|
-
if (dataMatch) {
|
|
247
|
-
blocks.push({ type: 'image', source: { type: 'base64', media_type: dataMatch[1], data: dataMatch[2] } });
|
|
248
|
-
} else {
|
|
249
|
-
blocks.push({ type: 'image', source: { type: 'url', url } });
|
|
250
|
-
}
|
|
251
|
-
}
|
|
252
|
-
return blocks;
|
|
253
|
-
}
|
|
254
|
-
|
|
255
|
-
// Collect images from the LAST user message (OpenAI only attaches images to the latest turn).
|
|
256
|
-
function collectImages(messages) {
|
|
257
|
-
for (let i = messages.length - 1; i >= 0; i--) {
|
|
258
|
-
if (messages[i].role === 'user') return extractImageBlocks(messages[i].content);
|
|
259
|
-
}
|
|
260
|
-
return [];
|
|
261
|
-
}
|
|
262
|
-
|
|
263
|
-
// ---------------------------------------------------------------------------
|
|
264
|
-
// Tool calling (Phase 1: native MCP tools — no more <tool_call> text hack)
|
|
265
|
-
// ---------------------------------------------------------------------------
|
|
266
|
-
// Client-provided OpenAI tools are registered with the SDK as in-process MCP
|
|
267
|
-
// tools (see lib/tool-bridge.js). The model emits **native** tool_use content
|
|
268
|
-
// blocks in its assistant messages; we abort the SDK on the first one and
|
|
269
|
-
// return OpenAI tool_calls to the client. When the client replies with tool
|
|
270
|
-
// results, we send them back as Anthropic tool_result content blocks inside
|
|
271
|
-
// a single SDKUserMessage — round-tripping cleanly through the SDK session.
|
|
272
|
-
|
|
273
|
-
function hasTools(body) {
|
|
274
|
-
return Array.isArray(body?.tools) && body.tools.length > 0;
|
|
275
|
-
}
|
|
276
|
-
|
|
277
|
-
/**
|
|
278
|
-
* Build the prompt text from the OpenAI messages array.
|
|
279
|
-
*
|
|
280
|
-
* Returns `{ promptText }` — a single string ready for the SDK. Tool
|
|
281
|
-
* results are spliced in as <tool_results> XML when present (see
|
|
282
|
-
* lib/tool-bridge.js#toolMessagesToText for why we don't use native
|
|
283
|
-
* tool_result content blocks yet).
|
|
284
|
-
*
|
|
285
|
-
* Resuming vs fresh:
|
|
286
|
-
* - Resuming: SDK has full history. We only send the new tail —
|
|
287
|
-
* trailing tool results plus the most recent user text, if any.
|
|
288
|
-
* - Fresh: SDK starts cold. We serialize the visible history with
|
|
289
|
-
* <system>/<previous_response>/<tool_results> tags. No tool-
|
|
290
|
-
* instruction injection — the SDK MCP registration handles that.
|
|
291
|
-
*/
|
|
292
|
-
function messagesToPrompt(messages, { resuming = false } = {}) {
|
|
293
|
-
if (resuming) {
|
|
294
|
-
// Walk backwards from the end, collecting trailing tool messages and
|
|
295
|
-
// the most recent user text. Tool results are formatted as a text
|
|
296
|
-
// block (see lib/tool-bridge.js#toolMessagesToText for the rationale).
|
|
297
|
-
const trailingToolMessages = [];
|
|
298
|
-
let userText = '';
|
|
299
|
-
for (let i = messages.length - 1; i >= 0; i--) {
|
|
300
|
-
const msg = messages[i];
|
|
301
|
-
if (msg.role === 'tool') {
|
|
302
|
-
trailingToolMessages.unshift(msg);
|
|
303
|
-
} else if (msg.role === 'user') {
|
|
304
|
-
userText = extractContent(msg.content);
|
|
305
|
-
break;
|
|
306
|
-
} else {
|
|
307
|
-
break;
|
|
308
|
-
}
|
|
309
|
-
}
|
|
310
|
-
const toolResultsText = toolMessagesToText(trailingToolMessages);
|
|
311
|
-
if (!userText && !toolResultsText) {
|
|
312
|
-
// Earlier code fell back to extracting whatever was at messages[-1],
|
|
313
|
-
// which on an assistant-terminated history sent the assistant's own
|
|
314
|
-
// previous reply back to the SDK as the new user prompt — and the
|
|
315
|
-
// model would "respond to its own reply." Catch this clearly instead.
|
|
316
|
-
return {
|
|
317
|
-
promptText: '',
|
|
318
|
-
error: 'Resume mode requires the request to end with a user message or tool result. Last message has role "' + (messages[messages.length - 1]?.role || 'unknown') + '".',
|
|
319
|
-
};
|
|
320
|
-
}
|
|
321
|
-
const parts = [];
|
|
322
|
-
if (toolResultsText) parts.push(toolResultsText);
|
|
323
|
-
if (userText) parts.push(userText);
|
|
324
|
-
return { promptText: parts.join('\n\n') };
|
|
325
|
-
}
|
|
326
|
-
|
|
327
|
-
// Fresh request: serialize visible history as XML-wrapped text. No
|
|
328
|
-
// tool-instruction injection (the model learns about tools via the SDK
|
|
329
|
-
// MCP registration, not the prompt).
|
|
330
|
-
const parts = [];
|
|
331
|
-
for (const msg of messages) {
|
|
332
|
-
switch (msg.role) {
|
|
333
|
-
case 'system':
|
|
334
|
-
parts.push(`<system>\n${extractContent(msg.content)}\n</system>\n`);
|
|
335
|
-
break;
|
|
336
|
-
case 'user':
|
|
337
|
-
parts.push(extractContent(msg.content));
|
|
338
|
-
break;
|
|
339
|
-
case 'assistant': {
|
|
340
|
-
// Best-effort replay. tool_calls in non-resume history are dropped;
|
|
341
|
-
// the model can usually infer continuity from the surrounding text.
|
|
342
|
-
const text = extractContent(msg.content);
|
|
343
|
-
if (text) parts.push(`<previous_response>\n${text}\n</previous_response>\n`);
|
|
344
|
-
break;
|
|
345
|
-
}
|
|
346
|
-
case 'tool': {
|
|
347
|
-
// Tool messages on a fresh turn (rare — clients normally use
|
|
348
|
-
// session keys). Splice as text since there's no preceding
|
|
349
|
-
// tool_use turn we can bind to natively.
|
|
350
|
-
const text = toolMessagesToText([msg]);
|
|
351
|
-
if (text) parts.push(text);
|
|
352
|
-
break;
|
|
353
|
-
}
|
|
354
|
-
}
|
|
355
|
-
}
|
|
356
|
-
return {
|
|
357
|
-
promptText: parts.join('\n').trim(),
|
|
358
|
-
};
|
|
359
|
-
}
|
|
360
|
-
|
|
361
|
-
/**
|
|
362
|
-
* Wrap promptText + optional image blocks into the form query() expects.
|
|
363
|
-
* Returns a string for the fast path (text-only, no images), or an
|
|
364
|
-
* async iterable yielding one SDKUserMessage with multi-part content
|
|
365
|
-
* when there are images.
|
|
366
|
-
*/
|
|
367
|
-
function buildQueryPrompt(promptText, imageBlocks) {
|
|
368
|
-
if (!imageBlocks.length) return promptText;
|
|
369
|
-
const content = [
|
|
370
|
-
{ type: 'text', text: promptText || '' },
|
|
371
|
-
...imageBlocks,
|
|
372
|
-
];
|
|
373
|
-
async function* gen() {
|
|
374
|
-
yield {
|
|
375
|
-
type: 'user',
|
|
376
|
-
message: { role: 'user', content },
|
|
377
|
-
parent_tool_use_id: null,
|
|
378
|
-
};
|
|
379
|
-
}
|
|
380
|
-
return gen();
|
|
381
|
-
}
|
|
382
|
-
|
|
383
|
-
// ---------------------------------------------------------------------------
|
|
384
|
-
// Normalize model name for OpenAI response format
|
|
385
|
-
// ---------------------------------------------------------------------------
|
|
386
|
-
|
|
387
|
-
function normalizeModelName(model) {
|
|
388
|
-
if (model?.includes('opus')) return 'claude-opus-4';
|
|
389
|
-
if (model?.includes('sonnet')) return 'claude-sonnet-4';
|
|
390
|
-
if (model?.includes('haiku')) return 'claude-haiku-4';
|
|
391
|
-
return model || 'claude-sonnet-4';
|
|
392
|
-
}
|
|
393
|
-
|
|
394
|
-
// ---------------------------------------------------------------------------
|
|
395
|
-
// SSE helpers
|
|
396
|
-
// ---------------------------------------------------------------------------
|
|
397
|
-
|
|
398
|
-
function makeChunk(requestId, model, content, role, finishReason) {
|
|
399
|
-
return {
|
|
400
|
-
id: `chatcmpl-${requestId}`,
|
|
401
|
-
object: 'chat.completion.chunk',
|
|
402
|
-
created: Math.floor(Date.now() / 1000),
|
|
403
|
-
model: normalizeModelName(model),
|
|
404
|
-
choices: [{
|
|
405
|
-
index: 0,
|
|
406
|
-
delta: {
|
|
407
|
-
...(role ? { role } : {}),
|
|
408
|
-
...(content !== undefined ? { content } : {}),
|
|
409
|
-
},
|
|
410
|
-
finish_reason: finishReason || null,
|
|
411
|
-
}],
|
|
412
|
-
};
|
|
413
|
-
}
|
|
414
|
-
|
|
415
|
-
function sendSSE(res, data) {
|
|
416
|
-
if (!res.writableEnded) {
|
|
417
|
-
res.write(`data: ${JSON.stringify(data)}\n\n`);
|
|
418
|
-
}
|
|
419
|
-
}
|
|
420
|
-
|
|
421
|
-
// ---------------------------------------------------------------------------
|
|
422
|
-
// POST /v1/chat/completions — streaming
|
|
423
|
-
// ---------------------------------------------------------------------------
|
|
424
|
-
|
|
425
|
-
async function handleStreaming(req, res, body, requestId, sessionKey) {
|
|
426
|
-
const existing = getSession(sessionKey);
|
|
427
|
-
const resuming = !!existing?.sdkSessionId;
|
|
428
|
-
const toolsEnabled = hasTools(body);
|
|
429
|
-
const { promptText, error: promptError } = messagesToPrompt(body.messages, { resuming });
|
|
430
|
-
if (promptError) {
|
|
431
|
-
return res.status(400).json({
|
|
432
|
-
error: { message: promptError, type: 'invalid_request_error', code: 'invalid_resume_messages' },
|
|
433
|
-
});
|
|
434
|
-
}
|
|
435
|
-
const images = collectImages(body.messages);
|
|
436
|
-
// NOTE: `prompt` is built inside runQuery (not here) when images are
|
|
437
|
-
// present, because buildQueryPrompt returns a single-use async iterator
|
|
438
|
-
// for multimodal requests. If we built it here and the SDK call hit a
|
|
439
|
-
// 401, runWithAuthRetry would invoke runQuery a second time with the
|
|
440
|
-
// same exhausted iterator → SDK gets an empty user message → silent
|
|
441
|
-
// empty response. Lazy construction inside runQuery rebuilds the
|
|
442
|
-
// iterator per attempt.
|
|
443
|
-
const model = resolveModel(body.model);
|
|
444
|
-
// Build the in-process MCP server exposing client tools to the SDK.
|
|
445
|
-
// null when toolsEnabled is false (or all tools are malformed).
|
|
446
|
-
const clientToolsServer = toolsEnabled ? buildClientToolsServer(body.tools) : null;
|
|
447
|
-
// System-prompt append: tells the model exactly which tools are
|
|
448
|
-
// available and that Claude Code's built-ins (Bash, Grep, Read, etc.)
|
|
449
|
-
// are NOT in this environment. Without this, the model trained-in
|
|
450
|
-
// priors lead it to call Grep/Bash, get blocked by allowedTools, and
|
|
451
|
-
// refuse the task instead of falling back to client tools. ~150 tokens.
|
|
452
|
-
const toolsGuidance = clientToolsServer ? buildToolUsageGuidance(body.tools) : null;
|
|
453
|
-
if (images.length) console.log(` [multimodal] ${images.length} image block(s)`);
|
|
454
|
-
if (toolsEnabled) console.log(` [tools] ${body.tools.length} client tool(s) registered as MCP`);
|
|
455
|
-
|
|
456
|
-
res.setHeader('Content-Type', 'text/event-stream');
|
|
457
|
-
res.setHeader('Cache-Control', 'no-cache');
|
|
458
|
-
res.setHeader('Connection', 'keep-alive');
|
|
459
|
-
res.setHeader('X-Request-Id', requestId);
|
|
460
|
-
if (sessionKey) res.setHeader('X-Session-Id', sessionKey);
|
|
461
|
-
res.flushHeaders();
|
|
462
|
-
res.write(':ok\n\n');
|
|
463
|
-
|
|
464
|
-
const abortController = new AbortController();
|
|
465
|
-
let isFirst = true;
|
|
466
|
-
let resolvedModel = model;
|
|
467
|
-
let capturedSessionId = existing?.sdkSessionId || null;
|
|
468
|
-
let clientDisconnected = false;
|
|
469
|
-
let inputTokens = 0;
|
|
470
|
-
let outputTokens = 0;
|
|
471
|
-
let cacheReadTokens = 0;
|
|
472
|
-
let cacheCreateTokens = 0;
|
|
473
|
-
|
|
474
|
-
res.on('close', () => {
|
|
475
|
-
clientDisconnected = true;
|
|
476
|
-
abortController.abort();
|
|
477
|
-
});
|
|
478
|
-
|
|
479
|
-
if (resuming) {
|
|
480
|
-
console.log(` [session] resuming: ${sessionKey} → sdk=${existing.sdkSessionId} (msgs=${existing.messageCount})`);
|
|
481
|
-
}
|
|
482
|
-
|
|
483
|
-
// Tools-mode buffers text and collects native tool_use blocks. If the
|
|
484
|
-
// model emits text first then a tool_use, we want both: textBefore as
|
|
485
|
-
// the assistant content, plus the tool_calls. (Most clients display the
|
|
486
|
-
// text and then act on the tool_calls.)
|
|
487
|
-
let bufferedText = '';
|
|
488
|
-
let collectedToolCalls = []; // [{id, name, arguments}] from extractToolUses()
|
|
489
|
-
|
|
490
|
-
const runQuery = async () => {
|
|
491
|
-
// Reset per-attempt state so a 401 retry starts clean
|
|
492
|
-
bufferedText = '';
|
|
493
|
-
collectedToolCalls = [];
|
|
494
|
-
isFirst = true;
|
|
495
|
-
resolvedModel = model;
|
|
496
|
-
capturedSessionId = existing?.sdkSessionId || null;
|
|
497
|
-
|
|
498
|
-
// Build the prompt lazily on each attempt — multimodal returns a
|
|
499
|
-
// single-use async iterator. Keeps 401 auth-retries safe.
|
|
500
|
-
const prompt = buildQueryPrompt(promptText, images);
|
|
501
|
-
for await (const message of query({
|
|
502
|
-
prompt,
|
|
503
|
-
options: {
|
|
504
|
-
model,
|
|
505
|
-
maxTurns: toolsEnabled ? 5 : 200,
|
|
506
|
-
permissionMode: 'bypassPermissions',
|
|
507
|
-
allowDangerouslySkipPermissions: true,
|
|
508
|
-
abortController,
|
|
509
|
-
// Tools-mode: register client tools as an in-process MCP server
|
|
510
|
-
// and allow only those (no Bash/Read/etc. — the SDK's built-ins
|
|
511
|
-
// would pollute the session and leak through to the model).
|
|
512
|
-
...(clientToolsServer
|
|
513
|
-
? {
|
|
514
|
-
mcpServers: { [MCP_SERVER_NAME]: clientToolsServer },
|
|
515
|
-
allowedTools: [`${MCP_TOOL_PREFIX}*`],
|
|
516
|
-
systemPrompt: { type: 'preset', preset: 'claude_code', append: toolsGuidance },
|
|
517
|
-
}
|
|
518
|
-
: toolsEnabled
|
|
519
|
-
// Tools were requested but none were valid — disable all tools.
|
|
520
|
-
? { allowedTools: [] }
|
|
521
|
-
: {}),
|
|
522
|
-
...(resuming ? { resume: existing.sdkSessionId } : {}),
|
|
523
|
-
...(sessionKey && !resuming ? { persistSession: true } : {}),
|
|
524
|
-
},
|
|
525
|
-
})) {
|
|
526
|
-
if (clientDisconnected) break;
|
|
527
|
-
|
|
528
|
-
const msgPreview = message.type === 'assistant'
|
|
529
|
-
? `content_keys=${Object.keys(message).join(',')}`
|
|
530
|
-
: message.type === 'result'
|
|
531
|
-
? `result=${(message.result || '').slice(0, 60)}`
|
|
532
|
-
: message.subtype || '';
|
|
533
|
-
console.log(` [msg] type=${message.type} ${msgPreview}`);
|
|
534
|
-
|
|
535
|
-
if (message.type === 'system' && message.subtype === 'init' && message.model) {
|
|
536
|
-
resolvedModel = message.model;
|
|
537
|
-
}
|
|
538
|
-
|
|
539
|
-
if (message.type === 'assistant' && message.session_id && !capturedSessionId) {
|
|
540
|
-
capturedSessionId = message.session_id;
|
|
541
|
-
console.log(` [session] captured sdk session: ${capturedSessionId}`);
|
|
542
|
-
}
|
|
543
|
-
|
|
544
|
-
// Extract text from this assistant message
|
|
545
|
-
let turnText = '';
|
|
546
|
-
if (message.type === 'assistant' && message.message?.content) {
|
|
547
|
-
const content = message.message.content;
|
|
548
|
-
if (Array.isArray(content)) {
|
|
549
|
-
for (const b of content) if (b.type === 'text' && b.text) turnText += b.text;
|
|
550
|
-
} else if (typeof content === 'string') {
|
|
551
|
-
turnText = content;
|
|
552
|
-
}
|
|
553
|
-
}
|
|
554
|
-
|
|
555
|
-
// Detect auth failure surfaced inline (common on long-running proxies
|
|
556
|
-
// where the SDK's cached creds expire). Throw so runWithAuthRetry
|
|
557
|
-
// treats it like a real 401 exception.
|
|
558
|
-
if (turnText && isAuthFailureText(turnText) && isFirst) {
|
|
559
|
-
abortController.abort();
|
|
560
|
-
throw new AuthFailureInResultText(turnText);
|
|
561
|
-
}
|
|
562
|
-
|
|
563
|
-
// Tools-mode: check for native tool_use content blocks. The moment
|
|
564
|
-
// we see one, abort the SDK — we don't want our stub handler to
|
|
565
|
-
// hang waiting on an execution that's actually happening client-side.
|
|
566
|
-
if (toolsEnabled && message.type === 'assistant' && hasToolUse(message)) {
|
|
567
|
-
const calls = extractToolUses(message);
|
|
568
|
-
if (calls.length) {
|
|
569
|
-
collectedToolCalls.push(...calls);
|
|
570
|
-
if (turnText) bufferedText += turnText;
|
|
571
|
-
console.log(` [tools] ${calls.length} native tool_use block(s) — aborting SDK`);
|
|
572
|
-
abortController.abort();
|
|
573
|
-
break;
|
|
574
|
-
}
|
|
575
|
-
}
|
|
576
|
-
|
|
577
|
-
if (turnText) {
|
|
578
|
-
if (toolsEnabled) {
|
|
579
|
-
// Buffer text in case it precedes a tool_use, or ends up as the
|
|
580
|
-
// final response when the model decides not to call any tools.
|
|
581
|
-
bufferedText += turnText;
|
|
582
|
-
} else {
|
|
583
|
-
sendSSE(res, makeChunk(requestId, resolvedModel, turnText, isFirst ? 'assistant' : undefined, null));
|
|
584
|
-
isFirst = false;
|
|
585
|
-
}
|
|
586
|
-
}
|
|
587
|
-
|
|
588
|
-
if (message.type === 'result') {
|
|
589
|
-
if (message.result && isAuthFailureText(message.result) && isFirst) {
|
|
590
|
-
throw new AuthFailureInResultText(message.result);
|
|
591
|
-
}
|
|
592
|
-
if (!toolsEnabled && message.result && isFirst) {
|
|
593
|
-
sendSSE(res, makeChunk(requestId, resolvedModel, message.result, 'assistant', null));
|
|
594
|
-
isFirst = false;
|
|
595
|
-
}
|
|
596
|
-
if (toolsEnabled && !bufferedText && message.result) bufferedText = message.result;
|
|
597
|
-
const usage = extractSdkUsage(message);
|
|
598
|
-
inputTokens = usage.input_tokens;
|
|
599
|
-
outputTokens = usage.output_tokens;
|
|
600
|
-
cacheReadTokens = usage.cache_read_input_tokens;
|
|
601
|
-
cacheCreateTokens = usage.cache_creation_input_tokens;
|
|
602
|
-
console.log(` [model-billed] requested=${resolvedModel} modelUsage=${JSON.stringify(usage.modelUsage || '(none)')}`);
|
|
603
|
-
break;
|
|
604
|
-
}
|
|
605
|
-
}
|
|
606
|
-
};
|
|
607
|
-
|
|
608
|
-
try {
|
|
609
|
-
await runWithAuthRetry({
|
|
610
|
-
attempt: runQuery,
|
|
611
|
-
// Only retry if we haven't written a real chunk yet. In tools mode we
|
|
612
|
-
// buffer internally so any retry is safe regardless.
|
|
613
|
-
bailIfStarted: () => !toolsEnabled && !isFirst,
|
|
614
|
-
onRefreshing: (err) => console.warn(`[auth] 401 on stream — refreshing (${err.message?.slice(0, 80)})`),
|
|
615
|
-
onRetry: (r) => console.log(`[auth] refreshed in ${r.durationMs}ms — retrying stream`),
|
|
616
|
-
});
|
|
617
|
-
} catch (err) {
|
|
618
|
-
// Abort from tool-call detection surfaces as an abort error — not a real failure
|
|
619
|
-
const isAbort = err?.name === 'AbortError' || /aborted/i.test(err?.message || '');
|
|
620
|
-
if (!clientDisconnected && !(toolsEnabled && isAbort)) {
|
|
621
|
-
console.error('[stream] SDK error:', err.message);
|
|
622
|
-
sendSSE(res, { error: { message: err.message, type: 'server_error', code: null } });
|
|
623
|
-
}
|
|
624
|
-
}
|
|
625
|
-
|
|
626
|
-
if (sessionKey && capturedSessionId) {
|
|
627
|
-
upsertSession(sessionKey, capturedSessionId, resolvedModel);
|
|
628
|
-
}
|
|
629
|
-
|
|
630
|
-
// Tools mode: emit the buffered response as a single chunk with either
|
|
631
|
-
// tool_calls (+ finish_reason: tool_calls) or plain text (+ stop).
|
|
632
|
-
if (toolsEnabled && !res.writableEnded) {
|
|
633
|
-
if (collectedToolCalls.length > 0) {
|
|
634
|
-
console.log(` [tools] emitting ${collectedToolCalls.length} tool_call(s)`);
|
|
635
|
-
const chunk = {
|
|
636
|
-
id: `chatcmpl-${requestId}`,
|
|
637
|
-
object: 'chat.completion.chunk',
|
|
638
|
-
created: Math.floor(Date.now() / 1000),
|
|
639
|
-
model: normalizeModelName(resolvedModel),
|
|
640
|
-
choices: [{
|
|
641
|
-
index: 0,
|
|
642
|
-
delta: {
|
|
643
|
-
role: 'assistant',
|
|
644
|
-
content: bufferedText.trim() || null,
|
|
645
|
-
tool_calls: collectedToolCalls.map((tc, i) => ({
|
|
646
|
-
index: i,
|
|
647
|
-
id: tc.id,
|
|
648
|
-
type: 'function',
|
|
649
|
-
function: { name: tc.name, arguments: tc.arguments },
|
|
650
|
-
})),
|
|
651
|
-
},
|
|
652
|
-
finish_reason: 'tool_calls',
|
|
653
|
-
}],
|
|
654
|
-
};
|
|
655
|
-
sendSSE(res, chunk);
|
|
656
|
-
} else {
|
|
657
|
-
sendSSE(res, makeChunk(requestId, resolvedModel, bufferedText, 'assistant', null));
|
|
658
|
-
sendSSE(res, makeChunk(requestId, resolvedModel, undefined, undefined, 'stop'));
|
|
659
|
-
}
|
|
660
|
-
res.write('data: [DONE]\n\n');
|
|
661
|
-
res.end();
|
|
662
|
-
captureResponse({
|
|
663
|
-
requestId,
|
|
664
|
-
usage: { input_tokens: inputTokens, output_tokens: outputTokens, cache_read_input_tokens: cacheReadTokens, cache_creation_input_tokens: cacheCreateTokens },
|
|
665
|
-
status: 'ok',
|
|
666
|
-
stopReason: collectedToolCalls.length > 0 ? 'tool_use' : 'end_turn',
|
|
667
|
-
model: resolvedModel,
|
|
668
|
-
});
|
|
669
|
-
return;
|
|
670
|
-
}
|
|
671
|
-
|
|
672
|
-
if (!res.writableEnded) {
|
|
673
|
-
sendSSE(res, makeChunk(requestId, resolvedModel, undefined, undefined, 'stop'));
|
|
674
|
-
res.write('data: [DONE]\n\n');
|
|
675
|
-
res.end();
|
|
676
|
-
}
|
|
677
|
-
|
|
678
|
-
captureResponse({
|
|
679
|
-
requestId,
|
|
680
|
-
usage: { input_tokens: inputTokens, output_tokens: outputTokens, cache_read_input_tokens: cacheReadTokens, cache_creation_input_tokens: cacheCreateTokens },
|
|
681
|
-
status: clientDisconnected ? 'client_disconnect' : 'ok',
|
|
682
|
-
stopReason: 'end_turn',
|
|
683
|
-
model: resolvedModel,
|
|
684
|
-
});
|
|
685
|
-
}
|
|
686
|
-
|
|
687
|
-
// ---------------------------------------------------------------------------
|
|
688
|
-
// POST /v1/chat/completions — non-streaming
|
|
689
|
-
// ---------------------------------------------------------------------------
|
|
690
|
-
|
|
691
|
-
async function handleNonStreaming(res, body, requestId, sessionKey) {
|
|
692
|
-
const existing = getSession(sessionKey);
|
|
693
|
-
const resuming = !!existing?.sdkSessionId;
|
|
694
|
-
const toolsEnabled = hasTools(body);
|
|
695
|
-
const { promptText, error: promptError } = messagesToPrompt(body.messages, { resuming });
|
|
696
|
-
if (promptError) {
|
|
697
|
-
return res.status(400).json({
|
|
698
|
-
error: { message: promptError, type: 'invalid_request_error', code: 'invalid_resume_messages' },
|
|
699
|
-
});
|
|
700
|
-
}
|
|
701
|
-
const images = collectImages(body.messages);
|
|
702
|
-
// NOTE: `prompt` is built inside runQuery (not here) when images are
|
|
703
|
-
// present, because buildQueryPrompt returns a single-use async iterator
|
|
704
|
-
// for multimodal requests. If we built it here and the SDK call hit a
|
|
705
|
-
// 401, runWithAuthRetry would invoke runQuery a second time with the
|
|
706
|
-
// same exhausted iterator → SDK gets an empty user message → silent
|
|
707
|
-
// empty response. Lazy construction inside runQuery rebuilds the
|
|
708
|
-
// iterator per attempt.
|
|
709
|
-
const model = resolveModel(body.model);
|
|
710
|
-
const clientToolsServer = toolsEnabled ? buildClientToolsServer(body.tools) : null;
|
|
711
|
-
const toolsGuidance = clientToolsServer ? buildToolUsageGuidance(body.tools) : null;
|
|
712
|
-
if (images.length) console.log(` [multimodal] ${images.length} image block(s)`);
|
|
713
|
-
if (toolsEnabled) console.log(` [tools] ${body.tools.length} client tool(s) registered as MCP`);
|
|
714
|
-
|
|
715
|
-
let resultText = '';
|
|
716
|
-
let collectedToolCalls = [];
|
|
717
|
-
let resolvedModel = model;
|
|
718
|
-
let inputTokens = 0;
|
|
719
|
-
let outputTokens = 0;
|
|
720
|
-
let cacheReadTokens = 0;
|
|
721
|
-
let cacheCreateTokens = 0;
|
|
722
|
-
let stopReason = 'end_turn';
|
|
723
|
-
let capturedSessionId = existing?.sdkSessionId || null;
|
|
724
|
-
const abortController = new AbortController();
|
|
725
|
-
|
|
726
|
-
if (resuming) {
|
|
727
|
-
console.log(` [session] resuming: ${sessionKey} → sdk=${existing.sdkSessionId} (msgs=${existing.messageCount})`);
|
|
728
|
-
}
|
|
729
|
-
|
|
730
|
-
const runQuery = async () => {
|
|
731
|
-
// Reset per-attempt state so a 401 retry starts clean
|
|
732
|
-
resultText = '';
|
|
733
|
-
collectedToolCalls = [];
|
|
734
|
-
resolvedModel = model;
|
|
735
|
-
inputTokens = 0;
|
|
736
|
-
outputTokens = 0;
|
|
737
|
-
capturedSessionId = existing?.sdkSessionId || null;
|
|
738
|
-
|
|
739
|
-
// Build the prompt lazily on each attempt — multimodal returns a
|
|
740
|
-
// single-use async iterator. Keeps 401 auth-retries safe.
|
|
741
|
-
const prompt = buildQueryPrompt(promptText, images);
|
|
742
|
-
for await (const message of query({
|
|
743
|
-
prompt,
|
|
744
|
-
options: {
|
|
745
|
-
model,
|
|
746
|
-
maxTurns: toolsEnabled ? 5 : 200,
|
|
747
|
-
permissionMode: 'bypassPermissions',
|
|
748
|
-
allowDangerouslySkipPermissions: true,
|
|
749
|
-
abortController,
|
|
750
|
-
...(clientToolsServer
|
|
751
|
-
? {
|
|
752
|
-
mcpServers: { [MCP_SERVER_NAME]: clientToolsServer },
|
|
753
|
-
allowedTools: [`${MCP_TOOL_PREFIX}*`],
|
|
754
|
-
systemPrompt: { type: 'preset', preset: 'claude_code', append: toolsGuidance },
|
|
755
|
-
}
|
|
756
|
-
: toolsEnabled
|
|
757
|
-
? { allowedTools: [] }
|
|
758
|
-
: {}),
|
|
759
|
-
...(resuming ? { resume: existing.sdkSessionId } : {}),
|
|
760
|
-
...(sessionKey && !resuming ? { persistSession: true } : {}),
|
|
761
|
-
},
|
|
762
|
-
})) {
|
|
763
|
-
if (message.type === 'system' && message.subtype === 'init' && message.model) {
|
|
764
|
-
resolvedModel = message.model;
|
|
765
|
-
}
|
|
766
|
-
|
|
767
|
-
if (message.type === 'assistant' && message.session_id && !capturedSessionId) {
|
|
768
|
-
capturedSessionId = message.session_id;
|
|
769
|
-
console.log(` [session] captured sdk session: ${capturedSessionId}`);
|
|
770
|
-
}
|
|
771
|
-
|
|
772
|
-
if (message.type === 'assistant' && message.message?.content) {
|
|
773
|
-
const content = message.message.content;
|
|
774
|
-
if (Array.isArray(content)) {
|
|
775
|
-
for (const block of content) {
|
|
776
|
-
if (block.type === 'text') resultText += block.text || '';
|
|
777
|
-
}
|
|
778
|
-
} else if (typeof content === 'string') {
|
|
779
|
-
resultText += content;
|
|
780
|
-
}
|
|
781
|
-
// Detect auth failure surfaced inline (long-running proxy, cached creds)
|
|
782
|
-
if (isAuthFailureText(resultText)) {
|
|
783
|
-
abortController.abort();
|
|
784
|
-
throw new AuthFailureInResultText(resultText);
|
|
785
|
-
}
|
|
786
|
-
// Native tool_use detection — abort the moment a tool_use lands.
|
|
787
|
-
if (toolsEnabled && hasToolUse(message)) {
|
|
788
|
-
const calls = extractToolUses(message);
|
|
789
|
-
if (calls.length) {
|
|
790
|
-
collectedToolCalls.push(...calls);
|
|
791
|
-
console.log(` [tools] ${calls.length} native tool_use block(s) — aborting SDK`);
|
|
792
|
-
abortController.abort();
|
|
793
|
-
break;
|
|
794
|
-
}
|
|
795
|
-
}
|
|
796
|
-
}
|
|
797
|
-
|
|
798
|
-
if (message.type === 'result') {
|
|
799
|
-
if (message.result && !resultText) resultText = message.result;
|
|
800
|
-
if (isAuthFailureText(resultText)) {
|
|
801
|
-
throw new AuthFailureInResultText(resultText);
|
|
802
|
-
}
|
|
803
|
-
const usage = extractSdkUsage(message);
|
|
804
|
-
inputTokens = usage.input_tokens;
|
|
805
|
-
outputTokens = usage.output_tokens;
|
|
806
|
-
cacheReadTokens = usage.cache_read_input_tokens;
|
|
807
|
-
cacheCreateTokens = usage.cache_creation_input_tokens;
|
|
808
|
-
console.log(` [model-billed] requested=${resolvedModel} modelUsage=${JSON.stringify(usage.modelUsage || '(none)')}`);
|
|
809
|
-
if (message.subtype) stopReason = message.subtype;
|
|
810
|
-
break;
|
|
811
|
-
}
|
|
812
|
-
}
|
|
813
|
-
};
|
|
814
|
-
|
|
815
|
-
try {
|
|
816
|
-
await runWithAuthRetry({
|
|
817
|
-
attempt: runQuery,
|
|
818
|
-
// Non-streaming never writes to res until the end — retry is always safe
|
|
819
|
-
bailIfStarted: () => false,
|
|
820
|
-
onRefreshing: (err) => console.warn(`[auth] 401 on sync call — refreshing (${err.message?.slice(0, 80)})`),
|
|
821
|
-
onRetry: (r) => console.log(`[auth] refreshed in ${r.durationMs}ms — retrying sync call`),
|
|
822
|
-
});
|
|
823
|
-
} catch (err) {
|
|
824
|
-
const isAbort = err?.name === 'AbortError' || /aborted/i.test(err?.message || '');
|
|
825
|
-
if (!(toolsEnabled && isAbort)) {
|
|
826
|
-
console.error('[non-stream] SDK error:', err.message);
|
|
827
|
-
return res.status(500).json({ error: { message: err.message, type: 'server_error', code: null } });
|
|
828
|
-
}
|
|
829
|
-
}
|
|
830
|
-
|
|
831
|
-
if (sessionKey && capturedSessionId) {
|
|
832
|
-
upsertSession(sessionKey, capturedSessionId, resolvedModel);
|
|
833
|
-
}
|
|
834
|
-
|
|
835
|
-
const responseHeaders = {};
|
|
836
|
-
if (sessionKey) responseHeaders['X-Session-Id'] = sessionKey;
|
|
837
|
-
|
|
838
|
-
// Tool-calling response shape
|
|
839
|
-
if (toolsEnabled && collectedToolCalls.length > 0) {
|
|
840
|
-
console.log(` [tools] emitting ${collectedToolCalls.length} tool_call(s)`);
|
|
841
|
-
return res.set(responseHeaders).json({
|
|
842
|
-
id: `chatcmpl-${requestId}`,
|
|
843
|
-
object: 'chat.completion',
|
|
844
|
-
created: Math.floor(Date.now() / 1000),
|
|
845
|
-
model: normalizeModelName(resolvedModel),
|
|
846
|
-
choices: [{
|
|
847
|
-
index: 0,
|
|
848
|
-
message: {
|
|
849
|
-
role: 'assistant',
|
|
850
|
-
content: resultText.trim() || null,
|
|
851
|
-
tool_calls: collectedToolCalls.map((tc) => ({
|
|
852
|
-
id: tc.id,
|
|
853
|
-
type: 'function',
|
|
854
|
-
function: { name: tc.name, arguments: tc.arguments },
|
|
855
|
-
})),
|
|
856
|
-
},
|
|
857
|
-
finish_reason: 'tool_calls',
|
|
858
|
-
}],
|
|
859
|
-
usage: { prompt_tokens: inputTokens, completion_tokens: outputTokens, total_tokens: inputTokens + outputTokens },
|
|
860
|
-
});
|
|
861
|
-
// No tool_use blocks → fall through to normal text response
|
|
862
|
-
}
|
|
863
|
-
|
|
864
|
-
res.set(responseHeaders).json({
|
|
865
|
-
id: `chatcmpl-${requestId}`,
|
|
866
|
-
object: 'chat.completion',
|
|
867
|
-
created: Math.floor(Date.now() / 1000),
|
|
868
|
-
model: normalizeModelName(resolvedModel),
|
|
869
|
-
choices: [{
|
|
870
|
-
index: 0,
|
|
871
|
-
message: { role: 'assistant', content: resultText },
|
|
872
|
-
finish_reason: 'stop',
|
|
873
|
-
}],
|
|
874
|
-
usage: { prompt_tokens: inputTokens, completion_tokens: outputTokens, total_tokens: inputTokens + outputTokens },
|
|
875
|
-
});
|
|
876
|
-
|
|
877
|
-
captureResponse({
|
|
878
|
-
requestId,
|
|
879
|
-
usage: { input_tokens: inputTokens, output_tokens: outputTokens, cache_read_input_tokens: cacheReadTokens, cache_creation_input_tokens: cacheCreateTokens },
|
|
880
|
-
status: 'ok',
|
|
881
|
-
stopReason,
|
|
882
|
-
model: resolvedModel,
|
|
883
|
-
});
|
|
884
|
-
}
|
|
885
|
-
|
|
886
|
-
// ---------------------------------------------------------------------------
|
|
887
|
-
// POST /v1/messages — Anthropic-native surface (non-streaming + streaming)
|
|
888
|
-
// ---------------------------------------------------------------------------
|
|
889
|
-
// The dual-surface architecture: Hermes uses /v1/chat/completions
|
|
890
|
-
// (OpenAI shape), OpenClaw uses /v1/messages (Anthropic shape). Both
|
|
891
|
-
// translate to the SAME underlying SDK query() — the surfaces are pure
|
|
892
|
-
// translators over a single inference engine.
|
|
893
|
-
//
|
|
894
|
-
// Tool calling: reuses Phase 1's native MCP path from lib/tool-bridge.js.
|
|
895
|
-
// No prompt-injected tool definitions, no <tool_call> text parsing.
|
|
896
|
-
// Inbound tool_results still spliced as text on resume (see anthropic.js
|
|
897
|
-
// docstring for why — Phase 1 limitation, not lifted here).
|
|
898
|
-
|
|
899
|
-
async function handleAnthropicNonStreaming(res, body, requestId, sessionKey) {
|
|
900
|
-
const existing = getSession(sessionKey);
|
|
901
|
-
const resuming = !!existing?.sdkSessionId;
|
|
902
|
-
const toolsEnabled = hasAnthropicTools(body);
|
|
903
|
-
const { promptText, error: promptError } = anthropicMessagesToPrompt(body, { resuming });
|
|
904
|
-
if (promptError) {
|
|
905
|
-
return res.status(400).json({
|
|
906
|
-
type: 'error',
|
|
907
|
-
error: { type: 'invalid_request_error', message: promptError },
|
|
908
|
-
});
|
|
909
|
-
}
|
|
910
|
-
const images = collectAnthropicImages(body.messages || []);
|
|
911
|
-
// See note in handleStreaming — `prompt` is built lazily inside runQuery
|
|
912
|
-
// because the multimodal path returns a single-use async iterator that
|
|
913
|
-
// a 401-retry would exhaust on the first attempt.
|
|
914
|
-
const model = resolveModel(body.model);
|
|
915
|
-
// Translate Anthropic tool defs → OpenAI shape that buildClientToolsServer
|
|
916
|
-
// expects. Both go through the same JSON-Schema → Zod path on the way to
|
|
917
|
-
// MCP; the wrapper shape difference is just `function:{name, parameters}`
|
|
918
|
-
// vs `{name, input_schema}`.
|
|
919
|
-
const toolsForBridge = toolsEnabled
|
|
920
|
-
? body.tools.map((t) => ({
|
|
921
|
-
type: 'function',
|
|
922
|
-
function: { name: t.name, description: t.description || '', parameters: t.input_schema || {} },
|
|
923
|
-
}))
|
|
924
|
-
: null;
|
|
925
|
-
const clientToolsServer = toolsForBridge ? buildClientToolsServer(toolsForBridge) : null;
|
|
926
|
-
const toolsGuidance = clientToolsServer ? buildToolUsageGuidance(toolsForBridge) : null;
|
|
927
|
-
|
|
928
|
-
if (images.length) console.log(` [multimodal] ${images.length} image block(s)`);
|
|
929
|
-
if (toolsEnabled) console.log(` [tools] ${body.tools.length} client tool(s) registered as MCP`);
|
|
930
|
-
|
|
931
|
-
let resultText = '';
|
|
932
|
-
let collectedToolCalls = [];
|
|
933
|
-
let resolvedModel = model;
|
|
934
|
-
let inputTokens = 0;
|
|
935
|
-
let outputTokens = 0;
|
|
936
|
-
let cacheReadTokens = 0;
|
|
937
|
-
let cacheCreateTokens = 0;
|
|
938
|
-
let capturedSessionId = existing?.sdkSessionId || null;
|
|
939
|
-
let stopReason = 'end_turn';
|
|
940
|
-
const abortController = new AbortController();
|
|
941
|
-
|
|
942
|
-
if (resuming) {
|
|
943
|
-
console.log(` [session] resuming: ${sessionKey} → sdk=${existing.sdkSessionId} (msgs=${existing.messageCount})`);
|
|
944
|
-
}
|
|
945
|
-
|
|
946
|
-
const runQuery = async () => {
|
|
947
|
-
resultText = '';
|
|
948
|
-
collectedToolCalls = [];
|
|
949
|
-
resolvedModel = model;
|
|
950
|
-
inputTokens = 0;
|
|
951
|
-
outputTokens = 0;
|
|
952
|
-
capturedSessionId = existing?.sdkSessionId || null;
|
|
953
|
-
stopReason = 'end_turn';
|
|
954
|
-
|
|
955
|
-
// Build the prompt lazily on each attempt — multimodal returns a
|
|
956
|
-
// single-use async iterator. Keeps 401 auth-retries safe.
|
|
957
|
-
const prompt = buildQueryPrompt(promptText, images);
|
|
958
|
-
for await (const message of query({
|
|
959
|
-
prompt,
|
|
960
|
-
options: {
|
|
961
|
-
model,
|
|
962
|
-
maxTurns: toolsEnabled ? 5 : 200,
|
|
963
|
-
permissionMode: 'bypassPermissions',
|
|
964
|
-
allowDangerouslySkipPermissions: true,
|
|
965
|
-
abortController,
|
|
966
|
-
...(clientToolsServer
|
|
967
|
-
? {
|
|
968
|
-
mcpServers: { [MCP_SERVER_NAME]: clientToolsServer },
|
|
969
|
-
allowedTools: [`${MCP_TOOL_PREFIX}*`],
|
|
970
|
-
systemPrompt: { type: 'preset', preset: 'claude_code', append: toolsGuidance },
|
|
971
|
-
}
|
|
972
|
-
: toolsEnabled
|
|
973
|
-
? { allowedTools: [] }
|
|
974
|
-
: {}),
|
|
975
|
-
...(resuming ? { resume: existing.sdkSessionId } : {}),
|
|
976
|
-
...(sessionKey && !resuming ? { persistSession: true } : {}),
|
|
977
|
-
},
|
|
978
|
-
})) {
|
|
979
|
-
if (message.type === 'system' && message.subtype === 'init' && message.model) {
|
|
980
|
-
resolvedModel = message.model;
|
|
981
|
-
}
|
|
982
|
-
|
|
983
|
-
if (message.type === 'assistant' && message.session_id && !capturedSessionId) {
|
|
984
|
-
capturedSessionId = message.session_id;
|
|
985
|
-
console.log(` [session] captured sdk session: ${capturedSessionId}`);
|
|
986
|
-
}
|
|
987
|
-
|
|
988
|
-
if (message.type === 'assistant' && message.message?.content) {
|
|
989
|
-
const content = message.message.content;
|
|
990
|
-
if (Array.isArray(content)) {
|
|
991
|
-
for (const block of content) {
|
|
992
|
-
if (block.type === 'text') resultText += block.text || '';
|
|
993
|
-
}
|
|
994
|
-
} else if (typeof content === 'string') {
|
|
995
|
-
resultText += content;
|
|
996
|
-
}
|
|
997
|
-
if (isAuthFailureText(resultText)) {
|
|
998
|
-
abortController.abort();
|
|
999
|
-
throw new AuthFailureInResultText(resultText);
|
|
1000
|
-
}
|
|
1001
|
-
if (toolsEnabled && hasToolUse(message)) {
|
|
1002
|
-
const calls = extractToolUses(message);
|
|
1003
|
-
if (calls.length) {
|
|
1004
|
-
collectedToolCalls.push(...calls);
|
|
1005
|
-
stopReason = 'tool_use';
|
|
1006
|
-
console.log(` [tools] ${calls.length} native tool_use block(s) — aborting SDK`);
|
|
1007
|
-
abortController.abort();
|
|
1008
|
-
break;
|
|
1009
|
-
}
|
|
1010
|
-
}
|
|
1011
|
-
}
|
|
1012
|
-
|
|
1013
|
-
if (message.type === 'result') {
|
|
1014
|
-
if (message.result && !resultText) resultText = message.result;
|
|
1015
|
-
if (isAuthFailureText(resultText)) {
|
|
1016
|
-
throw new AuthFailureInResultText(resultText);
|
|
1017
|
-
}
|
|
1018
|
-
const usage = extractSdkUsage(message);
|
|
1019
|
-
inputTokens = usage.input_tokens;
|
|
1020
|
-
outputTokens = usage.output_tokens;
|
|
1021
|
-
cacheReadTokens = usage.cache_read_input_tokens;
|
|
1022
|
-
cacheCreateTokens = usage.cache_creation_input_tokens;
|
|
1023
|
-
console.log(` [model-billed] requested=${resolvedModel} modelUsage=${JSON.stringify(usage.modelUsage || '(none)')}`);
|
|
1024
|
-
stopReason = mapStopReason(message);
|
|
1025
|
-
break;
|
|
1026
|
-
}
|
|
1027
|
-
}
|
|
1028
|
-
};
|
|
1029
|
-
|
|
1030
|
-
try {
|
|
1031
|
-
await runWithAuthRetry({
|
|
1032
|
-
attempt: runQuery,
|
|
1033
|
-
bailIfStarted: () => false,
|
|
1034
|
-
onRefreshing: (err) => console.warn(`[auth] 401 on /v1/messages — refreshing (${err.message?.slice(0, 80)})`),
|
|
1035
|
-
onRetry: (r) => console.log(`[auth] refreshed in ${r.durationMs}ms — retrying /v1/messages`),
|
|
1036
|
-
});
|
|
1037
|
-
} catch (err) {
|
|
1038
|
-
const isAbort = err?.name === 'AbortError' || /aborted/i.test(err?.message || '');
|
|
1039
|
-
if (!(toolsEnabled && isAbort)) {
|
|
1040
|
-
console.error('[/v1/messages] SDK error:', err.message);
|
|
1041
|
-
return res.status(500).json({
|
|
1042
|
-
type: 'error',
|
|
1043
|
-
error: { type: 'api_error', message: err.message },
|
|
1044
|
-
});
|
|
1045
|
-
}
|
|
1046
|
-
}
|
|
1047
|
-
|
|
1048
|
-
if (sessionKey && capturedSessionId) {
|
|
1049
|
-
upsertSession(sessionKey, capturedSessionId, resolvedModel);
|
|
1050
|
-
}
|
|
1051
|
-
|
|
1052
|
-
if (sessionKey) res.setHeader('X-Session-Id', sessionKey);
|
|
1053
|
-
|
|
1054
|
-
res.json(buildAnthropicResponse({
|
|
1055
|
-
rawText: resultText.trim(),
|
|
1056
|
-
toolUses: collectedToolCalls,
|
|
1057
|
-
model: resolvedModel,
|
|
1058
|
-
usage: { input_tokens: inputTokens, output_tokens: outputTokens },
|
|
1059
|
-
requestId,
|
|
1060
|
-
stopReason,
|
|
1061
|
-
}));
|
|
1062
|
-
|
|
1063
|
-
captureResponse({
|
|
1064
|
-
requestId,
|
|
1065
|
-
usage: { input_tokens: inputTokens, output_tokens: outputTokens, cache_read_input_tokens: cacheReadTokens, cache_creation_input_tokens: cacheCreateTokens },
|
|
1066
|
-
status: 'ok',
|
|
1067
|
-
stopReason,
|
|
1068
|
-
model: resolvedModel,
|
|
1069
|
-
});
|
|
1070
|
-
}
|
|
1071
|
-
|
|
1072
|
-
async function handleAnthropicStreaming(req, res, body, requestId, sessionKey) {
|
|
1073
|
-
const existing = getSession(sessionKey);
|
|
1074
|
-
const resuming = !!existing?.sdkSessionId;
|
|
1075
|
-
const toolsEnabled = hasAnthropicTools(body);
|
|
1076
|
-
const { promptText, error: promptError } = anthropicMessagesToPrompt(body, { resuming });
|
|
1077
|
-
if (promptError) {
|
|
1078
|
-
return res.status(400).json({
|
|
1079
|
-
type: 'error',
|
|
1080
|
-
error: { type: 'invalid_request_error', message: promptError },
|
|
1081
|
-
});
|
|
1082
|
-
}
|
|
1083
|
-
const images = collectAnthropicImages(body.messages || []);
|
|
1084
|
-
// See note in handleStreaming — `prompt` is built lazily inside runQuery
|
|
1085
|
-
// because the multimodal path returns a single-use async iterator that
|
|
1086
|
-
// a 401-retry would exhaust on the first attempt.
|
|
1087
|
-
const model = resolveModel(body.model);
|
|
1088
|
-
const toolsForBridge = toolsEnabled
|
|
1089
|
-
? body.tools.map((t) => ({
|
|
1090
|
-
type: 'function',
|
|
1091
|
-
function: { name: t.name, description: t.description || '', parameters: t.input_schema || {} },
|
|
1092
|
-
}))
|
|
1093
|
-
: null;
|
|
1094
|
-
const clientToolsServer = toolsForBridge ? buildClientToolsServer(toolsForBridge) : null;
|
|
1095
|
-
const toolsGuidance = clientToolsServer ? buildToolUsageGuidance(toolsForBridge) : null;
|
|
1096
|
-
|
|
1097
|
-
if (images.length) console.log(` [multimodal] ${images.length} image block(s)`);
|
|
1098
|
-
if (toolsEnabled) console.log(` [tools] ${body.tools.length} client tool(s) registered as MCP`);
|
|
1099
|
-
|
|
1100
|
-
res.setHeader('Content-Type', 'text/event-stream');
|
|
1101
|
-
res.setHeader('Cache-Control', 'no-cache');
|
|
1102
|
-
res.setHeader('Connection', 'keep-alive');
|
|
1103
|
-
res.setHeader('X-Request-Id', requestId);
|
|
1104
|
-
if (sessionKey) res.setHeader('X-Session-Id', sessionKey);
|
|
1105
|
-
res.flushHeaders();
|
|
1106
|
-
|
|
1107
|
-
const tx = makeStreamTranslator({ res, requestId, model });
|
|
1108
|
-
const abortController = new AbortController();
|
|
1109
|
-
let resolvedModel = model;
|
|
1110
|
-
let capturedSessionId = existing?.sdkSessionId || null;
|
|
1111
|
-
let inputTokens = 0;
|
|
1112
|
-
let outputTokens = 0;
|
|
1113
|
-
let cacheReadTokens = 0;
|
|
1114
|
-
let cacheCreateTokens = 0;
|
|
1115
|
-
let stopReason = 'end_turn';
|
|
1116
|
-
let clientDisconnected = false;
|
|
1117
|
-
let textEmittedSoFar = ''; // dedup against same-message reflow from SDK
|
|
1118
|
-
let toolUseEmitted = false;
|
|
1119
|
-
|
|
1120
|
-
res.on('close', () => {
|
|
1121
|
-
clientDisconnected = true;
|
|
1122
|
-
abortController.abort();
|
|
1123
|
-
});
|
|
1124
|
-
|
|
1125
|
-
if (resuming) {
|
|
1126
|
-
console.log(` [session] resuming: ${sessionKey} → sdk=${existing.sdkSessionId} (msgs=${existing.messageCount})`);
|
|
1127
|
-
}
|
|
1128
|
-
|
|
1129
|
-
const runQuery = async () => {
|
|
1130
|
-
// Reset per-attempt state in case of 401-retry. Note: tx is reused
|
|
1131
|
-
// across retries, so a successful retry that comes after we already
|
|
1132
|
-
// emitted message_start would surface as a confused stream. We bail
|
|
1133
|
-
// out of retry once the translator has started (see bailIfStarted).
|
|
1134
|
-
resolvedModel = model;
|
|
1135
|
-
capturedSessionId = existing?.sdkSessionId || null;
|
|
1136
|
-
inputTokens = 0;
|
|
1137
|
-
outputTokens = 0;
|
|
1138
|
-
stopReason = 'end_turn';
|
|
1139
|
-
textEmittedSoFar = '';
|
|
1140
|
-
toolUseEmitted = false;
|
|
1141
|
-
|
|
1142
|
-
// Build the prompt lazily on each attempt — multimodal returns a
|
|
1143
|
-
// single-use async iterator. Keeps 401 auth-retries safe.
|
|
1144
|
-
const prompt = buildQueryPrompt(promptText, images);
|
|
1145
|
-
for await (const message of query({
|
|
1146
|
-
prompt,
|
|
1147
|
-
options: {
|
|
1148
|
-
model,
|
|
1149
|
-
maxTurns: toolsEnabled ? 5 : 200,
|
|
1150
|
-
permissionMode: 'bypassPermissions',
|
|
1151
|
-
allowDangerouslySkipPermissions: true,
|
|
1152
|
-
abortController,
|
|
1153
|
-
...(clientToolsServer
|
|
1154
|
-
? {
|
|
1155
|
-
mcpServers: { [MCP_SERVER_NAME]: clientToolsServer },
|
|
1156
|
-
allowedTools: [`${MCP_TOOL_PREFIX}*`],
|
|
1157
|
-
systemPrompt: { type: 'preset', preset: 'claude_code', append: toolsGuidance },
|
|
1158
|
-
}
|
|
1159
|
-
: toolsEnabled
|
|
1160
|
-
? { allowedTools: [] }
|
|
1161
|
-
: {}),
|
|
1162
|
-
...(resuming ? { resume: existing.sdkSessionId } : {}),
|
|
1163
|
-
...(sessionKey && !resuming ? { persistSession: true } : {}),
|
|
1164
|
-
},
|
|
1165
|
-
})) {
|
|
1166
|
-
if (clientDisconnected) break;
|
|
1167
|
-
|
|
1168
|
-
if (message.type === 'system' && message.subtype === 'init' && message.model) {
|
|
1169
|
-
resolvedModel = message.model;
|
|
1170
|
-
tx.start(resolvedModel, 0);
|
|
1171
|
-
}
|
|
1172
|
-
|
|
1173
|
-
if (message.type === 'assistant' && message.session_id && !capturedSessionId) {
|
|
1174
|
-
capturedSessionId = message.session_id;
|
|
1175
|
-
console.log(` [session] captured sdk session: ${capturedSessionId}`);
|
|
1176
|
-
}
|
|
1177
|
-
|
|
1178
|
-
if (message.type === 'assistant' && message.message?.content) {
|
|
1179
|
-
const content = message.message.content;
|
|
1180
|
-
|
|
1181
|
-
// Auth-failure short-circuit: throw so runWithAuthRetry handles it.
|
|
1182
|
-
// Only safe before any text has been streamed (otherwise we've
|
|
1183
|
-
// already corrupted the SSE stream and can't undo).
|
|
1184
|
-
if (Array.isArray(content)) {
|
|
1185
|
-
let combined = '';
|
|
1186
|
-
for (const b of content) if (b?.type === 'text' && b.text) combined += b.text;
|
|
1187
|
-
if (combined && isAuthFailureText(combined) && !tx.hasStarted) {
|
|
1188
|
-
abortController.abort();
|
|
1189
|
-
throw new AuthFailureInResultText(combined);
|
|
1190
|
-
}
|
|
1191
|
-
}
|
|
1192
|
-
|
|
1193
|
-
// Tool_use detection: emit tool_use blocks structurally and abort.
|
|
1194
|
-
// We do this BEFORE streaming text deltas from this message so the
|
|
1195
|
-
// tool_use block is properly framed (after any pending text block
|
|
1196
|
-
// closes). The translator handles the close-text → open-tool-use
|
|
1197
|
-
// sequencing internally.
|
|
1198
|
-
if (toolsEnabled && hasToolUse(message)) {
|
|
1199
|
-
const calls = extractToolUses(message);
|
|
1200
|
-
if (calls.length) {
|
|
1201
|
-
// Emit any text from this same message *before* the tool_use
|
|
1202
|
-
// (Anthropic streams sometimes have text + tool_use in one
|
|
1203
|
-
// assistant message — preserve that ordering).
|
|
1204
|
-
if (Array.isArray(content)) {
|
|
1205
|
-
for (const b of content) {
|
|
1206
|
-
if (b?.type === 'text' && b.text) {
|
|
1207
|
-
// Compute delta vs what we've emitted to avoid duplication
|
|
1208
|
-
// on aggregator-style assistant messages that resend the
|
|
1209
|
-
// whole accumulated text.
|
|
1210
|
-
const delta = b.text.startsWith(textEmittedSoFar)
|
|
1211
|
-
? b.text.slice(textEmittedSoFar.length)
|
|
1212
|
-
: b.text;
|
|
1213
|
-
if (delta) {
|
|
1214
|
-
tx.pushTextDelta(delta);
|
|
1215
|
-
textEmittedSoFar += delta;
|
|
1216
|
-
}
|
|
1217
|
-
}
|
|
1218
|
-
}
|
|
1219
|
-
}
|
|
1220
|
-
for (const tu of calls) tx.pushToolUse(tu);
|
|
1221
|
-
toolUseEmitted = true;
|
|
1222
|
-
stopReason = 'tool_use';
|
|
1223
|
-
console.log(` [tools] ${calls.length} native tool_use block(s) — aborting SDK`);
|
|
1224
|
-
abortController.abort();
|
|
1225
|
-
break;
|
|
1226
|
-
}
|
|
1227
|
-
}
|
|
1228
|
-
|
|
1229
|
-
// Plain text-only assistant message: stream the delta.
|
|
1230
|
-
if (Array.isArray(content)) {
|
|
1231
|
-
let combined = '';
|
|
1232
|
-
for (const b of content) if (b?.type === 'text' && b.text) combined += b.text;
|
|
1233
|
-
if (combined) {
|
|
1234
|
-
const delta = combined.startsWith(textEmittedSoFar)
|
|
1235
|
-
? combined.slice(textEmittedSoFar.length)
|
|
1236
|
-
: combined;
|
|
1237
|
-
if (delta) {
|
|
1238
|
-
tx.pushTextDelta(delta);
|
|
1239
|
-
textEmittedSoFar += delta;
|
|
1240
|
-
}
|
|
1241
|
-
}
|
|
1242
|
-
} else if (typeof content === 'string' && content) {
|
|
1243
|
-
const delta = content.startsWith(textEmittedSoFar)
|
|
1244
|
-
? content.slice(textEmittedSoFar.length)
|
|
1245
|
-
: content;
|
|
1246
|
-
if (delta) {
|
|
1247
|
-
tx.pushTextDelta(delta);
|
|
1248
|
-
textEmittedSoFar += delta;
|
|
1249
|
-
}
|
|
1250
|
-
}
|
|
1251
|
-
}
|
|
1252
|
-
|
|
1253
|
-
if (message.type === 'result') {
|
|
1254
|
-
if (message.result && !textEmittedSoFar && !toolUseEmitted) {
|
|
1255
|
-
// Some SDK paths only deliver text via the final result message
|
|
1256
|
-
// (no streaming assistant messages). Emit it here as a single
|
|
1257
|
-
// delta — clients see this as "model started + finished in one
|
|
1258
|
-
// chunk", which is valid SSE.
|
|
1259
|
-
tx.pushTextDelta(message.result);
|
|
1260
|
-
}
|
|
1261
|
-
if (isAuthFailureText(message.result || '') && !tx.hasStarted) {
|
|
1262
|
-
throw new AuthFailureInResultText(message.result);
|
|
1263
|
-
}
|
|
1264
|
-
const usage = extractSdkUsage(message);
|
|
1265
|
-
inputTokens = usage.input_tokens;
|
|
1266
|
-
outputTokens = usage.output_tokens;
|
|
1267
|
-
cacheReadTokens = usage.cache_read_input_tokens;
|
|
1268
|
-
cacheCreateTokens = usage.cache_creation_input_tokens;
|
|
1269
|
-
console.log(` [model-billed] requested=${resolvedModel} modelUsage=${JSON.stringify(usage.modelUsage || '(none)')}`);
|
|
1270
|
-
if (!toolUseEmitted) stopReason = mapStopReason(message);
|
|
1271
|
-
break;
|
|
1272
|
-
}
|
|
1273
|
-
}
|
|
1274
|
-
};
|
|
1275
|
-
|
|
1276
|
-
try {
|
|
1277
|
-
await runWithAuthRetry({
|
|
1278
|
-
attempt: runQuery,
|
|
1279
|
-
// Once we've emitted message_start or any deltas, the SSE stream is
|
|
1280
|
-
// committed — a retry would fragment it. Same logic as the OpenAI
|
|
1281
|
-
// surface (bail once anything has been written).
|
|
1282
|
-
bailIfStarted: () => tx.hasStarted,
|
|
1283
|
-
onRefreshing: (err) => console.warn(`[auth] 401 on /v1/messages stream — refreshing (${err.message?.slice(0, 80)})`),
|
|
1284
|
-
onRetry: (r) => console.log(`[auth] refreshed in ${r.durationMs}ms — retrying /v1/messages stream`),
|
|
1285
|
-
});
|
|
1286
|
-
} catch (err) {
|
|
1287
|
-
const isAbort = err?.name === 'AbortError' || /aborted/i.test(err?.message || '');
|
|
1288
|
-
if (!clientDisconnected && !(toolsEnabled && isAbort)) {
|
|
1289
|
-
console.error('[/v1/messages stream] SDK error:', err.message);
|
|
1290
|
-
tx.error(err);
|
|
1291
|
-
return;
|
|
1292
|
-
}
|
|
1293
|
-
}
|
|
1294
|
-
|
|
1295
|
-
if (sessionKey && capturedSessionId) {
|
|
1296
|
-
upsertSession(sessionKey, capturedSessionId, resolvedModel);
|
|
1297
|
-
}
|
|
1298
|
-
|
|
1299
|
-
tx.finish({ stopReason, usage: { output_tokens: outputTokens } });
|
|
1300
|
-
|
|
1301
|
-
captureResponse({
|
|
1302
|
-
requestId,
|
|
1303
|
-
usage: { input_tokens: inputTokens, output_tokens: outputTokens, cache_read_input_tokens: cacheReadTokens, cache_creation_input_tokens: cacheCreateTokens },
|
|
1304
|
-
status: 'ok',
|
|
1305
|
-
stopReason,
|
|
1306
|
-
model: resolvedModel,
|
|
1307
|
-
});
|
|
1308
|
-
}
|
|
1309
|
-
|
|
1310
253
|
// ---------------------------------------------------------------------------
|
|
1311
254
|
// Express app
|
|
1312
255
|
// ---------------------------------------------------------------------------
|
|
@@ -1370,6 +313,36 @@ function requireLocalOrigin(req, res, next) {
|
|
|
1370
313
|
next();
|
|
1371
314
|
}
|
|
1372
315
|
|
|
316
|
+
function serializeSession(key, entry, { dashboard = false } = {}) {
|
|
317
|
+
const now = Date.now();
|
|
318
|
+
const idleMs = now - entry.lastUsed;
|
|
319
|
+
const ttlRemainingMs = Math.max(0, SESSION_TTL_MS - idleMs);
|
|
320
|
+
|
|
321
|
+
if (dashboard) {
|
|
322
|
+
return {
|
|
323
|
+
key,
|
|
324
|
+
sdkSessionId: entry.sdkSessionId,
|
|
325
|
+
model: entry.model,
|
|
326
|
+
messageCount: entry.messageCount,
|
|
327
|
+
createdAt: new Date(entry.createdAt).toISOString(),
|
|
328
|
+
lastUsedAt: new Date(entry.lastUsed).toISOString(),
|
|
329
|
+
idleSec: Math.floor(idleMs / 1000),
|
|
330
|
+
ttlRemainingSec: Math.floor(ttlRemainingMs / 1000),
|
|
331
|
+
};
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
return {
|
|
335
|
+
sessionKey: key,
|
|
336
|
+
sdkSessionId: entry.sdkSessionId,
|
|
337
|
+
model: entry.model,
|
|
338
|
+
messageCount: entry.messageCount,
|
|
339
|
+
createdAt: new Date(entry.createdAt).toISOString(),
|
|
340
|
+
lastUsed: new Date(entry.lastUsed).toISOString(),
|
|
341
|
+
idleSeconds: Math.round(idleMs / 1000),
|
|
342
|
+
ttlRemainingSeconds: Math.round(ttlRemainingMs / 1000),
|
|
343
|
+
};
|
|
344
|
+
}
|
|
345
|
+
|
|
1373
346
|
// GET / — serve dashboard. No-cache headers so browsers always re-fetch
|
|
1374
347
|
// after a mobygate upgrade; otherwise they keep serving the old index.html
|
|
1375
348
|
// from cache and users see a stale dashboard long after the service updated.
|
|
@@ -1402,6 +375,19 @@ app.get('/', async (_req, res) => {
|
|
|
1402
375
|
}
|
|
1403
376
|
});
|
|
1404
377
|
|
|
378
|
+
app.get('/dashboard.css', async (_req, res) => {
|
|
379
|
+
res.setHeader('Cache-Control', 'no-cache, no-store, must-revalidate');
|
|
380
|
+
res.setHeader('Pragma', 'no-cache');
|
|
381
|
+
res.setHeader('Expires', '0');
|
|
382
|
+
try {
|
|
383
|
+
const { readFile } = await import('fs/promises');
|
|
384
|
+
const css = await readFile(join(__dirname, 'dashboard.css'), 'utf8');
|
|
385
|
+
res.type('css').send(css);
|
|
386
|
+
} catch (e) {
|
|
387
|
+
res.status(404).type('text').send('dashboard.css not found at ' + join(__dirname, 'dashboard.css'));
|
|
388
|
+
}
|
|
389
|
+
});
|
|
390
|
+
|
|
1405
391
|
// /inspector — session inspector UI for browsing captures.
|
|
1406
392
|
// Backed by /dashboard/captures and /dashboard/captures/:filename.
|
|
1407
393
|
app.get('/inspector', async (_req, res) => {
|
|
@@ -1415,6 +401,19 @@ app.get('/inspector', async (_req, res) => {
|
|
|
1415
401
|
}
|
|
1416
402
|
});
|
|
1417
403
|
|
|
404
|
+
// GET /v1/chat/completions — RFC 9110: 405 with Allow header so probes
|
|
405
|
+
// (e.g. Hermes onboarding) can detect the endpoint exists. Returning 404
|
|
406
|
+
// on GET makes them think the endpoint is missing entirely.
|
|
407
|
+
const methodNotAllowed = (allow) => (_req, res) => {
|
|
408
|
+
res.set('Allow', allow);
|
|
409
|
+
res.status(405).json({
|
|
410
|
+
error: { message: `Method Not Allowed. Use ${allow}.`, type: 'invalid_request_error', code: 'method_not_allowed' },
|
|
411
|
+
});
|
|
412
|
+
};
|
|
413
|
+
app.get('/v1/chat/completions', methodNotAllowed('POST'));
|
|
414
|
+
app.get('/v1/messages', methodNotAllowed('POST'));
|
|
415
|
+
app.get('/quiet/v1/messages', methodNotAllowed('POST'));
|
|
416
|
+
|
|
1418
417
|
// POST /v1/chat/completions
|
|
1419
418
|
app.post('/v1/chat/completions', async (req, res) => {
|
|
1420
419
|
const requestId = uuidv4().replace(/-/g, '').slice(0, 24);
|
|
@@ -1483,11 +482,14 @@ app.post('/v1/chat/completions', async (req, res) => {
|
|
|
1483
482
|
res.on('finish', () => emitEnd());
|
|
1484
483
|
res.on('close', () => { if (!endEmitted) emitEnd({ status: 'error', error: 'client_disconnect' }); });
|
|
1485
484
|
|
|
1486
|
-
|
|
1487
|
-
|
|
1488
|
-
|
|
1489
|
-
|
|
1490
|
-
|
|
485
|
+
await runInference(
|
|
486
|
+
{ req, res, body, requestId, sessionKey },
|
|
487
|
+
openaiSurface,
|
|
488
|
+
{
|
|
489
|
+
mode: body.stream ? 'stream' : 'json',
|
|
490
|
+
deps: { getSession, upsertSession, resolveModel },
|
|
491
|
+
},
|
|
492
|
+
);
|
|
1491
493
|
});
|
|
1492
494
|
|
|
1493
495
|
// POST /v1/messages — Anthropic-native surface (for OpenClaw etc.).
|
|
@@ -1556,11 +558,104 @@ app.post('/v1/messages', async (req, res) => {
|
|
|
1556
558
|
res.on('finish', () => emitEnd());
|
|
1557
559
|
res.on('close', () => { if (!endEmitted) emitEnd({ status: 'error', error: 'client_disconnect' }); });
|
|
1558
560
|
|
|
1559
|
-
|
|
1560
|
-
|
|
561
|
+
await runInference(
|
|
562
|
+
{ req, res, body, requestId, sessionKey },
|
|
563
|
+
anthropicSurface,
|
|
564
|
+
{
|
|
565
|
+
mode: body.stream ? 'stream' : 'json',
|
|
566
|
+
deps: { getSession, upsertSession, resolveModel },
|
|
567
|
+
},
|
|
568
|
+
);
|
|
569
|
+
});
|
|
570
|
+
|
|
571
|
+
// POST /quiet/v1/messages — Anthropic-shape, but with two changes vs /v1/messages:
|
|
572
|
+
// 1. Body is scrubbed for known third-party agent identifiers
|
|
573
|
+
// (openclaw, hermes, mobius, etc.) before the SDK forwards it.
|
|
574
|
+
// 2. SDK receives an explicit string systemPrompt — disables the
|
|
575
|
+
// claude_code preset that otherwise injects "I am Claude Code…" framing.
|
|
576
|
+
//
|
|
577
|
+
// Use case: clients that don't want their identity to leak into Anthropic's
|
|
578
|
+
// detection heuristics (e.g. "found 'openclaw' in package.json → flag account
|
|
579
|
+
// for extra-usage billing"). Configurable scrub list at ~/.mobygate/quiet-words.txt.
|
|
580
|
+
app.post('/quiet/v1/messages', async (req, res) => {
|
|
581
|
+
const requestId = uuidv4().replace(/-/g, '').slice(0, 24);
|
|
582
|
+
const body = req.body;
|
|
583
|
+
|
|
584
|
+
if (!body?.messages || !Array.isArray(body.messages) || body.messages.length === 0) {
|
|
585
|
+
return res.status(400).json({
|
|
586
|
+
type: 'error',
|
|
587
|
+
error: { type: 'invalid_request_error', message: 'messages is required and must be a non-empty array' },
|
|
588
|
+
});
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
// Scrub the body in place BEFORE anything else reads it — capture, session
|
|
592
|
+
// derivation, prompt building all see the scrubbed content from here on.
|
|
593
|
+
// Diagnose first so we can log what we stripped (without leaking the values).
|
|
594
|
+
const diag = quietDiagnose(body);
|
|
595
|
+
scrubAnthropicBody(body);
|
|
596
|
+
|
|
597
|
+
const { key: sessionKey, source: sessionKeySource } = resolveSessionKey({
|
|
598
|
+
headerKey: req.headers['x-session-id'],
|
|
599
|
+
bodyKey: body.session_id,
|
|
600
|
+
body,
|
|
601
|
+
});
|
|
602
|
+
const existing = getSession(sessionKey);
|
|
603
|
+
const sessionTag = sessionKey
|
|
604
|
+
? ` | session=${sessionKey}${sessionKeySource === 'auto' ? ' (auto)' : ''}${existing ? ' (resume)' : ' (new)'}`
|
|
605
|
+
: '';
|
|
606
|
+
|
|
607
|
+
console.log(`[${new Date().toISOString()}] anthropic-quiet ${body.stream ? 'stream' : 'sync'} | model=${body.model} → ${resolveModel(body.model)} | msgs=${body.messages.length}${sessionTag}`);
|
|
608
|
+
if (diag.matches > 0) {
|
|
609
|
+
const breakdown = diag.words.map(w => `${w.word}×${w.count}`).join(' ');
|
|
610
|
+
console.log(` [quiet] scrubbed ${diag.matches} occurrence(s): ${breakdown}`);
|
|
1561
611
|
} else {
|
|
1562
|
-
|
|
612
|
+
console.log(` [quiet] payload was already clean (no matches)`);
|
|
1563
613
|
}
|
|
614
|
+
|
|
615
|
+
captureRequest({ path: '/quiet/v1/messages', body, requestId, sessionKey, sessionKeySource });
|
|
616
|
+
|
|
617
|
+
const startedAt = Date.now();
|
|
618
|
+
const imageBlocks = collectAnthropicImages(body.messages || []).length;
|
|
619
|
+
dashboardBus.emitEvent({
|
|
620
|
+
type: 'request.start',
|
|
621
|
+
id: requestId,
|
|
622
|
+
method: 'POST',
|
|
623
|
+
path: '/quiet/v1/messages',
|
|
624
|
+
model: body.model,
|
|
625
|
+
resolvedModel: resolveModel(body.model),
|
|
626
|
+
session: sessionKey,
|
|
627
|
+
stream: !!body.stream,
|
|
628
|
+
tools: hasAnthropicTools(body),
|
|
629
|
+
images: imageBlocks,
|
|
630
|
+
messages: body.messages.length,
|
|
631
|
+
resuming: !!existing,
|
|
632
|
+
quietScrubs: diag.matches,
|
|
633
|
+
});
|
|
634
|
+
|
|
635
|
+
let endEmitted = false;
|
|
636
|
+
const emitEnd = (overrides = {}) => {
|
|
637
|
+
if (endEmitted) return;
|
|
638
|
+
endEmitted = true;
|
|
639
|
+
dashboardBus.emitEvent({
|
|
640
|
+
type: 'request.end',
|
|
641
|
+
id: requestId,
|
|
642
|
+
durationMs: Date.now() - startedAt,
|
|
643
|
+
status: res.statusCode < 400 ? 'ok' : 'error',
|
|
644
|
+
httpStatus: res.statusCode,
|
|
645
|
+
...overrides,
|
|
646
|
+
});
|
|
647
|
+
};
|
|
648
|
+
res.on('finish', () => emitEnd());
|
|
649
|
+
res.on('close', () => { if (!endEmitted) emitEnd({ status: 'error', error: 'client_disconnect' }); });
|
|
650
|
+
|
|
651
|
+
await runInference(
|
|
652
|
+
{ req, res, body, requestId, sessionKey },
|
|
653
|
+
anthropicSurface,
|
|
654
|
+
{
|
|
655
|
+
mode: body.stream ? 'stream' : 'json',
|
|
656
|
+
deps: { getSession, upsertSession, resolveModel },
|
|
657
|
+
},
|
|
658
|
+
);
|
|
1564
659
|
});
|
|
1565
660
|
|
|
1566
661
|
// GET /v1/models
|
|
@@ -1582,37 +677,19 @@ app.get('/v1/models', (_req, res) => {
|
|
|
1582
677
|
});
|
|
1583
678
|
|
|
1584
679
|
// GET /sessions — list active sessions
|
|
1585
|
-
app.get('/sessions', (_req, res) => {
|
|
680
|
+
app.get('/sessions', requireLocalOrigin, (_req, res) => {
|
|
1586
681
|
const list = [];
|
|
1587
682
|
for (const [key, entry] of sessions) {
|
|
1588
|
-
list.push(
|
|
1589
|
-
sessionKey: key,
|
|
1590
|
-
sdkSessionId: entry.sdkSessionId,
|
|
1591
|
-
model: entry.model,
|
|
1592
|
-
messageCount: entry.messageCount,
|
|
1593
|
-
createdAt: new Date(entry.createdAt).toISOString(),
|
|
1594
|
-
lastUsed: new Date(entry.lastUsed).toISOString(),
|
|
1595
|
-
idleSeconds: Math.round((Date.now() - entry.lastUsed) / 1000),
|
|
1596
|
-
ttlRemainingSeconds: Math.max(0, Math.round((SESSION_TTL_MS - (Date.now() - entry.lastUsed)) / 1000)),
|
|
1597
|
-
});
|
|
683
|
+
list.push(serializeSession(key, entry));
|
|
1598
684
|
}
|
|
1599
685
|
res.json({ active: list.length, sessions: list });
|
|
1600
686
|
});
|
|
1601
687
|
|
|
1602
688
|
// GET /sessions/:key — get specific session
|
|
1603
|
-
app.get('/sessions/:key', (req, res) => {
|
|
689
|
+
app.get('/sessions/:key', requireLocalOrigin, (req, res) => {
|
|
1604
690
|
const entry = sessions.get(req.params.key);
|
|
1605
691
|
if (!entry) return res.status(404).json({ error: 'Session not found' });
|
|
1606
|
-
res.json(
|
|
1607
|
-
sessionKey: req.params.key,
|
|
1608
|
-
sdkSessionId: entry.sdkSessionId,
|
|
1609
|
-
model: entry.model,
|
|
1610
|
-
messageCount: entry.messageCount,
|
|
1611
|
-
createdAt: new Date(entry.createdAt).toISOString(),
|
|
1612
|
-
lastUsed: new Date(entry.lastUsed).toISOString(),
|
|
1613
|
-
idleSeconds: Math.round((Date.now() - entry.lastUsed) / 1000),
|
|
1614
|
-
ttlRemainingSeconds: Math.max(0, Math.round((SESSION_TTL_MS - (Date.now() - entry.lastUsed)) / 1000)),
|
|
1615
|
-
});
|
|
692
|
+
res.json(serializeSession(req.params.key, entry));
|
|
1616
693
|
});
|
|
1617
694
|
|
|
1618
695
|
// DELETE /sessions/:key — clear a session
|
|
@@ -1649,7 +726,7 @@ app.get('/health', (_req, res) => {
|
|
|
1649
726
|
// GET /auth/status
|
|
1650
727
|
// Reports CLI-side auth state plus (optionally) a real probe against Anthropic.
|
|
1651
728
|
// Pass ?quick=1 to skip the probe (reads keychain only — cheap).
|
|
1652
|
-
app.get('/auth/status', async (req, res) => {
|
|
729
|
+
app.get('/auth/status', requireLocalOrigin, async (req, res) => {
|
|
1653
730
|
const quick = req.query.quick === '1' || req.query.quick === 'true';
|
|
1654
731
|
const status = await getAuthStatus();
|
|
1655
732
|
if (!quick && status.ok && status.loggedIn) {
|
|
@@ -1722,7 +799,7 @@ async function loadBuildMeta() {
|
|
|
1722
799
|
}
|
|
1723
800
|
|
|
1724
801
|
// GET /dashboard/recent — ring-buffer snapshot for initial page load
|
|
1725
|
-
app.get('/dashboard/recent', async (req, res) => {
|
|
802
|
+
app.get('/dashboard/recent', requireLocalOrigin, async (req, res) => {
|
|
1726
803
|
const limit = Math.min(500, parseInt(req.query.limit || '100', 10));
|
|
1727
804
|
res.json({
|
|
1728
805
|
recent: dashboardBus.getRecent({ limit }),
|
|
@@ -1735,20 +812,10 @@ app.get('/dashboard/recent', async (req, res) => {
|
|
|
1735
812
|
});
|
|
1736
813
|
|
|
1737
814
|
// GET /dashboard/sessions — active session detail for the dashboard
|
|
1738
|
-
app.get('/dashboard/sessions', (_req, res) => {
|
|
1739
|
-
const now = Date.now();
|
|
815
|
+
app.get('/dashboard/sessions', requireLocalOrigin, (_req, res) => {
|
|
1740
816
|
const list = [];
|
|
1741
817
|
for (const [key, entry] of sessions) {
|
|
1742
|
-
list.push({
|
|
1743
|
-
key,
|
|
1744
|
-
sdkSessionId: entry.sdkSessionId,
|
|
1745
|
-
model: entry.model,
|
|
1746
|
-
messageCount: entry.messageCount,
|
|
1747
|
-
createdAt: new Date(entry.createdAt).toISOString(),
|
|
1748
|
-
lastUsedAt: new Date(entry.lastUsed).toISOString(),
|
|
1749
|
-
idleSec: Math.floor((now - entry.lastUsed) / 1000),
|
|
1750
|
-
ttlRemainingSec: Math.max(0, Math.floor((SESSION_TTL_MS - (now - entry.lastUsed)) / 1000)),
|
|
1751
|
-
});
|
|
818
|
+
list.push(serializeSession(key, entry, { dashboard: true }));
|
|
1752
819
|
}
|
|
1753
820
|
// Most recently used first
|
|
1754
821
|
list.sort((a, b) => a.idleSec - b.idleSec);
|
|
@@ -1925,6 +992,176 @@ app.post('/dashboard/captures-toggle', requireLocalOrigin, async (req, res) => {
|
|
|
1925
992
|
}
|
|
1926
993
|
});
|
|
1927
994
|
|
|
995
|
+
// GET /dashboard/session-costs — per-session cost breakdown (v0.8.5)
|
|
996
|
+
//
|
|
997
|
+
// Aggregates the [model-billed] log lines emitted by each handler's SDK
|
|
998
|
+
// result step. Grouped by session_key. Surfaces:
|
|
999
|
+
// - cost_usd total $ across all turns of this session
|
|
1000
|
+
// - turns number of completed (non-tool-use-aborted) turns
|
|
1001
|
+
// - dollars_per_turn average cost amortization (low = cache working)
|
|
1002
|
+
// - models per-model breakdown (opus vs sonnet vs haiku)
|
|
1003
|
+
// - first_user first user message (for human-readable identification)
|
|
1004
|
+
//
|
|
1005
|
+
// This view exists because today's audit found 38.9% of total spend
|
|
1006
|
+
// going to "singleton" sessions — channels that fire once, idle past
|
|
1007
|
+
// the wire-cache TTL, then pay cache_creation tax on the next turn.
|
|
1008
|
+
// The dashboard tab built off this endpoint lets users spot bleeding
|
|
1009
|
+
// channels in real time and decide which to keep warm via cron pings.
|
|
1010
|
+
app.get('/dashboard/session-costs', requireLocalOrigin, async (_req, res) => {
|
|
1011
|
+
try {
|
|
1012
|
+
const { readFile, readdir } = await import('fs/promises');
|
|
1013
|
+
const { existsSync } = await import('fs');
|
|
1014
|
+
const path = await import('path');
|
|
1015
|
+
const { homedir } = await import('os');
|
|
1016
|
+
|
|
1017
|
+
const logPath = join(LOGS_DIR, 'server.log');
|
|
1018
|
+
const captureDir = process.env.MOBYGATE_CAPTURE_DIR
|
|
1019
|
+
|| join(process.env.MOBYGATE_HOME || join(homedir(), '.mobygate'), 'captures');
|
|
1020
|
+
|
|
1021
|
+
// Step 1: parse [model-billed] lines from server.log, associating
|
|
1022
|
+
// each with the most recently observed session= line above it.
|
|
1023
|
+
const sessions = {}; // sk -> { turns, cost_usd, models: {model -> {turns, cost_usd, in_uncached, cache_read, cache_create, out}} }
|
|
1024
|
+
let lastSession = null;
|
|
1025
|
+
|
|
1026
|
+
if (existsSync(logPath)) {
|
|
1027
|
+
const raw = await readFile(logPath, 'utf8');
|
|
1028
|
+
for (const line of raw.split(/\r?\n/)) {
|
|
1029
|
+
const sessMatch = line.match(/session=(auto_\w+)/);
|
|
1030
|
+
if (sessMatch) lastSession = sessMatch[1];
|
|
1031
|
+
const billed = line.match(/\[model-billed\] requested=\S+ modelUsage=(\{.+\})/);
|
|
1032
|
+
if (billed && lastSession) {
|
|
1033
|
+
let mu;
|
|
1034
|
+
try { mu = JSON.parse(billed[1]); } catch { continue; }
|
|
1035
|
+
if (!sessions[lastSession]) {
|
|
1036
|
+
sessions[lastSession] = { turns: 0, cost_usd: 0, models: {} };
|
|
1037
|
+
}
|
|
1038
|
+
const rec = sessions[lastSession];
|
|
1039
|
+
rec.turns += 1;
|
|
1040
|
+
for (const [model, data] of Object.entries(mu)) {
|
|
1041
|
+
const cost = data.costUSD || 0;
|
|
1042
|
+
rec.cost_usd += cost;
|
|
1043
|
+
if (!rec.models[model]) rec.models[model] = { turns: 0, cost_usd: 0, in_uncached: 0, cache_read: 0, cache_create: 0, out: 0 };
|
|
1044
|
+
const m = rec.models[model];
|
|
1045
|
+
m.turns += 1;
|
|
1046
|
+
m.cost_usd += cost;
|
|
1047
|
+
m.in_uncached += data.inputTokens || 0;
|
|
1048
|
+
m.cache_read += data.cacheReadInputTokens || 0;
|
|
1049
|
+
m.cache_create += data.cacheCreationInputTokens || 0;
|
|
1050
|
+
m.out += data.outputTokens || 0;
|
|
1051
|
+
}
|
|
1052
|
+
}
|
|
1053
|
+
}
|
|
1054
|
+
}
|
|
1055
|
+
|
|
1056
|
+
// Step 2: enrich with capture metadata (first user message, model,
|
|
1057
|
+
// path, msg count) for each session_key. Only need to read enough
|
|
1058
|
+
// captures to find one per session.
|
|
1059
|
+
const sessionMeta = {};
|
|
1060
|
+
if (existsSync(captureDir)) {
|
|
1061
|
+
const files = (await readdir(captureDir))
|
|
1062
|
+
.filter(n => n.endsWith('.json'))
|
|
1063
|
+
.sort()
|
|
1064
|
+
.reverse(); // newest first
|
|
1065
|
+
for (const f of files) {
|
|
1066
|
+
const summaryFile = f.replace(/\.json$/, '.summary.txt');
|
|
1067
|
+
if (!existsSync(join(captureDir, summaryFile))) continue;
|
|
1068
|
+
const summary = await readFile(join(captureDir, summaryFile), 'utf8').catch(() => '');
|
|
1069
|
+
const skMatch = summary.match(/^session_key:\s+(auto_\w+)/m);
|
|
1070
|
+
if (!skMatch) continue;
|
|
1071
|
+
const sk = skMatch[1];
|
|
1072
|
+
if (sessionMeta[sk]) continue; // already have meta
|
|
1073
|
+
const modelMatch = summary.match(/^model:\s+(\S+)/m);
|
|
1074
|
+
const pathMatch = summary.match(/^path:\s+(\S+)/m);
|
|
1075
|
+
const msgsMatch = summary.match(/^messages:\s+(\d+)/m);
|
|
1076
|
+
const lastSeen = (await readFile(join(captureDir, summaryFile)).then(b => b.length).catch(()=>0)) ? f.slice(0, 19) : null;
|
|
1077
|
+
|
|
1078
|
+
let firstUser = null;
|
|
1079
|
+
try {
|
|
1080
|
+
const body = JSON.parse(await readFile(join(captureDir, f), 'utf8'));
|
|
1081
|
+
for (const m of (body.messages || []).slice(0, 5)) {
|
|
1082
|
+
if (m.role !== 'user') continue;
|
|
1083
|
+
const c = m.content;
|
|
1084
|
+
let txt = '';
|
|
1085
|
+
if (Array.isArray(c)) {
|
|
1086
|
+
for (const blk of c) {
|
|
1087
|
+
if (blk?.type === 'text' && blk.text) { txt = blk.text; break; }
|
|
1088
|
+
}
|
|
1089
|
+
} else if (typeof c === 'string') {
|
|
1090
|
+
txt = c;
|
|
1091
|
+
}
|
|
1092
|
+
// Skip "OpenClaw runtime context" boilerplate
|
|
1093
|
+
if (txt && !txt.startsWith('OpenClaw runtime context')) {
|
|
1094
|
+
firstUser = txt.slice(0, 80).replace(/\s+/g, ' ');
|
|
1095
|
+
break;
|
|
1096
|
+
}
|
|
1097
|
+
}
|
|
1098
|
+
} catch {}
|
|
1099
|
+
|
|
1100
|
+
sessionMeta[sk] = {
|
|
1101
|
+
model: modelMatch ? modelMatch[1] : null,
|
|
1102
|
+
path: pathMatch ? pathMatch[1] : null,
|
|
1103
|
+
msgs: msgsMatch ? parseInt(msgsMatch[1], 10) : null,
|
|
1104
|
+
lastSeenIso: lastSeen,
|
|
1105
|
+
firstUser,
|
|
1106
|
+
};
|
|
1107
|
+
}
|
|
1108
|
+
}
|
|
1109
|
+
|
|
1110
|
+
// Step 3: combine and sort
|
|
1111
|
+
const out = [];
|
|
1112
|
+
for (const [sk, rec] of Object.entries(sessions)) {
|
|
1113
|
+
const meta = sessionMeta[sk] || {};
|
|
1114
|
+
out.push({
|
|
1115
|
+
session_key: sk,
|
|
1116
|
+
turns: rec.turns,
|
|
1117
|
+
cost_usd: Math.round(rec.cost_usd * 10000) / 10000,
|
|
1118
|
+
per_turn_usd: Math.round((rec.cost_usd / Math.max(rec.turns, 1)) * 10000) / 10000,
|
|
1119
|
+
bucket: rec.turns === 1 ? 'singleton' : rec.turns <= 3 ? 'short' : rec.turns <= 10 ? 'medium' : 'warm',
|
|
1120
|
+
model: meta.model || null,
|
|
1121
|
+
path: meta.path || null,
|
|
1122
|
+
msgs: meta.msgs || null,
|
|
1123
|
+
last_seen: meta.lastSeenIso || null,
|
|
1124
|
+
first_user: meta.firstUser || null,
|
|
1125
|
+
models: Object.fromEntries(
|
|
1126
|
+
Object.entries(rec.models).map(([m, d]) => [m, {
|
|
1127
|
+
turns: d.turns,
|
|
1128
|
+
cost_usd: Math.round(d.cost_usd * 10000) / 10000,
|
|
1129
|
+
in_uncached: d.in_uncached,
|
|
1130
|
+
cache_read: d.cache_read,
|
|
1131
|
+
cache_create: d.cache_create,
|
|
1132
|
+
out: d.out,
|
|
1133
|
+
}]),
|
|
1134
|
+
),
|
|
1135
|
+
});
|
|
1136
|
+
}
|
|
1137
|
+
out.sort((a, b) => b.cost_usd - a.cost_usd);
|
|
1138
|
+
|
|
1139
|
+
// Step 4: aggregate stats
|
|
1140
|
+
const totalCost = out.reduce((s, r) => s + r.cost_usd, 0);
|
|
1141
|
+
const totalTurns = out.reduce((s, r) => s + r.turns, 0);
|
|
1142
|
+
const buckets = { singleton: { sessions: 0, cost: 0 }, short: { sessions: 0, cost: 0 }, medium: { sessions: 0, cost: 0 }, warm: { sessions: 0, cost: 0 } };
|
|
1143
|
+
for (const r of out) {
|
|
1144
|
+
buckets[r.bucket].sessions += 1;
|
|
1145
|
+
buckets[r.bucket].cost += r.cost_usd;
|
|
1146
|
+
}
|
|
1147
|
+
for (const k of Object.keys(buckets)) {
|
|
1148
|
+
buckets[k].cost = Math.round(buckets[k].cost * 100) / 100;
|
|
1149
|
+
buckets[k].pct_of_total = totalCost > 0 ? Math.round((buckets[k].cost / totalCost) * 1000) / 10 : 0;
|
|
1150
|
+
}
|
|
1151
|
+
|
|
1152
|
+
res.json({
|
|
1153
|
+
generatedAt: new Date().toISOString(),
|
|
1154
|
+
total_cost_usd: Math.round(totalCost * 100) / 100,
|
|
1155
|
+
total_turns: totalTurns,
|
|
1156
|
+
session_count: out.length,
|
|
1157
|
+
buckets,
|
|
1158
|
+
sessions: out,
|
|
1159
|
+
});
|
|
1160
|
+
} catch (e) {
|
|
1161
|
+
res.status(500).json({ error: e.message });
|
|
1162
|
+
}
|
|
1163
|
+
});
|
|
1164
|
+
|
|
1928
1165
|
// ---------------------------------------------------------------------------
|
|
1929
1166
|
// Updater — dashboard-driven "update available → update now" flow
|
|
1930
1167
|
// ---------------------------------------------------------------------------
|
|
@@ -1932,7 +1169,7 @@ app.post('/dashboard/captures-toggle', requireLocalOrigin, async (req, res) => {
|
|
|
1932
1169
|
// GET /update/check — is there a newer mobygate on npm?
|
|
1933
1170
|
// Response: { current, latest, updateAvailable, installMode, canApply, cached, error }
|
|
1934
1171
|
// Safe to poll: the npm registry call is cached for 15 min in-process.
|
|
1935
|
-
app.get('/update/check', async (req, res) => {
|
|
1172
|
+
app.get('/update/check', requireLocalOrigin, async (req, res) => {
|
|
1936
1173
|
try {
|
|
1937
1174
|
const force = req.query.force === '1' || req.query.force === 'true';
|
|
1938
1175
|
const info = await getUpdateCheck({ force });
|
|
@@ -1964,7 +1201,7 @@ app.post('/update/apply', requireLocalOrigin, (_req, res) => {
|
|
|
1964
1201
|
// The dashboard polls this during apply. `running` is determined by
|
|
1965
1202
|
// PID liveness, so even if our process is the one getting restarted,
|
|
1966
1203
|
// the new one answers correctly.
|
|
1967
|
-
app.get('/update/status', (req, res) => {
|
|
1204
|
+
app.get('/update/status', requireLocalOrigin, (req, res) => {
|
|
1968
1205
|
const state = readUpdateState();
|
|
1969
1206
|
let running = false;
|
|
1970
1207
|
if (state.pid) {
|
|
@@ -1987,11 +1224,12 @@ app.get('/update/status', (req, res) => {
|
|
|
1987
1224
|
|
|
1988
1225
|
app.listen(PORT, BIND, async () => {
|
|
1989
1226
|
const ttlMin = Math.round(SESSION_TTL_MS / 60000);
|
|
1227
|
+
const ttlHours = (SESSION_TTL_MS / 3600000).toFixed(1);
|
|
1990
1228
|
const meta = await loadBuildMeta();
|
|
1991
1229
|
console.log(banner({ version: meta.version }));
|
|
1992
1230
|
console.log(` bind ${BIND}:${PORT}${BIND === '127.0.0.1' ? ' (loopback only)' : ' (⚠ network-reachable — add auth)'}`);
|
|
1993
1231
|
console.log(` model ${DEFAULT_MODEL}`);
|
|
1994
|
-
console.log(` session TTL ${ttlMin} min`);
|
|
1232
|
+
console.log(` session TTL ${ttlMin} min (${ttlHours}h)`);
|
|
1995
1233
|
console.log(` dashboard http://localhost:${PORT}`);
|
|
1996
1234
|
if (isCaptureEnabled()) {
|
|
1997
1235
|
console.log(` capture ON → ${CAPTURE_DIR_PATH.replace(process.env.HOME || '', '~')}`);
|