mobygate 0.6.1 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +86 -0
- package/bin/mobygate.js +27 -8
- package/lib/anthropic.js +379 -0
- package/lib/updater.js +28 -8
- package/package.json +1 -1
- package/server.js +441 -0
package/CHANGELOG.md
CHANGED
|
@@ -4,6 +4,92 @@ All notable changes to mobygate are documented here. Format loosely follows
|
|
|
4
4
|
[Keep a Changelog](https://keepachangelog.com/en/1.1.0/); version numbers are
|
|
5
5
|
[Semantic Versioning](https://semver.org/).
|
|
6
6
|
|
|
7
|
+
## [0.7.0] — 2026-04-24
|
|
8
|
+
|
|
9
|
+
Phase 2: native Anthropic Messages surface.
|
|
10
|
+
|
|
11
|
+
Mobygate is now a **dual-surface gateway** — the existing OpenAI-compat
|
|
12
|
+
endpoint at `/v1/chat/completions` keeps working unchanged for Hermes
|
|
13
|
+
and other OpenAI-shaped clients, and a new `POST /v1/messages` endpoint
|
|
14
|
+
speaks native Anthropic Messages wire format for OpenClaw and any other
|
|
15
|
+
Anthropic-shaped client. Both surfaces translate to the same underlying
|
|
16
|
+
`query()` call on the Claude Agent SDK.
|
|
17
|
+
|
|
18
|
+
### Added
|
|
19
|
+
|
|
20
|
+
- **`POST /v1/messages`** — non-streaming + streaming. Accepts the
|
|
21
|
+
Anthropic Messages request shape (model, messages, system, tools,
|
|
22
|
+
max_tokens, stream, etc.) with native content blocks (`text`, `image`,
|
|
23
|
+
`tool_use`, `tool_result`) and returns native Anthropic responses.
|
|
24
|
+
- **Native Anthropic SSE streaming** — emits `message_start` →
|
|
25
|
+
`content_block_start/delta/stop` (per block, with sequential index) →
|
|
26
|
+
`message_delta` (stop_reason, usage) → `message_stop`. Tool calls
|
|
27
|
+
stream as `content_block_start` with `content_block: {type: 'tool_use'}`
|
|
28
|
+
followed by `content_block_delta` with `delta: {type: 'input_json_delta'}`.
|
|
29
|
+
- **Image passthrough** on `/v1/messages` — base64 data URLs and HTTP
|
|
30
|
+
URLs both flow through to the SDK as Anthropic image content blocks.
|
|
31
|
+
- New module: `lib/anthropic.js` — request translator, response builder,
|
|
32
|
+
streaming SSE translator, stop-reason mapper.
|
|
33
|
+
|
|
34
|
+
### Changed
|
|
35
|
+
|
|
36
|
+
- **Tool calling on `/v1/messages` reuses Phase 1's native MCP path**
|
|
37
|
+
(from `lib/tool-bridge.js`). No prompt-injected `<tool_call>` text
|
|
38
|
+
protocol on the new surface — the model emits genuine `tool_use`
|
|
39
|
+
content blocks via SDK MCP registration, and we surface them
|
|
40
|
+
structurally. (Earlier WIP work attempted to revert the Phase 1
|
|
41
|
+
tool architecture for this surface; that's been undone in favor of
|
|
42
|
+
reusing the proven path that ships in Hermes today.)
|
|
43
|
+
|
|
44
|
+
### Known limitation (carried from Phase 1)
|
|
45
|
+
|
|
46
|
+
- Inbound `tool_result` blocks on a resumed turn are still spliced as
|
|
47
|
+
`<tool_results>` text into the next prompt, rather than passed
|
|
48
|
+
through as native Anthropic `tool_result` content blocks. Reason:
|
|
49
|
+
aborting the SDK on a `tool_use` prevents the assistant turn from
|
|
50
|
+
being persisted in session state — on resume, native tool_result
|
|
51
|
+
blocks have nothing to bind to and the model re-calls the tool.
|
|
52
|
+
Text-form works because the resumed model has the prior turn in
|
|
53
|
+
conversational context. A future refactor will keep the SDK
|
|
54
|
+
iterator alive across HTTP request boundaries to lift this.
|
|
55
|
+
|
|
56
|
+
### Not in scope (deferred to a later release)
|
|
57
|
+
|
|
58
|
+
- Streaming retrofit on the `/v1/chat/completions` endpoint (currently
|
|
59
|
+
buffers tool-mode responses). Mentioned as a Phase 2 candidate; held
|
|
60
|
+
for a focused pass.
|
|
61
|
+
- `cache_control` passthrough — Anthropic's prompt caching is a billing
|
|
62
|
+
feature on API keys, not OAuth Max. We don't pass these headers
|
|
63
|
+
through; nothing to gain on this billing tier.
|
|
64
|
+
|
|
65
|
+
### OpenClaw migration
|
|
66
|
+
|
|
67
|
+
After this release, register a second provider entry pointing at the
|
|
68
|
+
new endpoint (`api: "anthropic-messages"`, `baseUrl: "http://localhost:3456"`,
|
|
69
|
+
endpoint resolved as `:baseUrl/v1/messages`). The existing
|
|
70
|
+
`claude-max-proxy/*` provider stays registered for clients that want
|
|
71
|
+
the OpenAI-compat surface (Hermes).
|
|
72
|
+
|
|
73
|
+
## [0.6.2] — 2026-04-24
|
|
74
|
+
|
|
75
|
+
### Fixed
|
|
76
|
+
|
|
77
|
+
- **Update on Windows failed with `EBUSY` after the npm-spawn fix**
|
|
78
|
+
in v0.6.1 because the running mobygate Node process holds open
|
|
79
|
+
file handles inside its own install dir
|
|
80
|
+
(`...\AppData\Roaming\npm\node_modules\mobygate`). When `npm install -g`
|
|
81
|
+
tries to atomically rename the directory, Windows refuses — POSIX
|
|
82
|
+
systems can replace open files, Windows can't.
|
|
83
|
+
- **Fix:** stop the service *before* running npm install, then start
|
|
84
|
+
on the new build. Affects both `mobygate update` (CLI) and the
|
|
85
|
+
dashboard `/update/apply` endpoint. The detached update child
|
|
86
|
+
survives because we spawn it with `detached: true` +
|
|
87
|
+
`windowsHide: true`, putting it in its own console group
|
|
88
|
+
independent of the parent that gets killed.
|
|
89
|
+
- POSIX systems now also stop-then-start (instead of unload-load /
|
|
90
|
+
restart), purely for symmetry and a cleaner log progression. No
|
|
91
|
+
behavior change there.
|
|
92
|
+
|
|
7
93
|
## [0.6.1] — 2026-04-24
|
|
8
94
|
|
|
9
95
|
### Fixed
|
package/bin/mobygate.js
CHANGED
|
@@ -581,6 +581,25 @@ async function cmdUpdate() {
|
|
|
581
581
|
}
|
|
582
582
|
print('');
|
|
583
583
|
|
|
584
|
+
// ---- Stop the service FIRST on Windows, otherwise running Node holds
|
|
585
|
+
// open file handles inside the install dir and `npm install -g` fails
|
|
586
|
+
// with EBUSY when it tries to rename the directory. On macOS/Linux we
|
|
587
|
+
// can replace open files freely, but stopping early there too is harmless
|
|
588
|
+
// and gives a cleaner restart sequence — so we do it everywhere.
|
|
589
|
+
let stoppedForUpdate = false;
|
|
590
|
+
if (IS_WIN) {
|
|
591
|
+
info('Stopping service so npm install can replace files...');
|
|
592
|
+
stopWindowsTask(WIN_LABELS.server);
|
|
593
|
+
stoppedForUpdate = true;
|
|
594
|
+
} else if (IS_MAC) {
|
|
595
|
+
const p = plistPathForLabel(SERVER_LABEL);
|
|
596
|
+
launchctlUnload(p);
|
|
597
|
+
stoppedForUpdate = true;
|
|
598
|
+
} else if (IS_LINUX) {
|
|
599
|
+
stopLinuxUnit(LINUX_UNITS.server);
|
|
600
|
+
stoppedForUpdate = true;
|
|
601
|
+
}
|
|
602
|
+
|
|
584
603
|
// ---- Perform the upgrade
|
|
585
604
|
if (mode === 'npm') {
|
|
586
605
|
info(`Running \`npm install -g mobygate@latest\`...`);
|
|
@@ -599,19 +618,19 @@ async function cmdUpdate() {
|
|
|
599
618
|
return die(`Install mode is "${mode}" — can't auto-update. Reinstall via npm or git.`);
|
|
600
619
|
}
|
|
601
620
|
|
|
602
|
-
// ----
|
|
621
|
+
// ---- Bring the service back up on the new code
|
|
603
622
|
section('Restart');
|
|
604
|
-
info('
|
|
623
|
+
info('Starting service on the new build...');
|
|
605
624
|
if (IS_MAC) {
|
|
606
625
|
const p = plistPathForLabel(SERVER_LABEL);
|
|
607
|
-
|
|
608
|
-
ok(`
|
|
626
|
+
launchctlLoad(p);
|
|
627
|
+
ok(`Loaded ${SERVER_LABEL}`);
|
|
609
628
|
} else if (IS_WIN) {
|
|
610
|
-
|
|
611
|
-
ok(`
|
|
629
|
+
startWindowsTask(WIN_LABELS.server);
|
|
630
|
+
ok(`Started ${WIN_LABELS.server}`);
|
|
612
631
|
} else if (IS_LINUX) {
|
|
613
|
-
|
|
614
|
-
ok(`
|
|
632
|
+
startLinuxUnit(LINUX_UNITS.server);
|
|
633
|
+
ok(`Started ${LINUX_UNITS.server}`);
|
|
615
634
|
}
|
|
616
635
|
print('');
|
|
617
636
|
info(`Tip: if the install-layout changed (new service file, new paths), run \`mobygate init\` to re-install the service definitions.`);
|
package/lib/anthropic.js
ADDED
|
@@ -0,0 +1,379 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Anthropic Messages translation layer.
|
|
3
|
+
*
|
|
4
|
+
* Translates between the native Anthropic Messages wire format
|
|
5
|
+
* (POST /v1/messages) and the Claude Agent SDK's `query()` shape used
|
|
6
|
+
* internally by mobygate. The SDK is the single source of truth for
|
|
7
|
+
* inference; this module just bridges request and response shapes so
|
|
8
|
+
* Anthropic-shaped clients (OpenClaw, etc.) can use native blocks
|
|
9
|
+
* (`text` / `image` / `tool_use` / `tool_result`) over the wire.
|
|
10
|
+
*
|
|
11
|
+
* Tool calling reuses the Phase 1 native-MCP path from lib/tool-bridge.js
|
|
12
|
+
* — client-defined tools are registered with the SDK as in-process MCP
|
|
13
|
+
* tools (Zod schemas converted from JSON Schema), the model emits real
|
|
14
|
+
* `tool_use` content blocks in its assistant stream, and we surface
|
|
15
|
+
* those structurally instead of regex-parsing them out of text. NO
|
|
16
|
+
* `<tool_call>` text protocol on this surface.
|
|
17
|
+
*
|
|
18
|
+
* Inbound `tool_result` blocks (when the client returns tool outputs in
|
|
19
|
+
* a follow-up turn) are still spliced as text on the resumed prompt.
|
|
20
|
+
* Same Phase 1 limitation: aborting the SDK on a tool_use prevents the
|
|
21
|
+
* assistant turn from being persisted in session state, so a native
|
|
22
|
+
* tool_result has nothing to bind to on resume. A future refactor that
|
|
23
|
+
* keeps the SDK iterator alive across HTTP request boundaries will lift
|
|
24
|
+
* this; until then, text-form works because the resumed model has the
|
|
25
|
+
* prior turn in conversational context.
|
|
26
|
+
*/
|
|
27
|
+
|
|
28
|
+
import { v4 as uuidv4 } from 'uuid';
|
|
29
|
+
|
|
30
|
+
// ---------------------------------------------------------------------------
|
|
31
|
+
// Content extraction — read individual block types out of an Anthropic message
|
|
32
|
+
// ---------------------------------------------------------------------------
|
|
33
|
+
|
|
34
|
+
export function anthropicTextOf(content) {
|
|
35
|
+
if (typeof content === 'string') return content;
|
|
36
|
+
if (!Array.isArray(content)) return '';
|
|
37
|
+
return content
|
|
38
|
+
.filter((b) => b?.type === 'text')
|
|
39
|
+
.map((b) => b.text || '')
|
|
40
|
+
.join('');
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export function anthropicImagesOf(content) {
|
|
44
|
+
if (!Array.isArray(content)) return [];
|
|
45
|
+
return content
|
|
46
|
+
.filter((b) => b?.type === 'image' && b.source)
|
|
47
|
+
.map((b) => ({ type: 'image', source: b.source }));
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export function anthropicToolResultsOf(content) {
|
|
51
|
+
if (!Array.isArray(content)) return [];
|
|
52
|
+
return content.filter((b) => b?.type === 'tool_result');
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
export function buildAnthropicSystemString(system) {
|
|
56
|
+
if (!system) return '';
|
|
57
|
+
if (typeof system === 'string') return system;
|
|
58
|
+
if (Array.isArray(system)) {
|
|
59
|
+
return system
|
|
60
|
+
.filter((b) => b?.type === 'text')
|
|
61
|
+
.map((b) => b.text || '')
|
|
62
|
+
.join('\n');
|
|
63
|
+
}
|
|
64
|
+
return '';
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export function hasAnthropicTools(body) {
|
|
68
|
+
return Array.isArray(body?.tools) && body.tools.length > 0;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// ---------------------------------------------------------------------------
|
|
72
|
+
// Tool-result text wrapping (inbound side, Phase 1 limitation persists)
|
|
73
|
+
// ---------------------------------------------------------------------------
|
|
74
|
+
|
|
75
|
+
function stringifyToolResultBody(content) {
|
|
76
|
+
if (typeof content === 'string') return content;
|
|
77
|
+
if (Array.isArray(content)) {
|
|
78
|
+
return content
|
|
79
|
+
.map((b) => {
|
|
80
|
+
if (b?.type === 'text') return b.text || '';
|
|
81
|
+
if (b?.type === 'image') return '[image content omitted in tool_result text replay]';
|
|
82
|
+
return JSON.stringify(b);
|
|
83
|
+
})
|
|
84
|
+
.filter(Boolean)
|
|
85
|
+
.join('\n');
|
|
86
|
+
}
|
|
87
|
+
return content == null ? '' : String(content);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function formatToolResultBlock(block) {
|
|
91
|
+
const id = block.tool_use_id || 'unknown';
|
|
92
|
+
const body = stringifyToolResultBody(block.content);
|
|
93
|
+
const errAttr = block.is_error ? ' is_error="true"' : '';
|
|
94
|
+
return `<tool_result id="${id}"${errAttr}>\n${body}\n</tool_result>`;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// ---------------------------------------------------------------------------
|
|
98
|
+
// Request translation: Anthropic Messages → SDK prompt string
|
|
99
|
+
// ---------------------------------------------------------------------------
|
|
100
|
+
// IMPORTANT: this returns just a string. Tool definitions are NOT injected
|
|
101
|
+
// into the prompt — the caller registers them with the SDK as MCP tools
|
|
102
|
+
// (see lib/tool-bridge.js #buildClientToolsServer). This is a deliberate
|
|
103
|
+
// reversal of OpenClaw's earlier WIP, which fell back to the legacy
|
|
104
|
+
// `<tool_call>` text protocol; the native MCP path proven in Phase 1
|
|
105
|
+
// works fine and we don't need to maintain two tool implementations.
|
|
106
|
+
|
|
107
|
+
export function anthropicMessagesToPrompt(body, { resuming = false } = {}) {
|
|
108
|
+
const messages = body.messages || [];
|
|
109
|
+
const system = buildAnthropicSystemString(body.system);
|
|
110
|
+
|
|
111
|
+
if (resuming) {
|
|
112
|
+
// SDK has full history. Send only the new tail: tool_results from
|
|
113
|
+
// the last user message (if any) plus any fresh user text.
|
|
114
|
+
const last = messages[messages.length - 1];
|
|
115
|
+
if (!last || last.role !== 'user') return '';
|
|
116
|
+
const trBlocks = anthropicToolResultsOf(last.content);
|
|
117
|
+
const text = anthropicTextOf(last.content);
|
|
118
|
+
const parts = [];
|
|
119
|
+
if (trBlocks.length) {
|
|
120
|
+
parts.push(`<tool_results>\n${trBlocks.map(formatToolResultBlock).join('\n')}\n</tool_results>`);
|
|
121
|
+
}
|
|
122
|
+
if (text) parts.push(text);
|
|
123
|
+
return parts.join('\n\n');
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Fresh request: serialize visible history. System prompt at top, then
|
|
127
|
+
// each turn. Assistant turns replay as best-effort text — tool_use
|
|
128
|
+
// blocks in the history are dropped (rare in practice; clients almost
|
|
129
|
+
// always use session keys for multi-turn tool conversations).
|
|
130
|
+
const parts = [];
|
|
131
|
+
if (system) parts.push(`<system>\n${system}\n</system>\n`);
|
|
132
|
+
|
|
133
|
+
let toolBuffer = [];
|
|
134
|
+
const flushTools = () => {
|
|
135
|
+
if (toolBuffer.length) {
|
|
136
|
+
parts.push(`<tool_results>\n${toolBuffer.join('\n')}\n</tool_results>\n`);
|
|
137
|
+
toolBuffer = [];
|
|
138
|
+
}
|
|
139
|
+
};
|
|
140
|
+
|
|
141
|
+
for (const msg of messages) {
|
|
142
|
+
if (msg.role === 'user') {
|
|
143
|
+
const trBlocks = anthropicToolResultsOf(msg.content);
|
|
144
|
+
for (const b of trBlocks) toolBuffer.push(formatToolResultBlock(b));
|
|
145
|
+
const text = anthropicTextOf(msg.content);
|
|
146
|
+
if (text) {
|
|
147
|
+
flushTools();
|
|
148
|
+
parts.push(text);
|
|
149
|
+
}
|
|
150
|
+
} else if (msg.role === 'assistant') {
|
|
151
|
+
flushTools();
|
|
152
|
+
const text = anthropicTextOf(msg.content);
|
|
153
|
+
if (text) parts.push(`<previous_response>\n${text}\n</previous_response>\n`);
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
flushTools();
|
|
157
|
+
return parts.join('\n').trim();
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
/**
|
|
161
|
+
* Pull image blocks from the latest user message. Anthropic only attaches
|
|
162
|
+
* images to user turns; we ignore older turns to mirror how the SDK + API
|
|
163
|
+
* treat current-turn vs historical content.
|
|
164
|
+
*/
|
|
165
|
+
export function collectAnthropicImages(messages) {
|
|
166
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
167
|
+
const msg = messages[i];
|
|
168
|
+
if (msg.role === 'user') {
|
|
169
|
+
const imgs = anthropicImagesOf(msg.content);
|
|
170
|
+
if (imgs.length) return imgs;
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
return [];
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
// ---------------------------------------------------------------------------
|
|
177
|
+
// Stop reason mapping
|
|
178
|
+
// ---------------------------------------------------------------------------
|
|
179
|
+
|
|
180
|
+
export function mapStopReason(sdkResult) {
|
|
181
|
+
if (!sdkResult) return 'end_turn';
|
|
182
|
+
const sr = sdkResult.stop_reason;
|
|
183
|
+
if (sr === 'tool_use') return 'tool_use';
|
|
184
|
+
if (sr === 'max_tokens' || sr === 'max_output_tokens') return 'max_tokens';
|
|
185
|
+
if (sr === 'stop_sequence') return 'stop_sequence';
|
|
186
|
+
if (sdkResult.subtype === 'error_max_turns') return 'max_tokens';
|
|
187
|
+
return 'end_turn';
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
// ---------------------------------------------------------------------------
|
|
191
|
+
// Non-streaming response builder
|
|
192
|
+
// ---------------------------------------------------------------------------
|
|
193
|
+
// Takes already-collected text + native tool_use blocks (from
|
|
194
|
+
// extractToolUses in tool-bridge.js) — does NOT parse anything from text.
|
|
195
|
+
// The handler in server.js does the SDK iteration and hands us the result.
|
|
196
|
+
|
|
197
|
+
export function buildAnthropicResponse({
|
|
198
|
+
rawText = '',
|
|
199
|
+
toolUses = [],
|
|
200
|
+
model,
|
|
201
|
+
usage,
|
|
202
|
+
requestId,
|
|
203
|
+
stopReason,
|
|
204
|
+
}) {
|
|
205
|
+
const id = `msg_${(requestId || uuidv4().replace(/-/g, '')).slice(0, 24)}`;
|
|
206
|
+
const content = [];
|
|
207
|
+
if (rawText) content.push({ type: 'text', text: rawText });
|
|
208
|
+
for (const tu of toolUses) {
|
|
209
|
+
// tool_use blocks from extractToolUses() are formatted for OpenAI:
|
|
210
|
+
// {id, name, arguments: <stringified-json>}. Anthropic wants {id, name, input}
|
|
211
|
+
// where input is the parsed object. Reverse the stringify.
|
|
212
|
+
let input = {};
|
|
213
|
+
try { input = JSON.parse(tu.arguments || '{}'); } catch {}
|
|
214
|
+
content.push({ type: 'tool_use', id: tu.id, name: tu.name, input });
|
|
215
|
+
}
|
|
216
|
+
// Empty content array would be invalid in the Anthropic API. If the
|
|
217
|
+
// model produced nothing actionable (rare — usually means an SDK error
|
|
218
|
+
// path), emit a single empty text block so clients don't crash on it.
|
|
219
|
+
if (content.length === 0) content.push({ type: 'text', text: '' });
|
|
220
|
+
|
|
221
|
+
return {
|
|
222
|
+
id,
|
|
223
|
+
type: 'message',
|
|
224
|
+
role: 'assistant',
|
|
225
|
+
model: model || 'claude-opus-4',
|
|
226
|
+
content,
|
|
227
|
+
stop_reason: stopReason || (toolUses.length ? 'tool_use' : 'end_turn'),
|
|
228
|
+
stop_sequence: null,
|
|
229
|
+
usage: {
|
|
230
|
+
input_tokens: usage?.input_tokens || 0,
|
|
231
|
+
output_tokens: usage?.output_tokens || 0,
|
|
232
|
+
},
|
|
233
|
+
};
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
// ---------------------------------------------------------------------------
|
|
237
|
+
// Streaming SSE translator
|
|
238
|
+
// ---------------------------------------------------------------------------
|
|
239
|
+
// Emits Anthropic-shaped events on an Express res. The caller drives it
|
|
240
|
+
// from the SDK iteration loop:
|
|
241
|
+
//
|
|
242
|
+
// const tx = makeStreamTranslator({ res, requestId, model });
|
|
243
|
+
// tx.start(resolvedModel, inputTokens);
|
|
244
|
+
// for await (const message of query(...)) {
|
|
245
|
+
// // text deltas:
|
|
246
|
+
// for (const block of message.message?.content || []) {
|
|
247
|
+
// if (block.type === 'text') tx.pushTextDelta(block.text);
|
|
248
|
+
// }
|
|
249
|
+
// // native tool_use:
|
|
250
|
+
// if (hasToolUse(message)) {
|
|
251
|
+
// for (const tu of extractToolUses(message)) tx.pushToolUse(tu);
|
|
252
|
+
// tx.finish({ stopReason: 'tool_use', usage: ... });
|
|
253
|
+
// break;
|
|
254
|
+
// }
|
|
255
|
+
// }
|
|
256
|
+
// tx.finish({ stopReason: 'end_turn', usage: ... });
|
|
257
|
+
|
|
258
|
+
export function makeStreamTranslator({ res, requestId, model }) {
|
|
259
|
+
let started = false;
|
|
260
|
+
let blockIndex = -1;
|
|
261
|
+
let textBlockOpen = false;
|
|
262
|
+
let finished = false;
|
|
263
|
+
const messageId = `msg_${(requestId || uuidv4().replace(/-/g, '')).slice(0, 24)}`;
|
|
264
|
+
|
|
265
|
+
const sendEvent = (event, data) => {
|
|
266
|
+
if (res.writableEnded) return;
|
|
267
|
+
res.write(`event: ${event}\n`);
|
|
268
|
+
res.write(`data: ${JSON.stringify(data)}\n\n`);
|
|
269
|
+
};
|
|
270
|
+
|
|
271
|
+
const start = (resolvedModel, inputTokens = 0) => {
|
|
272
|
+
if (started) return;
|
|
273
|
+
started = true;
|
|
274
|
+
sendEvent('message_start', {
|
|
275
|
+
type: 'message_start',
|
|
276
|
+
message: {
|
|
277
|
+
id: messageId,
|
|
278
|
+
type: 'message',
|
|
279
|
+
role: 'assistant',
|
|
280
|
+
model: resolvedModel || model,
|
|
281
|
+
content: [],
|
|
282
|
+
stop_reason: null,
|
|
283
|
+
stop_sequence: null,
|
|
284
|
+
usage: { input_tokens: inputTokens, output_tokens: 0 },
|
|
285
|
+
},
|
|
286
|
+
});
|
|
287
|
+
};
|
|
288
|
+
|
|
289
|
+
const openTextBlock = () => {
|
|
290
|
+
if (textBlockOpen) return;
|
|
291
|
+
blockIndex++;
|
|
292
|
+
textBlockOpen = true;
|
|
293
|
+
sendEvent('content_block_start', {
|
|
294
|
+
type: 'content_block_start',
|
|
295
|
+
index: blockIndex,
|
|
296
|
+
content_block: { type: 'text', text: '' },
|
|
297
|
+
});
|
|
298
|
+
};
|
|
299
|
+
|
|
300
|
+
const closeTextBlock = () => {
|
|
301
|
+
if (!textBlockOpen) return;
|
|
302
|
+
sendEvent('content_block_stop', { type: 'content_block_stop', index: blockIndex });
|
|
303
|
+
textBlockOpen = false;
|
|
304
|
+
};
|
|
305
|
+
|
|
306
|
+
const pushTextDelta = (text) => {
|
|
307
|
+
if (!text || finished) return;
|
|
308
|
+
if (!started) start(model, 0);
|
|
309
|
+
openTextBlock();
|
|
310
|
+
sendEvent('content_block_delta', {
|
|
311
|
+
type: 'content_block_delta',
|
|
312
|
+
index: blockIndex,
|
|
313
|
+
delta: { type: 'text_delta', text },
|
|
314
|
+
});
|
|
315
|
+
};
|
|
316
|
+
|
|
317
|
+
/**
|
|
318
|
+
* Emit a native tool_use as content_block_start + input_json_delta +
|
|
319
|
+
* content_block_stop. The SDK gives us the full input object up-front
|
|
320
|
+
* (we don't see the model streaming JSON character by character —
|
|
321
|
+
* that's exposed via the raw API but the Agent SDK aggregates), so
|
|
322
|
+
* we ship it as one delta. Clients that handle character-streamed
|
|
323
|
+
* input_json_delta still parse fine because partial_json across
|
|
324
|
+
* deltas concatenates to the same final string.
|
|
325
|
+
*
|
|
326
|
+
* `tu` is in OpenAI shape from extractToolUses: {id, name, arguments}
|
|
327
|
+
* where arguments is a JSON string.
|
|
328
|
+
*/
|
|
329
|
+
const pushToolUse = (tu) => {
|
|
330
|
+
if (finished) return;
|
|
331
|
+
if (!started) start(model, 0);
|
|
332
|
+
closeTextBlock();
|
|
333
|
+
blockIndex++;
|
|
334
|
+
sendEvent('content_block_start', {
|
|
335
|
+
type: 'content_block_start',
|
|
336
|
+
index: blockIndex,
|
|
337
|
+
content_block: { type: 'tool_use', id: tu.id, name: tu.name, input: {} },
|
|
338
|
+
});
|
|
339
|
+
sendEvent('content_block_delta', {
|
|
340
|
+
type: 'content_block_delta',
|
|
341
|
+
index: blockIndex,
|
|
342
|
+
delta: { type: 'input_json_delta', partial_json: tu.arguments || '{}' },
|
|
343
|
+
});
|
|
344
|
+
sendEvent('content_block_stop', { type: 'content_block_stop', index: blockIndex });
|
|
345
|
+
};
|
|
346
|
+
|
|
347
|
+
const finish = ({ stopReason = 'end_turn', usage = {} } = {}) => {
|
|
348
|
+
if (finished) return;
|
|
349
|
+
finished = true;
|
|
350
|
+
if (!started) start(model, 0);
|
|
351
|
+
closeTextBlock();
|
|
352
|
+
sendEvent('message_delta', {
|
|
353
|
+
type: 'message_delta',
|
|
354
|
+
delta: { stop_reason: stopReason, stop_sequence: null },
|
|
355
|
+
usage: { output_tokens: usage.output_tokens || 0 },
|
|
356
|
+
});
|
|
357
|
+
sendEvent('message_stop', { type: 'message_stop' });
|
|
358
|
+
if (!res.writableEnded) res.end();
|
|
359
|
+
};
|
|
360
|
+
|
|
361
|
+
const error = (err) => {
|
|
362
|
+
if (finished || res.writableEnded) return;
|
|
363
|
+
finished = true;
|
|
364
|
+
sendEvent('error', {
|
|
365
|
+
type: 'error',
|
|
366
|
+
error: { type: 'api_error', message: err?.message || String(err) },
|
|
367
|
+
});
|
|
368
|
+
if (!res.writableEnded) res.end();
|
|
369
|
+
};
|
|
370
|
+
|
|
371
|
+
return {
|
|
372
|
+
start,
|
|
373
|
+
pushTextDelta,
|
|
374
|
+
pushToolUse,
|
|
375
|
+
finish,
|
|
376
|
+
error,
|
|
377
|
+
get hasStarted() { return started; },
|
|
378
|
+
};
|
|
379
|
+
}
|
package/lib/updater.js
CHANGED
|
@@ -160,6 +160,16 @@ function writeUpdateState(patch) {
|
|
|
160
160
|
* Build the shell command that performs update + restart. Returned as a
|
|
161
161
|
* single string we can hand to `sh -c` / `cmd /c`. Written as a string
|
|
162
162
|
* (not an array) because we want shell redirection for log capture.
|
|
163
|
+
*
|
|
164
|
+
* Order of operations matters on Windows: the running mobygate process
|
|
165
|
+
* holds open file handles inside its own install dir (`...\node_modules\mobygate`),
|
|
166
|
+
* so `npm install -g` fails with EBUSY when it tries to rename the dir.
|
|
167
|
+
* Fix: stop the service FIRST (which kills our parent), then install,
|
|
168
|
+
* then start. The detached child survives because we spawn with
|
|
169
|
+
* `detached: true` + `windowsHide: true`, putting it in its own console
|
|
170
|
+
* group independent of the parent. POSIX systems can replace open files
|
|
171
|
+
* freely, but stopping early there too is harmless and gives a cleaner
|
|
172
|
+
* "off → install → on" sequence in the log.
|
|
163
173
|
*/
|
|
164
174
|
function buildUpdateCommand({ mode, repoRoot, logPath }) {
|
|
165
175
|
if (IS_WIN) {
|
|
@@ -167,6 +177,11 @@ function buildUpdateCommand({ mode, repoRoot, logPath }) {
|
|
|
167
177
|
// line so failures short-circuit via `||`.
|
|
168
178
|
const steps = [];
|
|
169
179
|
steps.push(`echo [mobygate-update] start at %DATE% %TIME%`);
|
|
180
|
+
// Stop FIRST so npm can replace files without EBUSY. /F forces close
|
|
181
|
+
// even if the process is mid-request; the SDK session map writes are
|
|
182
|
+
// synchronous and the SIGTERM handler flushes before exit.
|
|
183
|
+
steps.push(`echo [mobygate-update] stopping service`);
|
|
184
|
+
steps.push(`schtasks /End /TN "${WIN_SERVER_TASK}"`);
|
|
170
185
|
if (mode === 'npm') {
|
|
171
186
|
steps.push(`npm install -g mobygate@latest`);
|
|
172
187
|
} else if (mode === 'git') {
|
|
@@ -175,15 +190,22 @@ function buildUpdateCommand({ mode, repoRoot, logPath }) {
|
|
|
175
190
|
steps.push(`npm install`);
|
|
176
191
|
}
|
|
177
192
|
steps.push(`echo [mobygate-update] restarting service`);
|
|
178
|
-
steps.push(`schtasks /
|
|
179
|
-
steps.push(`schtasks /Run /TN "${WIN_SERVER_TASK}"`);
|
|
193
|
+
steps.push(`schtasks /Run /TN "${WIN_SERVER_TASK}"`);
|
|
180
194
|
steps.push(`echo [mobygate-update] done`);
|
|
181
195
|
// Join with && so any failure stops the chain. Final redirect to log.
|
|
182
196
|
const inner = steps.map((s) => `(${s})`).join(' && ');
|
|
183
197
|
return { shell: 'cmd', cmd: `${inner} >> "${logPath}" 2>&1` };
|
|
184
198
|
}
|
|
185
|
-
// POSIX: sh -c, bail-on-first-failure via set -e
|
|
199
|
+
// POSIX: sh -c, bail-on-first-failure via set -e. Stop service first
|
|
200
|
+
// for the same reason — symmetry, cleaner restart, no harm.
|
|
186
201
|
const parts = [`set -e`, `echo "[mobygate-update] start $(date)"`];
|
|
202
|
+
parts.push(`echo "[mobygate-update] stopping service"`);
|
|
203
|
+
if (IS_MAC) {
|
|
204
|
+
const plist = join(process.env.HOME || '~', 'Library', 'LaunchAgents', `${SERVER_LABEL}.plist`);
|
|
205
|
+
parts.push(`launchctl unload "${plist}" 2>/dev/null || true`);
|
|
206
|
+
} else if (IS_LINUX) {
|
|
207
|
+
parts.push(`systemctl --user stop ${LINUX_SERVER_UNIT} 2>/dev/null || true`);
|
|
208
|
+
}
|
|
187
209
|
if (mode === 'npm') {
|
|
188
210
|
parts.push(`npm install -g mobygate@latest`);
|
|
189
211
|
} else if (mode === 'git') {
|
|
@@ -191,14 +213,12 @@ function buildUpdateCommand({ mode, repoRoot, logPath }) {
|
|
|
191
213
|
parts.push(`git pull --ff-only`);
|
|
192
214
|
parts.push(`npm install`);
|
|
193
215
|
}
|
|
194
|
-
parts.push(`echo "[mobygate-update]
|
|
216
|
+
parts.push(`echo "[mobygate-update] starting service on new build"`);
|
|
195
217
|
if (IS_MAC) {
|
|
196
218
|
const plist = join(process.env.HOME || '~', 'Library', 'LaunchAgents', `${SERVER_LABEL}.plist`);
|
|
197
|
-
|
|
198
|
-
parts.push(`launchctl unload "${plist}" 2>/dev/null || true`);
|
|
199
|
-
parts.push(`launchctl load "${plist}"`);
|
|
219
|
+
parts.push(`launchctl load "${plist}"`);
|
|
200
220
|
} else if (IS_LINUX) {
|
|
201
|
-
parts.push(`systemctl --user
|
|
221
|
+
parts.push(`systemctl --user start ${LINUX_SERVER_UNIT}`);
|
|
202
222
|
}
|
|
203
223
|
parts.push(`echo "[mobygate-update] done"`);
|
|
204
224
|
const script = parts.join('\n');
|
package/package.json
CHANGED
package/server.js
CHANGED
|
@@ -68,6 +68,14 @@ import {
|
|
|
68
68
|
readUpdateLogTail,
|
|
69
69
|
getCurrentVersion,
|
|
70
70
|
} from './lib/updater.js';
|
|
71
|
+
import {
|
|
72
|
+
anthropicMessagesToPrompt,
|
|
73
|
+
collectAnthropicImages,
|
|
74
|
+
buildAnthropicResponse,
|
|
75
|
+
makeStreamTranslator,
|
|
76
|
+
hasAnthropicTools,
|
|
77
|
+
mapStopReason,
|
|
78
|
+
} from './lib/anthropic.js';
|
|
71
79
|
|
|
72
80
|
const __filename = fileURLToPath(import.meta.url);
|
|
73
81
|
const __dirname = dirname(__filename);
|
|
@@ -765,6 +773,376 @@ async function handleNonStreaming(res, body, requestId, sessionKey) {
|
|
|
765
773
|
});
|
|
766
774
|
}
|
|
767
775
|
|
|
776
|
+
// ---------------------------------------------------------------------------
|
|
777
|
+
// POST /v1/messages — Anthropic-native surface (non-streaming + streaming)
|
|
778
|
+
// ---------------------------------------------------------------------------
|
|
779
|
+
// The dual-surface architecture: Hermes uses /v1/chat/completions
|
|
780
|
+
// (OpenAI shape), OpenClaw uses /v1/messages (Anthropic shape). Both
|
|
781
|
+
// translate to the SAME underlying SDK query() — the surfaces are pure
|
|
782
|
+
// translators over a single inference engine.
|
|
783
|
+
//
|
|
784
|
+
// Tool calling: reuses Phase 1's native MCP path from lib/tool-bridge.js.
|
|
785
|
+
// No prompt-injected tool definitions, no <tool_call> text parsing.
|
|
786
|
+
// Inbound tool_results still spliced as text on resume (see anthropic.js
|
|
787
|
+
// docstring for why — Phase 1 limitation, not lifted here).
|
|
788
|
+
|
|
789
|
+
async function handleAnthropicNonStreaming(res, body, requestId, sessionKey) {
|
|
790
|
+
const existing = getSession(sessionKey);
|
|
791
|
+
const resuming = !!existing?.sdkSessionId;
|
|
792
|
+
const toolsEnabled = hasAnthropicTools(body);
|
|
793
|
+
const promptText = anthropicMessagesToPrompt(body, { resuming });
|
|
794
|
+
const images = collectAnthropicImages(body.messages || []);
|
|
795
|
+
const prompt = buildQueryPrompt(promptText, images);
|
|
796
|
+
const model = resolveModel(body.model);
|
|
797
|
+
// Translate Anthropic tool defs → OpenAI shape that buildClientToolsServer
|
|
798
|
+
// expects. Both go through the same JSON-Schema → Zod path on the way to
|
|
799
|
+
// MCP; the wrapper shape difference is just `function:{name, parameters}`
|
|
800
|
+
// vs `{name, input_schema}`.
|
|
801
|
+
const toolsForBridge = toolsEnabled
|
|
802
|
+
? body.tools.map((t) => ({
|
|
803
|
+
type: 'function',
|
|
804
|
+
function: { name: t.name, description: t.description || '', parameters: t.input_schema || {} },
|
|
805
|
+
}))
|
|
806
|
+
: null;
|
|
807
|
+
const clientToolsServer = toolsForBridge ? buildClientToolsServer(toolsForBridge) : null;
|
|
808
|
+
|
|
809
|
+
if (images.length) console.log(` [multimodal] ${images.length} image block(s)`);
|
|
810
|
+
if (toolsEnabled) console.log(` [tools] ${body.tools.length} client tool(s) registered as MCP`);
|
|
811
|
+
|
|
812
|
+
let resultText = '';
|
|
813
|
+
let collectedToolCalls = [];
|
|
814
|
+
let resolvedModel = model;
|
|
815
|
+
let inputTokens = 0;
|
|
816
|
+
let outputTokens = 0;
|
|
817
|
+
let capturedSessionId = existing?.sdkSessionId || null;
|
|
818
|
+
let stopReason = 'end_turn';
|
|
819
|
+
const abortController = new AbortController();
|
|
820
|
+
|
|
821
|
+
if (resuming) {
|
|
822
|
+
console.log(` [session] resuming: ${sessionKey} → sdk=${existing.sdkSessionId} (msgs=${existing.messageCount})`);
|
|
823
|
+
}
|
|
824
|
+
|
|
825
|
+
const runQuery = async () => {
|
|
826
|
+
resultText = '';
|
|
827
|
+
collectedToolCalls = [];
|
|
828
|
+
resolvedModel = model;
|
|
829
|
+
inputTokens = 0;
|
|
830
|
+
outputTokens = 0;
|
|
831
|
+
capturedSessionId = existing?.sdkSessionId || null;
|
|
832
|
+
stopReason = 'end_turn';
|
|
833
|
+
|
|
834
|
+
for await (const message of query({
|
|
835
|
+
prompt,
|
|
836
|
+
options: {
|
|
837
|
+
model,
|
|
838
|
+
maxTurns: toolsEnabled ? 5 : 200,
|
|
839
|
+
permissionMode: 'bypassPermissions',
|
|
840
|
+
allowDangerouslySkipPermissions: true,
|
|
841
|
+
abortController,
|
|
842
|
+
...(clientToolsServer
|
|
843
|
+
? {
|
|
844
|
+
mcpServers: { [MCP_SERVER_NAME]: clientToolsServer },
|
|
845
|
+
allowedTools: [`${MCP_TOOL_PREFIX}*`],
|
|
846
|
+
}
|
|
847
|
+
: toolsEnabled
|
|
848
|
+
? { allowedTools: [] }
|
|
849
|
+
: {}),
|
|
850
|
+
...(resuming ? { resume: existing.sdkSessionId } : {}),
|
|
851
|
+
...(sessionKey && !resuming ? { persistSession: true } : {}),
|
|
852
|
+
},
|
|
853
|
+
})) {
|
|
854
|
+
if (message.type === 'system' && message.subtype === 'init' && message.model) {
|
|
855
|
+
resolvedModel = message.model;
|
|
856
|
+
}
|
|
857
|
+
|
|
858
|
+
if (message.type === 'assistant' && message.session_id && !capturedSessionId) {
|
|
859
|
+
capturedSessionId = message.session_id;
|
|
860
|
+
console.log(` [session] captured sdk session: ${capturedSessionId}`);
|
|
861
|
+
}
|
|
862
|
+
|
|
863
|
+
if (message.type === 'assistant' && message.message?.content) {
|
|
864
|
+
const content = message.message.content;
|
|
865
|
+
if (Array.isArray(content)) {
|
|
866
|
+
for (const block of content) {
|
|
867
|
+
if (block.type === 'text') resultText += block.text || '';
|
|
868
|
+
}
|
|
869
|
+
} else if (typeof content === 'string') {
|
|
870
|
+
resultText += content;
|
|
871
|
+
}
|
|
872
|
+
if (isAuthFailureText(resultText)) {
|
|
873
|
+
abortController.abort();
|
|
874
|
+
throw new AuthFailureInResultText(resultText);
|
|
875
|
+
}
|
|
876
|
+
if (toolsEnabled && hasToolUse(message)) {
|
|
877
|
+
const calls = extractToolUses(message);
|
|
878
|
+
if (calls.length) {
|
|
879
|
+
collectedToolCalls.push(...calls);
|
|
880
|
+
stopReason = 'tool_use';
|
|
881
|
+
console.log(` [tools] ${calls.length} native tool_use block(s) — aborting SDK`);
|
|
882
|
+
abortController.abort();
|
|
883
|
+
break;
|
|
884
|
+
}
|
|
885
|
+
}
|
|
886
|
+
}
|
|
887
|
+
|
|
888
|
+
if (message.type === 'result') {
|
|
889
|
+
if (message.result && !resultText) resultText = message.result;
|
|
890
|
+
if (isAuthFailureText(resultText)) {
|
|
891
|
+
throw new AuthFailureInResultText(resultText);
|
|
892
|
+
}
|
|
893
|
+
inputTokens = message.input_tokens || 0;
|
|
894
|
+
outputTokens = message.output_tokens || 0;
|
|
895
|
+
stopReason = mapStopReason(message);
|
|
896
|
+
break;
|
|
897
|
+
}
|
|
898
|
+
}
|
|
899
|
+
};
|
|
900
|
+
|
|
901
|
+
try {
|
|
902
|
+
await runWithAuthRetry({
|
|
903
|
+
attempt: runQuery,
|
|
904
|
+
bailIfStarted: () => false,
|
|
905
|
+
onRefreshing: (err) => console.warn(`[auth] 401 on /v1/messages — refreshing (${err.message?.slice(0, 80)})`),
|
|
906
|
+
onRetry: (r) => console.log(`[auth] refreshed in ${r.durationMs}ms — retrying /v1/messages`),
|
|
907
|
+
});
|
|
908
|
+
} catch (err) {
|
|
909
|
+
const isAbort = err?.name === 'AbortError' || /aborted/i.test(err?.message || '');
|
|
910
|
+
if (!(toolsEnabled && isAbort)) {
|
|
911
|
+
console.error('[/v1/messages] SDK error:', err.message);
|
|
912
|
+
return res.status(500).json({
|
|
913
|
+
type: 'error',
|
|
914
|
+
error: { type: 'api_error', message: err.message },
|
|
915
|
+
});
|
|
916
|
+
}
|
|
917
|
+
}
|
|
918
|
+
|
|
919
|
+
if (sessionKey && capturedSessionId) {
|
|
920
|
+
upsertSession(sessionKey, capturedSessionId, resolvedModel);
|
|
921
|
+
}
|
|
922
|
+
|
|
923
|
+
if (sessionKey) res.setHeader('X-Session-Id', sessionKey);
|
|
924
|
+
|
|
925
|
+
res.json(buildAnthropicResponse({
|
|
926
|
+
rawText: resultText.trim(),
|
|
927
|
+
toolUses: collectedToolCalls,
|
|
928
|
+
model: resolvedModel,
|
|
929
|
+
usage: { input_tokens: inputTokens, output_tokens: outputTokens },
|
|
930
|
+
requestId,
|
|
931
|
+
stopReason,
|
|
932
|
+
}));
|
|
933
|
+
}
|
|
934
|
+
|
|
935
|
+
async function handleAnthropicStreaming(req, res, body, requestId, sessionKey) {
|
|
936
|
+
const existing = getSession(sessionKey);
|
|
937
|
+
const resuming = !!existing?.sdkSessionId;
|
|
938
|
+
const toolsEnabled = hasAnthropicTools(body);
|
|
939
|
+
const promptText = anthropicMessagesToPrompt(body, { resuming });
|
|
940
|
+
const images = collectAnthropicImages(body.messages || []);
|
|
941
|
+
const prompt = buildQueryPrompt(promptText, images);
|
|
942
|
+
const model = resolveModel(body.model);
|
|
943
|
+
const toolsForBridge = toolsEnabled
|
|
944
|
+
? body.tools.map((t) => ({
|
|
945
|
+
type: 'function',
|
|
946
|
+
function: { name: t.name, description: t.description || '', parameters: t.input_schema || {} },
|
|
947
|
+
}))
|
|
948
|
+
: null;
|
|
949
|
+
const clientToolsServer = toolsForBridge ? buildClientToolsServer(toolsForBridge) : null;
|
|
950
|
+
|
|
951
|
+
if (images.length) console.log(` [multimodal] ${images.length} image block(s)`);
|
|
952
|
+
if (toolsEnabled) console.log(` [tools] ${body.tools.length} client tool(s) registered as MCP`);
|
|
953
|
+
|
|
954
|
+
res.setHeader('Content-Type', 'text/event-stream');
|
|
955
|
+
res.setHeader('Cache-Control', 'no-cache');
|
|
956
|
+
res.setHeader('Connection', 'keep-alive');
|
|
957
|
+
res.setHeader('X-Request-Id', requestId);
|
|
958
|
+
if (sessionKey) res.setHeader('X-Session-Id', sessionKey);
|
|
959
|
+
res.flushHeaders();
|
|
960
|
+
|
|
961
|
+
const tx = makeStreamTranslator({ res, requestId, model });
|
|
962
|
+
const abortController = new AbortController();
|
|
963
|
+
let resolvedModel = model;
|
|
964
|
+
let capturedSessionId = existing?.sdkSessionId || null;
|
|
965
|
+
let inputTokens = 0;
|
|
966
|
+
let outputTokens = 0;
|
|
967
|
+
let stopReason = 'end_turn';
|
|
968
|
+
let clientDisconnected = false;
|
|
969
|
+
let textEmittedSoFar = ''; // dedup against same-message reflow from SDK
|
|
970
|
+
let toolUseEmitted = false;
|
|
971
|
+
|
|
972
|
+
res.on('close', () => {
|
|
973
|
+
clientDisconnected = true;
|
|
974
|
+
abortController.abort();
|
|
975
|
+
});
|
|
976
|
+
|
|
977
|
+
if (resuming) {
|
|
978
|
+
console.log(` [session] resuming: ${sessionKey} → sdk=${existing.sdkSessionId} (msgs=${existing.messageCount})`);
|
|
979
|
+
}
|
|
980
|
+
|
|
981
|
+
const runQuery = async () => {
|
|
982
|
+
// Reset per-attempt state in case of 401-retry. Note: tx is reused
|
|
983
|
+
// across retries, so a successful retry that comes after we already
|
|
984
|
+
// emitted message_start would surface as a confused stream. We bail
|
|
985
|
+
// out of retry once the translator has started (see bailIfStarted).
|
|
986
|
+
resolvedModel = model;
|
|
987
|
+
capturedSessionId = existing?.sdkSessionId || null;
|
|
988
|
+
inputTokens = 0;
|
|
989
|
+
outputTokens = 0;
|
|
990
|
+
stopReason = 'end_turn';
|
|
991
|
+
textEmittedSoFar = '';
|
|
992
|
+
toolUseEmitted = false;
|
|
993
|
+
|
|
994
|
+
for await (const message of query({
|
|
995
|
+
prompt,
|
|
996
|
+
options: {
|
|
997
|
+
model,
|
|
998
|
+
maxTurns: toolsEnabled ? 5 : 200,
|
|
999
|
+
permissionMode: 'bypassPermissions',
|
|
1000
|
+
allowDangerouslySkipPermissions: true,
|
|
1001
|
+
abortController,
|
|
1002
|
+
...(clientToolsServer
|
|
1003
|
+
? {
|
|
1004
|
+
mcpServers: { [MCP_SERVER_NAME]: clientToolsServer },
|
|
1005
|
+
allowedTools: [`${MCP_TOOL_PREFIX}*`],
|
|
1006
|
+
}
|
|
1007
|
+
: toolsEnabled
|
|
1008
|
+
? { allowedTools: [] }
|
|
1009
|
+
: {}),
|
|
1010
|
+
...(resuming ? { resume: existing.sdkSessionId } : {}),
|
|
1011
|
+
...(sessionKey && !resuming ? { persistSession: true } : {}),
|
|
1012
|
+
},
|
|
1013
|
+
})) {
|
|
1014
|
+
if (clientDisconnected) break;
|
|
1015
|
+
|
|
1016
|
+
if (message.type === 'system' && message.subtype === 'init' && message.model) {
|
|
1017
|
+
resolvedModel = message.model;
|
|
1018
|
+
tx.start(resolvedModel, 0);
|
|
1019
|
+
}
|
|
1020
|
+
|
|
1021
|
+
if (message.type === 'assistant' && message.session_id && !capturedSessionId) {
|
|
1022
|
+
capturedSessionId = message.session_id;
|
|
1023
|
+
console.log(` [session] captured sdk session: ${capturedSessionId}`);
|
|
1024
|
+
}
|
|
1025
|
+
|
|
1026
|
+
if (message.type === 'assistant' && message.message?.content) {
|
|
1027
|
+
const content = message.message.content;
|
|
1028
|
+
|
|
1029
|
+
// Auth-failure short-circuit: throw so runWithAuthRetry handles it.
|
|
1030
|
+
// Only safe before any text has been streamed (otherwise we've
|
|
1031
|
+
// already corrupted the SSE stream and can't undo).
|
|
1032
|
+
if (Array.isArray(content)) {
|
|
1033
|
+
let combined = '';
|
|
1034
|
+
for (const b of content) if (b?.type === 'text' && b.text) combined += b.text;
|
|
1035
|
+
if (combined && isAuthFailureText(combined) && !tx.hasStarted) {
|
|
1036
|
+
abortController.abort();
|
|
1037
|
+
throw new AuthFailureInResultText(combined);
|
|
1038
|
+
}
|
|
1039
|
+
}
|
|
1040
|
+
|
|
1041
|
+
// Tool_use detection: emit tool_use blocks structurally and abort.
|
|
1042
|
+
// We do this BEFORE streaming text deltas from this message so the
|
|
1043
|
+
// tool_use block is properly framed (after any pending text block
|
|
1044
|
+
// closes). The translator handles the close-text → open-tool-use
|
|
1045
|
+
// sequencing internally.
|
|
1046
|
+
if (toolsEnabled && hasToolUse(message)) {
|
|
1047
|
+
const calls = extractToolUses(message);
|
|
1048
|
+
if (calls.length) {
|
|
1049
|
+
// Emit any text from this same message *before* the tool_use
|
|
1050
|
+
// (Anthropic streams sometimes have text + tool_use in one
|
|
1051
|
+
// assistant message — preserve that ordering).
|
|
1052
|
+
if (Array.isArray(content)) {
|
|
1053
|
+
for (const b of content) {
|
|
1054
|
+
if (b?.type === 'text' && b.text) {
|
|
1055
|
+
// Compute delta vs what we've emitted to avoid duplication
|
|
1056
|
+
// on aggregator-style assistant messages that resend the
|
|
1057
|
+
// whole accumulated text.
|
|
1058
|
+
const delta = b.text.startsWith(textEmittedSoFar)
|
|
1059
|
+
? b.text.slice(textEmittedSoFar.length)
|
|
1060
|
+
: b.text;
|
|
1061
|
+
if (delta) {
|
|
1062
|
+
tx.pushTextDelta(delta);
|
|
1063
|
+
textEmittedSoFar += delta;
|
|
1064
|
+
}
|
|
1065
|
+
}
|
|
1066
|
+
}
|
|
1067
|
+
}
|
|
1068
|
+
for (const tu of calls) tx.pushToolUse(tu);
|
|
1069
|
+
toolUseEmitted = true;
|
|
1070
|
+
stopReason = 'tool_use';
|
|
1071
|
+
console.log(` [tools] ${calls.length} native tool_use block(s) — aborting SDK`);
|
|
1072
|
+
abortController.abort();
|
|
1073
|
+
break;
|
|
1074
|
+
}
|
|
1075
|
+
}
|
|
1076
|
+
|
|
1077
|
+
// Plain text-only assistant message: stream the delta.
|
|
1078
|
+
if (Array.isArray(content)) {
|
|
1079
|
+
let combined = '';
|
|
1080
|
+
for (const b of content) if (b?.type === 'text' && b.text) combined += b.text;
|
|
1081
|
+
if (combined) {
|
|
1082
|
+
const delta = combined.startsWith(textEmittedSoFar)
|
|
1083
|
+
? combined.slice(textEmittedSoFar.length)
|
|
1084
|
+
: combined;
|
|
1085
|
+
if (delta) {
|
|
1086
|
+
tx.pushTextDelta(delta);
|
|
1087
|
+
textEmittedSoFar += delta;
|
|
1088
|
+
}
|
|
1089
|
+
}
|
|
1090
|
+
} else if (typeof content === 'string' && content) {
|
|
1091
|
+
const delta = content.startsWith(textEmittedSoFar)
|
|
1092
|
+
? content.slice(textEmittedSoFar.length)
|
|
1093
|
+
: content;
|
|
1094
|
+
if (delta) {
|
|
1095
|
+
tx.pushTextDelta(delta);
|
|
1096
|
+
textEmittedSoFar += delta;
|
|
1097
|
+
}
|
|
1098
|
+
}
|
|
1099
|
+
}
|
|
1100
|
+
|
|
1101
|
+
if (message.type === 'result') {
|
|
1102
|
+
if (message.result && !textEmittedSoFar && !toolUseEmitted) {
|
|
1103
|
+
// Some SDK paths only deliver text via the final result message
|
|
1104
|
+
// (no streaming assistant messages). Emit it here as a single
|
|
1105
|
+
// delta — clients see this as "model started + finished in one
|
|
1106
|
+
// chunk", which is valid SSE.
|
|
1107
|
+
tx.pushTextDelta(message.result);
|
|
1108
|
+
}
|
|
1109
|
+
if (isAuthFailureText(message.result || '') && !tx.hasStarted) {
|
|
1110
|
+
throw new AuthFailureInResultText(message.result);
|
|
1111
|
+
}
|
|
1112
|
+
inputTokens = message.input_tokens || 0;
|
|
1113
|
+
outputTokens = message.output_tokens || 0;
|
|
1114
|
+
if (!toolUseEmitted) stopReason = mapStopReason(message);
|
|
1115
|
+
break;
|
|
1116
|
+
}
|
|
1117
|
+
}
|
|
1118
|
+
};
|
|
1119
|
+
|
|
1120
|
+
try {
|
|
1121
|
+
await runWithAuthRetry({
|
|
1122
|
+
attempt: runQuery,
|
|
1123
|
+
// Once we've emitted message_start or any deltas, the SSE stream is
|
|
1124
|
+
// committed — a retry would fragment it. Same logic as the OpenAI
|
|
1125
|
+
// surface (bail once anything has been written).
|
|
1126
|
+
bailIfStarted: () => tx.hasStarted,
|
|
1127
|
+
onRefreshing: (err) => console.warn(`[auth] 401 on /v1/messages stream — refreshing (${err.message?.slice(0, 80)})`),
|
|
1128
|
+
onRetry: (r) => console.log(`[auth] refreshed in ${r.durationMs}ms — retrying /v1/messages stream`),
|
|
1129
|
+
});
|
|
1130
|
+
} catch (err) {
|
|
1131
|
+
const isAbort = err?.name === 'AbortError' || /aborted/i.test(err?.message || '');
|
|
1132
|
+
if (!clientDisconnected && !(toolsEnabled && isAbort)) {
|
|
1133
|
+
console.error('[/v1/messages stream] SDK error:', err.message);
|
|
1134
|
+
tx.error(err);
|
|
1135
|
+
return;
|
|
1136
|
+
}
|
|
1137
|
+
}
|
|
1138
|
+
|
|
1139
|
+
if (sessionKey && capturedSessionId) {
|
|
1140
|
+
upsertSession(sessionKey, capturedSessionId, resolvedModel);
|
|
1141
|
+
}
|
|
1142
|
+
|
|
1143
|
+
tx.finish({ stopReason, usage: { output_tokens: outputTokens } });
|
|
1144
|
+
}
|
|
1145
|
+
|
|
768
1146
|
// ---------------------------------------------------------------------------
|
|
769
1147
|
// Express app
|
|
770
1148
|
// ---------------------------------------------------------------------------
|
|
@@ -866,6 +1244,69 @@ app.post('/v1/chat/completions', async (req, res) => {
|
|
|
866
1244
|
}
|
|
867
1245
|
});
|
|
868
1246
|
|
|
1247
|
+
// POST /v1/messages — Anthropic-native surface (for OpenClaw etc.).
|
|
1248
|
+
// Same dispatch shape as /v1/chat/completions, different translator pair.
|
|
1249
|
+
// Both endpoints terminate at the same SDK query() under the hood; this
|
|
1250
|
+
// route exists so Anthropic-shaped clients get native blocks (text /
|
|
1251
|
+
// image / tool_use / tool_result) without going through OpenAI shape.
|
|
1252
|
+
app.post('/v1/messages', async (req, res) => {
|
|
1253
|
+
const requestId = uuidv4().replace(/-/g, '').slice(0, 24);
|
|
1254
|
+
const body = req.body;
|
|
1255
|
+
|
|
1256
|
+
if (!body?.messages || !Array.isArray(body.messages) || body.messages.length === 0) {
|
|
1257
|
+
return res.status(400).json({
|
|
1258
|
+
type: 'error',
|
|
1259
|
+
error: { type: 'invalid_request_error', message: 'messages is required and must be a non-empty array' },
|
|
1260
|
+
});
|
|
1261
|
+
}
|
|
1262
|
+
|
|
1263
|
+
const sessionKey = req.headers['x-session-id'] || body.session_id || null;
|
|
1264
|
+
const existing = getSession(sessionKey);
|
|
1265
|
+
const sessionTag = sessionKey ? ` | session=${sessionKey}${existing ? ' (resume)' : ' (new)'}` : '';
|
|
1266
|
+
|
|
1267
|
+
console.log(`[${new Date().toISOString()}] anthropic ${body.stream ? 'stream' : 'sync'} | model=${body.model} → ${resolveModel(body.model)} | msgs=${body.messages.length}${sessionTag}`);
|
|
1268
|
+
|
|
1269
|
+
// Dashboard event — same shape as the OpenAI route, just labeled by path.
|
|
1270
|
+
const startedAt = Date.now();
|
|
1271
|
+
const imageBlocks = collectAnthropicImages(body.messages || []).length;
|
|
1272
|
+
dashboardBus.emitEvent({
|
|
1273
|
+
type: 'request.start',
|
|
1274
|
+
id: requestId,
|
|
1275
|
+
method: 'POST',
|
|
1276
|
+
path: '/v1/messages',
|
|
1277
|
+
model: body.model,
|
|
1278
|
+
resolvedModel: resolveModel(body.model),
|
|
1279
|
+
session: sessionKey,
|
|
1280
|
+
stream: !!body.stream,
|
|
1281
|
+
tools: hasAnthropicTools(body),
|
|
1282
|
+
images: imageBlocks,
|
|
1283
|
+
messages: body.messages.length,
|
|
1284
|
+
resuming: !!existing,
|
|
1285
|
+
});
|
|
1286
|
+
|
|
1287
|
+
let endEmitted = false;
|
|
1288
|
+
const emitEnd = (overrides = {}) => {
|
|
1289
|
+
if (endEmitted) return;
|
|
1290
|
+
endEmitted = true;
|
|
1291
|
+
dashboardBus.emitEvent({
|
|
1292
|
+
type: 'request.end',
|
|
1293
|
+
id: requestId,
|
|
1294
|
+
durationMs: Date.now() - startedAt,
|
|
1295
|
+
status: res.statusCode < 400 ? 'ok' : 'error',
|
|
1296
|
+
httpStatus: res.statusCode,
|
|
1297
|
+
...overrides,
|
|
1298
|
+
});
|
|
1299
|
+
};
|
|
1300
|
+
res.on('finish', () => emitEnd());
|
|
1301
|
+
res.on('close', () => { if (!endEmitted) emitEnd({ status: 'error', error: 'client_disconnect' }); });
|
|
1302
|
+
|
|
1303
|
+
if (body.stream) {
|
|
1304
|
+
await handleAnthropicStreaming(req, res, body, requestId, sessionKey);
|
|
1305
|
+
} else {
|
|
1306
|
+
await handleAnthropicNonStreaming(res, body, requestId, sessionKey);
|
|
1307
|
+
}
|
|
1308
|
+
});
|
|
1309
|
+
|
|
869
1310
|
// GET /v1/models
|
|
870
1311
|
app.get('/v1/models', (_req, res) => {
|
|
871
1312
|
const now = Math.floor(Date.now() / 1000);
|