clawmatrix 0.1.14 → 0.1.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/BOOTSTRAP.md +55 -8
- package/package.json +4 -2
- package/src/auth.ts +42 -12
- package/src/cluster-service.ts +35 -7
- package/src/compat.ts +3 -0
- package/src/config.ts +57 -6
- package/src/connection.ts +34 -8
- package/src/device-info.ts +48 -0
- package/src/handoff.ts +330 -21
- package/src/http-utils.ts +35 -0
- package/src/index.ts +47 -19
- package/src/model-proxy.ts +546 -242
- package/src/peer-manager.ts +65 -6
- package/src/router.ts +89 -47
- package/src/tool-proxy.ts +22 -7
- package/src/tools/cluster-events.ts +119 -0
- package/src/tools/cluster-exec.ts +4 -0
- package/src/tools/cluster-handoff-reply.ts +77 -0
- package/src/tools/cluster-handoff.ts +12 -0
- package/src/tools/cluster-peers.ts +17 -1
- package/src/tools/cluster-send.ts +1 -3
- package/src/tools/cluster-tool.ts +2 -5
- package/src/types.ts +117 -0
- package/src/web-ui.ts +694 -342
- package/src/web.ts +726 -50
package/src/model-proxy.ts
CHANGED
|
@@ -9,14 +9,26 @@ import type {
|
|
|
9
9
|
ModelStreamChunk,
|
|
10
10
|
} from "./types.ts";
|
|
11
11
|
import { debug } from "./debug.ts";
|
|
12
|
+
import { readBody } from "./http-utils.ts";
|
|
12
13
|
|
|
13
14
|
const MODEL_TIMEOUT = 120_000; // 2 minutes
|
|
15
|
+
const MAX_STREAM_BUFFER = 1_048_576; // 1MB — guard against upstream not sending newlines
|
|
16
|
+
|
|
17
|
+
type ResponseFormat = "chat" | "responses";
|
|
18
|
+
|
|
19
|
+
interface ProxyResponse {
|
|
20
|
+
status: number;
|
|
21
|
+
headers: Record<string, string>;
|
|
22
|
+
body: string | ReadableStream;
|
|
23
|
+
}
|
|
14
24
|
|
|
15
25
|
interface PendingModelReq {
|
|
16
26
|
resolve: (value: unknown) => void;
|
|
17
27
|
reject: (error: Error) => void;
|
|
18
28
|
timer: ReturnType<typeof setTimeout>;
|
|
19
29
|
stream: boolean;
|
|
30
|
+
responseFormat: ResponseFormat;
|
|
31
|
+
model?: string;
|
|
20
32
|
controller?: ReadableStreamDefaultController;
|
|
21
33
|
encoder?: TextEncoder;
|
|
22
34
|
}
|
|
@@ -36,35 +48,125 @@ export class ModelProxy {
|
|
|
36
48
|
this.openclawConfig = openclawConfig;
|
|
37
49
|
}
|
|
38
50
|
|
|
51
|
+
/**
|
|
52
|
+
* Normalize Responses API `input` to OpenAI chat messages for WS transport.
|
|
53
|
+
*
|
|
54
|
+
* Converts:
|
|
55
|
+
* - string → [{role: "user", content: "..."}]
|
|
56
|
+
* - shorthand {role, content: "..."} → pass through
|
|
57
|
+
* - full {type: "message", content: [{type: "input_text"}, {type: "input_image"}]} → chat format
|
|
58
|
+
* - {type: "function_call_output"} → {role: "tool", ...}
|
|
59
|
+
*
|
|
60
|
+
* Returns chat-completions compatible messages (text + image_url content parts).
|
|
61
|
+
*/
|
|
62
|
+
private static normalizeResponsesInput(input: unknown): unknown[] {
|
|
63
|
+
if (typeof input === "string") {
|
|
64
|
+
return [{ role: "user", content: input }];
|
|
65
|
+
}
|
|
66
|
+
if (!Array.isArray(input)) return [];
|
|
67
|
+
|
|
68
|
+
const messages: unknown[] = [];
|
|
69
|
+
for (const item of input) {
|
|
70
|
+
if (!item || typeof item !== "object") continue;
|
|
71
|
+
const obj = item as Record<string, unknown>;
|
|
72
|
+
|
|
73
|
+
// function_call_output → tool message
|
|
74
|
+
if (obj.type === "function_call_output") {
|
|
75
|
+
messages.push({
|
|
76
|
+
role: "tool",
|
|
77
|
+
tool_call_id: obj.call_id,
|
|
78
|
+
content: typeof obj.output === "string" ? obj.output : JSON.stringify(obj.output),
|
|
79
|
+
});
|
|
80
|
+
continue;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
const role = typeof obj.role === "string" ? obj.role : "user";
|
|
84
|
+
|
|
85
|
+
// Simple shorthand: {role: "user", content: "hello"}
|
|
86
|
+
if (typeof obj.content === "string") {
|
|
87
|
+
messages.push({ role, content: obj.content });
|
|
88
|
+
continue;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// Full format: {type: "message", role, content: [{type: "input_text"|"input_image"|...}]}
|
|
92
|
+
if (Array.isArray(obj.content)) {
|
|
93
|
+
const parts: unknown[] = [];
|
|
94
|
+
for (const part of obj.content) {
|
|
95
|
+
if (!part || typeof part !== "object") continue;
|
|
96
|
+
const p = part as Record<string, unknown>;
|
|
97
|
+
|
|
98
|
+
if (p.type === "input_text" || p.type === "output_text") {
|
|
99
|
+
// Text content → chat text part
|
|
100
|
+
if (typeof p.text === "string") {
|
|
101
|
+
parts.push({ type: "text", text: p.text });
|
|
102
|
+
}
|
|
103
|
+
} else if (p.type === "input_image") {
|
|
104
|
+
// Image content → chat image_url part
|
|
105
|
+
if (typeof p.image_url === "string") {
|
|
106
|
+
parts.push({ type: "image_url", image_url: { url: p.image_url } });
|
|
107
|
+
} else if (p.image_url && typeof p.image_url === "object") {
|
|
108
|
+
parts.push({ type: "image_url", image_url: p.image_url });
|
|
109
|
+
}
|
|
110
|
+
} else if (p.type === "text" && typeof p.text === "string") {
|
|
111
|
+
// Already chat format
|
|
112
|
+
parts.push(p);
|
|
113
|
+
} else if (p.type === "image_url") {
|
|
114
|
+
// Already chat format
|
|
115
|
+
parts.push(p);
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
if (parts.length === 1 && (parts[0] as Record<string, unknown>).type === "text") {
|
|
120
|
+
// Single text part → simplify to string content
|
|
121
|
+
messages.push({ role, content: ((parts[0] as Record<string, unknown>).text as string) });
|
|
122
|
+
} else if (parts.length > 0) {
|
|
123
|
+
messages.push({ role, content: parts });
|
|
124
|
+
}
|
|
125
|
+
continue;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// Fallback
|
|
129
|
+
if (typeof obj.text === "string") {
|
|
130
|
+
messages.push({ role, content: obj.text });
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
return messages;
|
|
134
|
+
}
|
|
135
|
+
|
|
39
136
|
/** Resolve API endpoint for a model: explicit config > OpenClaw provider > gateway fallback */
|
|
40
|
-
private resolveModelEndpoint(model: { id: string; provider: string; baseUrl?: string; apiKey?: string }): {
|
|
137
|
+
private resolveModelEndpoint(model: { id: string; provider: string; baseUrl?: string; apiKey?: string; api?: string }): { baseUrl: string; apiKey?: string; direct: boolean; api: string } {
|
|
138
|
+
const defaultApi = "openai-completions";
|
|
139
|
+
|
|
41
140
|
// 1. Explicit baseUrl in ClawMatrix model config
|
|
42
141
|
if (model.baseUrl) {
|
|
43
142
|
return {
|
|
44
|
-
|
|
143
|
+
baseUrl: model.baseUrl.replace(/\/$/, ""),
|
|
45
144
|
apiKey: model.apiKey,
|
|
46
145
|
direct: true,
|
|
146
|
+
api: model.api ?? defaultApi,
|
|
47
147
|
};
|
|
48
148
|
}
|
|
49
149
|
|
|
50
150
|
// 2. Read from OpenClaw's models.providers[provider]
|
|
51
151
|
const providers = (this.openclawConfig as Record<string, unknown>).models as
|
|
52
|
-
{ providers?: Record<string, { baseUrl?: string; apiKey?: string }> } | undefined;
|
|
152
|
+
{ providers?: Record<string, { baseUrl?: string; apiKey?: string; api?: string }> } | undefined;
|
|
53
153
|
const providerConfig = providers?.providers?.[model.provider];
|
|
54
154
|
if (providerConfig?.baseUrl) {
|
|
55
155
|
return {
|
|
56
|
-
|
|
156
|
+
baseUrl: providerConfig.baseUrl.replace(/\/$/, ""),
|
|
57
157
|
apiKey: typeof providerConfig.apiKey === "string" ? providerConfig.apiKey : undefined,
|
|
58
158
|
direct: true,
|
|
159
|
+
api: model.api ?? providerConfig.api ?? defaultApi,
|
|
59
160
|
};
|
|
60
161
|
}
|
|
61
162
|
|
|
62
|
-
// 3. Fallback: OpenClaw gateway
|
|
163
|
+
// 3. Fallback: OpenClaw gateway
|
|
63
164
|
const { port } = this.gatewayInfo;
|
|
64
165
|
return {
|
|
65
|
-
|
|
166
|
+
baseUrl: `http://127.0.0.1:${port}/v1`,
|
|
66
167
|
apiKey: undefined,
|
|
67
168
|
direct: false,
|
|
169
|
+
api: model.api ?? defaultApi,
|
|
68
170
|
};
|
|
69
171
|
}
|
|
70
172
|
|
|
@@ -78,8 +180,13 @@ export class ModelProxy {
|
|
|
78
180
|
debug("proxy", `${req.method} ${url.pathname} → ${p}`);
|
|
79
181
|
|
|
80
182
|
if (p === "/chat/completions" && req.method === "POST") {
|
|
81
|
-
const body = await
|
|
82
|
-
const response = await this.handleChatCompletion(body);
|
|
183
|
+
const body = await readBody(req);
|
|
184
|
+
const response = await this.handleChatCompletion(body, "openai-completions");
|
|
185
|
+
debug("proxy", `response status=${response.status}`);
|
|
186
|
+
this.sendResponse(res, response);
|
|
187
|
+
} else if (p === "/responses" && req.method === "POST") {
|
|
188
|
+
const body = await readBody(req);
|
|
189
|
+
const response = await this.handleResponses(body);
|
|
83
190
|
debug("proxy", `response status=${response.status}`);
|
|
84
191
|
this.sendResponse(res, response);
|
|
85
192
|
} else if (p === "/models" && req.method === "GET") {
|
|
@@ -115,18 +222,11 @@ export class ModelProxy {
|
|
|
115
222
|
pending.reject(new Error("Shutting down"));
|
|
116
223
|
}
|
|
117
224
|
this.pending.clear();
|
|
225
|
+
this.streamText.clear();
|
|
118
226
|
}
|
|
119
227
|
|
|
120
|
-
private readBody(req: import("node:http").IncomingMessage): Promise<string> {
|
|
121
|
-
return new Promise((resolve, reject) => {
|
|
122
|
-
const chunks: Buffer[] = [];
|
|
123
|
-
req.on("data", (chunk: Buffer) => chunks.push(chunk));
|
|
124
|
-
req.on("end", () => resolve(Buffer.concat(chunks).toString()));
|
|
125
|
-
req.on("error", reject);
|
|
126
|
-
});
|
|
127
|
-
}
|
|
128
228
|
|
|
129
|
-
private sendResponse(res: import("node:http").ServerResponse, response:
|
|
229
|
+
private sendResponse(res: import("node:http").ServerResponse, response: ProxyResponse) {
|
|
130
230
|
res.writeHead(response.status, response.headers);
|
|
131
231
|
if (typeof response.body === "string") {
|
|
132
232
|
res.end(response.body);
|
|
@@ -152,56 +252,59 @@ export class ModelProxy {
|
|
|
152
252
|
}
|
|
153
253
|
|
|
154
254
|
// ── HTTP handlers ──────────────────────────────────────────────
|
|
155
|
-
private async handleChatCompletion(rawBody: string): Promise<{ status: number; headers: Record<string, string>; body: string | ReadableStream }> {
|
|
156
|
-
let body: {
|
|
157
|
-
model: string;
|
|
158
|
-
messages: unknown[];
|
|
159
|
-
stream?: boolean;
|
|
160
|
-
temperature?: number;
|
|
161
|
-
max_tokens?: number;
|
|
162
|
-
};
|
|
163
255
|
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
body: JSON.stringify({ error: "Invalid JSON" }),
|
|
171
|
-
};
|
|
172
|
-
}
|
|
173
|
-
|
|
174
|
-
const rawModelId = body.model;
|
|
175
|
-
// Parse "nodeId/model" format: first segment is nodeId, rest is model ID.
|
|
176
|
-
// OpenClaw sends "providerId/modelId" where providerId = nodeId, so this
|
|
177
|
-
// naturally handles both OpenClaw calls and direct curl calls.
|
|
178
|
-
// If no "/" present, treat entire string as model ID and auto-resolve.
|
|
179
|
-
let nodeId: string | undefined;
|
|
180
|
-
let modelId: string;
|
|
256
|
+
/** Resolve model ID → proxyModel + route. Shared by chat completions and responses handlers. */
|
|
257
|
+
private resolveModelRoute(rawModelId: string): {
|
|
258
|
+
nodeId: string; modelId: string;
|
|
259
|
+
proxyModel: (typeof this.config.proxyModels)[number] | undefined;
|
|
260
|
+
routeNodeId: string;
|
|
261
|
+
} | { error: { status: number; message: string } } {
|
|
181
262
|
const slashIdx = rawModelId.indexOf("/");
|
|
263
|
+
let nodeId: string;
|
|
264
|
+
let modelId: string;
|
|
265
|
+
let proxyModel: (typeof this.config.proxyModels)[number] | undefined;
|
|
266
|
+
|
|
182
267
|
if (slashIdx > 0) {
|
|
183
268
|
nodeId = rawModelId.slice(0, slashIdx);
|
|
184
269
|
modelId = rawModelId.slice(slashIdx + 1);
|
|
270
|
+
proxyModel = this.config.proxyModels.find((m) => m.id === modelId && m.nodeId === nodeId);
|
|
185
271
|
} else {
|
|
186
272
|
modelId = rawModelId;
|
|
273
|
+
proxyModel = this.config.proxyModels.find((m) => m.id === modelId);
|
|
274
|
+
if (!proxyModel) {
|
|
275
|
+
return { error: { status: 404, message: `Model "${rawModelId}" not found in proxy models` } };
|
|
276
|
+
}
|
|
277
|
+
nodeId = proxyModel.nodeId;
|
|
187
278
|
}
|
|
188
|
-
|
|
189
|
-
const
|
|
190
|
-
|
|
191
|
-
const route = nodeId
|
|
192
|
-
? this.peerManager.router.getRoute(nodeId)
|
|
193
|
-
: this.peerManager.router.resolveModel(modelId);
|
|
194
|
-
debug("proxy", `proxyModel=${proxyModel?.id ?? "none"} route=${route?.nodeId ?? "none"} reachable=${route ? this.peerManager.canReach(route.nodeId) : false}`);
|
|
279
|
+
|
|
280
|
+
const route = this.peerManager.router.getRoute(nodeId);
|
|
281
|
+
debug("proxy", `model raw="${rawModelId}" nodeId=${nodeId} modelId="${modelId}" route=${route?.nodeId ?? "none"}`);
|
|
195
282
|
if (!route) {
|
|
196
|
-
return {
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
283
|
+
return { error: { status: 404, message: `Node "${nodeId}" not found in cluster` } };
|
|
284
|
+
}
|
|
285
|
+
if (!this.peerManager.canReach(route.nodeId)) {
|
|
286
|
+
return { error: { status: 502, message: `Cannot reach model node "${route.nodeId}"` } };
|
|
287
|
+
}
|
|
288
|
+
return { nodeId, modelId, proxyModel, routeNodeId: route.nodeId };
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
private async handleChatCompletion(rawBody: string, _api: string): Promise<ProxyResponse> {
|
|
292
|
+
let body: { model: string; messages: unknown[]; stream?: boolean; temperature?: number; max_tokens?: number };
|
|
293
|
+
try {
|
|
294
|
+
body = JSON.parse(rawBody);
|
|
295
|
+
} catch {
|
|
296
|
+
return { status: 400, headers: { "Content-Type": "application/json" }, body: JSON.stringify({ error: "Invalid JSON" }) };
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
const resolved = this.resolveModelRoute(body.model);
|
|
300
|
+
if ("error" in resolved) {
|
|
301
|
+
return { status: resolved.error.status, headers: { "Content-Type": "application/json" }, body: JSON.stringify({ error: { message: resolved.error.message } }) };
|
|
201
302
|
}
|
|
202
303
|
|
|
203
|
-
|
|
304
|
+
const { modelId, proxyModel, routeNodeId } = resolved;
|
|
204
305
|
const messages = body.messages;
|
|
306
|
+
debug("proxy", `messages count=${messages?.length ?? 0} roles=${(messages ?? []).map((m: unknown) => (m as Record<string, unknown>)?.role).join(",")}`);
|
|
307
|
+
|
|
205
308
|
if (proxyModel?.description) {
|
|
206
309
|
const first = messages[0] as { role?: string; content?: string } | undefined;
|
|
207
310
|
if (first?.role === "system" && typeof first.content === "string") {
|
|
@@ -213,35 +316,66 @@ export class ModelProxy {
|
|
|
213
316
|
|
|
214
317
|
const stream = body.stream ?? false;
|
|
215
318
|
const requestId = crypto.randomUUID();
|
|
216
|
-
|
|
217
319
|
const frame: ModelRequest = {
|
|
218
|
-
type: "model_req",
|
|
219
|
-
|
|
220
|
-
from: this.config.nodeId,
|
|
221
|
-
to: route.nodeId,
|
|
222
|
-
timestamp: Date.now(),
|
|
223
|
-
payload: {
|
|
224
|
-
model: modelId,
|
|
225
|
-
messages,
|
|
226
|
-
temperature: body.temperature,
|
|
227
|
-
maxTokens: body.max_tokens,
|
|
228
|
-
stream,
|
|
229
|
-
},
|
|
320
|
+
type: "model_req", id: requestId, from: this.config.nodeId, to: routeNodeId, timestamp: Date.now(),
|
|
321
|
+
payload: { model: modelId, provider: proxyModel?.provider, api: proxyModel?.api, messages, temperature: body.temperature, maxTokens: body.max_tokens, stream },
|
|
230
322
|
};
|
|
231
323
|
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
324
|
+
if (stream) {
|
|
325
|
+
return this.handleStreamRequest(requestId, routeNodeId, frame, "chat");
|
|
326
|
+
} else {
|
|
327
|
+
return this.handleNonStreamRequest(requestId, routeNodeId, frame, "chat");
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
private async handleResponses(rawBody: string): Promise<ProxyResponse> {
|
|
332
|
+
let body: { model: string; input: unknown; stream?: boolean; temperature?: number; max_output_tokens?: number; instructions?: string };
|
|
333
|
+
try {
|
|
334
|
+
body = JSON.parse(rawBody);
|
|
335
|
+
} catch {
|
|
336
|
+
return { status: 400, headers: { "Content-Type": "application/json" }, body: JSON.stringify({ error: "Invalid JSON" }) };
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
const resolved = this.resolveModelRoute(body.model);
|
|
340
|
+
if ("error" in resolved) {
|
|
341
|
+
return { status: resolved.error.status, headers: { "Content-Type": "application/json" }, body: JSON.stringify({ error: { message: resolved.error.message } }) };
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
const { modelId, proxyModel, routeNodeId } = resolved;
|
|
345
|
+
|
|
346
|
+
// Normalize responses API input → simple chat messages for WS transport.
|
|
347
|
+
// Responses API items use {type: "message", role, content: [{type: "input_text", text}]}
|
|
348
|
+
// but WS protocol carries simple {role, content} chat messages.
|
|
349
|
+
const messages = ModelProxy.normalizeResponsesInput(body.input);
|
|
350
|
+
|
|
351
|
+
// Prepend instructions as system/developer message
|
|
352
|
+
if (body.instructions) {
|
|
353
|
+
messages.unshift({ role: "developer", content: body.instructions });
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
if (proxyModel?.description) {
|
|
357
|
+
const first = messages[0] as { role?: string; content?: string } | undefined;
|
|
358
|
+
if (first?.role === "system" && typeof first.content === "string") {
|
|
359
|
+
first.content = `[Model: ${proxyModel.description}]\n${first.content}`;
|
|
360
|
+
} else if (first?.role === "developer" && typeof first.content === "string") {
|
|
361
|
+
first.content = `[Model: ${proxyModel.description}]\n${first.content}`;
|
|
362
|
+
} else {
|
|
363
|
+
messages.unshift({ role: "system", content: `[Model: ${proxyModel.description}]` });
|
|
364
|
+
}
|
|
239
365
|
}
|
|
240
366
|
|
|
367
|
+
const stream = body.stream ?? false;
|
|
368
|
+
const requestId = crypto.randomUUID();
|
|
369
|
+
debug("proxy", `responses: stream=${stream} messages=${messages.length} input_type=${typeof body.input}${Array.isArray(body.input) ? `[${body.input.length}]` : ""}`);
|
|
370
|
+
const frame: ModelRequest = {
|
|
371
|
+
type: "model_req", id: requestId, from: this.config.nodeId, to: routeNodeId, timestamp: Date.now(),
|
|
372
|
+
payload: { model: modelId, provider: proxyModel?.provider, api: proxyModel?.api, messages, temperature: body.temperature, maxTokens: body.max_output_tokens, stream },
|
|
373
|
+
};
|
|
374
|
+
|
|
241
375
|
if (stream) {
|
|
242
|
-
return this.handleStreamRequest(requestId,
|
|
376
|
+
return this.handleStreamRequest(requestId, routeNodeId, frame, "responses");
|
|
243
377
|
} else {
|
|
244
|
-
return this.handleNonStreamRequest(requestId,
|
|
378
|
+
return this.handleNonStreamRequest(requestId, routeNodeId, frame, "responses");
|
|
245
379
|
}
|
|
246
380
|
}
|
|
247
381
|
|
|
@@ -249,73 +383,102 @@ export class ModelProxy {
|
|
|
249
383
|
requestId: string,
|
|
250
384
|
targetNodeId: string,
|
|
251
385
|
frame: ModelRequest,
|
|
252
|
-
|
|
386
|
+
responseFormat: ResponseFormat,
|
|
387
|
+
): ProxyResponse & { body: ReadableStream } {
|
|
253
388
|
const encoder = new TextEncoder();
|
|
389
|
+
const model = frame.payload.model;
|
|
254
390
|
|
|
255
391
|
const readable = new ReadableStream({
|
|
256
392
|
start: (controller) => {
|
|
257
393
|
const timer = setTimeout(() => {
|
|
258
394
|
this.pending.delete(requestId);
|
|
395
|
+
this.streamText.delete(requestId);
|
|
259
396
|
this.peerManager.router.markFailed(requestId);
|
|
260
397
|
try {
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
);
|
|
269
|
-
controller.enqueue(encoder.encode("data: [DONE]\n\n"));
|
|
398
|
+
if (responseFormat === "responses") {
|
|
399
|
+
controller.enqueue(encoder.encode(`event: response.output_text.delta\ndata: ${JSON.stringify({ type: "response.output_text.delta", delta: "\n\n[ClawMatrix] Error: model request timed out" })}\n\n`));
|
|
400
|
+
this.enqueueResponsesStreamDone(controller, encoder, requestId, model);
|
|
401
|
+
} else {
|
|
402
|
+
controller.enqueue(encoder.encode(`data: ${JSON.stringify({ id: `chatcmpl-${requestId}`, object: "chat.completion.chunk", choices: [{ index: 0, delta: { content: "\n\n[ClawMatrix] Error: model request timed out" }, finish_reason: "stop" }] })}\n\n`));
|
|
403
|
+
controller.enqueue(encoder.encode("data: [DONE]\n\n"));
|
|
404
|
+
}
|
|
270
405
|
controller.close();
|
|
271
|
-
} catch {
|
|
272
|
-
// controller may already be closed
|
|
273
|
-
}
|
|
406
|
+
} catch { /* controller may already be closed */ }
|
|
274
407
|
}, MODEL_TIMEOUT);
|
|
275
408
|
|
|
276
409
|
this.pending.set(requestId, {
|
|
277
|
-
resolve: () => {},
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
stream: true,
|
|
281
|
-
controller,
|
|
282
|
-
encoder,
|
|
410
|
+
resolve: () => {}, reject: () => {},
|
|
411
|
+
timer, stream: true, responseFormat, model,
|
|
412
|
+
controller, encoder,
|
|
283
413
|
});
|
|
284
414
|
|
|
415
|
+
// Emit setup events for responses API
|
|
416
|
+
if (responseFormat === "responses") {
|
|
417
|
+
this.enqueueResponsesStreamSetup(controller, encoder, requestId, model);
|
|
418
|
+
}
|
|
419
|
+
|
|
285
420
|
const sent = this.peerManager.sendTo(targetNodeId, frame);
|
|
286
421
|
if (!sent) {
|
|
287
422
|
this.pending.delete(requestId);
|
|
288
423
|
clearTimeout(timer);
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
controller
|
|
424
|
+
try {
|
|
425
|
+
if (responseFormat === "responses") {
|
|
426
|
+
controller.enqueue(encoder.encode(`event: response.output_text.delta\ndata: ${JSON.stringify({ type: "response.output_text.delta", delta: `[ClawMatrix] Cannot reach model node "${targetNodeId}"` })}\n\n`));
|
|
427
|
+
this.enqueueResponsesStreamDone(controller, encoder, requestId, model);
|
|
428
|
+
} else {
|
|
429
|
+
controller.enqueue(encoder.encode(`data: ${JSON.stringify({ id: `chatcmpl-${requestId}`, object: "chat.completion.chunk", choices: [{ index: 0, delta: { content: `[ClawMatrix] Cannot reach model node "${targetNodeId}"` }, finish_reason: "stop" }] })}\n\n`));
|
|
430
|
+
controller.enqueue(encoder.encode("data: [DONE]\n\n"));
|
|
431
|
+
}
|
|
432
|
+
controller.close();
|
|
433
|
+
} catch { /* controller may already be closed */ }
|
|
299
434
|
}
|
|
300
435
|
},
|
|
301
436
|
});
|
|
302
437
|
|
|
303
438
|
return {
|
|
304
439
|
status: 200,
|
|
305
|
-
headers: {
|
|
306
|
-
"Content-Type": "text/event-stream",
|
|
307
|
-
"Cache-Control": "no-cache",
|
|
308
|
-
"Connection": "keep-alive",
|
|
309
|
-
},
|
|
440
|
+
headers: { "Content-Type": "text/event-stream", "Cache-Control": "no-cache", "Connection": "keep-alive" },
|
|
310
441
|
body: readable,
|
|
311
442
|
};
|
|
312
443
|
}
|
|
313
444
|
|
|
445
|
+
/** Emit responses API stream setup events (response.created → content_part.added). */
|
|
446
|
+
private enqueueResponsesStreamSetup(controller: ReadableStreamDefaultController, encoder: TextEncoder, id: string, model: string) {
|
|
447
|
+
const respId = `resp_${id}`;
|
|
448
|
+
const msgId = `msg_${id}`;
|
|
449
|
+
const now = Math.floor(Date.now() / 1000);
|
|
450
|
+
const baseResp = { id: respId, object: "response", created_at: now, status: "in_progress", model, output: [] };
|
|
451
|
+
const msgItem = { type: "message", id: msgId, role: "assistant", content: [], status: "in_progress" };
|
|
452
|
+
const textPart = { type: "output_text", text: "" };
|
|
453
|
+
|
|
454
|
+
controller.enqueue(encoder.encode(`event: response.created\ndata: ${JSON.stringify({ type: "response.created", response: baseResp })}\n\n`));
|
|
455
|
+
controller.enqueue(encoder.encode(`event: response.in_progress\ndata: ${JSON.stringify({ type: "response.in_progress", response: baseResp })}\n\n`));
|
|
456
|
+
controller.enqueue(encoder.encode(`event: response.output_item.added\ndata: ${JSON.stringify({ type: "response.output_item.added", output_index: 0, item: msgItem })}\n\n`));
|
|
457
|
+
controller.enqueue(encoder.encode(`event: response.content_part.added\ndata: ${JSON.stringify({ type: "response.content_part.added", item_id: msgId, output_index: 0, content_index: 0, part: textPart })}\n\n`));
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
/** Emit responses API stream completion events (output_text.done → response.completed). */
|
|
461
|
+
private enqueueResponsesStreamDone(controller: ReadableStreamDefaultController, encoder: TextEncoder, id: string, model: string, content?: string, usage?: { inputTokens: number; outputTokens: number }) {
|
|
462
|
+
const respId = `resp_${id}`;
|
|
463
|
+
const msgId = `msg_${id}`;
|
|
464
|
+
const now = Math.floor(Date.now() / 1000);
|
|
465
|
+
const textPart = { type: "output_text", text: content ?? "" };
|
|
466
|
+
const msgItem = { type: "message", id: msgId, role: "assistant", content: [textPart], status: "completed" };
|
|
467
|
+
const usageObj = usage ? { input_tokens: usage.inputTokens, output_tokens: usage.outputTokens, total_tokens: usage.inputTokens + usage.outputTokens } : { input_tokens: 0, output_tokens: 0, total_tokens: 0 };
|
|
468
|
+
const completedResp = { id: respId, object: "response", created_at: now, status: "completed", model, output: [msgItem], usage: usageObj };
|
|
469
|
+
|
|
470
|
+
controller.enqueue(encoder.encode(`event: response.output_text.done\ndata: ${JSON.stringify({ type: "response.output_text.done", item_id: msgId, output_index: 0, content_index: 0, text: content ?? "" })}\n\n`));
|
|
471
|
+
controller.enqueue(encoder.encode(`event: response.content_part.done\ndata: ${JSON.stringify({ type: "response.content_part.done", item_id: msgId, output_index: 0, content_index: 0, part: textPart })}\n\n`));
|
|
472
|
+
controller.enqueue(encoder.encode(`event: response.output_item.done\ndata: ${JSON.stringify({ type: "response.output_item.done", output_index: 0, item: msgItem })}\n\n`));
|
|
473
|
+
controller.enqueue(encoder.encode(`event: response.completed\ndata: ${JSON.stringify({ type: "response.completed", response: completedResp })}\n\n`));
|
|
474
|
+
}
|
|
475
|
+
|
|
314
476
|
private async handleNonStreamRequest(
|
|
315
477
|
requestId: string,
|
|
316
478
|
targetNodeId: string,
|
|
317
479
|
frame: ModelRequest,
|
|
318
|
-
|
|
480
|
+
responseFormat: ResponseFormat,
|
|
481
|
+
): Promise<ProxyResponse & { body: string }> {
|
|
319
482
|
try {
|
|
320
483
|
const result = await new Promise<ModelResponse["payload"]>(
|
|
321
484
|
(resolve, reject) => {
|
|
@@ -327,9 +490,7 @@ export class ModelProxy {
|
|
|
327
490
|
|
|
328
491
|
this.pending.set(requestId, {
|
|
329
492
|
resolve: resolve as (v: unknown) => void,
|
|
330
|
-
reject,
|
|
331
|
-
timer,
|
|
332
|
-
stream: false,
|
|
493
|
+
reject, timer, stream: false, responseFormat,
|
|
333
494
|
});
|
|
334
495
|
|
|
335
496
|
const sent = this.peerManager.sendTo(targetNodeId, frame);
|
|
@@ -349,6 +510,41 @@ export class ModelProxy {
|
|
|
349
510
|
};
|
|
350
511
|
}
|
|
351
512
|
|
|
513
|
+
if (responseFormat === "responses") {
|
|
514
|
+
const msgId = `msg_${requestId}`;
|
|
515
|
+
const usageObj = result.usage
|
|
516
|
+
? { input_tokens: result.usage.inputTokens, output_tokens: result.usage.outputTokens, total_tokens: result.usage.inputTokens + result.usage.outputTokens }
|
|
517
|
+
: { input_tokens: 0, output_tokens: 0, total_tokens: 0 };
|
|
518
|
+
// If upstream sent full output array (responses API), use it directly
|
|
519
|
+
const output = Array.isArray(result.message)
|
|
520
|
+
? result.message
|
|
521
|
+
: [{
|
|
522
|
+
type: "message", id: msgId, role: "assistant",
|
|
523
|
+
content: [{ type: "output_text", text: result.content ?? "" }],
|
|
524
|
+
status: "completed",
|
|
525
|
+
}];
|
|
526
|
+
return {
|
|
527
|
+
status: 200,
|
|
528
|
+
headers: { "Content-Type": "application/json" },
|
|
529
|
+
body: JSON.stringify({
|
|
530
|
+
id: `resp_${requestId}`,
|
|
531
|
+
object: "response",
|
|
532
|
+
created_at: Math.floor(Date.now() / 1000),
|
|
533
|
+
status: "completed",
|
|
534
|
+
model: frame.payload.model,
|
|
535
|
+
output,
|
|
536
|
+
usage: usageObj,
|
|
537
|
+
}),
|
|
538
|
+
};
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
// Chat completions format — use full message object when available (has tool_calls etc.)
|
|
542
|
+
const msg = result.message as Record<string, unknown> | undefined;
|
|
543
|
+
const message = msg
|
|
544
|
+
? { role: "assistant", ...msg }
|
|
545
|
+
: { role: "assistant", content: result.content };
|
|
546
|
+
const finishReason = msg?.tool_calls ? "tool_calls" : "stop";
|
|
547
|
+
|
|
352
548
|
return {
|
|
353
549
|
status: 200,
|
|
354
550
|
headers: { "Content-Type": "application/json" },
|
|
@@ -357,19 +553,13 @@ export class ModelProxy {
|
|
|
357
553
|
object: "chat.completion",
|
|
358
554
|
created: Math.floor(Date.now() / 1000),
|
|
359
555
|
model: frame.payload.model,
|
|
360
|
-
choices: [
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
},
|
|
366
|
-
],
|
|
556
|
+
choices: [{
|
|
557
|
+
index: 0,
|
|
558
|
+
message,
|
|
559
|
+
finish_reason: finishReason,
|
|
560
|
+
}],
|
|
367
561
|
usage: result.usage
|
|
368
|
-
? {
|
|
369
|
-
prompt_tokens: result.usage.inputTokens,
|
|
370
|
-
completion_tokens: result.usage.outputTokens,
|
|
371
|
-
total_tokens: result.usage.inputTokens + result.usage.outputTokens,
|
|
372
|
-
}
|
|
562
|
+
? { prompt_tokens: result.usage.inputTokens, completion_tokens: result.usage.outputTokens, total_tokens: result.usage.inputTokens + result.usage.outputTokens }
|
|
373
563
|
: undefined,
|
|
374
564
|
}),
|
|
375
565
|
};
|
|
@@ -382,17 +572,36 @@ export class ModelProxy {
|
|
|
382
572
|
}
|
|
383
573
|
}
|
|
384
574
|
|
|
385
|
-
private handleListModels():
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
575
|
+
private handleListModels(): ProxyResponse & { body: string } {
|
|
576
|
+
// Build from proxyModels config (has full detail) and enrich with
|
|
577
|
+
// connectivity info from the router so consumers know what's reachable.
|
|
578
|
+
const reachable = new Set(
|
|
579
|
+
this.peerManager.router.getAllPeers()
|
|
580
|
+
.filter((p) => p.connection?.isOpen || p.reachableVia)
|
|
581
|
+
.map((p) => p.nodeId),
|
|
582
|
+
);
|
|
583
|
+
|
|
584
|
+
const models = this.config.proxyModels.map((m) => {
|
|
585
|
+
const entry: Record<string, unknown> = {
|
|
586
|
+
id: m.id,
|
|
587
|
+
object: "model",
|
|
588
|
+
created: 0,
|
|
589
|
+
owned_by: m.provider ?? "unknown",
|
|
590
|
+
// Extended fields
|
|
591
|
+
...(m.description && { description: m.description }),
|
|
592
|
+
...(m.contextWindow && { context_window: m.contextWindow }),
|
|
593
|
+
...(m.maxTokens && { max_tokens: m.maxTokens }),
|
|
594
|
+
...(m.reasoning !== undefined && { reasoning: m.reasoning }),
|
|
595
|
+
...(m.input && { input: m.input }),
|
|
596
|
+
...(m.api && { api: m.api }),
|
|
597
|
+
...(m.cost && { cost: m.cost }),
|
|
598
|
+
...(m.compat && { compat: m.compat }),
|
|
599
|
+
// Cluster info
|
|
600
|
+
node_id: m.nodeId,
|
|
601
|
+
reachable: reachable.has(m.nodeId),
|
|
602
|
+
};
|
|
603
|
+
return entry;
|
|
604
|
+
});
|
|
396
605
|
|
|
397
606
|
return {
|
|
398
607
|
status: 200,
|
|
@@ -414,19 +623,16 @@ export class ModelProxy {
|
|
|
414
623
|
clearTimeout(pending.timer);
|
|
415
624
|
this.pending.delete(frame.id);
|
|
416
625
|
try {
|
|
417
|
-
const
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
}
|
|
422
|
-
|
|
423
|
-
pending.encoder.encode(
|
|
424
|
-
|
|
425
|
-
pending.controller.enqueue(pending.encoder.encode("data: [DONE]\n\n"));
|
|
626
|
+
const errMsg = `[ClawMatrix] Remote error: ${frame.payload.error}`;
|
|
627
|
+
if (pending.responseFormat === "responses") {
|
|
628
|
+
pending.controller.enqueue(pending.encoder.encode(`event: response.output_text.delta\ndata: ${JSON.stringify({ type: "response.output_text.delta", delta: errMsg })}\n\n`));
|
|
629
|
+
this.enqueueResponsesStreamDone(pending.controller, pending.encoder, frame.id, pending.model ?? "", errMsg);
|
|
630
|
+
} else {
|
|
631
|
+
pending.controller.enqueue(pending.encoder.encode(`data: ${JSON.stringify({ id: `chatcmpl-${frame.id}`, object: "chat.completion.chunk", choices: [{ index: 0, delta: { content: errMsg }, finish_reason: "stop" }] })}\n\n`));
|
|
632
|
+
pending.controller.enqueue(pending.encoder.encode("data: [DONE]\n\n"));
|
|
633
|
+
}
|
|
426
634
|
pending.controller.close();
|
|
427
|
-
} catch {
|
|
428
|
-
// controller may already be closed
|
|
429
|
-
}
|
|
635
|
+
} catch { /* controller may already be closed */ }
|
|
430
636
|
}
|
|
431
637
|
return;
|
|
432
638
|
}
|
|
@@ -436,6 +642,9 @@ export class ModelProxy {
|
|
|
436
642
|
pending.resolve(frame.payload);
|
|
437
643
|
}
|
|
438
644
|
|
|
645
|
+
/** Accumulated text per stream request (needed for responses API done events). */
|
|
646
|
+
private streamText = new Map<string, string>();
|
|
647
|
+
|
|
439
648
|
handleModelStream(frame: ModelStreamChunk) {
|
|
440
649
|
debug("stream", `id=${frame.id} done=${frame.payload.done} delta=${JSON.stringify(frame.payload.delta?.slice?.(0, 50) ?? frame.payload.delta)} failed=${this.peerManager.router.isFailed(frame.id)} hasPending=${this.pending.has(frame.id)}`);
|
|
441
650
|
if (this.peerManager.router.isFailed(frame.id)) return;
|
|
@@ -443,54 +652,87 @@ export class ModelProxy {
|
|
|
443
652
|
if (!pending?.stream || !pending.controller || !pending.encoder) return;
|
|
444
653
|
|
|
445
654
|
try {
|
|
446
|
-
if (
|
|
447
|
-
|
|
448
|
-
id: `chatcmpl-${frame.id}`,
|
|
449
|
-
object: "chat.completion.chunk",
|
|
450
|
-
choices: [{ index: 0, delta: {}, finish_reason: "stop" }],
|
|
451
|
-
};
|
|
452
|
-
if (frame.payload.usage) {
|
|
453
|
-
finalChunk.usage = {
|
|
454
|
-
prompt_tokens: frame.payload.usage.inputTokens,
|
|
455
|
-
completion_tokens: frame.payload.usage.outputTokens,
|
|
456
|
-
total_tokens: frame.payload.usage.inputTokens + frame.payload.usage.outputTokens,
|
|
457
|
-
};
|
|
458
|
-
}
|
|
459
|
-
pending.controller.enqueue(
|
|
460
|
-
pending.encoder.encode(`data: ${JSON.stringify(finalChunk)}\n\n`),
|
|
461
|
-
);
|
|
462
|
-
pending.controller.enqueue(pending.encoder.encode("data: [DONE]\n\n"));
|
|
463
|
-
pending.controller.close();
|
|
464
|
-
clearTimeout(pending.timer);
|
|
465
|
-
this.pending.delete(frame.id);
|
|
655
|
+
if (pending.responseFormat === "responses") {
|
|
656
|
+
this.handleModelStreamResponses(frame, pending);
|
|
466
657
|
} else {
|
|
467
|
-
|
|
468
|
-
id: `chatcmpl-${frame.id}`,
|
|
469
|
-
object: "chat.completion.chunk",
|
|
470
|
-
choices: [
|
|
471
|
-
{
|
|
472
|
-
index: 0,
|
|
473
|
-
delta: { content: frame.payload.delta },
|
|
474
|
-
finish_reason: null,
|
|
475
|
-
},
|
|
476
|
-
],
|
|
477
|
-
};
|
|
478
|
-
pending.controller.enqueue(
|
|
479
|
-
pending.encoder.encode(`data: ${JSON.stringify(chunk)}\n\n`),
|
|
480
|
-
);
|
|
658
|
+
this.handleModelStreamChat(frame, pending);
|
|
481
659
|
}
|
|
482
660
|
} catch {
|
|
483
661
|
clearTimeout(pending.timer);
|
|
484
662
|
this.pending.delete(frame.id);
|
|
663
|
+
this.streamText.delete(frame.id);
|
|
485
664
|
}
|
|
486
665
|
}
|
|
487
666
|
|
|
667
|
+
private handleModelStreamChat(frame: ModelStreamChunk, pending: PendingModelReq) {
|
|
668
|
+
if (frame.payload.done) {
|
|
669
|
+
const finalChunk: Record<string, unknown> = {
|
|
670
|
+
id: `chatcmpl-${frame.id}`, object: "chat.completion.chunk",
|
|
671
|
+
choices: [{ index: 0, delta: {}, finish_reason: "stop" }],
|
|
672
|
+
};
|
|
673
|
+
if (frame.payload.usage) {
|
|
674
|
+
finalChunk.usage = { prompt_tokens: frame.payload.usage.inputTokens, completion_tokens: frame.payload.usage.outputTokens, total_tokens: frame.payload.usage.inputTokens + frame.payload.usage.outputTokens };
|
|
675
|
+
}
|
|
676
|
+
pending.controller!.enqueue(pending.encoder!.encode(`data: ${JSON.stringify(finalChunk)}\n\n`));
|
|
677
|
+
pending.controller!.enqueue(pending.encoder!.encode("data: [DONE]\n\n"));
|
|
678
|
+
pending.controller!.close();
|
|
679
|
+
clearTimeout(pending.timer);
|
|
680
|
+
this.pending.delete(frame.id);
|
|
681
|
+
} else {
|
|
682
|
+
// Use full deltaObj when available (carries tool_calls etc.), otherwise simple text delta
|
|
683
|
+
const delta = frame.payload.deltaObj ?? { content: frame.payload.delta };
|
|
684
|
+
const chunk = { id: `chatcmpl-${frame.id}`, object: "chat.completion.chunk", choices: [{ index: 0, delta, finish_reason: null }] };
|
|
685
|
+
pending.controller!.enqueue(pending.encoder!.encode(`data: ${JSON.stringify(chunk)}\n\n`));
|
|
686
|
+
}
|
|
687
|
+
}
|
|
688
|
+
|
|
689
|
+
private handleModelStreamResponses(frame: ModelStreamChunk, pending: PendingModelReq) {
|
|
690
|
+
if (frame.payload.done) {
|
|
691
|
+
const fullText = this.streamText.get(frame.id) ?? "";
|
|
692
|
+
this.streamText.delete(frame.id);
|
|
693
|
+
this.enqueueResponsesStreamDone(pending.controller!, pending.encoder!, frame.id, pending.model ?? "", fullText, frame.payload.usage);
|
|
694
|
+
pending.controller!.close();
|
|
695
|
+
clearTimeout(pending.timer);
|
|
696
|
+
this.pending.delete(frame.id);
|
|
697
|
+
} else {
|
|
698
|
+
// Accumulate text for done event
|
|
699
|
+
this.streamText.set(frame.id, (this.streamText.get(frame.id) ?? "") + frame.payload.delta);
|
|
700
|
+
const evt = { type: "response.output_text.delta", item_id: `msg_${frame.id}`, output_index: 0, content_index: 0, delta: frame.payload.delta };
|
|
701
|
+
pending.controller!.enqueue(pending.encoder!.encode(`event: response.output_text.delta\ndata: ${JSON.stringify(evt)}\n\n`));
|
|
702
|
+
}
|
|
703
|
+
}
|
|
704
|
+
|
|
705
|
+
private sendStreamDelta(to: string, id: string, delta: string, deltaObj?: unknown) {
|
|
706
|
+
this.peerManager.sendTo(to, {
|
|
707
|
+
type: "model_stream",
|
|
708
|
+
id,
|
|
709
|
+
from: this.config.nodeId,
|
|
710
|
+
to,
|
|
711
|
+
timestamp: Date.now(),
|
|
712
|
+
payload: { delta, ...(deltaObj !== undefined && { deltaObj }), done: false },
|
|
713
|
+
} satisfies ModelStreamChunk);
|
|
714
|
+
}
|
|
715
|
+
|
|
716
|
+
private sendStreamDone(to: string, id: string, usage?: { inputTokens: number; outputTokens: number }) {
|
|
717
|
+
this.peerManager.sendTo(to, {
|
|
718
|
+
type: "model_stream",
|
|
719
|
+
id,
|
|
720
|
+
from: this.config.nodeId,
|
|
721
|
+
to,
|
|
722
|
+
timestamp: Date.now(),
|
|
723
|
+
payload: { delta: "", done: true, usage },
|
|
724
|
+
} satisfies ModelStreamChunk);
|
|
725
|
+
}
|
|
726
|
+
|
|
488
727
|
/** Handle model_req locally: call the model API directly or fall back to OpenClaw gateway. */
|
|
489
728
|
async handleModelRequest(frame: ModelRequest): Promise<void> {
|
|
490
729
|
const { id, from, payload } = frame;
|
|
491
|
-
debug("model_req", `handling model="${payload.model}" from=${from} stream=${payload.stream}`);
|
|
730
|
+
debug("model_req", `handling model="${payload.model}" provider=${payload.provider ?? "any"} from=${from} stream=${payload.stream}`);
|
|
492
731
|
|
|
493
|
-
const model =
|
|
732
|
+
const model = payload.provider
|
|
733
|
+
? this.config.models.find((m) => m.id === payload.model && m.provider === payload.provider)
|
|
734
|
+
?? this.config.models.find((m) => m.id === payload.model)
|
|
735
|
+
: this.config.models.find((m) => m.id === payload.model);
|
|
494
736
|
if (!model) {
|
|
495
737
|
this.peerManager.sendTo(from, {
|
|
496
738
|
type: "model_res",
|
|
@@ -505,28 +747,42 @@ export class ModelProxy {
|
|
|
505
747
|
|
|
506
748
|
try {
|
|
507
749
|
const endpoint = this.resolveModelEndpoint(model);
|
|
750
|
+
const isResponsesApi = endpoint.api === "openai-responses" || endpoint.api === "openai-codex-responses";
|
|
751
|
+
const path = isResponsesApi ? "/responses" : "/chat/completions";
|
|
752
|
+
const url = `${endpoint.baseUrl}${path}`;
|
|
508
753
|
const headers: Record<string, string> = { "Content-Type": "application/json" };
|
|
509
754
|
|
|
510
755
|
if (endpoint.direct) {
|
|
511
756
|
if (endpoint.apiKey) headers["Authorization"] = `Bearer ${endpoint.apiKey}`;
|
|
512
|
-
debug("model_req", `direct API call to ${endpoint.
|
|
757
|
+
debug("model_req", `direct API call to ${url} (api=${endpoint.api})`);
|
|
513
758
|
} else {
|
|
514
759
|
const { authHeader } = this.gatewayInfo;
|
|
515
760
|
if (authHeader) headers["Authorization"] = authHeader;
|
|
516
|
-
debug("model_req", `gateway fallback to ${
|
|
761
|
+
debug("model_req", `gateway fallback to ${url}`);
|
|
517
762
|
}
|
|
518
763
|
|
|
519
|
-
const
|
|
764
|
+
const modelField = endpoint.direct ? model.id : `${model.provider}/${model.id}`;
|
|
765
|
+
const requestBody = isResponsesApi
|
|
766
|
+
? {
|
|
767
|
+
model: modelField,
|
|
768
|
+
input: payload.messages,
|
|
769
|
+
stream: payload.stream,
|
|
770
|
+
temperature: payload.temperature,
|
|
771
|
+
max_output_tokens: payload.maxTokens,
|
|
772
|
+
}
|
|
773
|
+
: {
|
|
774
|
+
model: modelField,
|
|
775
|
+
messages: payload.messages,
|
|
776
|
+
temperature: payload.temperature,
|
|
777
|
+
max_tokens: payload.maxTokens,
|
|
778
|
+
stream: payload.stream,
|
|
779
|
+
...(payload.stream ? { stream_options: { include_usage: true } } : {}),
|
|
780
|
+
};
|
|
781
|
+
|
|
782
|
+
const response = await fetch(url, {
|
|
520
783
|
method: "POST",
|
|
521
784
|
headers,
|
|
522
|
-
body: JSON.stringify(
|
|
523
|
-
model: endpoint.direct ? model.id : `${model.provider}/${model.id}`,
|
|
524
|
-
messages: payload.messages,
|
|
525
|
-
temperature: payload.temperature,
|
|
526
|
-
max_tokens: payload.maxTokens,
|
|
527
|
-
stream: payload.stream,
|
|
528
|
-
...(payload.stream ? { stream_options: { include_usage: true } } : {}),
|
|
529
|
-
}),
|
|
785
|
+
body: JSON.stringify(requestBody),
|
|
530
786
|
});
|
|
531
787
|
|
|
532
788
|
if (!response.ok) {
|
|
@@ -549,73 +805,122 @@ export class ModelProxy {
|
|
|
549
805
|
if (done) break;
|
|
550
806
|
|
|
551
807
|
buffer += decoder.decode(value, { stream: true });
|
|
808
|
+
if (buffer.length > MAX_STREAM_BUFFER) {
|
|
809
|
+
throw new Error("Stream buffer exceeded 1MB — upstream may be malformed");
|
|
810
|
+
}
|
|
552
811
|
const lines = buffer.split("\n");
|
|
553
812
|
buffer = lines.pop()!;
|
|
554
813
|
|
|
814
|
+
// Track SSE event type for responses API
|
|
815
|
+
let currentEvent = "";
|
|
555
816
|
for (const line of lines) {
|
|
817
|
+
if (line.startsWith("event: ")) {
|
|
818
|
+
currentEvent = line.slice(7).trim();
|
|
819
|
+
continue;
|
|
820
|
+
}
|
|
556
821
|
if (!line.startsWith("data: ")) continue;
|
|
557
822
|
const data = line.slice(6).trim();
|
|
558
823
|
if (data === "[DONE]") {
|
|
559
|
-
this.
|
|
560
|
-
type: "model_stream",
|
|
561
|
-
id,
|
|
562
|
-
from: this.config.nodeId,
|
|
563
|
-
to: from,
|
|
564
|
-
timestamp: Date.now(),
|
|
565
|
-
payload: { delta: "", done: true, usage: lastUsage },
|
|
566
|
-
} satisfies ModelStreamChunk);
|
|
824
|
+
this.sendStreamDone(from, id, lastUsage);
|
|
567
825
|
streamDone = true;
|
|
568
826
|
break;
|
|
569
827
|
}
|
|
570
828
|
|
|
571
829
|
try {
|
|
572
830
|
const parsed = JSON.parse(data);
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
831
|
+
|
|
832
|
+
if (isResponsesApi) {
|
|
833
|
+
const evtType = currentEvent || parsed.type;
|
|
834
|
+
if (evtType === "response.output_text.delta") {
|
|
835
|
+
const delta = parsed.delta || "";
|
|
836
|
+
if (delta) {
|
|
837
|
+
this.sendStreamDelta(from, id, delta);
|
|
838
|
+
}
|
|
839
|
+
} else if (evtType === "response.completed") {
|
|
840
|
+
const usage = parsed.response?.usage;
|
|
841
|
+
if (usage) {
|
|
842
|
+
lastUsage = {
|
|
843
|
+
inputTokens: usage.input_tokens ?? usage.prompt_tokens ?? 0,
|
|
844
|
+
outputTokens: usage.output_tokens ?? usage.completion_tokens ?? 0,
|
|
845
|
+
};
|
|
846
|
+
}
|
|
847
|
+
this.sendStreamDone(from, id, lastUsage);
|
|
848
|
+
streamDone = true;
|
|
849
|
+
break;
|
|
850
|
+
}
|
|
851
|
+
} else {
|
|
852
|
+
// Chat completions format
|
|
853
|
+
if (parsed.usage) {
|
|
854
|
+
lastUsage = {
|
|
855
|
+
inputTokens: parsed.usage.prompt_tokens,
|
|
856
|
+
outputTokens: parsed.usage.completion_tokens,
|
|
857
|
+
};
|
|
858
|
+
}
|
|
859
|
+
const d = parsed.choices?.[0]?.delta;
|
|
860
|
+
const delta = d?.content || d?.reasoning_content || "";
|
|
861
|
+
// Pass full delta object when it contains tool_calls or other structured data
|
|
862
|
+
const hasStructured = d?.tool_calls || d?.refusal != null;
|
|
863
|
+
if (delta || hasStructured) {
|
|
864
|
+
this.sendStreamDelta(from, id, delta, hasStructured ? d : undefined);
|
|
865
|
+
}
|
|
590
866
|
}
|
|
591
867
|
} catch {
|
|
592
868
|
// skip malformed chunks
|
|
593
869
|
}
|
|
870
|
+
currentEvent = "";
|
|
594
871
|
}
|
|
595
872
|
}
|
|
596
|
-
// If the upstream closed without sending [DONE]
|
|
597
|
-
// frame so the requesting side doesn't hang
|
|
873
|
+
// If the upstream closed without sending [DONE] or response.completed,
|
|
874
|
+
// send a completion frame so the requesting side doesn't hang.
|
|
598
875
|
if (!streamDone) {
|
|
599
|
-
this.
|
|
600
|
-
type: "model_stream",
|
|
601
|
-
id,
|
|
602
|
-
from: this.config.nodeId,
|
|
603
|
-
to: from,
|
|
604
|
-
timestamp: Date.now(),
|
|
605
|
-
payload: { delta: "", done: true, usage: lastUsage },
|
|
606
|
-
} satisfies ModelStreamChunk);
|
|
876
|
+
this.sendStreamDone(from, id, lastUsage);
|
|
607
877
|
}
|
|
608
878
|
} finally {
|
|
609
879
|
reader.releaseLock();
|
|
610
880
|
}
|
|
611
881
|
} else {
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
882
|
+
// Non-streaming response
|
|
883
|
+
const result = await response.json();
|
|
884
|
+
let content: string;
|
|
885
|
+
let message: unknown | undefined;
|
|
886
|
+
let usage: { inputTokens: number; outputTokens: number } | undefined;
|
|
887
|
+
|
|
888
|
+
if (isResponsesApi) {
|
|
889
|
+
// Responses API: extract text from output[].content[].text
|
|
890
|
+
content = "";
|
|
891
|
+
const output = result.output as { type?: string; content?: { type?: string; text?: string }[] }[] | undefined;
|
|
892
|
+
if (Array.isArray(output)) {
|
|
893
|
+
for (const item of output) {
|
|
894
|
+
if (item.type === "message" && Array.isArray(item.content)) {
|
|
895
|
+
for (const part of item.content) {
|
|
896
|
+
if (part.type === "output_text" && part.text) content += part.text;
|
|
897
|
+
}
|
|
898
|
+
}
|
|
899
|
+
}
|
|
900
|
+
}
|
|
901
|
+
// Carry full output array for structured data (function_call items, etc.)
|
|
902
|
+
message = result.output;
|
|
903
|
+
if (result.usage) {
|
|
904
|
+
usage = {
|
|
905
|
+
inputTokens: result.usage.input_tokens ?? result.usage.prompt_tokens ?? 0,
|
|
906
|
+
outputTokens: result.usage.output_tokens ?? result.usage.completion_tokens ?? 0,
|
|
907
|
+
};
|
|
908
|
+
}
|
|
909
|
+
} else {
|
|
910
|
+
// Chat completions format
|
|
911
|
+
const msg = result.choices?.[0]?.message;
|
|
912
|
+
content = msg?.content || msg?.reasoning_content || "";
|
|
913
|
+
// Carry full message object when it has tool_calls or other structured data
|
|
914
|
+
if (msg?.tool_calls || msg?.refusal != null || msg?.function_call) {
|
|
915
|
+
message = msg;
|
|
916
|
+
}
|
|
917
|
+
if (result.usage) {
|
|
918
|
+
usage = {
|
|
919
|
+
inputTokens: result.usage.prompt_tokens,
|
|
920
|
+
outputTokens: result.usage.completion_tokens,
|
|
921
|
+
};
|
|
922
|
+
}
|
|
923
|
+
}
|
|
619
924
|
|
|
620
925
|
this.peerManager.sendTo(from, {
|
|
621
926
|
type: "model_res",
|
|
@@ -626,9 +931,8 @@ export class ModelProxy {
|
|
|
626
931
|
payload: {
|
|
627
932
|
success: true,
|
|
628
933
|
content,
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
: undefined,
|
|
934
|
+
...(message !== undefined && { message }),
|
|
935
|
+
usage,
|
|
632
936
|
},
|
|
633
937
|
} satisfies ModelResponse);
|
|
634
938
|
}
|