clawmatrix 0.1.13 → 0.1.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/BOOTSTRAP.md +40 -8
- package/package.json +7 -5
- package/src/cluster-service.ts +5 -4
- package/src/config.ts +57 -6
- package/src/connection.ts +20 -3
- package/src/device-info.ts +48 -0
- package/src/handoff.ts +20 -5
- package/src/index.ts +14 -4
- package/src/model-proxy.ts +530 -229
- package/src/peer-manager.ts +11 -2
- package/src/router.ts +31 -23
- package/src/tool-proxy.ts +6 -0
- package/src/types.ts +24 -0
- package/src/web-ui.ts +227 -20
- package/src/web.ts +55 -1
package/src/model-proxy.ts
CHANGED
|
@@ -11,12 +11,17 @@ import type {
|
|
|
11
11
|
import { debug } from "./debug.ts";
|
|
12
12
|
|
|
13
13
|
const MODEL_TIMEOUT = 120_000; // 2 minutes
|
|
14
|
+
const MAX_STREAM_BUFFER = 1_048_576; // 1MB — guard against upstream not sending newlines
|
|
15
|
+
|
|
16
|
+
type ResponseFormat = "chat" | "responses";
|
|
14
17
|
|
|
15
18
|
interface PendingModelReq {
|
|
16
19
|
resolve: (value: unknown) => void;
|
|
17
20
|
reject: (error: Error) => void;
|
|
18
21
|
timer: ReturnType<typeof setTimeout>;
|
|
19
22
|
stream: boolean;
|
|
23
|
+
responseFormat: ResponseFormat;
|
|
24
|
+
model?: string;
|
|
20
25
|
controller?: ReadableStreamDefaultController;
|
|
21
26
|
encoder?: TextEncoder;
|
|
22
27
|
}
|
|
@@ -36,35 +41,125 @@ export class ModelProxy {
|
|
|
36
41
|
this.openclawConfig = openclawConfig;
|
|
37
42
|
}
|
|
38
43
|
|
|
44
|
+
/**
|
|
45
|
+
* Normalize Responses API `input` to OpenAI chat messages for WS transport.
|
|
46
|
+
*
|
|
47
|
+
* Converts:
|
|
48
|
+
* - string → [{role: "user", content: "..."}]
|
|
49
|
+
* - shorthand {role, content: "..."} → pass through
|
|
50
|
+
* - full {type: "message", content: [{type: "input_text"}, {type: "input_image"}]} → chat format
|
|
51
|
+
* - {type: "function_call_output"} → {role: "tool", ...}
|
|
52
|
+
*
|
|
53
|
+
* Returns chat-completions compatible messages (text + image_url content parts).
|
|
54
|
+
*/
|
|
55
|
+
private static normalizeResponsesInput(input: unknown): unknown[] {
|
|
56
|
+
if (typeof input === "string") {
|
|
57
|
+
return [{ role: "user", content: input }];
|
|
58
|
+
}
|
|
59
|
+
if (!Array.isArray(input)) return [];
|
|
60
|
+
|
|
61
|
+
const messages: unknown[] = [];
|
|
62
|
+
for (const item of input) {
|
|
63
|
+
if (!item || typeof item !== "object") continue;
|
|
64
|
+
const obj = item as Record<string, unknown>;
|
|
65
|
+
|
|
66
|
+
// function_call_output → tool message
|
|
67
|
+
if (obj.type === "function_call_output") {
|
|
68
|
+
messages.push({
|
|
69
|
+
role: "tool",
|
|
70
|
+
tool_call_id: obj.call_id,
|
|
71
|
+
content: typeof obj.output === "string" ? obj.output : JSON.stringify(obj.output),
|
|
72
|
+
});
|
|
73
|
+
continue;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
const role = typeof obj.role === "string" ? obj.role : "user";
|
|
77
|
+
|
|
78
|
+
// Simple shorthand: {role: "user", content: "hello"}
|
|
79
|
+
if (typeof obj.content === "string") {
|
|
80
|
+
messages.push({ role, content: obj.content });
|
|
81
|
+
continue;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// Full format: {type: "message", role, content: [{type: "input_text"|"input_image"|...}]}
|
|
85
|
+
if (Array.isArray(obj.content)) {
|
|
86
|
+
const parts: unknown[] = [];
|
|
87
|
+
for (const part of obj.content) {
|
|
88
|
+
if (!part || typeof part !== "object") continue;
|
|
89
|
+
const p = part as Record<string, unknown>;
|
|
90
|
+
|
|
91
|
+
if (p.type === "input_text" || p.type === "output_text") {
|
|
92
|
+
// Text content → chat text part
|
|
93
|
+
if (typeof p.text === "string") {
|
|
94
|
+
parts.push({ type: "text", text: p.text });
|
|
95
|
+
}
|
|
96
|
+
} else if (p.type === "input_image") {
|
|
97
|
+
// Image content → chat image_url part
|
|
98
|
+
if (typeof p.image_url === "string") {
|
|
99
|
+
parts.push({ type: "image_url", image_url: { url: p.image_url } });
|
|
100
|
+
} else if (p.image_url && typeof p.image_url === "object") {
|
|
101
|
+
parts.push({ type: "image_url", image_url: p.image_url });
|
|
102
|
+
}
|
|
103
|
+
} else if (p.type === "text" && typeof p.text === "string") {
|
|
104
|
+
// Already chat format
|
|
105
|
+
parts.push(p);
|
|
106
|
+
} else if (p.type === "image_url") {
|
|
107
|
+
// Already chat format
|
|
108
|
+
parts.push(p);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
if (parts.length === 1 && (parts[0] as Record<string, unknown>).type === "text") {
|
|
113
|
+
// Single text part → simplify to string content
|
|
114
|
+
messages.push({ role, content: ((parts[0] as Record<string, unknown>).text as string) });
|
|
115
|
+
} else if (parts.length > 0) {
|
|
116
|
+
messages.push({ role, content: parts });
|
|
117
|
+
}
|
|
118
|
+
continue;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// Fallback
|
|
122
|
+
if (typeof obj.text === "string") {
|
|
123
|
+
messages.push({ role, content: obj.text });
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
return messages;
|
|
127
|
+
}
|
|
128
|
+
|
|
39
129
|
/** Resolve API endpoint for a model: explicit config > OpenClaw provider > gateway fallback */
|
|
40
|
-
private resolveModelEndpoint(model: { id: string; provider: string; baseUrl?: string; apiKey?: string }): {
|
|
130
|
+
private resolveModelEndpoint(model: { id: string; provider: string; baseUrl?: string; apiKey?: string; api?: string }): { baseUrl: string; apiKey?: string; direct: boolean; api: string } {
|
|
131
|
+
const defaultApi = "openai-completions";
|
|
132
|
+
|
|
41
133
|
// 1. Explicit baseUrl in ClawMatrix model config
|
|
42
134
|
if (model.baseUrl) {
|
|
43
135
|
return {
|
|
44
|
-
|
|
136
|
+
baseUrl: model.baseUrl.replace(/\/$/, ""),
|
|
45
137
|
apiKey: model.apiKey,
|
|
46
138
|
direct: true,
|
|
139
|
+
api: model.api ?? defaultApi,
|
|
47
140
|
};
|
|
48
141
|
}
|
|
49
142
|
|
|
50
143
|
// 2. Read from OpenClaw's models.providers[provider]
|
|
51
144
|
const providers = (this.openclawConfig as Record<string, unknown>).models as
|
|
52
|
-
{ providers?: Record<string, { baseUrl?: string; apiKey?: string }> } | undefined;
|
|
145
|
+
{ providers?: Record<string, { baseUrl?: string; apiKey?: string; api?: string }> } | undefined;
|
|
53
146
|
const providerConfig = providers?.providers?.[model.provider];
|
|
54
147
|
if (providerConfig?.baseUrl) {
|
|
55
148
|
return {
|
|
56
|
-
|
|
149
|
+
baseUrl: providerConfig.baseUrl.replace(/\/$/, ""),
|
|
57
150
|
apiKey: typeof providerConfig.apiKey === "string" ? providerConfig.apiKey : undefined,
|
|
58
151
|
direct: true,
|
|
152
|
+
api: model.api ?? providerConfig.api ?? defaultApi,
|
|
59
153
|
};
|
|
60
154
|
}
|
|
61
155
|
|
|
62
|
-
// 3. Fallback: OpenClaw gateway
|
|
156
|
+
// 3. Fallback: OpenClaw gateway
|
|
63
157
|
const { port } = this.gatewayInfo;
|
|
64
158
|
return {
|
|
65
|
-
|
|
159
|
+
baseUrl: `http://127.0.0.1:${port}/v1`,
|
|
66
160
|
apiKey: undefined,
|
|
67
161
|
direct: false,
|
|
162
|
+
api: model.api ?? defaultApi,
|
|
68
163
|
};
|
|
69
164
|
}
|
|
70
165
|
|
|
@@ -79,7 +174,12 @@ export class ModelProxy {
|
|
|
79
174
|
|
|
80
175
|
if (p === "/chat/completions" && req.method === "POST") {
|
|
81
176
|
const body = await this.readBody(req);
|
|
82
|
-
const response = await this.handleChatCompletion(body);
|
|
177
|
+
const response = await this.handleChatCompletion(body, "openai-completions");
|
|
178
|
+
debug("proxy", `response status=${response.status}`);
|
|
179
|
+
this.sendResponse(res, response);
|
|
180
|
+
} else if (p === "/responses" && req.method === "POST") {
|
|
181
|
+
const body = await this.readBody(req);
|
|
182
|
+
const response = await this.handleResponses(body);
|
|
83
183
|
debug("proxy", `response status=${response.status}`);
|
|
84
184
|
this.sendResponse(res, response);
|
|
85
185
|
} else if (p === "/models" && req.method === "GET") {
|
|
@@ -152,55 +252,56 @@ export class ModelProxy {
|
|
|
152
252
|
}
|
|
153
253
|
|
|
154
254
|
// ── HTTP handlers ──────────────────────────────────────────────
|
|
155
|
-
private async handleChatCompletion(rawBody: string): Promise<{ status: number; headers: Record<string, string>; body: string | ReadableStream }> {
|
|
156
|
-
let body: {
|
|
157
|
-
model: string;
|
|
158
|
-
messages: unknown[];
|
|
159
|
-
stream?: boolean;
|
|
160
|
-
temperature?: number;
|
|
161
|
-
max_tokens?: number;
|
|
162
|
-
};
|
|
163
255
|
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
body: JSON.stringify({ error: "Invalid JSON" }),
|
|
171
|
-
};
|
|
172
|
-
}
|
|
173
|
-
|
|
174
|
-
const rawModelId = body.model;
|
|
175
|
-
// Parse "nodeId/model" format: first segment is nodeId, rest is model ID.
|
|
176
|
-
// OpenClaw sends "providerId/modelId" where providerId = nodeId, so this
|
|
177
|
-
// naturally handles both OpenClaw calls and direct curl calls.
|
|
178
|
-
// If no "/" present, treat entire string as model ID and auto-resolve.
|
|
179
|
-
let nodeId: string | undefined;
|
|
180
|
-
let modelId: string;
|
|
256
|
+
/** Resolve model ID → proxyModel + route. Shared by chat completions and responses handlers. */
|
|
257
|
+
private resolveModelRoute(rawModelId: string): {
|
|
258
|
+
nodeId: string; modelId: string;
|
|
259
|
+
proxyModel: (typeof this.config.proxyModels)[number] | undefined;
|
|
260
|
+
routeNodeId: string;
|
|
261
|
+
} | { error: { status: number; message: string } } {
|
|
181
262
|
const slashIdx = rawModelId.indexOf("/");
|
|
263
|
+
let nodeId: string;
|
|
264
|
+
let modelId: string;
|
|
265
|
+
let proxyModel: (typeof this.config.proxyModels)[number] | undefined;
|
|
266
|
+
|
|
182
267
|
if (slashIdx > 0) {
|
|
183
268
|
nodeId = rawModelId.slice(0, slashIdx);
|
|
184
269
|
modelId = rawModelId.slice(slashIdx + 1);
|
|
270
|
+
proxyModel = this.config.proxyModels.find((m) => m.id === modelId && m.nodeId === nodeId);
|
|
185
271
|
} else {
|
|
186
272
|
modelId = rawModelId;
|
|
273
|
+
proxyModel = this.config.proxyModels.find((m) => m.id === modelId);
|
|
274
|
+
if (!proxyModel) {
|
|
275
|
+
return { error: { status: 404, message: `Model "${rawModelId}" not found in proxy models` } };
|
|
276
|
+
}
|
|
277
|
+
nodeId = proxyModel.nodeId;
|
|
187
278
|
}
|
|
188
|
-
|
|
189
|
-
const
|
|
190
|
-
|
|
191
|
-
const route = nodeId
|
|
192
|
-
? this.peerManager.router.getRoute(nodeId)
|
|
193
|
-
: this.peerManager.router.resolveModel(modelId);
|
|
194
|
-
debug("proxy", `proxyModel=${proxyModel?.id ?? "none"} route=${route?.nodeId ?? "none"} reachable=${route ? this.peerManager.canReach(route.nodeId) : false}`);
|
|
279
|
+
|
|
280
|
+
const route = this.peerManager.router.getRoute(nodeId);
|
|
281
|
+
debug("proxy", `model raw="${rawModelId}" nodeId=${nodeId} modelId="${modelId}" route=${route?.nodeId ?? "none"}`);
|
|
195
282
|
if (!route) {
|
|
196
|
-
return {
|
|
197
|
-
status: 404,
|
|
198
|
-
headers: { "Content-Type": "application/json" },
|
|
199
|
-
body: JSON.stringify({ error: { message: `Model "${modelId}" not found in cluster (proxyModels: [${this.config.proxyModels.map(m => m.id).join(", ")}])` } }),
|
|
200
|
-
};
|
|
283
|
+
return { error: { status: 404, message: `Node "${nodeId}" not found in cluster` } };
|
|
201
284
|
}
|
|
285
|
+
if (!this.peerManager.canReach(route.nodeId)) {
|
|
286
|
+
return { error: { status: 502, message: `Cannot reach model node "${route.nodeId}"` } };
|
|
287
|
+
}
|
|
288
|
+
return { nodeId, modelId, proxyModel, routeNodeId: route.nodeId };
|
|
289
|
+
}
|
|
202
290
|
|
|
203
|
-
|
|
291
|
+
private async handleChatCompletion(rawBody: string, _api: string): Promise<{ status: number; headers: Record<string, string>; body: string | ReadableStream }> {
|
|
292
|
+
let body: { model: string; messages: unknown[]; stream?: boolean; temperature?: number; max_tokens?: number };
|
|
293
|
+
try {
|
|
294
|
+
body = JSON.parse(rawBody);
|
|
295
|
+
} catch {
|
|
296
|
+
return { status: 400, headers: { "Content-Type": "application/json" }, body: JSON.stringify({ error: "Invalid JSON" }) };
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
const resolved = this.resolveModelRoute(body.model);
|
|
300
|
+
if ("error" in resolved) {
|
|
301
|
+
return { status: resolved.error.status, headers: { "Content-Type": "application/json" }, body: JSON.stringify({ error: { message: resolved.error.message } }) };
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
const { modelId, proxyModel, routeNodeId } = resolved;
|
|
204
305
|
const messages = body.messages;
|
|
205
306
|
if (proxyModel?.description) {
|
|
206
307
|
const first = messages[0] as { role?: string; content?: string } | undefined;
|
|
@@ -213,35 +314,66 @@ export class ModelProxy {
|
|
|
213
314
|
|
|
214
315
|
const stream = body.stream ?? false;
|
|
215
316
|
const requestId = crypto.randomUUID();
|
|
216
|
-
|
|
217
317
|
const frame: ModelRequest = {
|
|
218
|
-
type: "model_req",
|
|
219
|
-
|
|
220
|
-
from: this.config.nodeId,
|
|
221
|
-
to: route.nodeId,
|
|
222
|
-
timestamp: Date.now(),
|
|
223
|
-
payload: {
|
|
224
|
-
model: modelId,
|
|
225
|
-
messages,
|
|
226
|
-
temperature: body.temperature,
|
|
227
|
-
maxTokens: body.max_tokens,
|
|
228
|
-
stream,
|
|
229
|
-
},
|
|
318
|
+
type: "model_req", id: requestId, from: this.config.nodeId, to: routeNodeId, timestamp: Date.now(),
|
|
319
|
+
payload: { model: modelId, provider: proxyModel?.provider, api: proxyModel?.api, messages, temperature: body.temperature, maxTokens: body.max_tokens, stream },
|
|
230
320
|
};
|
|
231
321
|
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
322
|
+
if (stream) {
|
|
323
|
+
return this.handleStreamRequest(requestId, routeNodeId, frame, "chat");
|
|
324
|
+
} else {
|
|
325
|
+
return this.handleNonStreamRequest(requestId, routeNodeId, frame, "chat");
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
private async handleResponses(rawBody: string): Promise<{ status: number; headers: Record<string, string>; body: string | ReadableStream }> {
|
|
330
|
+
let body: { model: string; input: unknown; stream?: boolean; temperature?: number; max_output_tokens?: number; instructions?: string };
|
|
331
|
+
try {
|
|
332
|
+
body = JSON.parse(rawBody);
|
|
333
|
+
} catch {
|
|
334
|
+
return { status: 400, headers: { "Content-Type": "application/json" }, body: JSON.stringify({ error: "Invalid JSON" }) };
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
const resolved = this.resolveModelRoute(body.model);
|
|
338
|
+
if ("error" in resolved) {
|
|
339
|
+
return { status: resolved.error.status, headers: { "Content-Type": "application/json" }, body: JSON.stringify({ error: { message: resolved.error.message } }) };
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
const { modelId, proxyModel, routeNodeId } = resolved;
|
|
343
|
+
|
|
344
|
+
// Normalize responses API input → simple chat messages for WS transport.
|
|
345
|
+
// Responses API items use {type: "message", role, content: [{type: "input_text", text}]}
|
|
346
|
+
// but WS protocol carries simple {role, content} chat messages.
|
|
347
|
+
const messages = ModelProxy.normalizeResponsesInput(body.input);
|
|
348
|
+
|
|
349
|
+
// Prepend instructions as system/developer message
|
|
350
|
+
if (body.instructions) {
|
|
351
|
+
messages.unshift({ role: "developer", content: body.instructions });
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
if (proxyModel?.description) {
|
|
355
|
+
const first = messages[0] as { role?: string; content?: string } | undefined;
|
|
356
|
+
if (first?.role === "system" && typeof first.content === "string") {
|
|
357
|
+
first.content = `[Model: ${proxyModel.description}]\n${first.content}`;
|
|
358
|
+
} else if (first?.role === "developer" && typeof first.content === "string") {
|
|
359
|
+
first.content = `[Model: ${proxyModel.description}]\n${first.content}`;
|
|
360
|
+
} else {
|
|
361
|
+
messages.unshift({ role: "system", content: `[Model: ${proxyModel.description}]` });
|
|
362
|
+
}
|
|
239
363
|
}
|
|
240
364
|
|
|
365
|
+
const stream = body.stream ?? false;
|
|
366
|
+
const requestId = crypto.randomUUID();
|
|
367
|
+
debug("proxy", `responses: stream=${stream} messages=${messages.length} input_type=${typeof body.input}${Array.isArray(body.input) ? `[${body.input.length}]` : ""}`);
|
|
368
|
+
const frame: ModelRequest = {
|
|
369
|
+
type: "model_req", id: requestId, from: this.config.nodeId, to: routeNodeId, timestamp: Date.now(),
|
|
370
|
+
payload: { model: modelId, provider: proxyModel?.provider, api: proxyModel?.api, messages, temperature: body.temperature, maxTokens: body.max_output_tokens, stream },
|
|
371
|
+
};
|
|
372
|
+
|
|
241
373
|
if (stream) {
|
|
242
|
-
return this.handleStreamRequest(requestId,
|
|
374
|
+
return this.handleStreamRequest(requestId, routeNodeId, frame, "responses");
|
|
243
375
|
} else {
|
|
244
|
-
return this.handleNonStreamRequest(requestId,
|
|
376
|
+
return this.handleNonStreamRequest(requestId, routeNodeId, frame, "responses");
|
|
245
377
|
}
|
|
246
378
|
}
|
|
247
379
|
|
|
@@ -249,8 +381,10 @@ export class ModelProxy {
|
|
|
249
381
|
requestId: string,
|
|
250
382
|
targetNodeId: string,
|
|
251
383
|
frame: ModelRequest,
|
|
384
|
+
responseFormat: ResponseFormat,
|
|
252
385
|
): { status: number; headers: Record<string, string>; body: ReadableStream } {
|
|
253
386
|
const encoder = new TextEncoder();
|
|
387
|
+
const model = frame.payload.model;
|
|
254
388
|
|
|
255
389
|
const readable = new ReadableStream({
|
|
256
390
|
start: (controller) => {
|
|
@@ -258,63 +392,89 @@ export class ModelProxy {
|
|
|
258
392
|
this.pending.delete(requestId);
|
|
259
393
|
this.peerManager.router.markFailed(requestId);
|
|
260
394
|
try {
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
);
|
|
269
|
-
controller.enqueue(encoder.encode("data: [DONE]\n\n"));
|
|
395
|
+
if (responseFormat === "responses") {
|
|
396
|
+
controller.enqueue(encoder.encode(`event: response.output_text.delta\ndata: ${JSON.stringify({ type: "response.output_text.delta", delta: "\n\n[ClawMatrix] Error: model request timed out" })}\n\n`));
|
|
397
|
+
this.enqueueResponsesStreamDone(controller, encoder, requestId, model);
|
|
398
|
+
} else {
|
|
399
|
+
controller.enqueue(encoder.encode(`data: ${JSON.stringify({ id: `chatcmpl-${requestId}`, object: "chat.completion.chunk", choices: [{ index: 0, delta: { content: "\n\n[ClawMatrix] Error: model request timed out" }, finish_reason: "stop" }] })}\n\n`));
|
|
400
|
+
controller.enqueue(encoder.encode("data: [DONE]\n\n"));
|
|
401
|
+
}
|
|
270
402
|
controller.close();
|
|
271
|
-
} catch {
|
|
272
|
-
// controller may already be closed
|
|
273
|
-
}
|
|
403
|
+
} catch { /* controller may already be closed */ }
|
|
274
404
|
}, MODEL_TIMEOUT);
|
|
275
405
|
|
|
276
406
|
this.pending.set(requestId, {
|
|
277
|
-
resolve: () => {},
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
stream: true,
|
|
281
|
-
controller,
|
|
282
|
-
encoder,
|
|
407
|
+
resolve: () => {}, reject: () => {},
|
|
408
|
+
timer, stream: true, responseFormat, model,
|
|
409
|
+
controller, encoder,
|
|
283
410
|
});
|
|
284
411
|
|
|
412
|
+
// Emit setup events for responses API
|
|
413
|
+
if (responseFormat === "responses") {
|
|
414
|
+
this.enqueueResponsesStreamSetup(controller, encoder, requestId, model);
|
|
415
|
+
}
|
|
416
|
+
|
|
285
417
|
const sent = this.peerManager.sendTo(targetNodeId, frame);
|
|
286
418
|
if (!sent) {
|
|
287
419
|
this.pending.delete(requestId);
|
|
288
420
|
clearTimeout(timer);
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
controller
|
|
421
|
+
try {
|
|
422
|
+
if (responseFormat === "responses") {
|
|
423
|
+
controller.enqueue(encoder.encode(`event: response.output_text.delta\ndata: ${JSON.stringify({ type: "response.output_text.delta", delta: `[ClawMatrix] Cannot reach model node "${targetNodeId}"` })}\n\n`));
|
|
424
|
+
this.enqueueResponsesStreamDone(controller, encoder, requestId, model);
|
|
425
|
+
} else {
|
|
426
|
+
controller.enqueue(encoder.encode(`data: ${JSON.stringify({ id: `chatcmpl-${requestId}`, object: "chat.completion.chunk", choices: [{ index: 0, delta: { content: `[ClawMatrix] Cannot reach model node "${targetNodeId}"` }, finish_reason: "stop" }] })}\n\n`));
|
|
427
|
+
controller.enqueue(encoder.encode("data: [DONE]\n\n"));
|
|
428
|
+
}
|
|
429
|
+
controller.close();
|
|
430
|
+
} catch { /* controller may already be closed */ }
|
|
299
431
|
}
|
|
300
432
|
},
|
|
301
433
|
});
|
|
302
434
|
|
|
303
435
|
return {
|
|
304
436
|
status: 200,
|
|
305
|
-
headers: {
|
|
306
|
-
"Content-Type": "text/event-stream",
|
|
307
|
-
"Cache-Control": "no-cache",
|
|
308
|
-
"Connection": "keep-alive",
|
|
309
|
-
},
|
|
437
|
+
headers: { "Content-Type": "text/event-stream", "Cache-Control": "no-cache", "Connection": "keep-alive" },
|
|
310
438
|
body: readable,
|
|
311
439
|
};
|
|
312
440
|
}
|
|
313
441
|
|
|
442
|
+
/** Emit responses API stream setup events (response.created → content_part.added). */
|
|
443
|
+
private enqueueResponsesStreamSetup(controller: ReadableStreamDefaultController, encoder: TextEncoder, id: string, model: string) {
|
|
444
|
+
const respId = `resp_${id}`;
|
|
445
|
+
const msgId = `msg_${id}`;
|
|
446
|
+
const now = Math.floor(Date.now() / 1000);
|
|
447
|
+
const baseResp = { id: respId, object: "response", created_at: now, status: "in_progress", model, output: [] };
|
|
448
|
+
const msgItem = { type: "message", id: msgId, role: "assistant", content: [], status: "in_progress" };
|
|
449
|
+
const textPart = { type: "output_text", text: "" };
|
|
450
|
+
|
|
451
|
+
controller.enqueue(encoder.encode(`event: response.created\ndata: ${JSON.stringify({ type: "response.created", response: baseResp })}\n\n`));
|
|
452
|
+
controller.enqueue(encoder.encode(`event: response.in_progress\ndata: ${JSON.stringify({ type: "response.in_progress", response: baseResp })}\n\n`));
|
|
453
|
+
controller.enqueue(encoder.encode(`event: response.output_item.added\ndata: ${JSON.stringify({ type: "response.output_item.added", output_index: 0, item: msgItem })}\n\n`));
|
|
454
|
+
controller.enqueue(encoder.encode(`event: response.content_part.added\ndata: ${JSON.stringify({ type: "response.content_part.added", item_id: msgId, output_index: 0, content_index: 0, part: textPart })}\n\n`));
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
/** Emit responses API stream completion events (output_text.done → response.completed). */
|
|
458
|
+
private enqueueResponsesStreamDone(controller: ReadableStreamDefaultController, encoder: TextEncoder, id: string, model: string, content?: string, usage?: { inputTokens: number; outputTokens: number }) {
|
|
459
|
+
const respId = `resp_${id}`;
|
|
460
|
+
const msgId = `msg_${id}`;
|
|
461
|
+
const now = Math.floor(Date.now() / 1000);
|
|
462
|
+
const textPart = { type: "output_text", text: content ?? "" };
|
|
463
|
+
const msgItem = { type: "message", id: msgId, role: "assistant", content: [textPart], status: "completed" };
|
|
464
|
+
const usageObj = usage ? { input_tokens: usage.inputTokens, output_tokens: usage.outputTokens, total_tokens: usage.inputTokens + usage.outputTokens } : { input_tokens: 0, output_tokens: 0, total_tokens: 0 };
|
|
465
|
+
const completedResp = { id: respId, object: "response", created_at: now, status: "completed", model, output: [msgItem], usage: usageObj };
|
|
466
|
+
|
|
467
|
+
controller.enqueue(encoder.encode(`event: response.output_text.done\ndata: ${JSON.stringify({ type: "response.output_text.done", item_id: msgId, output_index: 0, content_index: 0, text: content ?? "" })}\n\n`));
|
|
468
|
+
controller.enqueue(encoder.encode(`event: response.content_part.done\ndata: ${JSON.stringify({ type: "response.content_part.done", item_id: msgId, output_index: 0, content_index: 0, part: textPart })}\n\n`));
|
|
469
|
+
controller.enqueue(encoder.encode(`event: response.output_item.done\ndata: ${JSON.stringify({ type: "response.output_item.done", output_index: 0, item: msgItem })}\n\n`));
|
|
470
|
+
controller.enqueue(encoder.encode(`event: response.completed\ndata: ${JSON.stringify({ type: "response.completed", response: completedResp })}\n\n`));
|
|
471
|
+
}
|
|
472
|
+
|
|
314
473
|
private async handleNonStreamRequest(
|
|
315
474
|
requestId: string,
|
|
316
475
|
targetNodeId: string,
|
|
317
476
|
frame: ModelRequest,
|
|
477
|
+
responseFormat: ResponseFormat,
|
|
318
478
|
): Promise<{ status: number; headers: Record<string, string>; body: string }> {
|
|
319
479
|
try {
|
|
320
480
|
const result = await new Promise<ModelResponse["payload"]>(
|
|
@@ -327,9 +487,7 @@ export class ModelProxy {
|
|
|
327
487
|
|
|
328
488
|
this.pending.set(requestId, {
|
|
329
489
|
resolve: resolve as (v: unknown) => void,
|
|
330
|
-
reject,
|
|
331
|
-
timer,
|
|
332
|
-
stream: false,
|
|
490
|
+
reject, timer, stream: false, responseFormat,
|
|
333
491
|
});
|
|
334
492
|
|
|
335
493
|
const sent = this.peerManager.sendTo(targetNodeId, frame);
|
|
@@ -349,6 +507,41 @@ export class ModelProxy {
|
|
|
349
507
|
};
|
|
350
508
|
}
|
|
351
509
|
|
|
510
|
+
if (responseFormat === "responses") {
|
|
511
|
+
const msgId = `msg_${requestId}`;
|
|
512
|
+
const usageObj = result.usage
|
|
513
|
+
? { input_tokens: result.usage.inputTokens, output_tokens: result.usage.outputTokens, total_tokens: result.usage.inputTokens + result.usage.outputTokens }
|
|
514
|
+
: { input_tokens: 0, output_tokens: 0, total_tokens: 0 };
|
|
515
|
+
// If upstream sent full output array (responses API), use it directly
|
|
516
|
+
const output = Array.isArray(result.message)
|
|
517
|
+
? result.message
|
|
518
|
+
: [{
|
|
519
|
+
type: "message", id: msgId, role: "assistant",
|
|
520
|
+
content: [{ type: "output_text", text: result.content ?? "" }],
|
|
521
|
+
status: "completed",
|
|
522
|
+
}];
|
|
523
|
+
return {
|
|
524
|
+
status: 200,
|
|
525
|
+
headers: { "Content-Type": "application/json" },
|
|
526
|
+
body: JSON.stringify({
|
|
527
|
+
id: `resp_${requestId}`,
|
|
528
|
+
object: "response",
|
|
529
|
+
created_at: Math.floor(Date.now() / 1000),
|
|
530
|
+
status: "completed",
|
|
531
|
+
model: frame.payload.model,
|
|
532
|
+
output,
|
|
533
|
+
usage: usageObj,
|
|
534
|
+
}),
|
|
535
|
+
};
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
// Chat completions format — use full message object when available (has tool_calls etc.)
|
|
539
|
+
const msg = result.message as Record<string, unknown> | undefined;
|
|
540
|
+
const message = msg
|
|
541
|
+
? { role: "assistant", ...msg }
|
|
542
|
+
: { role: "assistant", content: result.content };
|
|
543
|
+
const finishReason = msg?.tool_calls ? "tool_calls" : "stop";
|
|
544
|
+
|
|
352
545
|
return {
|
|
353
546
|
status: 200,
|
|
354
547
|
headers: { "Content-Type": "application/json" },
|
|
@@ -357,19 +550,13 @@ export class ModelProxy {
|
|
|
357
550
|
object: "chat.completion",
|
|
358
551
|
created: Math.floor(Date.now() / 1000),
|
|
359
552
|
model: frame.payload.model,
|
|
360
|
-
choices: [
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
},
|
|
366
|
-
],
|
|
553
|
+
choices: [{
|
|
554
|
+
index: 0,
|
|
555
|
+
message,
|
|
556
|
+
finish_reason: finishReason,
|
|
557
|
+
}],
|
|
367
558
|
usage: result.usage
|
|
368
|
-
? {
|
|
369
|
-
prompt_tokens: result.usage.inputTokens,
|
|
370
|
-
completion_tokens: result.usage.outputTokens,
|
|
371
|
-
total_tokens: result.usage.inputTokens + result.usage.outputTokens,
|
|
372
|
-
}
|
|
559
|
+
? { prompt_tokens: result.usage.inputTokens, completion_tokens: result.usage.outputTokens, total_tokens: result.usage.inputTokens + result.usage.outputTokens }
|
|
373
560
|
: undefined,
|
|
374
561
|
}),
|
|
375
562
|
};
|
|
@@ -383,16 +570,35 @@ export class ModelProxy {
|
|
|
383
570
|
}
|
|
384
571
|
|
|
385
572
|
private handleListModels(): { status: number; headers: Record<string, string>; body: string } {
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
573
|
+
// Build from proxyModels config (has full detail) and enrich with
|
|
574
|
+
// connectivity info from the router so consumers know what's reachable.
|
|
575
|
+
const reachable = new Set(
|
|
576
|
+
this.peerManager.router.getAllPeers()
|
|
577
|
+
.filter((p) => p.connection?.isOpen || p.reachableVia)
|
|
578
|
+
.map((p) => p.nodeId),
|
|
579
|
+
);
|
|
580
|
+
|
|
581
|
+
const models = this.config.proxyModels.map((m) => {
|
|
582
|
+
const entry: Record<string, unknown> = {
|
|
583
|
+
id: m.id,
|
|
584
|
+
object: "model",
|
|
585
|
+
created: 0,
|
|
586
|
+
owned_by: m.provider ?? "unknown",
|
|
587
|
+
// Extended fields
|
|
588
|
+
...(m.description && { description: m.description }),
|
|
589
|
+
...(m.contextWindow && { context_window: m.contextWindow }),
|
|
590
|
+
...(m.maxTokens && { max_tokens: m.maxTokens }),
|
|
591
|
+
...(m.reasoning !== undefined && { reasoning: m.reasoning }),
|
|
592
|
+
...(m.input && { input: m.input }),
|
|
593
|
+
...(m.api && { api: m.api }),
|
|
594
|
+
...(m.cost && { cost: m.cost }),
|
|
595
|
+
...(m.compat && { compat: m.compat }),
|
|
596
|
+
// Cluster info
|
|
597
|
+
node_id: m.nodeId,
|
|
598
|
+
reachable: reachable.has(m.nodeId),
|
|
599
|
+
};
|
|
600
|
+
return entry;
|
|
601
|
+
});
|
|
396
602
|
|
|
397
603
|
return {
|
|
398
604
|
status: 200,
|
|
@@ -414,19 +620,16 @@ export class ModelProxy {
|
|
|
414
620
|
clearTimeout(pending.timer);
|
|
415
621
|
this.pending.delete(frame.id);
|
|
416
622
|
try {
|
|
417
|
-
const
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
}
|
|
422
|
-
|
|
423
|
-
pending.encoder.encode(
|
|
424
|
-
|
|
425
|
-
pending.controller.enqueue(pending.encoder.encode("data: [DONE]\n\n"));
|
|
623
|
+
const errMsg = `[ClawMatrix] Remote error: ${frame.payload.error}`;
|
|
624
|
+
if (pending.responseFormat === "responses") {
|
|
625
|
+
pending.controller.enqueue(pending.encoder.encode(`event: response.output_text.delta\ndata: ${JSON.stringify({ type: "response.output_text.delta", delta: errMsg })}\n\n`));
|
|
626
|
+
this.enqueueResponsesStreamDone(pending.controller, pending.encoder, frame.id, pending.model ?? "", errMsg);
|
|
627
|
+
} else {
|
|
628
|
+
pending.controller.enqueue(pending.encoder.encode(`data: ${JSON.stringify({ id: `chatcmpl-${frame.id}`, object: "chat.completion.chunk", choices: [{ index: 0, delta: { content: errMsg }, finish_reason: "stop" }] })}\n\n`));
|
|
629
|
+
pending.controller.enqueue(pending.encoder.encode("data: [DONE]\n\n"));
|
|
630
|
+
}
|
|
426
631
|
pending.controller.close();
|
|
427
|
-
} catch {
|
|
428
|
-
// controller may already be closed
|
|
429
|
-
}
|
|
632
|
+
} catch { /* controller may already be closed */ }
|
|
430
633
|
}
|
|
431
634
|
return;
|
|
432
635
|
}
|
|
@@ -436,6 +639,9 @@ export class ModelProxy {
|
|
|
436
639
|
pending.resolve(frame.payload);
|
|
437
640
|
}
|
|
438
641
|
|
|
642
|
+
/** Accumulated text per stream request (needed for responses API done events). */
|
|
643
|
+
private streamText = new Map<string, string>();
|
|
644
|
+
|
|
439
645
|
handleModelStream(frame: ModelStreamChunk) {
|
|
440
646
|
debug("stream", `id=${frame.id} done=${frame.payload.done} delta=${JSON.stringify(frame.payload.delta?.slice?.(0, 50) ?? frame.payload.delta)} failed=${this.peerManager.router.isFailed(frame.id)} hasPending=${this.pending.has(frame.id)}`);
|
|
441
647
|
if (this.peerManager.router.isFailed(frame.id)) return;
|
|
@@ -443,54 +649,87 @@ export class ModelProxy {
|
|
|
443
649
|
if (!pending?.stream || !pending.controller || !pending.encoder) return;
|
|
444
650
|
|
|
445
651
|
try {
|
|
446
|
-
if (
|
|
447
|
-
|
|
448
|
-
id: `chatcmpl-${frame.id}`,
|
|
449
|
-
object: "chat.completion.chunk",
|
|
450
|
-
choices: [{ index: 0, delta: {}, finish_reason: "stop" }],
|
|
451
|
-
};
|
|
452
|
-
if (frame.payload.usage) {
|
|
453
|
-
finalChunk.usage = {
|
|
454
|
-
prompt_tokens: frame.payload.usage.inputTokens,
|
|
455
|
-
completion_tokens: frame.payload.usage.outputTokens,
|
|
456
|
-
total_tokens: frame.payload.usage.inputTokens + frame.payload.usage.outputTokens,
|
|
457
|
-
};
|
|
458
|
-
}
|
|
459
|
-
pending.controller.enqueue(
|
|
460
|
-
pending.encoder.encode(`data: ${JSON.stringify(finalChunk)}\n\n`),
|
|
461
|
-
);
|
|
462
|
-
pending.controller.enqueue(pending.encoder.encode("data: [DONE]\n\n"));
|
|
463
|
-
pending.controller.close();
|
|
464
|
-
clearTimeout(pending.timer);
|
|
465
|
-
this.pending.delete(frame.id);
|
|
652
|
+
if (pending.responseFormat === "responses") {
|
|
653
|
+
this.handleModelStreamResponses(frame, pending);
|
|
466
654
|
} else {
|
|
467
|
-
|
|
468
|
-
id: `chatcmpl-${frame.id}`,
|
|
469
|
-
object: "chat.completion.chunk",
|
|
470
|
-
choices: [
|
|
471
|
-
{
|
|
472
|
-
index: 0,
|
|
473
|
-
delta: { content: frame.payload.delta },
|
|
474
|
-
finish_reason: null,
|
|
475
|
-
},
|
|
476
|
-
],
|
|
477
|
-
};
|
|
478
|
-
pending.controller.enqueue(
|
|
479
|
-
pending.encoder.encode(`data: ${JSON.stringify(chunk)}\n\n`),
|
|
480
|
-
);
|
|
655
|
+
this.handleModelStreamChat(frame, pending);
|
|
481
656
|
}
|
|
482
657
|
} catch {
|
|
483
658
|
clearTimeout(pending.timer);
|
|
484
659
|
this.pending.delete(frame.id);
|
|
660
|
+
this.streamText.delete(frame.id);
|
|
485
661
|
}
|
|
486
662
|
}
|
|
487
663
|
|
|
664
|
+
private handleModelStreamChat(frame: ModelStreamChunk, pending: PendingModelReq) {
|
|
665
|
+
if (frame.payload.done) {
|
|
666
|
+
const finalChunk: Record<string, unknown> = {
|
|
667
|
+
id: `chatcmpl-${frame.id}`, object: "chat.completion.chunk",
|
|
668
|
+
choices: [{ index: 0, delta: {}, finish_reason: "stop" }],
|
|
669
|
+
};
|
|
670
|
+
if (frame.payload.usage) {
|
|
671
|
+
finalChunk.usage = { prompt_tokens: frame.payload.usage.inputTokens, completion_tokens: frame.payload.usage.outputTokens, total_tokens: frame.payload.usage.inputTokens + frame.payload.usage.outputTokens };
|
|
672
|
+
}
|
|
673
|
+
pending.controller!.enqueue(pending.encoder!.encode(`data: ${JSON.stringify(finalChunk)}\n\n`));
|
|
674
|
+
pending.controller!.enqueue(pending.encoder!.encode("data: [DONE]\n\n"));
|
|
675
|
+
pending.controller!.close();
|
|
676
|
+
clearTimeout(pending.timer);
|
|
677
|
+
this.pending.delete(frame.id);
|
|
678
|
+
} else {
|
|
679
|
+
// Use full deltaObj when available (carries tool_calls etc.), otherwise simple text delta
|
|
680
|
+
const delta = frame.payload.deltaObj ?? { content: frame.payload.delta };
|
|
681
|
+
const chunk = { id: `chatcmpl-${frame.id}`, object: "chat.completion.chunk", choices: [{ index: 0, delta, finish_reason: null }] };
|
|
682
|
+
pending.controller!.enqueue(pending.encoder!.encode(`data: ${JSON.stringify(chunk)}\n\n`));
|
|
683
|
+
}
|
|
684
|
+
}
|
|
685
|
+
|
|
686
|
+
private handleModelStreamResponses(frame: ModelStreamChunk, pending: PendingModelReq) {
|
|
687
|
+
if (frame.payload.done) {
|
|
688
|
+
const fullText = this.streamText.get(frame.id) ?? "";
|
|
689
|
+
this.streamText.delete(frame.id);
|
|
690
|
+
this.enqueueResponsesStreamDone(pending.controller!, pending.encoder!, frame.id, pending.model ?? "", fullText, frame.payload.usage);
|
|
691
|
+
pending.controller!.close();
|
|
692
|
+
clearTimeout(pending.timer);
|
|
693
|
+
this.pending.delete(frame.id);
|
|
694
|
+
} else {
|
|
695
|
+
// Accumulate text for done event
|
|
696
|
+
this.streamText.set(frame.id, (this.streamText.get(frame.id) ?? "") + frame.payload.delta);
|
|
697
|
+
const evt = { type: "response.output_text.delta", item_id: `msg_${frame.id}`, output_index: 0, content_index: 0, delta: frame.payload.delta };
|
|
698
|
+
pending.controller!.enqueue(pending.encoder!.encode(`event: response.output_text.delta\ndata: ${JSON.stringify(evt)}\n\n`));
|
|
699
|
+
}
|
|
700
|
+
}
|
|
701
|
+
|
|
702
|
+
private sendStreamDelta(to: string, id: string, delta: string, deltaObj?: unknown) {
|
|
703
|
+
this.peerManager.sendTo(to, {
|
|
704
|
+
type: "model_stream",
|
|
705
|
+
id,
|
|
706
|
+
from: this.config.nodeId,
|
|
707
|
+
to,
|
|
708
|
+
timestamp: Date.now(),
|
|
709
|
+
payload: { delta, ...(deltaObj !== undefined && { deltaObj }), done: false },
|
|
710
|
+
} satisfies ModelStreamChunk);
|
|
711
|
+
}
|
|
712
|
+
|
|
713
|
+
private sendStreamDone(to: string, id: string, usage?: { inputTokens: number; outputTokens: number }) {
|
|
714
|
+
this.peerManager.sendTo(to, {
|
|
715
|
+
type: "model_stream",
|
|
716
|
+
id,
|
|
717
|
+
from: this.config.nodeId,
|
|
718
|
+
to,
|
|
719
|
+
timestamp: Date.now(),
|
|
720
|
+
payload: { delta: "", done: true, usage },
|
|
721
|
+
} satisfies ModelStreamChunk);
|
|
722
|
+
}
|
|
723
|
+
|
|
488
724
|
/** Handle model_req locally: call the model API directly or fall back to OpenClaw gateway. */
|
|
489
725
|
async handleModelRequest(frame: ModelRequest): Promise<void> {
|
|
490
726
|
const { id, from, payload } = frame;
|
|
491
|
-
debug("model_req", `handling model="${payload.model}" from=${from} stream=${payload.stream}`);
|
|
727
|
+
debug("model_req", `handling model="${payload.model}" provider=${payload.provider ?? "any"} from=${from} stream=${payload.stream}`);
|
|
492
728
|
|
|
493
|
-
const model =
|
|
729
|
+
const model = payload.provider
|
|
730
|
+
? this.config.models.find((m) => m.id === payload.model && m.provider === payload.provider)
|
|
731
|
+
?? this.config.models.find((m) => m.id === payload.model)
|
|
732
|
+
: this.config.models.find((m) => m.id === payload.model);
|
|
494
733
|
if (!model) {
|
|
495
734
|
this.peerManager.sendTo(from, {
|
|
496
735
|
type: "model_res",
|
|
@@ -505,28 +744,42 @@ export class ModelProxy {
|
|
|
505
744
|
|
|
506
745
|
try {
|
|
507
746
|
const endpoint = this.resolveModelEndpoint(model);
|
|
747
|
+
const isResponsesApi = endpoint.api === "openai-responses" || endpoint.api === "openai-codex-responses";
|
|
748
|
+
const path = isResponsesApi ? "/responses" : "/chat/completions";
|
|
749
|
+
const url = `${endpoint.baseUrl}${path}`;
|
|
508
750
|
const headers: Record<string, string> = { "Content-Type": "application/json" };
|
|
509
751
|
|
|
510
752
|
if (endpoint.direct) {
|
|
511
753
|
if (endpoint.apiKey) headers["Authorization"] = `Bearer ${endpoint.apiKey}`;
|
|
512
|
-
debug("model_req", `direct API call to ${endpoint.
|
|
754
|
+
debug("model_req", `direct API call to ${url} (api=${endpoint.api})`);
|
|
513
755
|
} else {
|
|
514
756
|
const { authHeader } = this.gatewayInfo;
|
|
515
757
|
if (authHeader) headers["Authorization"] = authHeader;
|
|
516
|
-
debug("model_req", `gateway fallback to ${
|
|
758
|
+
debug("model_req", `gateway fallback to ${url}`);
|
|
517
759
|
}
|
|
518
760
|
|
|
519
|
-
const
|
|
761
|
+
const modelField = endpoint.direct ? model.id : `${model.provider}/${model.id}`;
|
|
762
|
+
const requestBody = isResponsesApi
|
|
763
|
+
? {
|
|
764
|
+
model: modelField,
|
|
765
|
+
input: payload.messages,
|
|
766
|
+
stream: payload.stream,
|
|
767
|
+
temperature: payload.temperature,
|
|
768
|
+
max_output_tokens: payload.maxTokens,
|
|
769
|
+
}
|
|
770
|
+
: {
|
|
771
|
+
model: modelField,
|
|
772
|
+
messages: payload.messages,
|
|
773
|
+
temperature: payload.temperature,
|
|
774
|
+
max_tokens: payload.maxTokens,
|
|
775
|
+
stream: payload.stream,
|
|
776
|
+
...(payload.stream ? { stream_options: { include_usage: true } } : {}),
|
|
777
|
+
};
|
|
778
|
+
|
|
779
|
+
const response = await fetch(url, {
|
|
520
780
|
method: "POST",
|
|
521
781
|
headers,
|
|
522
|
-
body: JSON.stringify(
|
|
523
|
-
model: endpoint.direct ? model.id : `${model.provider}/${model.id}`,
|
|
524
|
-
messages: payload.messages,
|
|
525
|
-
temperature: payload.temperature,
|
|
526
|
-
max_tokens: payload.maxTokens,
|
|
527
|
-
stream: payload.stream,
|
|
528
|
-
...(payload.stream ? { stream_options: { include_usage: true } } : {}),
|
|
529
|
-
}),
|
|
782
|
+
body: JSON.stringify(requestBody),
|
|
530
783
|
});
|
|
531
784
|
|
|
532
785
|
if (!response.ok) {
|
|
@@ -549,73 +802,122 @@ export class ModelProxy {
|
|
|
549
802
|
if (done) break;
|
|
550
803
|
|
|
551
804
|
buffer += decoder.decode(value, { stream: true });
|
|
805
|
+
if (buffer.length > MAX_STREAM_BUFFER) {
|
|
806
|
+
throw new Error("Stream buffer exceeded 1MB — upstream may be malformed");
|
|
807
|
+
}
|
|
552
808
|
const lines = buffer.split("\n");
|
|
553
809
|
buffer = lines.pop()!;
|
|
554
810
|
|
|
811
|
+
// Track SSE event type for responses API
|
|
812
|
+
let currentEvent = "";
|
|
555
813
|
for (const line of lines) {
|
|
814
|
+
if (line.startsWith("event: ")) {
|
|
815
|
+
currentEvent = line.slice(7).trim();
|
|
816
|
+
continue;
|
|
817
|
+
}
|
|
556
818
|
if (!line.startsWith("data: ")) continue;
|
|
557
819
|
const data = line.slice(6).trim();
|
|
558
820
|
if (data === "[DONE]") {
|
|
559
|
-
this.
|
|
560
|
-
type: "model_stream",
|
|
561
|
-
id,
|
|
562
|
-
from: this.config.nodeId,
|
|
563
|
-
to: from,
|
|
564
|
-
timestamp: Date.now(),
|
|
565
|
-
payload: { delta: "", done: true, usage: lastUsage },
|
|
566
|
-
} satisfies ModelStreamChunk);
|
|
821
|
+
this.sendStreamDone(from, id, lastUsage);
|
|
567
822
|
streamDone = true;
|
|
568
823
|
break;
|
|
569
824
|
}
|
|
570
825
|
|
|
571
826
|
try {
|
|
572
827
|
const parsed = JSON.parse(data);
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
828
|
+
|
|
829
|
+
if (isResponsesApi) {
|
|
830
|
+
const evtType = currentEvent || parsed.type;
|
|
831
|
+
if (evtType === "response.output_text.delta") {
|
|
832
|
+
const delta = parsed.delta || "";
|
|
833
|
+
if (delta) {
|
|
834
|
+
this.sendStreamDelta(from, id, delta);
|
|
835
|
+
}
|
|
836
|
+
} else if (evtType === "response.completed") {
|
|
837
|
+
const usage = parsed.response?.usage;
|
|
838
|
+
if (usage) {
|
|
839
|
+
lastUsage = {
|
|
840
|
+
inputTokens: usage.input_tokens ?? usage.prompt_tokens ?? 0,
|
|
841
|
+
outputTokens: usage.output_tokens ?? usage.completion_tokens ?? 0,
|
|
842
|
+
};
|
|
843
|
+
}
|
|
844
|
+
this.sendStreamDone(from, id, lastUsage);
|
|
845
|
+
streamDone = true;
|
|
846
|
+
break;
|
|
847
|
+
}
|
|
848
|
+
} else {
|
|
849
|
+
// Chat completions format
|
|
850
|
+
if (parsed.usage) {
|
|
851
|
+
lastUsage = {
|
|
852
|
+
inputTokens: parsed.usage.prompt_tokens,
|
|
853
|
+
outputTokens: parsed.usage.completion_tokens,
|
|
854
|
+
};
|
|
855
|
+
}
|
|
856
|
+
const d = parsed.choices?.[0]?.delta;
|
|
857
|
+
const delta = d?.content || d?.reasoning_content || "";
|
|
858
|
+
// Pass full delta object when it contains tool_calls or other structured data
|
|
859
|
+
const hasStructured = d?.tool_calls || d?.refusal != null;
|
|
860
|
+
if (delta || hasStructured) {
|
|
861
|
+
this.sendStreamDelta(from, id, delta, hasStructured ? d : undefined);
|
|
862
|
+
}
|
|
590
863
|
}
|
|
591
864
|
} catch {
|
|
592
865
|
// skip malformed chunks
|
|
593
866
|
}
|
|
867
|
+
currentEvent = "";
|
|
594
868
|
}
|
|
595
869
|
}
|
|
596
|
-
// If the upstream closed without sending [DONE]
|
|
597
|
-
// frame so the requesting side doesn't hang
|
|
870
|
+
// If the upstream closed without sending [DONE] or response.completed,
|
|
871
|
+
// send a completion frame so the requesting side doesn't hang.
|
|
598
872
|
if (!streamDone) {
|
|
599
|
-
this.
|
|
600
|
-
type: "model_stream",
|
|
601
|
-
id,
|
|
602
|
-
from: this.config.nodeId,
|
|
603
|
-
to: from,
|
|
604
|
-
timestamp: Date.now(),
|
|
605
|
-
payload: { delta: "", done: true, usage: lastUsage },
|
|
606
|
-
} satisfies ModelStreamChunk);
|
|
873
|
+
this.sendStreamDone(from, id, lastUsage);
|
|
607
874
|
}
|
|
608
875
|
} finally {
|
|
609
876
|
reader.releaseLock();
|
|
610
877
|
}
|
|
611
878
|
} else {
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
879
|
+
// Non-streaming response
|
|
880
|
+
const result = await response.json();
|
|
881
|
+
let content: string;
|
|
882
|
+
let message: unknown | undefined;
|
|
883
|
+
let usage: { inputTokens: number; outputTokens: number } | undefined;
|
|
884
|
+
|
|
885
|
+
if (isResponsesApi) {
|
|
886
|
+
// Responses API: extract text from output[].content[].text
|
|
887
|
+
content = "";
|
|
888
|
+
const output = result.output as { type?: string; content?: { type?: string; text?: string }[] }[] | undefined;
|
|
889
|
+
if (Array.isArray(output)) {
|
|
890
|
+
for (const item of output) {
|
|
891
|
+
if (item.type === "message" && Array.isArray(item.content)) {
|
|
892
|
+
for (const part of item.content) {
|
|
893
|
+
if (part.type === "output_text" && part.text) content += part.text;
|
|
894
|
+
}
|
|
895
|
+
}
|
|
896
|
+
}
|
|
897
|
+
}
|
|
898
|
+
// Carry full output array for structured data (function_call items, etc.)
|
|
899
|
+
message = result.output;
|
|
900
|
+
if (result.usage) {
|
|
901
|
+
usage = {
|
|
902
|
+
inputTokens: result.usage.input_tokens ?? result.usage.prompt_tokens ?? 0,
|
|
903
|
+
outputTokens: result.usage.output_tokens ?? result.usage.completion_tokens ?? 0,
|
|
904
|
+
};
|
|
905
|
+
}
|
|
906
|
+
} else {
|
|
907
|
+
// Chat completions format
|
|
908
|
+
const msg = result.choices?.[0]?.message;
|
|
909
|
+
content = msg?.content || msg?.reasoning_content || "";
|
|
910
|
+
// Carry full message object when it has tool_calls or other structured data
|
|
911
|
+
if (msg?.tool_calls || msg?.refusal != null || msg?.function_call) {
|
|
912
|
+
message = msg;
|
|
913
|
+
}
|
|
914
|
+
if (result.usage) {
|
|
915
|
+
usage = {
|
|
916
|
+
inputTokens: result.usage.prompt_tokens,
|
|
917
|
+
outputTokens: result.usage.completion_tokens,
|
|
918
|
+
};
|
|
919
|
+
}
|
|
920
|
+
}
|
|
619
921
|
|
|
620
922
|
this.peerManager.sendTo(from, {
|
|
621
923
|
type: "model_res",
|
|
@@ -626,9 +928,8 @@ export class ModelProxy {
|
|
|
626
928
|
payload: {
|
|
627
929
|
success: true,
|
|
628
930
|
content,
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
: undefined,
|
|
931
|
+
...(message !== undefined && { message }),
|
|
932
|
+
usage,
|
|
632
933
|
},
|
|
633
934
|
} satisfies ModelResponse);
|
|
634
935
|
}
|