clawmatrix 0.1.13 → 0.1.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,12 +11,17 @@ import type {
11
11
  import { debug } from "./debug.ts";
12
12
 
13
13
  const MODEL_TIMEOUT = 120_000; // 2 minutes
14
+ const MAX_STREAM_BUFFER = 1_048_576; // 1MB — guard against upstream not sending newlines
15
+
16
+ type ResponseFormat = "chat" | "responses";
14
17
 
15
18
  interface PendingModelReq {
16
19
  resolve: (value: unknown) => void;
17
20
  reject: (error: Error) => void;
18
21
  timer: ReturnType<typeof setTimeout>;
19
22
  stream: boolean;
23
+ responseFormat: ResponseFormat;
24
+ model?: string;
20
25
  controller?: ReadableStreamDefaultController;
21
26
  encoder?: TextEncoder;
22
27
  }
@@ -36,35 +41,125 @@ export class ModelProxy {
36
41
  this.openclawConfig = openclawConfig;
37
42
  }
38
43
 
44
+ /**
45
+ * Normalize Responses API `input` to OpenAI chat messages for WS transport.
46
+ *
47
+ * Converts:
48
+ * - string → [{role: "user", content: "..."}]
49
+ * - shorthand {role, content: "..."} → pass through
50
+ * - full {type: "message", content: [{type: "input_text"}, {type: "input_image"}]} → chat format
51
+ * - {type: "function_call_output"} → {role: "tool", ...}
52
+ *
53
+ * Returns chat-completions compatible messages (text + image_url content parts).
54
+ */
55
+ private static normalizeResponsesInput(input: unknown): unknown[] {
56
+ if (typeof input === "string") {
57
+ return [{ role: "user", content: input }];
58
+ }
59
+ if (!Array.isArray(input)) return [];
60
+
61
+ const messages: unknown[] = [];
62
+ for (const item of input) {
63
+ if (!item || typeof item !== "object") continue;
64
+ const obj = item as Record<string, unknown>;
65
+
66
+ // function_call_output → tool message
67
+ if (obj.type === "function_call_output") {
68
+ messages.push({
69
+ role: "tool",
70
+ tool_call_id: obj.call_id,
71
+ content: typeof obj.output === "string" ? obj.output : JSON.stringify(obj.output),
72
+ });
73
+ continue;
74
+ }
75
+
76
+ const role = typeof obj.role === "string" ? obj.role : "user";
77
+
78
+ // Simple shorthand: {role: "user", content: "hello"}
79
+ if (typeof obj.content === "string") {
80
+ messages.push({ role, content: obj.content });
81
+ continue;
82
+ }
83
+
84
+ // Full format: {type: "message", role, content: [{type: "input_text"|"input_image"|...}]}
85
+ if (Array.isArray(obj.content)) {
86
+ const parts: unknown[] = [];
87
+ for (const part of obj.content) {
88
+ if (!part || typeof part !== "object") continue;
89
+ const p = part as Record<string, unknown>;
90
+
91
+ if (p.type === "input_text" || p.type === "output_text") {
92
+ // Text content → chat text part
93
+ if (typeof p.text === "string") {
94
+ parts.push({ type: "text", text: p.text });
95
+ }
96
+ } else if (p.type === "input_image") {
97
+ // Image content → chat image_url part
98
+ if (typeof p.image_url === "string") {
99
+ parts.push({ type: "image_url", image_url: { url: p.image_url } });
100
+ } else if (p.image_url && typeof p.image_url === "object") {
101
+ parts.push({ type: "image_url", image_url: p.image_url });
102
+ }
103
+ } else if (p.type === "text" && typeof p.text === "string") {
104
+ // Already chat format
105
+ parts.push(p);
106
+ } else if (p.type === "image_url") {
107
+ // Already chat format
108
+ parts.push(p);
109
+ }
110
+ }
111
+
112
+ if (parts.length === 1 && (parts[0] as Record<string, unknown>).type === "text") {
113
+ // Single text part → simplify to string content
114
+ messages.push({ role, content: ((parts[0] as Record<string, unknown>).text as string) });
115
+ } else if (parts.length > 0) {
116
+ messages.push({ role, content: parts });
117
+ }
118
+ continue;
119
+ }
120
+
121
+ // Fallback
122
+ if (typeof obj.text === "string") {
123
+ messages.push({ role, content: obj.text });
124
+ }
125
+ }
126
+ return messages;
127
+ }
128
+
39
129
  /** Resolve API endpoint for a model: explicit config > OpenClaw provider > gateway fallback */
40
- private resolveModelEndpoint(model: { id: string; provider: string; baseUrl?: string; apiKey?: string }): { url: string; apiKey?: string; direct: boolean } {
130
+ private resolveModelEndpoint(model: { id: string; provider: string; baseUrl?: string; apiKey?: string; api?: string }): { baseUrl: string; apiKey?: string; direct: boolean; api: string } {
131
+ const defaultApi = "openai-completions";
132
+
41
133
  // 1. Explicit baseUrl in ClawMatrix model config
42
134
  if (model.baseUrl) {
43
135
  return {
44
- url: `${model.baseUrl.replace(/\/$/, "")}/chat/completions`,
136
+ baseUrl: model.baseUrl.replace(/\/$/, ""),
45
137
  apiKey: model.apiKey,
46
138
  direct: true,
139
+ api: model.api ?? defaultApi,
47
140
  };
48
141
  }
49
142
 
50
143
  // 2. Read from OpenClaw's models.providers[provider]
51
144
  const providers = (this.openclawConfig as Record<string, unknown>).models as
52
- { providers?: Record<string, { baseUrl?: string; apiKey?: string }> } | undefined;
145
+ { providers?: Record<string, { baseUrl?: string; apiKey?: string; api?: string }> } | undefined;
53
146
  const providerConfig = providers?.providers?.[model.provider];
54
147
  if (providerConfig?.baseUrl) {
55
148
  return {
56
- url: `${providerConfig.baseUrl.replace(/\/$/, "")}/chat/completions`,
149
+ baseUrl: providerConfig.baseUrl.replace(/\/$/, ""),
57
150
  apiKey: typeof providerConfig.apiKey === "string" ? providerConfig.apiKey : undefined,
58
151
  direct: true,
152
+ api: model.api ?? providerConfig.api ?? defaultApi,
59
153
  };
60
154
  }
61
155
 
62
- // 3. Fallback: OpenClaw gateway (goes through agent system — not recommended)
156
+ // 3. Fallback: OpenClaw gateway
63
157
  const { port } = this.gatewayInfo;
64
158
  return {
65
- url: `http://127.0.0.1:${port}/v1/chat/completions`,
159
+ baseUrl: `http://127.0.0.1:${port}/v1`,
66
160
  apiKey: undefined,
67
161
  direct: false,
162
+ api: model.api ?? defaultApi,
68
163
  };
69
164
  }
70
165
 
@@ -79,7 +174,12 @@ export class ModelProxy {
79
174
 
80
175
  if (p === "/chat/completions" && req.method === "POST") {
81
176
  const body = await this.readBody(req);
82
- const response = await this.handleChatCompletion(body);
177
+ const response = await this.handleChatCompletion(body, "openai-completions");
178
+ debug("proxy", `response status=${response.status}`);
179
+ this.sendResponse(res, response);
180
+ } else if (p === "/responses" && req.method === "POST") {
181
+ const body = await this.readBody(req);
182
+ const response = await this.handleResponses(body);
83
183
  debug("proxy", `response status=${response.status}`);
84
184
  this.sendResponse(res, response);
85
185
  } else if (p === "/models" && req.method === "GET") {
@@ -152,55 +252,56 @@ export class ModelProxy {
152
252
  }
153
253
 
154
254
  // ── HTTP handlers ──────────────────────────────────────────────
155
- private async handleChatCompletion(rawBody: string): Promise<{ status: number; headers: Record<string, string>; body: string | ReadableStream }> {
156
- let body: {
157
- model: string;
158
- messages: unknown[];
159
- stream?: boolean;
160
- temperature?: number;
161
- max_tokens?: number;
162
- };
163
255
 
164
- try {
165
- body = JSON.parse(rawBody);
166
- } catch {
167
- return {
168
- status: 400,
169
- headers: { "Content-Type": "application/json" },
170
- body: JSON.stringify({ error: "Invalid JSON" }),
171
- };
172
- }
173
-
174
- const rawModelId = body.model;
175
- // Parse "nodeId/model" format: first segment is nodeId, rest is model ID.
176
- // OpenClaw sends "providerId/modelId" where providerId = nodeId, so this
177
- // naturally handles both OpenClaw calls and direct curl calls.
178
- // If no "/" present, treat entire string as model ID and auto-resolve.
179
- let nodeId: string | undefined;
180
- let modelId: string;
256
+ /** Resolve model ID → proxyModel + route. Shared by chat completions and responses handlers. */
257
+ private resolveModelRoute(rawModelId: string): {
258
+ nodeId: string; modelId: string;
259
+ proxyModel: (typeof this.config.proxyModels)[number] | undefined;
260
+ routeNodeId: string;
261
+ } | { error: { status: number; message: string } } {
181
262
  const slashIdx = rawModelId.indexOf("/");
263
+ let nodeId: string;
264
+ let modelId: string;
265
+ let proxyModel: (typeof this.config.proxyModels)[number] | undefined;
266
+
182
267
  if (slashIdx > 0) {
183
268
  nodeId = rawModelId.slice(0, slashIdx);
184
269
  modelId = rawModelId.slice(slashIdx + 1);
270
+ proxyModel = this.config.proxyModels.find((m) => m.id === modelId && m.nodeId === nodeId);
185
271
  } else {
186
272
  modelId = rawModelId;
273
+ proxyModel = this.config.proxyModels.find((m) => m.id === modelId);
274
+ if (!proxyModel) {
275
+ return { error: { status: 404, message: `Model "${rawModelId}" not found in proxy models` } };
276
+ }
277
+ nodeId = proxyModel.nodeId;
187
278
  }
188
- debug("proxy", `model raw="${rawModelId}" nodeId=${nodeId ?? "auto"} modelId="${modelId}" stream=${body.stream ?? false}`);
189
- const proxyModel = this.config.proxyModels.find((m) => m.id === modelId && (!nodeId || m.nodeId === nodeId))
190
- ?? this.config.proxyModels.find((m) => m.id === modelId);
191
- const route = nodeId
192
- ? this.peerManager.router.getRoute(nodeId)
193
- : this.peerManager.router.resolveModel(modelId);
194
- debug("proxy", `proxyModel=${proxyModel?.id ?? "none"} route=${route?.nodeId ?? "none"} reachable=${route ? this.peerManager.canReach(route.nodeId) : false}`);
279
+
280
+ const route = this.peerManager.router.getRoute(nodeId);
281
+ debug("proxy", `model raw="${rawModelId}" nodeId=${nodeId} modelId="${modelId}" route=${route?.nodeId ?? "none"}`);
195
282
  if (!route) {
196
- return {
197
- status: 404,
198
- headers: { "Content-Type": "application/json" },
199
- body: JSON.stringify({ error: { message: `Model "${modelId}" not found in cluster (proxyModels: [${this.config.proxyModels.map(m => m.id).join(", ")}])` } }),
200
- };
283
+ return { error: { status: 404, message: `Node "${nodeId}" not found in cluster` } };
201
284
  }
285
+ if (!this.peerManager.canReach(route.nodeId)) {
286
+ return { error: { status: 502, message: `Cannot reach model node "${route.nodeId}"` } };
287
+ }
288
+ return { nodeId, modelId, proxyModel, routeNodeId: route.nodeId };
289
+ }
202
290
 
203
- // Inject model identity so the LLM knows what it is
291
+ private async handleChatCompletion(rawBody: string, _api: string): Promise<{ status: number; headers: Record<string, string>; body: string | ReadableStream }> {
292
+ let body: { model: string; messages: unknown[]; stream?: boolean; temperature?: number; max_tokens?: number };
293
+ try {
294
+ body = JSON.parse(rawBody);
295
+ } catch {
296
+ return { status: 400, headers: { "Content-Type": "application/json" }, body: JSON.stringify({ error: "Invalid JSON" }) };
297
+ }
298
+
299
+ const resolved = this.resolveModelRoute(body.model);
300
+ if ("error" in resolved) {
301
+ return { status: resolved.error.status, headers: { "Content-Type": "application/json" }, body: JSON.stringify({ error: { message: resolved.error.message } }) };
302
+ }
303
+
304
+ const { modelId, proxyModel, routeNodeId } = resolved;
204
305
  const messages = body.messages;
205
306
  if (proxyModel?.description) {
206
307
  const first = messages[0] as { role?: string; content?: string } | undefined;
@@ -213,35 +314,66 @@ export class ModelProxy {
213
314
 
214
315
  const stream = body.stream ?? false;
215
316
  const requestId = crypto.randomUUID();
216
-
217
317
  const frame: ModelRequest = {
218
- type: "model_req",
219
- id: requestId,
220
- from: this.config.nodeId,
221
- to: route.nodeId,
222
- timestamp: Date.now(),
223
- payload: {
224
- model: modelId,
225
- messages,
226
- temperature: body.temperature,
227
- maxTokens: body.max_tokens,
228
- stream,
229
- },
318
+ type: "model_req", id: requestId, from: this.config.nodeId, to: routeNodeId, timestamp: Date.now(),
319
+ payload: { model: modelId, provider: proxyModel?.provider, api: proxyModel?.api, messages, temperature: body.temperature, maxTokens: body.max_tokens, stream },
230
320
  };
231
321
 
232
- // Pre-check reachability before starting a stream (avoids silent empty response)
233
- if (!this.peerManager.canReach(route.nodeId)) {
234
- return {
235
- status: 502,
236
- headers: { "Content-Type": "application/json" },
237
- body: JSON.stringify({ error: { message: `Cannot reach model node "${route.nodeId}"` } }),
238
- };
322
+ if (stream) {
323
+ return this.handleStreamRequest(requestId, routeNodeId, frame, "chat");
324
+ } else {
325
+ return this.handleNonStreamRequest(requestId, routeNodeId, frame, "chat");
326
+ }
327
+ }
328
+
329
+ private async handleResponses(rawBody: string): Promise<{ status: number; headers: Record<string, string>; body: string | ReadableStream }> {
330
+ let body: { model: string; input: unknown; stream?: boolean; temperature?: number; max_output_tokens?: number; instructions?: string };
331
+ try {
332
+ body = JSON.parse(rawBody);
333
+ } catch {
334
+ return { status: 400, headers: { "Content-Type": "application/json" }, body: JSON.stringify({ error: "Invalid JSON" }) };
335
+ }
336
+
337
+ const resolved = this.resolveModelRoute(body.model);
338
+ if ("error" in resolved) {
339
+ return { status: resolved.error.status, headers: { "Content-Type": "application/json" }, body: JSON.stringify({ error: { message: resolved.error.message } }) };
340
+ }
341
+
342
+ const { modelId, proxyModel, routeNodeId } = resolved;
343
+
344
+ // Normalize responses API input → simple chat messages for WS transport.
345
+ // Responses API items use {type: "message", role, content: [{type: "input_text", text}]}
346
+ // but WS protocol carries simple {role, content} chat messages.
347
+ const messages = ModelProxy.normalizeResponsesInput(body.input);
348
+
349
+ // Prepend instructions as system/developer message
350
+ if (body.instructions) {
351
+ messages.unshift({ role: "developer", content: body.instructions });
352
+ }
353
+
354
+ if (proxyModel?.description) {
355
+ const first = messages[0] as { role?: string; content?: string } | undefined;
356
+ if (first?.role === "system" && typeof first.content === "string") {
357
+ first.content = `[Model: ${proxyModel.description}]\n${first.content}`;
358
+ } else if (first?.role === "developer" && typeof first.content === "string") {
359
+ first.content = `[Model: ${proxyModel.description}]\n${first.content}`;
360
+ } else {
361
+ messages.unshift({ role: "system", content: `[Model: ${proxyModel.description}]` });
362
+ }
239
363
  }
240
364
 
365
+ const stream = body.stream ?? false;
366
+ const requestId = crypto.randomUUID();
367
+ debug("proxy", `responses: stream=${stream} messages=${messages.length} input_type=${typeof body.input}${Array.isArray(body.input) ? `[${body.input.length}]` : ""}`);
368
+ const frame: ModelRequest = {
369
+ type: "model_req", id: requestId, from: this.config.nodeId, to: routeNodeId, timestamp: Date.now(),
370
+ payload: { model: modelId, provider: proxyModel?.provider, api: proxyModel?.api, messages, temperature: body.temperature, maxTokens: body.max_output_tokens, stream },
371
+ };
372
+
241
373
  if (stream) {
242
- return this.handleStreamRequest(requestId, route.nodeId, frame);
374
+ return this.handleStreamRequest(requestId, routeNodeId, frame, "responses");
243
375
  } else {
244
- return this.handleNonStreamRequest(requestId, route.nodeId, frame);
376
+ return this.handleNonStreamRequest(requestId, routeNodeId, frame, "responses");
245
377
  }
246
378
  }
247
379
 
@@ -249,8 +381,10 @@ export class ModelProxy {
249
381
  requestId: string,
250
382
  targetNodeId: string,
251
383
  frame: ModelRequest,
384
+ responseFormat: ResponseFormat,
252
385
  ): { status: number; headers: Record<string, string>; body: ReadableStream } {
253
386
  const encoder = new TextEncoder();
387
+ const model = frame.payload.model;
254
388
 
255
389
  const readable = new ReadableStream({
256
390
  start: (controller) => {
@@ -258,63 +392,89 @@ export class ModelProxy {
258
392
  this.pending.delete(requestId);
259
393
  this.peerManager.router.markFailed(requestId);
260
394
  try {
261
- const errorChunk = {
262
- id: `chatcmpl-${requestId}`,
263
- object: "chat.completion.chunk",
264
- choices: [{ index: 0, delta: { content: "\n\n[ClawMatrix] Error: model request timed out" }, finish_reason: "stop" }],
265
- };
266
- controller.enqueue(
267
- encoder.encode(`data: ${JSON.stringify(errorChunk)}\n\n`),
268
- );
269
- controller.enqueue(encoder.encode("data: [DONE]\n\n"));
395
+ if (responseFormat === "responses") {
396
+ controller.enqueue(encoder.encode(`event: response.output_text.delta\ndata: ${JSON.stringify({ type: "response.output_text.delta", delta: "\n\n[ClawMatrix] Error: model request timed out" })}\n\n`));
397
+ this.enqueueResponsesStreamDone(controller, encoder, requestId, model);
398
+ } else {
399
+ controller.enqueue(encoder.encode(`data: ${JSON.stringify({ id: `chatcmpl-${requestId}`, object: "chat.completion.chunk", choices: [{ index: 0, delta: { content: "\n\n[ClawMatrix] Error: model request timed out" }, finish_reason: "stop" }] })}\n\n`));
400
+ controller.enqueue(encoder.encode("data: [DONE]\n\n"));
401
+ }
270
402
  controller.close();
271
- } catch {
272
- // controller may already be closed
273
- }
403
+ } catch { /* controller may already be closed */ }
274
404
  }, MODEL_TIMEOUT);
275
405
 
276
406
  this.pending.set(requestId, {
277
- resolve: () => {},
278
- reject: () => {},
279
- timer,
280
- stream: true,
281
- controller,
282
- encoder,
407
+ resolve: () => {}, reject: () => {},
408
+ timer, stream: true, responseFormat, model,
409
+ controller, encoder,
283
410
  });
284
411
 
412
+ // Emit setup events for responses API
413
+ if (responseFormat === "responses") {
414
+ this.enqueueResponsesStreamSetup(controller, encoder, requestId, model);
415
+ }
416
+
285
417
  const sent = this.peerManager.sendTo(targetNodeId, frame);
286
418
  if (!sent) {
287
419
  this.pending.delete(requestId);
288
420
  clearTimeout(timer);
289
- const errChunk = {
290
- id: `chatcmpl-${requestId}`,
291
- object: "chat.completion.chunk",
292
- choices: [{ index: 0, delta: { content: `[ClawMatrix] Cannot reach model node "${targetNodeId}"` }, finish_reason: "stop" }],
293
- };
294
- controller.enqueue(
295
- encoder.encode(`data: ${JSON.stringify(errChunk)}\n\n`),
296
- );
297
- controller.enqueue(encoder.encode("data: [DONE]\n\n"));
298
- controller.close();
421
+ try {
422
+ if (responseFormat === "responses") {
423
+ controller.enqueue(encoder.encode(`event: response.output_text.delta\ndata: ${JSON.stringify({ type: "response.output_text.delta", delta: `[ClawMatrix] Cannot reach model node "${targetNodeId}"` })}\n\n`));
424
+ this.enqueueResponsesStreamDone(controller, encoder, requestId, model);
425
+ } else {
426
+ controller.enqueue(encoder.encode(`data: ${JSON.stringify({ id: `chatcmpl-${requestId}`, object: "chat.completion.chunk", choices: [{ index: 0, delta: { content: `[ClawMatrix] Cannot reach model node "${targetNodeId}"` }, finish_reason: "stop" }] })}\n\n`));
427
+ controller.enqueue(encoder.encode("data: [DONE]\n\n"));
428
+ }
429
+ controller.close();
430
+ } catch { /* controller may already be closed */ }
299
431
  }
300
432
  },
301
433
  });
302
434
 
303
435
  return {
304
436
  status: 200,
305
- headers: {
306
- "Content-Type": "text/event-stream",
307
- "Cache-Control": "no-cache",
308
- "Connection": "keep-alive",
309
- },
437
+ headers: { "Content-Type": "text/event-stream", "Cache-Control": "no-cache", "Connection": "keep-alive" },
310
438
  body: readable,
311
439
  };
312
440
  }
313
441
 
442
+ /** Emit responses API stream setup events (response.created → content_part.added). */
443
+ private enqueueResponsesStreamSetup(controller: ReadableStreamDefaultController, encoder: TextEncoder, id: string, model: string) {
444
+ const respId = `resp_${id}`;
445
+ const msgId = `msg_${id}`;
446
+ const now = Math.floor(Date.now() / 1000);
447
+ const baseResp = { id: respId, object: "response", created_at: now, status: "in_progress", model, output: [] };
448
+ const msgItem = { type: "message", id: msgId, role: "assistant", content: [], status: "in_progress" };
449
+ const textPart = { type: "output_text", text: "" };
450
+
451
+ controller.enqueue(encoder.encode(`event: response.created\ndata: ${JSON.stringify({ type: "response.created", response: baseResp })}\n\n`));
452
+ controller.enqueue(encoder.encode(`event: response.in_progress\ndata: ${JSON.stringify({ type: "response.in_progress", response: baseResp })}\n\n`));
453
+ controller.enqueue(encoder.encode(`event: response.output_item.added\ndata: ${JSON.stringify({ type: "response.output_item.added", output_index: 0, item: msgItem })}\n\n`));
454
+ controller.enqueue(encoder.encode(`event: response.content_part.added\ndata: ${JSON.stringify({ type: "response.content_part.added", item_id: msgId, output_index: 0, content_index: 0, part: textPart })}\n\n`));
455
+ }
456
+
457
+ /** Emit responses API stream completion events (output_text.done → response.completed). */
458
+ private enqueueResponsesStreamDone(controller: ReadableStreamDefaultController, encoder: TextEncoder, id: string, model: string, content?: string, usage?: { inputTokens: number; outputTokens: number }) {
459
+ const respId = `resp_${id}`;
460
+ const msgId = `msg_${id}`;
461
+ const now = Math.floor(Date.now() / 1000);
462
+ const textPart = { type: "output_text", text: content ?? "" };
463
+ const msgItem = { type: "message", id: msgId, role: "assistant", content: [textPart], status: "completed" };
464
+ const usageObj = usage ? { input_tokens: usage.inputTokens, output_tokens: usage.outputTokens, total_tokens: usage.inputTokens + usage.outputTokens } : { input_tokens: 0, output_tokens: 0, total_tokens: 0 };
465
+ const completedResp = { id: respId, object: "response", created_at: now, status: "completed", model, output: [msgItem], usage: usageObj };
466
+
467
+ controller.enqueue(encoder.encode(`event: response.output_text.done\ndata: ${JSON.stringify({ type: "response.output_text.done", item_id: msgId, output_index: 0, content_index: 0, text: content ?? "" })}\n\n`));
468
+ controller.enqueue(encoder.encode(`event: response.content_part.done\ndata: ${JSON.stringify({ type: "response.content_part.done", item_id: msgId, output_index: 0, content_index: 0, part: textPart })}\n\n`));
469
+ controller.enqueue(encoder.encode(`event: response.output_item.done\ndata: ${JSON.stringify({ type: "response.output_item.done", output_index: 0, item: msgItem })}\n\n`));
470
+ controller.enqueue(encoder.encode(`event: response.completed\ndata: ${JSON.stringify({ type: "response.completed", response: completedResp })}\n\n`));
471
+ }
472
+
314
473
  private async handleNonStreamRequest(
315
474
  requestId: string,
316
475
  targetNodeId: string,
317
476
  frame: ModelRequest,
477
+ responseFormat: ResponseFormat,
318
478
  ): Promise<{ status: number; headers: Record<string, string>; body: string }> {
319
479
  try {
320
480
  const result = await new Promise<ModelResponse["payload"]>(
@@ -327,9 +487,7 @@ export class ModelProxy {
327
487
 
328
488
  this.pending.set(requestId, {
329
489
  resolve: resolve as (v: unknown) => void,
330
- reject,
331
- timer,
332
- stream: false,
490
+ reject, timer, stream: false, responseFormat,
333
491
  });
334
492
 
335
493
  const sent = this.peerManager.sendTo(targetNodeId, frame);
@@ -349,6 +507,41 @@ export class ModelProxy {
349
507
  };
350
508
  }
351
509
 
510
+ if (responseFormat === "responses") {
511
+ const msgId = `msg_${requestId}`;
512
+ const usageObj = result.usage
513
+ ? { input_tokens: result.usage.inputTokens, output_tokens: result.usage.outputTokens, total_tokens: result.usage.inputTokens + result.usage.outputTokens }
514
+ : { input_tokens: 0, output_tokens: 0, total_tokens: 0 };
515
+ // If upstream sent full output array (responses API), use it directly
516
+ const output = Array.isArray(result.message)
517
+ ? result.message
518
+ : [{
519
+ type: "message", id: msgId, role: "assistant",
520
+ content: [{ type: "output_text", text: result.content ?? "" }],
521
+ status: "completed",
522
+ }];
523
+ return {
524
+ status: 200,
525
+ headers: { "Content-Type": "application/json" },
526
+ body: JSON.stringify({
527
+ id: `resp_${requestId}`,
528
+ object: "response",
529
+ created_at: Math.floor(Date.now() / 1000),
530
+ status: "completed",
531
+ model: frame.payload.model,
532
+ output,
533
+ usage: usageObj,
534
+ }),
535
+ };
536
+ }
537
+
538
+ // Chat completions format — use full message object when available (has tool_calls etc.)
539
+ const msg = result.message as Record<string, unknown> | undefined;
540
+ const message = msg
541
+ ? { role: "assistant", ...msg }
542
+ : { role: "assistant", content: result.content };
543
+ const finishReason = msg?.tool_calls ? "tool_calls" : "stop";
544
+
352
545
  return {
353
546
  status: 200,
354
547
  headers: { "Content-Type": "application/json" },
@@ -357,19 +550,13 @@ export class ModelProxy {
357
550
  object: "chat.completion",
358
551
  created: Math.floor(Date.now() / 1000),
359
552
  model: frame.payload.model,
360
- choices: [
361
- {
362
- index: 0,
363
- message: { role: "assistant", content: result.content },
364
- finish_reason: "stop",
365
- },
366
- ],
553
+ choices: [{
554
+ index: 0,
555
+ message,
556
+ finish_reason: finishReason,
557
+ }],
367
558
  usage: result.usage
368
- ? {
369
- prompt_tokens: result.usage.inputTokens,
370
- completion_tokens: result.usage.outputTokens,
371
- total_tokens: result.usage.inputTokens + result.usage.outputTokens,
372
- }
559
+ ? { prompt_tokens: result.usage.inputTokens, completion_tokens: result.usage.outputTokens, total_tokens: result.usage.inputTokens + result.usage.outputTokens }
373
560
  : undefined,
374
561
  }),
375
562
  };
@@ -383,16 +570,35 @@ export class ModelProxy {
383
570
  }
384
571
 
385
572
  private handleListModels(): { status: number; headers: Record<string, string>; body: string } {
386
- const models = this.peerManager.router
387
- .getAllPeers()
388
- .flatMap((p) =>
389
- p.models.map((m) => ({
390
- id: m.id,
391
- object: "model",
392
- created: 0,
393
- owned_by: m.provider,
394
- })),
395
- );
573
+ // Build from proxyModels config (has full detail) and enrich with
574
+ // connectivity info from the router so consumers know what's reachable.
575
+ const reachable = new Set(
576
+ this.peerManager.router.getAllPeers()
577
+ .filter((p) => p.connection?.isOpen || p.reachableVia)
578
+ .map((p) => p.nodeId),
579
+ );
580
+
581
+ const models = this.config.proxyModels.map((m) => {
582
+ const entry: Record<string, unknown> = {
583
+ id: m.id,
584
+ object: "model",
585
+ created: 0,
586
+ owned_by: m.provider ?? "unknown",
587
+ // Extended fields
588
+ ...(m.description && { description: m.description }),
589
+ ...(m.contextWindow && { context_window: m.contextWindow }),
590
+ ...(m.maxTokens && { max_tokens: m.maxTokens }),
591
+ ...(m.reasoning !== undefined && { reasoning: m.reasoning }),
592
+ ...(m.input && { input: m.input }),
593
+ ...(m.api && { api: m.api }),
594
+ ...(m.cost && { cost: m.cost }),
595
+ ...(m.compat && { compat: m.compat }),
596
+ // Cluster info
597
+ node_id: m.nodeId,
598
+ reachable: reachable.has(m.nodeId),
599
+ };
600
+ return entry;
601
+ });
396
602
 
397
603
  return {
398
604
  status: 200,
@@ -414,19 +620,16 @@ export class ModelProxy {
414
620
  clearTimeout(pending.timer);
415
621
  this.pending.delete(frame.id);
416
622
  try {
417
- const errChunk = {
418
- id: `chatcmpl-${frame.id}`,
419
- object: "chat.completion.chunk",
420
- choices: [{ index: 0, delta: { content: `[ClawMatrix] Remote error: ${frame.payload.error}` }, finish_reason: "stop" }],
421
- };
422
- pending.controller.enqueue(
423
- pending.encoder.encode(`data: ${JSON.stringify(errChunk)}\n\n`),
424
- );
425
- pending.controller.enqueue(pending.encoder.encode("data: [DONE]\n\n"));
623
+ const errMsg = `[ClawMatrix] Remote error: ${frame.payload.error}`;
624
+ if (pending.responseFormat === "responses") {
625
+ pending.controller.enqueue(pending.encoder.encode(`event: response.output_text.delta\ndata: ${JSON.stringify({ type: "response.output_text.delta", delta: errMsg })}\n\n`));
626
+ this.enqueueResponsesStreamDone(pending.controller, pending.encoder, frame.id, pending.model ?? "", errMsg);
627
+ } else {
628
+ pending.controller.enqueue(pending.encoder.encode(`data: ${JSON.stringify({ id: `chatcmpl-${frame.id}`, object: "chat.completion.chunk", choices: [{ index: 0, delta: { content: errMsg }, finish_reason: "stop" }] })}\n\n`));
629
+ pending.controller.enqueue(pending.encoder.encode("data: [DONE]\n\n"));
630
+ }
426
631
  pending.controller.close();
427
- } catch {
428
- // controller may already be closed
429
- }
632
+ } catch { /* controller may already be closed */ }
430
633
  }
431
634
  return;
432
635
  }
@@ -436,6 +639,9 @@ export class ModelProxy {
436
639
  pending.resolve(frame.payload);
437
640
  }
438
641
 
642
+ /** Accumulated text per stream request (needed for responses API done events). */
643
+ private streamText = new Map<string, string>();
644
+
439
645
  handleModelStream(frame: ModelStreamChunk) {
440
646
  debug("stream", `id=${frame.id} done=${frame.payload.done} delta=${JSON.stringify(frame.payload.delta?.slice?.(0, 50) ?? frame.payload.delta)} failed=${this.peerManager.router.isFailed(frame.id)} hasPending=${this.pending.has(frame.id)}`);
441
647
  if (this.peerManager.router.isFailed(frame.id)) return;
@@ -443,54 +649,87 @@ export class ModelProxy {
443
649
  if (!pending?.stream || !pending.controller || !pending.encoder) return;
444
650
 
445
651
  try {
446
- if (frame.payload.done) {
447
- const finalChunk: Record<string, unknown> = {
448
- id: `chatcmpl-${frame.id}`,
449
- object: "chat.completion.chunk",
450
- choices: [{ index: 0, delta: {}, finish_reason: "stop" }],
451
- };
452
- if (frame.payload.usage) {
453
- finalChunk.usage = {
454
- prompt_tokens: frame.payload.usage.inputTokens,
455
- completion_tokens: frame.payload.usage.outputTokens,
456
- total_tokens: frame.payload.usage.inputTokens + frame.payload.usage.outputTokens,
457
- };
458
- }
459
- pending.controller.enqueue(
460
- pending.encoder.encode(`data: ${JSON.stringify(finalChunk)}\n\n`),
461
- );
462
- pending.controller.enqueue(pending.encoder.encode("data: [DONE]\n\n"));
463
- pending.controller.close();
464
- clearTimeout(pending.timer);
465
- this.pending.delete(frame.id);
652
+ if (pending.responseFormat === "responses") {
653
+ this.handleModelStreamResponses(frame, pending);
466
654
  } else {
467
- const chunk = {
468
- id: `chatcmpl-${frame.id}`,
469
- object: "chat.completion.chunk",
470
- choices: [
471
- {
472
- index: 0,
473
- delta: { content: frame.payload.delta },
474
- finish_reason: null,
475
- },
476
- ],
477
- };
478
- pending.controller.enqueue(
479
- pending.encoder.encode(`data: ${JSON.stringify(chunk)}\n\n`),
480
- );
655
+ this.handleModelStreamChat(frame, pending);
481
656
  }
482
657
  } catch {
483
658
  clearTimeout(pending.timer);
484
659
  this.pending.delete(frame.id);
660
+ this.streamText.delete(frame.id);
485
661
  }
486
662
  }
487
663
 
664
+ private handleModelStreamChat(frame: ModelStreamChunk, pending: PendingModelReq) {
665
+ if (frame.payload.done) {
666
+ const finalChunk: Record<string, unknown> = {
667
+ id: `chatcmpl-${frame.id}`, object: "chat.completion.chunk",
668
+ choices: [{ index: 0, delta: {}, finish_reason: "stop" }],
669
+ };
670
+ if (frame.payload.usage) {
671
+ finalChunk.usage = { prompt_tokens: frame.payload.usage.inputTokens, completion_tokens: frame.payload.usage.outputTokens, total_tokens: frame.payload.usage.inputTokens + frame.payload.usage.outputTokens };
672
+ }
673
+ pending.controller!.enqueue(pending.encoder!.encode(`data: ${JSON.stringify(finalChunk)}\n\n`));
674
+ pending.controller!.enqueue(pending.encoder!.encode("data: [DONE]\n\n"));
675
+ pending.controller!.close();
676
+ clearTimeout(pending.timer);
677
+ this.pending.delete(frame.id);
678
+ } else {
679
+ // Use full deltaObj when available (carries tool_calls etc.), otherwise simple text delta
680
+ const delta = frame.payload.deltaObj ?? { content: frame.payload.delta };
681
+ const chunk = { id: `chatcmpl-${frame.id}`, object: "chat.completion.chunk", choices: [{ index: 0, delta, finish_reason: null }] };
682
+ pending.controller!.enqueue(pending.encoder!.encode(`data: ${JSON.stringify(chunk)}\n\n`));
683
+ }
684
+ }
685
+
686
+ private handleModelStreamResponses(frame: ModelStreamChunk, pending: PendingModelReq) {
687
+ if (frame.payload.done) {
688
+ const fullText = this.streamText.get(frame.id) ?? "";
689
+ this.streamText.delete(frame.id);
690
+ this.enqueueResponsesStreamDone(pending.controller!, pending.encoder!, frame.id, pending.model ?? "", fullText, frame.payload.usage);
691
+ pending.controller!.close();
692
+ clearTimeout(pending.timer);
693
+ this.pending.delete(frame.id);
694
+ } else {
695
+ // Accumulate text for done event
696
+ this.streamText.set(frame.id, (this.streamText.get(frame.id) ?? "") + frame.payload.delta);
697
+ const evt = { type: "response.output_text.delta", item_id: `msg_${frame.id}`, output_index: 0, content_index: 0, delta: frame.payload.delta };
698
+ pending.controller!.enqueue(pending.encoder!.encode(`event: response.output_text.delta\ndata: ${JSON.stringify(evt)}\n\n`));
699
+ }
700
+ }
701
+
702
+ private sendStreamDelta(to: string, id: string, delta: string, deltaObj?: unknown) {
703
+ this.peerManager.sendTo(to, {
704
+ type: "model_stream",
705
+ id,
706
+ from: this.config.nodeId,
707
+ to,
708
+ timestamp: Date.now(),
709
+ payload: { delta, ...(deltaObj !== undefined && { deltaObj }), done: false },
710
+ } satisfies ModelStreamChunk);
711
+ }
712
+
713
+ private sendStreamDone(to: string, id: string, usage?: { inputTokens: number; outputTokens: number }) {
714
+ this.peerManager.sendTo(to, {
715
+ type: "model_stream",
716
+ id,
717
+ from: this.config.nodeId,
718
+ to,
719
+ timestamp: Date.now(),
720
+ payload: { delta: "", done: true, usage },
721
+ } satisfies ModelStreamChunk);
722
+ }
723
+
488
724
  /** Handle model_req locally: call the model API directly or fall back to OpenClaw gateway. */
489
725
  async handleModelRequest(frame: ModelRequest): Promise<void> {
490
726
  const { id, from, payload } = frame;
491
- debug("model_req", `handling model="${payload.model}" from=${from} stream=${payload.stream}`);
727
+ debug("model_req", `handling model="${payload.model}" provider=${payload.provider ?? "any"} from=${from} stream=${payload.stream}`);
492
728
 
493
- const model = this.config.models.find((m) => m.id === payload.model);
729
+ const model = payload.provider
730
+ ? this.config.models.find((m) => m.id === payload.model && m.provider === payload.provider)
731
+ ?? this.config.models.find((m) => m.id === payload.model)
732
+ : this.config.models.find((m) => m.id === payload.model);
494
733
  if (!model) {
495
734
  this.peerManager.sendTo(from, {
496
735
  type: "model_res",
@@ -505,28 +744,42 @@ export class ModelProxy {
505
744
 
506
745
  try {
507
746
  const endpoint = this.resolveModelEndpoint(model);
747
+ const isResponsesApi = endpoint.api === "openai-responses" || endpoint.api === "openai-codex-responses";
748
+ const path = isResponsesApi ? "/responses" : "/chat/completions";
749
+ const url = `${endpoint.baseUrl}${path}`;
508
750
  const headers: Record<string, string> = { "Content-Type": "application/json" };
509
751
 
510
752
  if (endpoint.direct) {
511
753
  if (endpoint.apiKey) headers["Authorization"] = `Bearer ${endpoint.apiKey}`;
512
- debug("model_req", `direct API call to ${endpoint.url}`);
754
+ debug("model_req", `direct API call to ${url} (api=${endpoint.api})`);
513
755
  } else {
514
756
  const { authHeader } = this.gatewayInfo;
515
757
  if (authHeader) headers["Authorization"] = authHeader;
516
- debug("model_req", `gateway fallback to ${endpoint.url} (not recommended)`);
758
+ debug("model_req", `gateway fallback to ${url}`);
517
759
  }
518
760
 
519
- const response = await fetch(endpoint.url, {
761
+ const modelField = endpoint.direct ? model.id : `${model.provider}/${model.id}`;
762
+ const requestBody = isResponsesApi
763
+ ? {
764
+ model: modelField,
765
+ input: payload.messages,
766
+ stream: payload.stream,
767
+ temperature: payload.temperature,
768
+ max_output_tokens: payload.maxTokens,
769
+ }
770
+ : {
771
+ model: modelField,
772
+ messages: payload.messages,
773
+ temperature: payload.temperature,
774
+ max_tokens: payload.maxTokens,
775
+ stream: payload.stream,
776
+ ...(payload.stream ? { stream_options: { include_usage: true } } : {}),
777
+ };
778
+
779
+ const response = await fetch(url, {
520
780
  method: "POST",
521
781
  headers,
522
- body: JSON.stringify({
523
- model: endpoint.direct ? model.id : `${model.provider}/${model.id}`,
524
- messages: payload.messages,
525
- temperature: payload.temperature,
526
- max_tokens: payload.maxTokens,
527
- stream: payload.stream,
528
- ...(payload.stream ? { stream_options: { include_usage: true } } : {}),
529
- }),
782
+ body: JSON.stringify(requestBody),
530
783
  });
531
784
 
532
785
  if (!response.ok) {
@@ -549,73 +802,122 @@ export class ModelProxy {
549
802
  if (done) break;
550
803
 
551
804
  buffer += decoder.decode(value, { stream: true });
805
+ if (buffer.length > MAX_STREAM_BUFFER) {
806
+ throw new Error("Stream buffer exceeded 1MB — upstream may be malformed");
807
+ }
552
808
  const lines = buffer.split("\n");
553
809
  buffer = lines.pop()!;
554
810
 
811
+ // Track SSE event type for responses API
812
+ let currentEvent = "";
555
813
  for (const line of lines) {
814
+ if (line.startsWith("event: ")) {
815
+ currentEvent = line.slice(7).trim();
816
+ continue;
817
+ }
556
818
  if (!line.startsWith("data: ")) continue;
557
819
  const data = line.slice(6).trim();
558
820
  if (data === "[DONE]") {
559
- this.peerManager.sendTo(from, {
560
- type: "model_stream",
561
- id,
562
- from: this.config.nodeId,
563
- to: from,
564
- timestamp: Date.now(),
565
- payload: { delta: "", done: true, usage: lastUsage },
566
- } satisfies ModelStreamChunk);
821
+ this.sendStreamDone(from, id, lastUsage);
567
822
  streamDone = true;
568
823
  break;
569
824
  }
570
825
 
571
826
  try {
572
827
  const parsed = JSON.parse(data);
573
- if (parsed.usage) {
574
- lastUsage = {
575
- inputTokens: parsed.usage.prompt_tokens,
576
- outputTokens: parsed.usage.completion_tokens,
577
- };
578
- }
579
- const d = parsed.choices?.[0]?.delta;
580
- const delta = d?.content || d?.reasoning_content || "";
581
- if (delta) {
582
- this.peerManager.sendTo(from, {
583
- type: "model_stream",
584
- id,
585
- from: this.config.nodeId,
586
- to: from,
587
- timestamp: Date.now(),
588
- payload: { delta, done: false },
589
- } satisfies ModelStreamChunk);
828
+
829
+ if (isResponsesApi) {
830
+ const evtType = currentEvent || parsed.type;
831
+ if (evtType === "response.output_text.delta") {
832
+ const delta = parsed.delta || "";
833
+ if (delta) {
834
+ this.sendStreamDelta(from, id, delta);
835
+ }
836
+ } else if (evtType === "response.completed") {
837
+ const usage = parsed.response?.usage;
838
+ if (usage) {
839
+ lastUsage = {
840
+ inputTokens: usage.input_tokens ?? usage.prompt_tokens ?? 0,
841
+ outputTokens: usage.output_tokens ?? usage.completion_tokens ?? 0,
842
+ };
843
+ }
844
+ this.sendStreamDone(from, id, lastUsage);
845
+ streamDone = true;
846
+ break;
847
+ }
848
+ } else {
849
+ // Chat completions format
850
+ if (parsed.usage) {
851
+ lastUsage = {
852
+ inputTokens: parsed.usage.prompt_tokens,
853
+ outputTokens: parsed.usage.completion_tokens,
854
+ };
855
+ }
856
+ const d = parsed.choices?.[0]?.delta;
857
+ const delta = d?.content || d?.reasoning_content || "";
858
+ // Pass full delta object when it contains tool_calls or other structured data
859
+ const hasStructured = d?.tool_calls || d?.refusal != null;
860
+ if (delta || hasStructured) {
861
+ this.sendStreamDelta(from, id, delta, hasStructured ? d : undefined);
862
+ }
590
863
  }
591
864
  } catch {
592
865
  // skip malformed chunks
593
866
  }
867
+ currentEvent = "";
594
868
  }
595
869
  }
596
- // If the upstream closed without sending [DONE], send a completion
597
- // frame so the requesting side doesn't hang until MODEL_TIMEOUT.
870
+ // If the upstream closed without sending [DONE] or response.completed,
871
+ // send a completion frame so the requesting side doesn't hang.
598
872
  if (!streamDone) {
599
- this.peerManager.sendTo(from, {
600
- type: "model_stream",
601
- id,
602
- from: this.config.nodeId,
603
- to: from,
604
- timestamp: Date.now(),
605
- payload: { delta: "", done: true, usage: lastUsage },
606
- } satisfies ModelStreamChunk);
873
+ this.sendStreamDone(from, id, lastUsage);
607
874
  }
608
875
  } finally {
609
876
  reader.releaseLock();
610
877
  }
611
878
  } else {
612
- const result = (await response.json()) as {
613
- choices?: { message?: { content?: string; reasoning_content?: string } }[];
614
- usage?: { prompt_tokens: number; completion_tokens: number };
615
- };
616
- const msg = result.choices?.[0]?.message;
617
- const content = msg?.content || msg?.reasoning_content || "";
618
- const usage = result.usage;
879
+ // Non-streaming response
880
+ const result = await response.json();
881
+ let content: string;
882
+ let message: unknown | undefined;
883
+ let usage: { inputTokens: number; outputTokens: number } | undefined;
884
+
885
+ if (isResponsesApi) {
886
+ // Responses API: extract text from output[].content[].text
887
+ content = "";
888
+ const output = result.output as { type?: string; content?: { type?: string; text?: string }[] }[] | undefined;
889
+ if (Array.isArray(output)) {
890
+ for (const item of output) {
891
+ if (item.type === "message" && Array.isArray(item.content)) {
892
+ for (const part of item.content) {
893
+ if (part.type === "output_text" && part.text) content += part.text;
894
+ }
895
+ }
896
+ }
897
+ }
898
+ // Carry full output array for structured data (function_call items, etc.)
899
+ message = result.output;
900
+ if (result.usage) {
901
+ usage = {
902
+ inputTokens: result.usage.input_tokens ?? result.usage.prompt_tokens ?? 0,
903
+ outputTokens: result.usage.output_tokens ?? result.usage.completion_tokens ?? 0,
904
+ };
905
+ }
906
+ } else {
907
+ // Chat completions format
908
+ const msg = result.choices?.[0]?.message;
909
+ content = msg?.content || msg?.reasoning_content || "";
910
+ // Carry full message object when it has tool_calls or other structured data
911
+ if (msg?.tool_calls || msg?.refusal != null || msg?.function_call) {
912
+ message = msg;
913
+ }
914
+ if (result.usage) {
915
+ usage = {
916
+ inputTokens: result.usage.prompt_tokens,
917
+ outputTokens: result.usage.completion_tokens,
918
+ };
919
+ }
920
+ }
619
921
 
620
922
  this.peerManager.sendTo(from, {
621
923
  type: "model_res",
@@ -626,9 +928,8 @@ export class ModelProxy {
626
928
  payload: {
627
929
  success: true,
628
930
  content,
629
- usage: usage
630
- ? { inputTokens: usage.prompt_tokens, outputTokens: usage.completion_tokens }
631
- : undefined,
931
+ ...(message !== undefined && { message }),
932
+ usage,
632
933
  },
633
934
  } satisfies ModelResponse);
634
935
  }