clawmatrix 0.1.14 → 0.1.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,14 +9,26 @@ import type {
9
9
  ModelStreamChunk,
10
10
  } from "./types.ts";
11
11
  import { debug } from "./debug.ts";
12
+ import { readBody } from "./http-utils.ts";
12
13
 
13
14
  const MODEL_TIMEOUT = 120_000; // 2 minutes
15
+ const MAX_STREAM_BUFFER = 1_048_576; // 1MB — guard against upstream not sending newlines
16
+
17
+ type ResponseFormat = "chat" | "responses";
18
+
19
+ interface ProxyResponse {
20
+ status: number;
21
+ headers: Record<string, string>;
22
+ body: string | ReadableStream;
23
+ }
14
24
 
15
25
  interface PendingModelReq {
16
26
  resolve: (value: unknown) => void;
17
27
  reject: (error: Error) => void;
18
28
  timer: ReturnType<typeof setTimeout>;
19
29
  stream: boolean;
30
+ responseFormat: ResponseFormat;
31
+ model?: string;
20
32
  controller?: ReadableStreamDefaultController;
21
33
  encoder?: TextEncoder;
22
34
  }
@@ -36,35 +48,125 @@ export class ModelProxy {
36
48
  this.openclawConfig = openclawConfig;
37
49
  }
38
50
 
51
+ /**
52
+ * Normalize Responses API `input` to OpenAI chat messages for WS transport.
53
+ *
54
+ * Converts:
55
+ * - string → [{role: "user", content: "..."}]
56
+ * - shorthand {role, content: "..."} → pass through
57
+ * - full {type: "message", content: [{type: "input_text"}, {type: "input_image"}]} → chat format
58
+ * - {type: "function_call_output"} → {role: "tool", ...}
59
+ *
60
+ * Returns chat-completions compatible messages (text + image_url content parts).
61
+ */
62
+ private static normalizeResponsesInput(input: unknown): unknown[] {
63
+ if (typeof input === "string") {
64
+ return [{ role: "user", content: input }];
65
+ }
66
+ if (!Array.isArray(input)) return [];
67
+
68
+ const messages: unknown[] = [];
69
+ for (const item of input) {
70
+ if (!item || typeof item !== "object") continue;
71
+ const obj = item as Record<string, unknown>;
72
+
73
+ // function_call_output → tool message
74
+ if (obj.type === "function_call_output") {
75
+ messages.push({
76
+ role: "tool",
77
+ tool_call_id: obj.call_id,
78
+ content: typeof obj.output === "string" ? obj.output : JSON.stringify(obj.output),
79
+ });
80
+ continue;
81
+ }
82
+
83
+ const role = typeof obj.role === "string" ? obj.role : "user";
84
+
85
+ // Simple shorthand: {role: "user", content: "hello"}
86
+ if (typeof obj.content === "string") {
87
+ messages.push({ role, content: obj.content });
88
+ continue;
89
+ }
90
+
91
+ // Full format: {type: "message", role, content: [{type: "input_text"|"input_image"|...}]}
92
+ if (Array.isArray(obj.content)) {
93
+ const parts: unknown[] = [];
94
+ for (const part of obj.content) {
95
+ if (!part || typeof part !== "object") continue;
96
+ const p = part as Record<string, unknown>;
97
+
98
+ if (p.type === "input_text" || p.type === "output_text") {
99
+ // Text content → chat text part
100
+ if (typeof p.text === "string") {
101
+ parts.push({ type: "text", text: p.text });
102
+ }
103
+ } else if (p.type === "input_image") {
104
+ // Image content → chat image_url part
105
+ if (typeof p.image_url === "string") {
106
+ parts.push({ type: "image_url", image_url: { url: p.image_url } });
107
+ } else if (p.image_url && typeof p.image_url === "object") {
108
+ parts.push({ type: "image_url", image_url: p.image_url });
109
+ }
110
+ } else if (p.type === "text" && typeof p.text === "string") {
111
+ // Already chat format
112
+ parts.push(p);
113
+ } else if (p.type === "image_url") {
114
+ // Already chat format
115
+ parts.push(p);
116
+ }
117
+ }
118
+
119
+ if (parts.length === 1 && (parts[0] as Record<string, unknown>).type === "text") {
120
+ // Single text part → simplify to string content
121
+ messages.push({ role, content: ((parts[0] as Record<string, unknown>).text as string) });
122
+ } else if (parts.length > 0) {
123
+ messages.push({ role, content: parts });
124
+ }
125
+ continue;
126
+ }
127
+
128
+ // Fallback
129
+ if (typeof obj.text === "string") {
130
+ messages.push({ role, content: obj.text });
131
+ }
132
+ }
133
+ return messages;
134
+ }
135
+
39
136
  /** Resolve API endpoint for a model: explicit config > OpenClaw provider > gateway fallback */
40
- private resolveModelEndpoint(model: { id: string; provider: string; baseUrl?: string; apiKey?: string }): { url: string; apiKey?: string; direct: boolean } {
137
+ private resolveModelEndpoint(model: { id: string; provider: string; baseUrl?: string; apiKey?: string; api?: string }): { baseUrl: string; apiKey?: string; direct: boolean; api: string } {
138
+ const defaultApi = "openai-completions";
139
+
41
140
  // 1. Explicit baseUrl in ClawMatrix model config
42
141
  if (model.baseUrl) {
43
142
  return {
44
- url: `${model.baseUrl.replace(/\/$/, "")}/chat/completions`,
143
+ baseUrl: model.baseUrl.replace(/\/$/, ""),
45
144
  apiKey: model.apiKey,
46
145
  direct: true,
146
+ api: model.api ?? defaultApi,
47
147
  };
48
148
  }
49
149
 
50
150
  // 2. Read from OpenClaw's models.providers[provider]
51
151
  const providers = (this.openclawConfig as Record<string, unknown>).models as
52
- { providers?: Record<string, { baseUrl?: string; apiKey?: string }> } | undefined;
152
+ { providers?: Record<string, { baseUrl?: string; apiKey?: string; api?: string }> } | undefined;
53
153
  const providerConfig = providers?.providers?.[model.provider];
54
154
  if (providerConfig?.baseUrl) {
55
155
  return {
56
- url: `${providerConfig.baseUrl.replace(/\/$/, "")}/chat/completions`,
156
+ baseUrl: providerConfig.baseUrl.replace(/\/$/, ""),
57
157
  apiKey: typeof providerConfig.apiKey === "string" ? providerConfig.apiKey : undefined,
58
158
  direct: true,
159
+ api: model.api ?? providerConfig.api ?? defaultApi,
59
160
  };
60
161
  }
61
162
 
62
- // 3. Fallback: OpenClaw gateway (goes through agent system — not recommended)
163
+ // 3. Fallback: OpenClaw gateway
63
164
  const { port } = this.gatewayInfo;
64
165
  return {
65
- url: `http://127.0.0.1:${port}/v1/chat/completions`,
166
+ baseUrl: `http://127.0.0.1:${port}/v1`,
66
167
  apiKey: undefined,
67
168
  direct: false,
169
+ api: model.api ?? defaultApi,
68
170
  };
69
171
  }
70
172
 
@@ -78,8 +180,13 @@ export class ModelProxy {
78
180
  debug("proxy", `${req.method} ${url.pathname} → ${p}`);
79
181
 
80
182
  if (p === "/chat/completions" && req.method === "POST") {
81
- const body = await this.readBody(req);
82
- const response = await this.handleChatCompletion(body);
183
+ const body = await readBody(req);
184
+ const response = await this.handleChatCompletion(body, "openai-completions");
185
+ debug("proxy", `response status=${response.status}`);
186
+ this.sendResponse(res, response);
187
+ } else if (p === "/responses" && req.method === "POST") {
188
+ const body = await readBody(req);
189
+ const response = await this.handleResponses(body);
83
190
  debug("proxy", `response status=${response.status}`);
84
191
  this.sendResponse(res, response);
85
192
  } else if (p === "/models" && req.method === "GET") {
@@ -115,18 +222,11 @@ export class ModelProxy {
115
222
  pending.reject(new Error("Shutting down"));
116
223
  }
117
224
  this.pending.clear();
225
+ this.streamText.clear();
118
226
  }
119
227
 
120
- private readBody(req: import("node:http").IncomingMessage): Promise<string> {
121
- return new Promise((resolve, reject) => {
122
- const chunks: Buffer[] = [];
123
- req.on("data", (chunk: Buffer) => chunks.push(chunk));
124
- req.on("end", () => resolve(Buffer.concat(chunks).toString()));
125
- req.on("error", reject);
126
- });
127
- }
128
228
 
129
- private sendResponse(res: import("node:http").ServerResponse, response: { status: number; headers: Record<string, string>; body: string | ReadableStream }) {
229
+ private sendResponse(res: import("node:http").ServerResponse, response: ProxyResponse) {
130
230
  res.writeHead(response.status, response.headers);
131
231
  if (typeof response.body === "string") {
132
232
  res.end(response.body);
@@ -152,56 +252,59 @@ export class ModelProxy {
152
252
  }
153
253
 
154
254
  // ── HTTP handlers ──────────────────────────────────────────────
155
- private async handleChatCompletion(rawBody: string): Promise<{ status: number; headers: Record<string, string>; body: string | ReadableStream }> {
156
- let body: {
157
- model: string;
158
- messages: unknown[];
159
- stream?: boolean;
160
- temperature?: number;
161
- max_tokens?: number;
162
- };
163
255
 
164
- try {
165
- body = JSON.parse(rawBody);
166
- } catch {
167
- return {
168
- status: 400,
169
- headers: { "Content-Type": "application/json" },
170
- body: JSON.stringify({ error: "Invalid JSON" }),
171
- };
172
- }
173
-
174
- const rawModelId = body.model;
175
- // Parse "nodeId/model" format: first segment is nodeId, rest is model ID.
176
- // OpenClaw sends "providerId/modelId" where providerId = nodeId, so this
177
- // naturally handles both OpenClaw calls and direct curl calls.
178
- // If no "/" present, treat entire string as model ID and auto-resolve.
179
- let nodeId: string | undefined;
180
- let modelId: string;
256
+ /** Resolve model ID → proxyModel + route. Shared by chat completions and responses handlers. */
257
+ private resolveModelRoute(rawModelId: string): {
258
+ nodeId: string; modelId: string;
259
+ proxyModel: (typeof this.config.proxyModels)[number] | undefined;
260
+ routeNodeId: string;
261
+ } | { error: { status: number; message: string } } {
181
262
  const slashIdx = rawModelId.indexOf("/");
263
+ let nodeId: string;
264
+ let modelId: string;
265
+ let proxyModel: (typeof this.config.proxyModels)[number] | undefined;
266
+
182
267
  if (slashIdx > 0) {
183
268
  nodeId = rawModelId.slice(0, slashIdx);
184
269
  modelId = rawModelId.slice(slashIdx + 1);
270
+ proxyModel = this.config.proxyModels.find((m) => m.id === modelId && m.nodeId === nodeId);
185
271
  } else {
186
272
  modelId = rawModelId;
273
+ proxyModel = this.config.proxyModels.find((m) => m.id === modelId);
274
+ if (!proxyModel) {
275
+ return { error: { status: 404, message: `Model "${rawModelId}" not found in proxy models` } };
276
+ }
277
+ nodeId = proxyModel.nodeId;
187
278
  }
188
- debug("proxy", `model raw="${rawModelId}" nodeId=${nodeId ?? "auto"} modelId="${modelId}" stream=${body.stream ?? false}`);
189
- const proxyModel = this.config.proxyModels.find((m) => m.id === modelId && (!nodeId || m.nodeId === nodeId))
190
- ?? this.config.proxyModels.find((m) => m.id === modelId);
191
- const route = nodeId
192
- ? this.peerManager.router.getRoute(nodeId)
193
- : this.peerManager.router.resolveModel(modelId);
194
- debug("proxy", `proxyModel=${proxyModel?.id ?? "none"} route=${route?.nodeId ?? "none"} reachable=${route ? this.peerManager.canReach(route.nodeId) : false}`);
279
+
280
+ const route = this.peerManager.router.getRoute(nodeId);
281
+ debug("proxy", `model raw="${rawModelId}" nodeId=${nodeId} modelId="${modelId}" route=${route?.nodeId ?? "none"}`);
195
282
  if (!route) {
196
- return {
197
- status: 404,
198
- headers: { "Content-Type": "application/json" },
199
- body: JSON.stringify({ error: { message: `Model "${modelId}" not found in cluster (proxyModels: [${this.config.proxyModels.map(m => m.id).join(", ")}])` } }),
200
- };
283
+ return { error: { status: 404, message: `Node "${nodeId}" not found in cluster` } };
284
+ }
285
+ if (!this.peerManager.canReach(route.nodeId)) {
286
+ return { error: { status: 502, message: `Cannot reach model node "${route.nodeId}"` } };
287
+ }
288
+ return { nodeId, modelId, proxyModel, routeNodeId: route.nodeId };
289
+ }
290
+
291
+ private async handleChatCompletion(rawBody: string, _api: string): Promise<ProxyResponse> {
292
+ let body: { model: string; messages: unknown[]; stream?: boolean; temperature?: number; max_tokens?: number };
293
+ try {
294
+ body = JSON.parse(rawBody);
295
+ } catch {
296
+ return { status: 400, headers: { "Content-Type": "application/json" }, body: JSON.stringify({ error: "Invalid JSON" }) };
297
+ }
298
+
299
+ const resolved = this.resolveModelRoute(body.model);
300
+ if ("error" in resolved) {
301
+ return { status: resolved.error.status, headers: { "Content-Type": "application/json" }, body: JSON.stringify({ error: { message: resolved.error.message } }) };
201
302
  }
202
303
 
203
- // Inject model identity so the LLM knows what it is
304
+ const { modelId, proxyModel, routeNodeId } = resolved;
204
305
  const messages = body.messages;
306
+ debug("proxy", `messages count=${messages?.length ?? 0} roles=${(messages ?? []).map((m: unknown) => (m as Record<string, unknown>)?.role).join(",")}`);
307
+
205
308
  if (proxyModel?.description) {
206
309
  const first = messages[0] as { role?: string; content?: string } | undefined;
207
310
  if (first?.role === "system" && typeof first.content === "string") {
@@ -213,35 +316,66 @@ export class ModelProxy {
213
316
 
214
317
  const stream = body.stream ?? false;
215
318
  const requestId = crypto.randomUUID();
216
-
217
319
  const frame: ModelRequest = {
218
- type: "model_req",
219
- id: requestId,
220
- from: this.config.nodeId,
221
- to: route.nodeId,
222
- timestamp: Date.now(),
223
- payload: {
224
- model: modelId,
225
- messages,
226
- temperature: body.temperature,
227
- maxTokens: body.max_tokens,
228
- stream,
229
- },
320
+ type: "model_req", id: requestId, from: this.config.nodeId, to: routeNodeId, timestamp: Date.now(),
321
+ payload: { model: modelId, provider: proxyModel?.provider, api: proxyModel?.api, messages, temperature: body.temperature, maxTokens: body.max_tokens, stream },
230
322
  };
231
323
 
232
- // Pre-check reachability before starting a stream (avoids silent empty response)
233
- if (!this.peerManager.canReach(route.nodeId)) {
234
- return {
235
- status: 502,
236
- headers: { "Content-Type": "application/json" },
237
- body: JSON.stringify({ error: { message: `Cannot reach model node "${route.nodeId}"` } }),
238
- };
324
+ if (stream) {
325
+ return this.handleStreamRequest(requestId, routeNodeId, frame, "chat");
326
+ } else {
327
+ return this.handleNonStreamRequest(requestId, routeNodeId, frame, "chat");
328
+ }
329
+ }
330
+
331
+ private async handleResponses(rawBody: string): Promise<ProxyResponse> {
332
+ let body: { model: string; input: unknown; stream?: boolean; temperature?: number; max_output_tokens?: number; instructions?: string };
333
+ try {
334
+ body = JSON.parse(rawBody);
335
+ } catch {
336
+ return { status: 400, headers: { "Content-Type": "application/json" }, body: JSON.stringify({ error: "Invalid JSON" }) };
337
+ }
338
+
339
+ const resolved = this.resolveModelRoute(body.model);
340
+ if ("error" in resolved) {
341
+ return { status: resolved.error.status, headers: { "Content-Type": "application/json" }, body: JSON.stringify({ error: { message: resolved.error.message } }) };
342
+ }
343
+
344
+ const { modelId, proxyModel, routeNodeId } = resolved;
345
+
346
+ // Normalize responses API input → simple chat messages for WS transport.
347
+ // Responses API items use {type: "message", role, content: [{type: "input_text", text}]}
348
+ // but WS protocol carries simple {role, content} chat messages.
349
+ const messages = ModelProxy.normalizeResponsesInput(body.input);
350
+
351
+ // Prepend instructions as system/developer message
352
+ if (body.instructions) {
353
+ messages.unshift({ role: "developer", content: body.instructions });
354
+ }
355
+
356
+ if (proxyModel?.description) {
357
+ const first = messages[0] as { role?: string; content?: string } | undefined;
358
+ if (first?.role === "system" && typeof first.content === "string") {
359
+ first.content = `[Model: ${proxyModel.description}]\n${first.content}`;
360
+ } else if (first?.role === "developer" && typeof first.content === "string") {
361
+ first.content = `[Model: ${proxyModel.description}]\n${first.content}`;
362
+ } else {
363
+ messages.unshift({ role: "system", content: `[Model: ${proxyModel.description}]` });
364
+ }
239
365
  }
240
366
 
367
+ const stream = body.stream ?? false;
368
+ const requestId = crypto.randomUUID();
369
+ debug("proxy", `responses: stream=${stream} messages=${messages.length} input_type=${typeof body.input}${Array.isArray(body.input) ? `[${body.input.length}]` : ""}`);
370
+ const frame: ModelRequest = {
371
+ type: "model_req", id: requestId, from: this.config.nodeId, to: routeNodeId, timestamp: Date.now(),
372
+ payload: { model: modelId, provider: proxyModel?.provider, api: proxyModel?.api, messages, temperature: body.temperature, maxTokens: body.max_output_tokens, stream },
373
+ };
374
+
241
375
  if (stream) {
242
- return this.handleStreamRequest(requestId, route.nodeId, frame);
376
+ return this.handleStreamRequest(requestId, routeNodeId, frame, "responses");
243
377
  } else {
244
- return this.handleNonStreamRequest(requestId, route.nodeId, frame);
378
+ return this.handleNonStreamRequest(requestId, routeNodeId, frame, "responses");
245
379
  }
246
380
  }
247
381
 
@@ -249,73 +383,102 @@ export class ModelProxy {
249
383
  requestId: string,
250
384
  targetNodeId: string,
251
385
  frame: ModelRequest,
252
- ): { status: number; headers: Record<string, string>; body: ReadableStream } {
386
+ responseFormat: ResponseFormat,
387
+ ): ProxyResponse & { body: ReadableStream } {
253
388
  const encoder = new TextEncoder();
389
+ const model = frame.payload.model;
254
390
 
255
391
  const readable = new ReadableStream({
256
392
  start: (controller) => {
257
393
  const timer = setTimeout(() => {
258
394
  this.pending.delete(requestId);
395
+ this.streamText.delete(requestId);
259
396
  this.peerManager.router.markFailed(requestId);
260
397
  try {
261
- const errorChunk = {
262
- id: `chatcmpl-${requestId}`,
263
- object: "chat.completion.chunk",
264
- choices: [{ index: 0, delta: { content: "\n\n[ClawMatrix] Error: model request timed out" }, finish_reason: "stop" }],
265
- };
266
- controller.enqueue(
267
- encoder.encode(`data: ${JSON.stringify(errorChunk)}\n\n`),
268
- );
269
- controller.enqueue(encoder.encode("data: [DONE]\n\n"));
398
+ if (responseFormat === "responses") {
399
+ controller.enqueue(encoder.encode(`event: response.output_text.delta\ndata: ${JSON.stringify({ type: "response.output_text.delta", delta: "\n\n[ClawMatrix] Error: model request timed out" })}\n\n`));
400
+ this.enqueueResponsesStreamDone(controller, encoder, requestId, model);
401
+ } else {
402
+ controller.enqueue(encoder.encode(`data: ${JSON.stringify({ id: `chatcmpl-${requestId}`, object: "chat.completion.chunk", choices: [{ index: 0, delta: { content: "\n\n[ClawMatrix] Error: model request timed out" }, finish_reason: "stop" }] })}\n\n`));
403
+ controller.enqueue(encoder.encode("data: [DONE]\n\n"));
404
+ }
270
405
  controller.close();
271
- } catch {
272
- // controller may already be closed
273
- }
406
+ } catch { /* controller may already be closed */ }
274
407
  }, MODEL_TIMEOUT);
275
408
 
276
409
  this.pending.set(requestId, {
277
- resolve: () => {},
278
- reject: () => {},
279
- timer,
280
- stream: true,
281
- controller,
282
- encoder,
410
+ resolve: () => {}, reject: () => {},
411
+ timer, stream: true, responseFormat, model,
412
+ controller, encoder,
283
413
  });
284
414
 
415
+ // Emit setup events for responses API
416
+ if (responseFormat === "responses") {
417
+ this.enqueueResponsesStreamSetup(controller, encoder, requestId, model);
418
+ }
419
+
285
420
  const sent = this.peerManager.sendTo(targetNodeId, frame);
286
421
  if (!sent) {
287
422
  this.pending.delete(requestId);
288
423
  clearTimeout(timer);
289
- const errChunk = {
290
- id: `chatcmpl-${requestId}`,
291
- object: "chat.completion.chunk",
292
- choices: [{ index: 0, delta: { content: `[ClawMatrix] Cannot reach model node "${targetNodeId}"` }, finish_reason: "stop" }],
293
- };
294
- controller.enqueue(
295
- encoder.encode(`data: ${JSON.stringify(errChunk)}\n\n`),
296
- );
297
- controller.enqueue(encoder.encode("data: [DONE]\n\n"));
298
- controller.close();
424
+ try {
425
+ if (responseFormat === "responses") {
426
+ controller.enqueue(encoder.encode(`event: response.output_text.delta\ndata: ${JSON.stringify({ type: "response.output_text.delta", delta: `[ClawMatrix] Cannot reach model node "${targetNodeId}"` })}\n\n`));
427
+ this.enqueueResponsesStreamDone(controller, encoder, requestId, model);
428
+ } else {
429
+ controller.enqueue(encoder.encode(`data: ${JSON.stringify({ id: `chatcmpl-${requestId}`, object: "chat.completion.chunk", choices: [{ index: 0, delta: { content: `[ClawMatrix] Cannot reach model node "${targetNodeId}"` }, finish_reason: "stop" }] })}\n\n`));
430
+ controller.enqueue(encoder.encode("data: [DONE]\n\n"));
431
+ }
432
+ controller.close();
433
+ } catch { /* controller may already be closed */ }
299
434
  }
300
435
  },
301
436
  });
302
437
 
303
438
  return {
304
439
  status: 200,
305
- headers: {
306
- "Content-Type": "text/event-stream",
307
- "Cache-Control": "no-cache",
308
- "Connection": "keep-alive",
309
- },
440
+ headers: { "Content-Type": "text/event-stream", "Cache-Control": "no-cache", "Connection": "keep-alive" },
310
441
  body: readable,
311
442
  };
312
443
  }
313
444
 
445
+ /** Emit responses API stream setup events (response.created → content_part.added). */
446
+ private enqueueResponsesStreamSetup(controller: ReadableStreamDefaultController, encoder: TextEncoder, id: string, model: string) {
447
+ const respId = `resp_${id}`;
448
+ const msgId = `msg_${id}`;
449
+ const now = Math.floor(Date.now() / 1000);
450
+ const baseResp = { id: respId, object: "response", created_at: now, status: "in_progress", model, output: [] };
451
+ const msgItem = { type: "message", id: msgId, role: "assistant", content: [], status: "in_progress" };
452
+ const textPart = { type: "output_text", text: "" };
453
+
454
+ controller.enqueue(encoder.encode(`event: response.created\ndata: ${JSON.stringify({ type: "response.created", response: baseResp })}\n\n`));
455
+ controller.enqueue(encoder.encode(`event: response.in_progress\ndata: ${JSON.stringify({ type: "response.in_progress", response: baseResp })}\n\n`));
456
+ controller.enqueue(encoder.encode(`event: response.output_item.added\ndata: ${JSON.stringify({ type: "response.output_item.added", output_index: 0, item: msgItem })}\n\n`));
457
+ controller.enqueue(encoder.encode(`event: response.content_part.added\ndata: ${JSON.stringify({ type: "response.content_part.added", item_id: msgId, output_index: 0, content_index: 0, part: textPart })}\n\n`));
458
+ }
459
+
460
+ /** Emit responses API stream completion events (output_text.done → response.completed). */
461
+ private enqueueResponsesStreamDone(controller: ReadableStreamDefaultController, encoder: TextEncoder, id: string, model: string, content?: string, usage?: { inputTokens: number; outputTokens: number }) {
462
+ const respId = `resp_${id}`;
463
+ const msgId = `msg_${id}`;
464
+ const now = Math.floor(Date.now() / 1000);
465
+ const textPart = { type: "output_text", text: content ?? "" };
466
+ const msgItem = { type: "message", id: msgId, role: "assistant", content: [textPart], status: "completed" };
467
+ const usageObj = usage ? { input_tokens: usage.inputTokens, output_tokens: usage.outputTokens, total_tokens: usage.inputTokens + usage.outputTokens } : { input_tokens: 0, output_tokens: 0, total_tokens: 0 };
468
+ const completedResp = { id: respId, object: "response", created_at: now, status: "completed", model, output: [msgItem], usage: usageObj };
469
+
470
+ controller.enqueue(encoder.encode(`event: response.output_text.done\ndata: ${JSON.stringify({ type: "response.output_text.done", item_id: msgId, output_index: 0, content_index: 0, text: content ?? "" })}\n\n`));
471
+ controller.enqueue(encoder.encode(`event: response.content_part.done\ndata: ${JSON.stringify({ type: "response.content_part.done", item_id: msgId, output_index: 0, content_index: 0, part: textPart })}\n\n`));
472
+ controller.enqueue(encoder.encode(`event: response.output_item.done\ndata: ${JSON.stringify({ type: "response.output_item.done", output_index: 0, item: msgItem })}\n\n`));
473
+ controller.enqueue(encoder.encode(`event: response.completed\ndata: ${JSON.stringify({ type: "response.completed", response: completedResp })}\n\n`));
474
+ }
475
+
314
476
  private async handleNonStreamRequest(
315
477
  requestId: string,
316
478
  targetNodeId: string,
317
479
  frame: ModelRequest,
318
- ): Promise<{ status: number; headers: Record<string, string>; body: string }> {
480
+ responseFormat: ResponseFormat,
481
+ ): Promise<ProxyResponse & { body: string }> {
319
482
  try {
320
483
  const result = await new Promise<ModelResponse["payload"]>(
321
484
  (resolve, reject) => {
@@ -327,9 +490,7 @@ export class ModelProxy {
327
490
 
328
491
  this.pending.set(requestId, {
329
492
  resolve: resolve as (v: unknown) => void,
330
- reject,
331
- timer,
332
- stream: false,
493
+ reject, timer, stream: false, responseFormat,
333
494
  });
334
495
 
335
496
  const sent = this.peerManager.sendTo(targetNodeId, frame);
@@ -349,6 +510,41 @@ export class ModelProxy {
349
510
  };
350
511
  }
351
512
 
513
+ if (responseFormat === "responses") {
514
+ const msgId = `msg_${requestId}`;
515
+ const usageObj = result.usage
516
+ ? { input_tokens: result.usage.inputTokens, output_tokens: result.usage.outputTokens, total_tokens: result.usage.inputTokens + result.usage.outputTokens }
517
+ : { input_tokens: 0, output_tokens: 0, total_tokens: 0 };
518
+ // If upstream sent full output array (responses API), use it directly
519
+ const output = Array.isArray(result.message)
520
+ ? result.message
521
+ : [{
522
+ type: "message", id: msgId, role: "assistant",
523
+ content: [{ type: "output_text", text: result.content ?? "" }],
524
+ status: "completed",
525
+ }];
526
+ return {
527
+ status: 200,
528
+ headers: { "Content-Type": "application/json" },
529
+ body: JSON.stringify({
530
+ id: `resp_${requestId}`,
531
+ object: "response",
532
+ created_at: Math.floor(Date.now() / 1000),
533
+ status: "completed",
534
+ model: frame.payload.model,
535
+ output,
536
+ usage: usageObj,
537
+ }),
538
+ };
539
+ }
540
+
541
+ // Chat completions format — use full message object when available (has tool_calls etc.)
542
+ const msg = result.message as Record<string, unknown> | undefined;
543
+ const message = msg
544
+ ? { role: "assistant", ...msg }
545
+ : { role: "assistant", content: result.content };
546
+ const finishReason = msg?.tool_calls ? "tool_calls" : "stop";
547
+
352
548
  return {
353
549
  status: 200,
354
550
  headers: { "Content-Type": "application/json" },
@@ -357,19 +553,13 @@ export class ModelProxy {
357
553
  object: "chat.completion",
358
554
  created: Math.floor(Date.now() / 1000),
359
555
  model: frame.payload.model,
360
- choices: [
361
- {
362
- index: 0,
363
- message: { role: "assistant", content: result.content },
364
- finish_reason: "stop",
365
- },
366
- ],
556
+ choices: [{
557
+ index: 0,
558
+ message,
559
+ finish_reason: finishReason,
560
+ }],
367
561
  usage: result.usage
368
- ? {
369
- prompt_tokens: result.usage.inputTokens,
370
- completion_tokens: result.usage.outputTokens,
371
- total_tokens: result.usage.inputTokens + result.usage.outputTokens,
372
- }
562
+ ? { prompt_tokens: result.usage.inputTokens, completion_tokens: result.usage.outputTokens, total_tokens: result.usage.inputTokens + result.usage.outputTokens }
373
563
  : undefined,
374
564
  }),
375
565
  };
@@ -382,17 +572,36 @@ export class ModelProxy {
382
572
  }
383
573
  }
384
574
 
385
- private handleListModels(): { status: number; headers: Record<string, string>; body: string } {
386
- const models = this.peerManager.router
387
- .getAllPeers()
388
- .flatMap((p) =>
389
- p.models.map((m) => ({
390
- id: m.id,
391
- object: "model",
392
- created: 0,
393
- owned_by: m.provider,
394
- })),
395
- );
575
+ private handleListModels(): ProxyResponse & { body: string } {
576
+ // Build from proxyModels config (has full detail) and enrich with
577
+ // connectivity info from the router so consumers know what's reachable.
578
+ const reachable = new Set(
579
+ this.peerManager.router.getAllPeers()
580
+ .filter((p) => p.connection?.isOpen || p.reachableVia)
581
+ .map((p) => p.nodeId),
582
+ );
583
+
584
+ const models = this.config.proxyModels.map((m) => {
585
+ const entry: Record<string, unknown> = {
586
+ id: m.id,
587
+ object: "model",
588
+ created: 0,
589
+ owned_by: m.provider ?? "unknown",
590
+ // Extended fields
591
+ ...(m.description && { description: m.description }),
592
+ ...(m.contextWindow && { context_window: m.contextWindow }),
593
+ ...(m.maxTokens && { max_tokens: m.maxTokens }),
594
+ ...(m.reasoning !== undefined && { reasoning: m.reasoning }),
595
+ ...(m.input && { input: m.input }),
596
+ ...(m.api && { api: m.api }),
597
+ ...(m.cost && { cost: m.cost }),
598
+ ...(m.compat && { compat: m.compat }),
599
+ // Cluster info
600
+ node_id: m.nodeId,
601
+ reachable: reachable.has(m.nodeId),
602
+ };
603
+ return entry;
604
+ });
396
605
 
397
606
  return {
398
607
  status: 200,
@@ -414,19 +623,16 @@ export class ModelProxy {
414
623
  clearTimeout(pending.timer);
415
624
  this.pending.delete(frame.id);
416
625
  try {
417
- const errChunk = {
418
- id: `chatcmpl-${frame.id}`,
419
- object: "chat.completion.chunk",
420
- choices: [{ index: 0, delta: { content: `[ClawMatrix] Remote error: ${frame.payload.error}` }, finish_reason: "stop" }],
421
- };
422
- pending.controller.enqueue(
423
- pending.encoder.encode(`data: ${JSON.stringify(errChunk)}\n\n`),
424
- );
425
- pending.controller.enqueue(pending.encoder.encode("data: [DONE]\n\n"));
626
+ const errMsg = `[ClawMatrix] Remote error: ${frame.payload.error}`;
627
+ if (pending.responseFormat === "responses") {
628
+ pending.controller.enqueue(pending.encoder.encode(`event: response.output_text.delta\ndata: ${JSON.stringify({ type: "response.output_text.delta", delta: errMsg })}\n\n`));
629
+ this.enqueueResponsesStreamDone(pending.controller, pending.encoder, frame.id, pending.model ?? "", errMsg);
630
+ } else {
631
+ pending.controller.enqueue(pending.encoder.encode(`data: ${JSON.stringify({ id: `chatcmpl-${frame.id}`, object: "chat.completion.chunk", choices: [{ index: 0, delta: { content: errMsg }, finish_reason: "stop" }] })}\n\n`));
632
+ pending.controller.enqueue(pending.encoder.encode("data: [DONE]\n\n"));
633
+ }
426
634
  pending.controller.close();
427
- } catch {
428
- // controller may already be closed
429
- }
635
+ } catch { /* controller may already be closed */ }
430
636
  }
431
637
  return;
432
638
  }
@@ -436,6 +642,9 @@ export class ModelProxy {
436
642
  pending.resolve(frame.payload);
437
643
  }
438
644
 
645
+ /** Accumulated text per stream request (needed for responses API done events). */
646
+ private streamText = new Map<string, string>();
647
+
439
648
  handleModelStream(frame: ModelStreamChunk) {
440
649
  debug("stream", `id=${frame.id} done=${frame.payload.done} delta=${JSON.stringify(frame.payload.delta?.slice?.(0, 50) ?? frame.payload.delta)} failed=${this.peerManager.router.isFailed(frame.id)} hasPending=${this.pending.has(frame.id)}`);
441
650
  if (this.peerManager.router.isFailed(frame.id)) return;
@@ -443,54 +652,87 @@ export class ModelProxy {
443
652
  if (!pending?.stream || !pending.controller || !pending.encoder) return;
444
653
 
445
654
  try {
446
- if (frame.payload.done) {
447
- const finalChunk: Record<string, unknown> = {
448
- id: `chatcmpl-${frame.id}`,
449
- object: "chat.completion.chunk",
450
- choices: [{ index: 0, delta: {}, finish_reason: "stop" }],
451
- };
452
- if (frame.payload.usage) {
453
- finalChunk.usage = {
454
- prompt_tokens: frame.payload.usage.inputTokens,
455
- completion_tokens: frame.payload.usage.outputTokens,
456
- total_tokens: frame.payload.usage.inputTokens + frame.payload.usage.outputTokens,
457
- };
458
- }
459
- pending.controller.enqueue(
460
- pending.encoder.encode(`data: ${JSON.stringify(finalChunk)}\n\n`),
461
- );
462
- pending.controller.enqueue(pending.encoder.encode("data: [DONE]\n\n"));
463
- pending.controller.close();
464
- clearTimeout(pending.timer);
465
- this.pending.delete(frame.id);
655
+ if (pending.responseFormat === "responses") {
656
+ this.handleModelStreamResponses(frame, pending);
466
657
  } else {
467
- const chunk = {
468
- id: `chatcmpl-${frame.id}`,
469
- object: "chat.completion.chunk",
470
- choices: [
471
- {
472
- index: 0,
473
- delta: { content: frame.payload.delta },
474
- finish_reason: null,
475
- },
476
- ],
477
- };
478
- pending.controller.enqueue(
479
- pending.encoder.encode(`data: ${JSON.stringify(chunk)}\n\n`),
480
- );
658
+ this.handleModelStreamChat(frame, pending);
481
659
  }
482
660
  } catch {
483
661
  clearTimeout(pending.timer);
484
662
  this.pending.delete(frame.id);
663
+ this.streamText.delete(frame.id);
485
664
  }
486
665
  }
487
666
 
667
+ private handleModelStreamChat(frame: ModelStreamChunk, pending: PendingModelReq) {
668
+ if (frame.payload.done) {
669
+ const finalChunk: Record<string, unknown> = {
670
+ id: `chatcmpl-${frame.id}`, object: "chat.completion.chunk",
671
+ choices: [{ index: 0, delta: {}, finish_reason: "stop" }],
672
+ };
673
+ if (frame.payload.usage) {
674
+ finalChunk.usage = { prompt_tokens: frame.payload.usage.inputTokens, completion_tokens: frame.payload.usage.outputTokens, total_tokens: frame.payload.usage.inputTokens + frame.payload.usage.outputTokens };
675
+ }
676
+ pending.controller!.enqueue(pending.encoder!.encode(`data: ${JSON.stringify(finalChunk)}\n\n`));
677
+ pending.controller!.enqueue(pending.encoder!.encode("data: [DONE]\n\n"));
678
+ pending.controller!.close();
679
+ clearTimeout(pending.timer);
680
+ this.pending.delete(frame.id);
681
+ } else {
682
+ // Use full deltaObj when available (carries tool_calls etc.), otherwise simple text delta
683
+ const delta = frame.payload.deltaObj ?? { content: frame.payload.delta };
684
+ const chunk = { id: `chatcmpl-${frame.id}`, object: "chat.completion.chunk", choices: [{ index: 0, delta, finish_reason: null }] };
685
+ pending.controller!.enqueue(pending.encoder!.encode(`data: ${JSON.stringify(chunk)}\n\n`));
686
+ }
687
+ }
688
+
689
+ private handleModelStreamResponses(frame: ModelStreamChunk, pending: PendingModelReq) {
690
+ if (frame.payload.done) {
691
+ const fullText = this.streamText.get(frame.id) ?? "";
692
+ this.streamText.delete(frame.id);
693
+ this.enqueueResponsesStreamDone(pending.controller!, pending.encoder!, frame.id, pending.model ?? "", fullText, frame.payload.usage);
694
+ pending.controller!.close();
695
+ clearTimeout(pending.timer);
696
+ this.pending.delete(frame.id);
697
+ } else {
698
+ // Accumulate text for done event
699
+ this.streamText.set(frame.id, (this.streamText.get(frame.id) ?? "") + frame.payload.delta);
700
+ const evt = { type: "response.output_text.delta", item_id: `msg_${frame.id}`, output_index: 0, content_index: 0, delta: frame.payload.delta };
701
+ pending.controller!.enqueue(pending.encoder!.encode(`event: response.output_text.delta\ndata: ${JSON.stringify(evt)}\n\n`));
702
+ }
703
+ }
704
+
705
+ private sendStreamDelta(to: string, id: string, delta: string, deltaObj?: unknown) {
706
+ this.peerManager.sendTo(to, {
707
+ type: "model_stream",
708
+ id,
709
+ from: this.config.nodeId,
710
+ to,
711
+ timestamp: Date.now(),
712
+ payload: { delta, ...(deltaObj !== undefined && { deltaObj }), done: false },
713
+ } satisfies ModelStreamChunk);
714
+ }
715
+
716
+ private sendStreamDone(to: string, id: string, usage?: { inputTokens: number; outputTokens: number }) {
717
+ this.peerManager.sendTo(to, {
718
+ type: "model_stream",
719
+ id,
720
+ from: this.config.nodeId,
721
+ to,
722
+ timestamp: Date.now(),
723
+ payload: { delta: "", done: true, usage },
724
+ } satisfies ModelStreamChunk);
725
+ }
726
+
488
727
  /** Handle model_req locally: call the model API directly or fall back to OpenClaw gateway. */
489
728
  async handleModelRequest(frame: ModelRequest): Promise<void> {
490
729
  const { id, from, payload } = frame;
491
- debug("model_req", `handling model="${payload.model}" from=${from} stream=${payload.stream}`);
730
+ debug("model_req", `handling model="${payload.model}" provider=${payload.provider ?? "any"} from=${from} stream=${payload.stream}`);
492
731
 
493
- const model = this.config.models.find((m) => m.id === payload.model);
732
+ const model = payload.provider
733
+ ? this.config.models.find((m) => m.id === payload.model && m.provider === payload.provider)
734
+ ?? this.config.models.find((m) => m.id === payload.model)
735
+ : this.config.models.find((m) => m.id === payload.model);
494
736
  if (!model) {
495
737
  this.peerManager.sendTo(from, {
496
738
  type: "model_res",
@@ -505,28 +747,42 @@ export class ModelProxy {
505
747
 
506
748
  try {
507
749
  const endpoint = this.resolveModelEndpoint(model);
750
+ const isResponsesApi = endpoint.api === "openai-responses" || endpoint.api === "openai-codex-responses";
751
+ const path = isResponsesApi ? "/responses" : "/chat/completions";
752
+ const url = `${endpoint.baseUrl}${path}`;
508
753
  const headers: Record<string, string> = { "Content-Type": "application/json" };
509
754
 
510
755
  if (endpoint.direct) {
511
756
  if (endpoint.apiKey) headers["Authorization"] = `Bearer ${endpoint.apiKey}`;
512
- debug("model_req", `direct API call to ${endpoint.url}`);
757
+ debug("model_req", `direct API call to ${url} (api=${endpoint.api})`);
513
758
  } else {
514
759
  const { authHeader } = this.gatewayInfo;
515
760
  if (authHeader) headers["Authorization"] = authHeader;
516
- debug("model_req", `gateway fallback to ${endpoint.url} (not recommended)`);
761
+ debug("model_req", `gateway fallback to ${url}`);
517
762
  }
518
763
 
519
- const response = await fetch(endpoint.url, {
764
+ const modelField = endpoint.direct ? model.id : `${model.provider}/${model.id}`;
765
+ const requestBody = isResponsesApi
766
+ ? {
767
+ model: modelField,
768
+ input: payload.messages,
769
+ stream: payload.stream,
770
+ temperature: payload.temperature,
771
+ max_output_tokens: payload.maxTokens,
772
+ }
773
+ : {
774
+ model: modelField,
775
+ messages: payload.messages,
776
+ temperature: payload.temperature,
777
+ max_tokens: payload.maxTokens,
778
+ stream: payload.stream,
779
+ ...(payload.stream ? { stream_options: { include_usage: true } } : {}),
780
+ };
781
+
782
+ const response = await fetch(url, {
520
783
  method: "POST",
521
784
  headers,
522
- body: JSON.stringify({
523
- model: endpoint.direct ? model.id : `${model.provider}/${model.id}`,
524
- messages: payload.messages,
525
- temperature: payload.temperature,
526
- max_tokens: payload.maxTokens,
527
- stream: payload.stream,
528
- ...(payload.stream ? { stream_options: { include_usage: true } } : {}),
529
- }),
785
+ body: JSON.stringify(requestBody),
530
786
  });
531
787
 
532
788
  if (!response.ok) {
@@ -549,73 +805,122 @@ export class ModelProxy {
549
805
  if (done) break;
550
806
 
551
807
  buffer += decoder.decode(value, { stream: true });
808
+ if (buffer.length > MAX_STREAM_BUFFER) {
809
+ throw new Error("Stream buffer exceeded 1MB — upstream may be malformed");
810
+ }
552
811
  const lines = buffer.split("\n");
553
812
  buffer = lines.pop()!;
554
813
 
814
+ // Track SSE event type for responses API
815
+ let currentEvent = "";
555
816
  for (const line of lines) {
817
+ if (line.startsWith("event: ")) {
818
+ currentEvent = line.slice(7).trim();
819
+ continue;
820
+ }
556
821
  if (!line.startsWith("data: ")) continue;
557
822
  const data = line.slice(6).trim();
558
823
  if (data === "[DONE]") {
559
- this.peerManager.sendTo(from, {
560
- type: "model_stream",
561
- id,
562
- from: this.config.nodeId,
563
- to: from,
564
- timestamp: Date.now(),
565
- payload: { delta: "", done: true, usage: lastUsage },
566
- } satisfies ModelStreamChunk);
824
+ this.sendStreamDone(from, id, lastUsage);
567
825
  streamDone = true;
568
826
  break;
569
827
  }
570
828
 
571
829
  try {
572
830
  const parsed = JSON.parse(data);
573
- if (parsed.usage) {
574
- lastUsage = {
575
- inputTokens: parsed.usage.prompt_tokens,
576
- outputTokens: parsed.usage.completion_tokens,
577
- };
578
- }
579
- const d = parsed.choices?.[0]?.delta;
580
- const delta = d?.content || d?.reasoning_content || "";
581
- if (delta) {
582
- this.peerManager.sendTo(from, {
583
- type: "model_stream",
584
- id,
585
- from: this.config.nodeId,
586
- to: from,
587
- timestamp: Date.now(),
588
- payload: { delta, done: false },
589
- } satisfies ModelStreamChunk);
831
+
832
+ if (isResponsesApi) {
833
+ const evtType = currentEvent || parsed.type;
834
+ if (evtType === "response.output_text.delta") {
835
+ const delta = parsed.delta || "";
836
+ if (delta) {
837
+ this.sendStreamDelta(from, id, delta);
838
+ }
839
+ } else if (evtType === "response.completed") {
840
+ const usage = parsed.response?.usage;
841
+ if (usage) {
842
+ lastUsage = {
843
+ inputTokens: usage.input_tokens ?? usage.prompt_tokens ?? 0,
844
+ outputTokens: usage.output_tokens ?? usage.completion_tokens ?? 0,
845
+ };
846
+ }
847
+ this.sendStreamDone(from, id, lastUsage);
848
+ streamDone = true;
849
+ break;
850
+ }
851
+ } else {
852
+ // Chat completions format
853
+ if (parsed.usage) {
854
+ lastUsage = {
855
+ inputTokens: parsed.usage.prompt_tokens,
856
+ outputTokens: parsed.usage.completion_tokens,
857
+ };
858
+ }
859
+ const d = parsed.choices?.[0]?.delta;
860
+ const delta = d?.content || d?.reasoning_content || "";
861
+ // Pass full delta object when it contains tool_calls or other structured data
862
+ const hasStructured = d?.tool_calls || d?.refusal != null;
863
+ if (delta || hasStructured) {
864
+ this.sendStreamDelta(from, id, delta, hasStructured ? d : undefined);
865
+ }
590
866
  }
591
867
  } catch {
592
868
  // skip malformed chunks
593
869
  }
870
+ currentEvent = "";
594
871
  }
595
872
  }
596
- // If the upstream closed without sending [DONE], send a completion
597
- // frame so the requesting side doesn't hang until MODEL_TIMEOUT.
873
+ // If the upstream closed without sending [DONE] or response.completed,
874
+ // send a completion frame so the requesting side doesn't hang.
598
875
  if (!streamDone) {
599
- this.peerManager.sendTo(from, {
600
- type: "model_stream",
601
- id,
602
- from: this.config.nodeId,
603
- to: from,
604
- timestamp: Date.now(),
605
- payload: { delta: "", done: true, usage: lastUsage },
606
- } satisfies ModelStreamChunk);
876
+ this.sendStreamDone(from, id, lastUsage);
607
877
  }
608
878
  } finally {
609
879
  reader.releaseLock();
610
880
  }
611
881
  } else {
612
- const result = (await response.json()) as {
613
- choices?: { message?: { content?: string; reasoning_content?: string } }[];
614
- usage?: { prompt_tokens: number; completion_tokens: number };
615
- };
616
- const msg = result.choices?.[0]?.message;
617
- const content = msg?.content || msg?.reasoning_content || "";
618
- const usage = result.usage;
882
+ // Non-streaming response
883
+ const result = await response.json();
884
+ let content: string;
885
+ let message: unknown | undefined;
886
+ let usage: { inputTokens: number; outputTokens: number } | undefined;
887
+
888
+ if (isResponsesApi) {
889
+ // Responses API: extract text from output[].content[].text
890
+ content = "";
891
+ const output = result.output as { type?: string; content?: { type?: string; text?: string }[] }[] | undefined;
892
+ if (Array.isArray(output)) {
893
+ for (const item of output) {
894
+ if (item.type === "message" && Array.isArray(item.content)) {
895
+ for (const part of item.content) {
896
+ if (part.type === "output_text" && part.text) content += part.text;
897
+ }
898
+ }
899
+ }
900
+ }
901
+ // Carry full output array for structured data (function_call items, etc.)
902
+ message = result.output;
903
+ if (result.usage) {
904
+ usage = {
905
+ inputTokens: result.usage.input_tokens ?? result.usage.prompt_tokens ?? 0,
906
+ outputTokens: result.usage.output_tokens ?? result.usage.completion_tokens ?? 0,
907
+ };
908
+ }
909
+ } else {
910
+ // Chat completions format
911
+ const msg = result.choices?.[0]?.message;
912
+ content = msg?.content || msg?.reasoning_content || "";
913
+ // Carry full message object when it has tool_calls or other structured data
914
+ if (msg?.tool_calls || msg?.refusal != null || msg?.function_call) {
915
+ message = msg;
916
+ }
917
+ if (result.usage) {
918
+ usage = {
919
+ inputTokens: result.usage.prompt_tokens,
920
+ outputTokens: result.usage.completion_tokens,
921
+ };
922
+ }
923
+ }
619
924
 
620
925
  this.peerManager.sendTo(from, {
621
926
  type: "model_res",
@@ -626,9 +931,8 @@ export class ModelProxy {
626
931
  payload: {
627
932
  success: true,
628
933
  content,
629
- usage: usage
630
- ? { inputTokens: usage.prompt_tokens, outputTokens: usage.completion_tokens }
631
- : undefined,
934
+ ...(message !== undefined && { message }),
935
+ usage,
632
936
  },
633
937
  } satisfies ModelResponse);
634
938
  }