npm - clawmatrix - Versions diffs - 0.1.13 → 0.1.15 - Mend

clawmatrix 0.1.13 → 0.1.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/src/model-proxy.ts CHANGED Viewed

@@ -11,12 +11,17 @@ import type {
 import { debug } from "./debug.ts";
 const MODEL_TIMEOUT = 120_000; // 2 minutes
+const MAX_STREAM_BUFFER = 1_048_576; // 1MB — guard against upstream not sending newlines
+type ResponseFormat = "chat" | "responses";
 interface PendingModelReq {
   resolve: (value: unknown) => void;
   reject: (error: Error) => void;
   timer: ReturnType<typeof setTimeout>;
   stream: boolean;
+  responseFormat: ResponseFormat;
+  model?: string;
   controller?: ReadableStreamDefaultController;
   encoder?: TextEncoder;
 }
@@ -36,35 +41,125 @@ export class ModelProxy {
     this.openclawConfig = openclawConfig;
   }
+  /**
+   * Normalize Responses API `input` to OpenAI chat messages for WS transport.
+   *
+   * Converts:
+   *   - string → [{role: "user", content: "..."}]
+   *   - shorthand {role, content: "..."} → pass through
+   *   - full {type: "message", content: [{type: "input_text"}, {type: "input_image"}]} → chat format
+   *   - {type: "function_call_output"} → {role: "tool", ...}
+   *
+   * Returns chat-completions compatible messages (text + image_url content parts).
+   */
+  private static normalizeResponsesInput(input: unknown): unknown[] {
+    if (typeof input === "string") {
+      return [{ role: "user", content: input }];
+    }
+    if (!Array.isArray(input)) return [];
+    const messages: unknown[] = [];
+    for (const item of input) {
+      if (!item || typeof item !== "object") continue;
+      const obj = item as Record<string, unknown>;
+      // function_call_output → tool message
+      if (obj.type === "function_call_output") {
+        messages.push({
+          role: "tool",
+          tool_call_id: obj.call_id,
+          content: typeof obj.output === "string" ? obj.output : JSON.stringify(obj.output),
+        });
+        continue;
+      }
+      const role = typeof obj.role === "string" ? obj.role : "user";
+      // Simple shorthand: {role: "user", content: "hello"}
+      if (typeof obj.content === "string") {
+        messages.push({ role, content: obj.content });
+        continue;
+      }
+      // Full format: {type: "message", role, content: [{type: "input_text"|"input_image"|...}]}
+      if (Array.isArray(obj.content)) {
+        const parts: unknown[] = [];
+        for (const part of obj.content) {
+          if (!part || typeof part !== "object") continue;
+          const p = part as Record<string, unknown>;
+          if (p.type === "input_text" || p.type === "output_text") {
+            // Text content → chat text part
+            if (typeof p.text === "string") {
+              parts.push({ type: "text", text: p.text });
+            }
+          } else if (p.type === "input_image") {
+            // Image content → chat image_url part
+            if (typeof p.image_url === "string") {
+              parts.push({ type: "image_url", image_url: { url: p.image_url } });
+            } else if (p.image_url && typeof p.image_url === "object") {
+              parts.push({ type: "image_url", image_url: p.image_url });
+            }
+          } else if (p.type === "text" && typeof p.text === "string") {
+            // Already chat format
+            parts.push(p);
+          } else if (p.type === "image_url") {
+            // Already chat format
+            parts.push(p);
+          }
+        }
+        if (parts.length === 1 && (parts[0] as Record<string, unknown>).type === "text") {
+          // Single text part → simplify to string content
+          messages.push({ role, content: ((parts[0] as Record<string, unknown>).text as string) });
+        } else if (parts.length > 0) {
+          messages.push({ role, content: parts });
+        }
+        continue;
+      }
+      // Fallback
+      if (typeof obj.text === "string") {
+        messages.push({ role, content: obj.text });
+      }
+    }
+    return messages;
+  }
   /** Resolve API endpoint for a model: explicit config > OpenClaw provider > gateway fallback */
-  private resolveModelEndpoint(model: { id: string; provider: string; baseUrl?: string; apiKey?: string }): { url: string; apiKey?: string; direct: boolean } {
+  private resolveModelEndpoint(model: { id: string; provider: string; baseUrl?: string; apiKey?: string; api?: string }): { baseUrl: string; apiKey?: string; direct: boolean; api: string } {
+    const defaultApi = "openai-completions";
     // 1. Explicit baseUrl in ClawMatrix model config
     if (model.baseUrl) {
       return {
-        url: `${model.baseUrl.replace(/\/$/, "")}/chat/completions`,
+        baseUrl: model.baseUrl.replace(/\/$/, ""),
         apiKey: model.apiKey,
         direct: true,
+        api: model.api ?? defaultApi,
       };
     }
     // 2. Read from OpenClaw's models.providers[provider]
     const providers = (this.openclawConfig as Record<string, unknown>).models as
-      { providers?: Record<string, { baseUrl?: string; apiKey?: string }> } | undefined;
+      { providers?: Record<string, { baseUrl?: string; apiKey?: string; api?: string }> } | undefined;
     const providerConfig = providers?.providers?.[model.provider];
     if (providerConfig?.baseUrl) {
       return {
-        url: `${providerConfig.baseUrl.replace(/\/$/, "")}/chat/completions`,
+        baseUrl: providerConfig.baseUrl.replace(/\/$/, ""),
         apiKey: typeof providerConfig.apiKey === "string" ? providerConfig.apiKey : undefined,
         direct: true,
+        api: model.api ?? providerConfig.api ?? defaultApi,
       };
     }
-    // 3. Fallback: OpenClaw gateway (goes through agent system — not recommended)
+    // 3. Fallback: OpenClaw gateway
     const { port } = this.gatewayInfo;
     return {
-      url: `http://127.0.0.1:${port}/v1/chat/completions`,
+      baseUrl: `http://127.0.0.1:${port}/v1`,
       apiKey: undefined,
       direct: false,
+      api: model.api ?? defaultApi,
     };
   }
@@ -79,7 +174,12 @@ export class ModelProxy {
         if (p === "/chat/completions" && req.method === "POST") {
           const body = await this.readBody(req);
-          const response = await this.handleChatCompletion(body);
+          const response = await this.handleChatCompletion(body, "openai-completions");
+          debug("proxy", `response status=${response.status}`);
+          this.sendResponse(res, response);
+        } else if (p === "/responses" && req.method === "POST") {
+          const body = await this.readBody(req);
+          const response = await this.handleResponses(body);
           debug("proxy", `response status=${response.status}`);
           this.sendResponse(res, response);
         } else if (p === "/models" && req.method === "GET") {
@@ -152,55 +252,56 @@ export class ModelProxy {
   }
   // ── HTTP handlers ──────────────────────────────────────────────
-  private async handleChatCompletion(rawBody: string): Promise<{ status: number; headers: Record<string, string>; body: string | ReadableStream }> {
-    let body: {
-      model: string;
-      messages: unknown[];
-      stream?: boolean;
-      temperature?: number;
-      max_tokens?: number;
-    };
-    try {
-      body = JSON.parse(rawBody);
-    } catch {
-      return {
-        status: 400,
-        headers: { "Content-Type": "application/json" },
-        body: JSON.stringify({ error: "Invalid JSON" }),
-      };
-    }
-    const rawModelId = body.model;
-    // Parse "nodeId/model" format: first segment is nodeId, rest is model ID.
-    // OpenClaw sends "providerId/modelId" where providerId = nodeId, so this
-    // naturally handles both OpenClaw calls and direct curl calls.
-    // If no "/" present, treat entire string as model ID and auto-resolve.
-    let nodeId: string | undefined;
-    let modelId: string;
+  /** Resolve model ID → proxyModel + route. Shared by chat completions and responses handlers. */
+  private resolveModelRoute(rawModelId: string): {
+    nodeId: string; modelId: string;
+    proxyModel: (typeof this.config.proxyModels)[number] | undefined;
+    routeNodeId: string;
+  } | { error: { status: number; message: string } } {
     const slashIdx = rawModelId.indexOf("/");
+    let nodeId: string;
+    let modelId: string;
+    let proxyModel: (typeof this.config.proxyModels)[number] | undefined;
     if (slashIdx > 0) {
       nodeId = rawModelId.slice(0, slashIdx);
       modelId = rawModelId.slice(slashIdx + 1);
+      proxyModel = this.config.proxyModels.find((m) => m.id === modelId && m.nodeId === nodeId);
     } else {
       modelId = rawModelId;
+      proxyModel = this.config.proxyModels.find((m) => m.id === modelId);
+      if (!proxyModel) {
+        return { error: { status: 404, message: `Model "${rawModelId}" not found in proxy models` } };
+      }
+      nodeId = proxyModel.nodeId;
     }
-    debug("proxy", `model raw="${rawModelId}" nodeId=${nodeId ?? "auto"} modelId="${modelId}" stream=${body.stream ?? false}`);
-    const proxyModel = this.config.proxyModels.find((m) => m.id === modelId && (!nodeId || m.nodeId === nodeId))
-      ?? this.config.proxyModels.find((m) => m.id === modelId);
-    const route = nodeId
-      ? this.peerManager.router.getRoute(nodeId)
-      : this.peerManager.router.resolveModel(modelId);
-    debug("proxy", `proxyModel=${proxyModel?.id ?? "none"} route=${route?.nodeId ?? "none"} reachable=${route ? this.peerManager.canReach(route.nodeId) : false}`);
+    const route = this.peerManager.router.getRoute(nodeId);
+    debug("proxy", `model raw="${rawModelId}" nodeId=${nodeId} modelId="${modelId}" route=${route?.nodeId ?? "none"}`);
     if (!route) {
-      return {
-        status: 404,
-        headers: { "Content-Type": "application/json" },
-        body: JSON.stringify({ error: { message: `Model "${modelId}" not found in cluster (proxyModels: [${this.config.proxyModels.map(m => m.id).join(", ")}])` } }),
-      };
+      return { error: { status: 404, message: `Node "${nodeId}" not found in cluster` } };
     }
+    if (!this.peerManager.canReach(route.nodeId)) {
+      return { error: { status: 502, message: `Cannot reach model node "${route.nodeId}"` } };
+    }
+    return { nodeId, modelId, proxyModel, routeNodeId: route.nodeId };
+  }
-    // Inject model identity so the LLM knows what it is
+  private async handleChatCompletion(rawBody: string, _api: string): Promise<{ status: number; headers: Record<string, string>; body: string | ReadableStream }> {
+    let body: { model: string; messages: unknown[]; stream?: boolean; temperature?: number; max_tokens?: number };
+    try {
+      body = JSON.parse(rawBody);
+    } catch {
+      return { status: 400, headers: { "Content-Type": "application/json" }, body: JSON.stringify({ error: "Invalid JSON" }) };
+    }
+    const resolved = this.resolveModelRoute(body.model);
+    if ("error" in resolved) {
+      return { status: resolved.error.status, headers: { "Content-Type": "application/json" }, body: JSON.stringify({ error: { message: resolved.error.message } }) };
+    }
+    const { modelId, proxyModel, routeNodeId } = resolved;
     const messages = body.messages;
     if (proxyModel?.description) {
       const first = messages[0] as { role?: string; content?: string } | undefined;
@@ -213,35 +314,66 @@ export class ModelProxy {
     const stream = body.stream ?? false;
     const requestId = crypto.randomUUID();
     const frame: ModelRequest = {
-      type: "model_req",
-      id: requestId,
-      from: this.config.nodeId,
-      to: route.nodeId,
-      timestamp: Date.now(),
-      payload: {
-        model: modelId,
-        messages,
-        temperature: body.temperature,
-        maxTokens: body.max_tokens,
-        stream,
-      },
+      type: "model_req", id: requestId, from: this.config.nodeId, to: routeNodeId, timestamp: Date.now(),
+      payload: { model: modelId, provider: proxyModel?.provider, api: proxyModel?.api, messages, temperature: body.temperature, maxTokens: body.max_tokens, stream },
     };
-    // Pre-check reachability before starting a stream (avoids silent empty response)
-    if (!this.peerManager.canReach(route.nodeId)) {
-      return {
-        status: 502,
-        headers: { "Content-Type": "application/json" },
-        body: JSON.stringify({ error: { message: `Cannot reach model node "${route.nodeId}"` } }),
-      };
+    if (stream) {
+      return this.handleStreamRequest(requestId, routeNodeId, frame, "chat");
+    } else {
+      return this.handleNonStreamRequest(requestId, routeNodeId, frame, "chat");
+    }
+  }
+  private async handleResponses(rawBody: string): Promise<{ status: number; headers: Record<string, string>; body: string | ReadableStream }> {
+    let body: { model: string; input: unknown; stream?: boolean; temperature?: number; max_output_tokens?: number; instructions?: string };
+    try {
+      body = JSON.parse(rawBody);
+    } catch {
+      return { status: 400, headers: { "Content-Type": "application/json" }, body: JSON.stringify({ error: "Invalid JSON" }) };
+    }
+    const resolved = this.resolveModelRoute(body.model);
+    if ("error" in resolved) {
+      return { status: resolved.error.status, headers: { "Content-Type": "application/json" }, body: JSON.stringify({ error: { message: resolved.error.message } }) };
+    }
+    const { modelId, proxyModel, routeNodeId } = resolved;
+    // Normalize responses API input → simple chat messages for WS transport.
+    // Responses API items use {type: "message", role, content: [{type: "input_text", text}]}
+    // but WS protocol carries simple {role, content} chat messages.
+    const messages = ModelProxy.normalizeResponsesInput(body.input);
+    // Prepend instructions as system/developer message
+    if (body.instructions) {
+      messages.unshift({ role: "developer", content: body.instructions });
+    }
+    if (proxyModel?.description) {
+      const first = messages[0] as { role?: string; content?: string } | undefined;
+      if (first?.role === "system" && typeof first.content === "string") {
+        first.content = `[Model: ${proxyModel.description}]\n${first.content}`;
+      } else if (first?.role === "developer" && typeof first.content === "string") {
+        first.content = `[Model: ${proxyModel.description}]\n${first.content}`;
+      } else {
+        messages.unshift({ role: "system", content: `[Model: ${proxyModel.description}]` });
+      }
     }
+    const stream = body.stream ?? false;
+    const requestId = crypto.randomUUID();
+    debug("proxy", `responses: stream=${stream} messages=${messages.length} input_type=${typeof body.input}${Array.isArray(body.input) ? `[${body.input.length}]` : ""}`);
+    const frame: ModelRequest = {
+      type: "model_req", id: requestId, from: this.config.nodeId, to: routeNodeId, timestamp: Date.now(),
+      payload: { model: modelId, provider: proxyModel?.provider, api: proxyModel?.api, messages, temperature: body.temperature, maxTokens: body.max_output_tokens, stream },
+    };
     if (stream) {
-      return this.handleStreamRequest(requestId, route.nodeId, frame);
+      return this.handleStreamRequest(requestId, routeNodeId, frame, "responses");
     } else {
-      return this.handleNonStreamRequest(requestId, route.nodeId, frame);
+      return this.handleNonStreamRequest(requestId, routeNodeId, frame, "responses");
     }
   }
@@ -249,8 +381,10 @@ export class ModelProxy {
     requestId: string,
     targetNodeId: string,
     frame: ModelRequest,
+    responseFormat: ResponseFormat,
   ): { status: number; headers: Record<string, string>; body: ReadableStream } {
     const encoder = new TextEncoder();
+    const model = frame.payload.model;
     const readable = new ReadableStream({
       start: (controller) => {
@@ -258,63 +392,89 @@ export class ModelProxy {
           this.pending.delete(requestId);
           this.peerManager.router.markFailed(requestId);
           try {
-            const errorChunk = {
-              id: `chatcmpl-${requestId}`,
-              object: "chat.completion.chunk",
-              choices: [{ index: 0, delta: { content: "\n\n[ClawMatrix] Error: model request timed out" }, finish_reason: "stop" }],
-            };
-            controller.enqueue(
-              encoder.encode(`data: ${JSON.stringify(errorChunk)}\n\n`),
-            );
-            controller.enqueue(encoder.encode("data: [DONE]\n\n"));
+            if (responseFormat === "responses") {
+              controller.enqueue(encoder.encode(`event: response.output_text.delta\ndata: ${JSON.stringify({ type: "response.output_text.delta", delta: "\n\n[ClawMatrix] Error: model request timed out" })}\n\n`));
+              this.enqueueResponsesStreamDone(controller, encoder, requestId, model);
+            } else {
+              controller.enqueue(encoder.encode(`data: ${JSON.stringify({ id: `chatcmpl-${requestId}`, object: "chat.completion.chunk", choices: [{ index: 0, delta: { content: "\n\n[ClawMatrix] Error: model request timed out" }, finish_reason: "stop" }] })}\n\n`));
+              controller.enqueue(encoder.encode("data: [DONE]\n\n"));
+            }
             controller.close();
-          } catch {
-            // controller may already be closed
-          }
+          } catch { /* controller may already be closed */ }
         }, MODEL_TIMEOUT);
         this.pending.set(requestId, {
-          resolve: () => {},
-          reject: () => {},
-          timer,
-          stream: true,
-          controller,
-          encoder,
+          resolve: () => {}, reject: () => {},
+          timer, stream: true, responseFormat, model,
+          controller, encoder,
         });
+        // Emit setup events for responses API
+        if (responseFormat === "responses") {
+          this.enqueueResponsesStreamSetup(controller, encoder, requestId, model);
+        }
         const sent = this.peerManager.sendTo(targetNodeId, frame);
         if (!sent) {
           this.pending.delete(requestId);
           clearTimeout(timer);
-          const errChunk = {
-            id: `chatcmpl-${requestId}`,
-            object: "chat.completion.chunk",
-            choices: [{ index: 0, delta: { content: `[ClawMatrix] Cannot reach model node "${targetNodeId}"` }, finish_reason: "stop" }],
-          };
-          controller.enqueue(
-            encoder.encode(`data: ${JSON.stringify(errChunk)}\n\n`),
-          );
-          controller.enqueue(encoder.encode("data: [DONE]\n\n"));
-          controller.close();
+          try {
+            if (responseFormat === "responses") {
+              controller.enqueue(encoder.encode(`event: response.output_text.delta\ndata: ${JSON.stringify({ type: "response.output_text.delta", delta: `[ClawMatrix] Cannot reach model node "${targetNodeId}"` })}\n\n`));
+              this.enqueueResponsesStreamDone(controller, encoder, requestId, model);
+            } else {
+              controller.enqueue(encoder.encode(`data: ${JSON.stringify({ id: `chatcmpl-${requestId}`, object: "chat.completion.chunk", choices: [{ index: 0, delta: { content: `[ClawMatrix] Cannot reach model node "${targetNodeId}"` }, finish_reason: "stop" }] })}\n\n`));
+              controller.enqueue(encoder.encode("data: [DONE]\n\n"));
+            }
+            controller.close();
+          } catch { /* controller may already be closed */ }
         }
       },
     });
     return {
       status: 200,
-      headers: {
-        "Content-Type": "text/event-stream",
-        "Cache-Control": "no-cache",
-        "Connection": "keep-alive",
-      },
+      headers: { "Content-Type": "text/event-stream", "Cache-Control": "no-cache", "Connection": "keep-alive" },
       body: readable,
     };
   }
+  /** Emit responses API stream setup events (response.created → content_part.added). */
+  private enqueueResponsesStreamSetup(controller: ReadableStreamDefaultController, encoder: TextEncoder, id: string, model: string) {
+    const respId = `resp_${id}`;
+    const msgId = `msg_${id}`;
+    const now = Math.floor(Date.now() / 1000);
+    const baseResp = { id: respId, object: "response", created_at: now, status: "in_progress", model, output: [] };
+    const msgItem = { type: "message", id: msgId, role: "assistant", content: [], status: "in_progress" };
+    const textPart = { type: "output_text", text: "" };
+    controller.enqueue(encoder.encode(`event: response.created\ndata: ${JSON.stringify({ type: "response.created", response: baseResp })}\n\n`));
+    controller.enqueue(encoder.encode(`event: response.in_progress\ndata: ${JSON.stringify({ type: "response.in_progress", response: baseResp })}\n\n`));
+    controller.enqueue(encoder.encode(`event: response.output_item.added\ndata: ${JSON.stringify({ type: "response.output_item.added", output_index: 0, item: msgItem })}\n\n`));
+    controller.enqueue(encoder.encode(`event: response.content_part.added\ndata: ${JSON.stringify({ type: "response.content_part.added", item_id: msgId, output_index: 0, content_index: 0, part: textPart })}\n\n`));
+  }
+  /** Emit responses API stream completion events (output_text.done → response.completed). */
+  private enqueueResponsesStreamDone(controller: ReadableStreamDefaultController, encoder: TextEncoder, id: string, model: string, content?: string, usage?: { inputTokens: number; outputTokens: number }) {
+    const respId = `resp_${id}`;
+    const msgId = `msg_${id}`;
+    const now = Math.floor(Date.now() / 1000);
+    const textPart = { type: "output_text", text: content ?? "" };
+    const msgItem = { type: "message", id: msgId, role: "assistant", content: [textPart], status: "completed" };
+    const usageObj = usage ? { input_tokens: usage.inputTokens, output_tokens: usage.outputTokens, total_tokens: usage.inputTokens + usage.outputTokens } : { input_tokens: 0, output_tokens: 0, total_tokens: 0 };
+    const completedResp = { id: respId, object: "response", created_at: now, status: "completed", model, output: [msgItem], usage: usageObj };
+    controller.enqueue(encoder.encode(`event: response.output_text.done\ndata: ${JSON.stringify({ type: "response.output_text.done", item_id: msgId, output_index: 0, content_index: 0, text: content ?? "" })}\n\n`));
+    controller.enqueue(encoder.encode(`event: response.content_part.done\ndata: ${JSON.stringify({ type: "response.content_part.done", item_id: msgId, output_index: 0, content_index: 0, part: textPart })}\n\n`));
+    controller.enqueue(encoder.encode(`event: response.output_item.done\ndata: ${JSON.stringify({ type: "response.output_item.done", output_index: 0, item: msgItem })}\n\n`));
+    controller.enqueue(encoder.encode(`event: response.completed\ndata: ${JSON.stringify({ type: "response.completed", response: completedResp })}\n\n`));
+  }
   private async handleNonStreamRequest(
     requestId: string,
     targetNodeId: string,
     frame: ModelRequest,
+    responseFormat: ResponseFormat,
   ): Promise<{ status: number; headers: Record<string, string>; body: string }> {
     try {
       const result = await new Promise<ModelResponse["payload"]>(
@@ -327,9 +487,7 @@ export class ModelProxy {
           this.pending.set(requestId, {
             resolve: resolve as (v: unknown) => void,
-            reject,
-            timer,
-            stream: false,
+            reject, timer, stream: false, responseFormat,
           });
           const sent = this.peerManager.sendTo(targetNodeId, frame);
@@ -349,6 +507,41 @@ export class ModelProxy {
         };
       }
+      if (responseFormat === "responses") {
+        const msgId = `msg_${requestId}`;
+        const usageObj = result.usage
+          ? { input_tokens: result.usage.inputTokens, output_tokens: result.usage.outputTokens, total_tokens: result.usage.inputTokens + result.usage.outputTokens }
+          : { input_tokens: 0, output_tokens: 0, total_tokens: 0 };
+        // If upstream sent full output array (responses API), use it directly
+        const output = Array.isArray(result.message)
+          ? result.message
+          : [{
+              type: "message", id: msgId, role: "assistant",
+              content: [{ type: "output_text", text: result.content ?? "" }],
+              status: "completed",
+            }];
+        return {
+          status: 200,
+          headers: { "Content-Type": "application/json" },
+          body: JSON.stringify({
+            id: `resp_${requestId}`,
+            object: "response",
+            created_at: Math.floor(Date.now() / 1000),
+            status: "completed",
+            model: frame.payload.model,
+            output,
+            usage: usageObj,
+          }),
+        };
+      }
+      // Chat completions format — use full message object when available (has tool_calls etc.)
+      const msg = result.message as Record<string, unknown> | undefined;
+      const message = msg
+        ? { role: "assistant", ...msg }
+        : { role: "assistant", content: result.content };
+      const finishReason = msg?.tool_calls ? "tool_calls" : "stop";
       return {
         status: 200,
         headers: { "Content-Type": "application/json" },
@@ -357,19 +550,13 @@ export class ModelProxy {
           object: "chat.completion",
           created: Math.floor(Date.now() / 1000),
           model: frame.payload.model,
-          choices: [
-            {
-              index: 0,
-              message: { role: "assistant", content: result.content },
-              finish_reason: "stop",
-            },
-          ],
+          choices: [{
+            index: 0,
+            message,
+            finish_reason: finishReason,
+          }],
           usage: result.usage
-            ? {
-                prompt_tokens: result.usage.inputTokens,
-                completion_tokens: result.usage.outputTokens,
-                total_tokens: result.usage.inputTokens + result.usage.outputTokens,
-              }
+            ? { prompt_tokens: result.usage.inputTokens, completion_tokens: result.usage.outputTokens, total_tokens: result.usage.inputTokens + result.usage.outputTokens }
             : undefined,
         }),
       };
@@ -383,16 +570,35 @@ export class ModelProxy {
   }
   private handleListModels(): { status: number; headers: Record<string, string>; body: string } {
-    const models = this.peerManager.router
-      .getAllPeers()
-      .flatMap((p) =>
-        p.models.map((m) => ({
-          id: m.id,
-          object: "model",
-          created: 0,
-          owned_by: m.provider,
-        })),
-      );
+    // Build from proxyModels config (has full detail) and enrich with
+    // connectivity info from the router so consumers know what's reachable.
+    const reachable = new Set(
+      this.peerManager.router.getAllPeers()
+        .filter((p) => p.connection?.isOpen || p.reachableVia)
+        .map((p) => p.nodeId),
+    );
+    const models = this.config.proxyModels.map((m) => {
+      const entry: Record<string, unknown> = {
+        id: m.id,
+        object: "model",
+        created: 0,
+        owned_by: m.provider ?? "unknown",
+        // Extended fields
+        ...(m.description && { description: m.description }),
+        ...(m.contextWindow && { context_window: m.contextWindow }),
+        ...(m.maxTokens && { max_tokens: m.maxTokens }),
+        ...(m.reasoning !== undefined && { reasoning: m.reasoning }),
+        ...(m.input && { input: m.input }),
+        ...(m.api && { api: m.api }),
+        ...(m.cost && { cost: m.cost }),
+        ...(m.compat && { compat: m.compat }),
+        // Cluster info
+        node_id: m.nodeId,
+        reachable: reachable.has(m.nodeId),
+      };
+      return entry;
+    });
     return {
       status: 200,
@@ -414,19 +620,16 @@ export class ModelProxy {
         clearTimeout(pending.timer);
         this.pending.delete(frame.id);
         try {
-          const errChunk = {
-            id: `chatcmpl-${frame.id}`,
-            object: "chat.completion.chunk",
-            choices: [{ index: 0, delta: { content: `[ClawMatrix] Remote error: ${frame.payload.error}` }, finish_reason: "stop" }],
-          };
-          pending.controller.enqueue(
-            pending.encoder.encode(`data: ${JSON.stringify(errChunk)}\n\n`),
-          );
-          pending.controller.enqueue(pending.encoder.encode("data: [DONE]\n\n"));
+          const errMsg = `[ClawMatrix] Remote error: ${frame.payload.error}`;
+          if (pending.responseFormat === "responses") {
+            pending.controller.enqueue(pending.encoder.encode(`event: response.output_text.delta\ndata: ${JSON.stringify({ type: "response.output_text.delta", delta: errMsg })}\n\n`));
+            this.enqueueResponsesStreamDone(pending.controller, pending.encoder, frame.id, pending.model ?? "", errMsg);
+          } else {
+            pending.controller.enqueue(pending.encoder.encode(`data: ${JSON.stringify({ id: `chatcmpl-${frame.id}`, object: "chat.completion.chunk", choices: [{ index: 0, delta: { content: errMsg }, finish_reason: "stop" }] })}\n\n`));
+            pending.controller.enqueue(pending.encoder.encode("data: [DONE]\n\n"));
+          }
           pending.controller.close();
-        } catch {
-          // controller may already be closed
-        }
+        } catch { /* controller may already be closed */ }
       }
       return;
     }
@@ -436,6 +639,9 @@ export class ModelProxy {
     pending.resolve(frame.payload);
   }
+  /** Accumulated text per stream request (needed for responses API done events). */
+  private streamText = new Map<string, string>();
   handleModelStream(frame: ModelStreamChunk) {
     debug("stream", `id=${frame.id} done=${frame.payload.done} delta=${JSON.stringify(frame.payload.delta?.slice?.(0, 50) ?? frame.payload.delta)} failed=${this.peerManager.router.isFailed(frame.id)} hasPending=${this.pending.has(frame.id)}`);
     if (this.peerManager.router.isFailed(frame.id)) return;
@@ -443,54 +649,87 @@ export class ModelProxy {
     if (!pending?.stream || !pending.controller || !pending.encoder) return;
     try {
-      if (frame.payload.done) {
-        const finalChunk: Record<string, unknown> = {
-          id: `chatcmpl-${frame.id}`,
-          object: "chat.completion.chunk",
-          choices: [{ index: 0, delta: {}, finish_reason: "stop" }],
-        };
-        if (frame.payload.usage) {
-          finalChunk.usage = {
-            prompt_tokens: frame.payload.usage.inputTokens,
-            completion_tokens: frame.payload.usage.outputTokens,
-            total_tokens: frame.payload.usage.inputTokens + frame.payload.usage.outputTokens,
-          };
-        }
-        pending.controller.enqueue(
-          pending.encoder.encode(`data: ${JSON.stringify(finalChunk)}\n\n`),
-        );
-        pending.controller.enqueue(pending.encoder.encode("data: [DONE]\n\n"));
-        pending.controller.close();
-        clearTimeout(pending.timer);
-        this.pending.delete(frame.id);
+      if (pending.responseFormat === "responses") {
+        this.handleModelStreamResponses(frame, pending);
       } else {
-        const chunk = {
-          id: `chatcmpl-${frame.id}`,
-          object: "chat.completion.chunk",
-          choices: [
-            {
-              index: 0,
-              delta: { content: frame.payload.delta },
-              finish_reason: null,
-            },
-          ],
-        };
-        pending.controller.enqueue(
-          pending.encoder.encode(`data: ${JSON.stringify(chunk)}\n\n`),
-        );
+        this.handleModelStreamChat(frame, pending);
       }
     } catch {
       clearTimeout(pending.timer);
       this.pending.delete(frame.id);
+      this.streamText.delete(frame.id);
     }
   }
+  private handleModelStreamChat(frame: ModelStreamChunk, pending: PendingModelReq) {
+    if (frame.payload.done) {
+      const finalChunk: Record<string, unknown> = {
+        id: `chatcmpl-${frame.id}`, object: "chat.completion.chunk",
+        choices: [{ index: 0, delta: {}, finish_reason: "stop" }],
+      };
+      if (frame.payload.usage) {
+        finalChunk.usage = { prompt_tokens: frame.payload.usage.inputTokens, completion_tokens: frame.payload.usage.outputTokens, total_tokens: frame.payload.usage.inputTokens + frame.payload.usage.outputTokens };
+      }
+      pending.controller!.enqueue(pending.encoder!.encode(`data: ${JSON.stringify(finalChunk)}\n\n`));
+      pending.controller!.enqueue(pending.encoder!.encode("data: [DONE]\n\n"));
+      pending.controller!.close();
+      clearTimeout(pending.timer);
+      this.pending.delete(frame.id);
+    } else {
+      // Use full deltaObj when available (carries tool_calls etc.), otherwise simple text delta
+      const delta = frame.payload.deltaObj ?? { content: frame.payload.delta };
+      const chunk = { id: `chatcmpl-${frame.id}`, object: "chat.completion.chunk", choices: [{ index: 0, delta, finish_reason: null }] };
+      pending.controller!.enqueue(pending.encoder!.encode(`data: ${JSON.stringify(chunk)}\n\n`));
+    }
+  }
+  private handleModelStreamResponses(frame: ModelStreamChunk, pending: PendingModelReq) {
+    if (frame.payload.done) {
+      const fullText = this.streamText.get(frame.id) ?? "";
+      this.streamText.delete(frame.id);
+      this.enqueueResponsesStreamDone(pending.controller!, pending.encoder!, frame.id, pending.model ?? "", fullText, frame.payload.usage);
+      pending.controller!.close();
+      clearTimeout(pending.timer);
+      this.pending.delete(frame.id);
+    } else {
+      // Accumulate text for done event
+      this.streamText.set(frame.id, (this.streamText.get(frame.id) ?? "") + frame.payload.delta);
+      const evt = { type: "response.output_text.delta", item_id: `msg_${frame.id}`, output_index: 0, content_index: 0, delta: frame.payload.delta };
+      pending.controller!.enqueue(pending.encoder!.encode(`event: response.output_text.delta\ndata: ${JSON.stringify(evt)}\n\n`));
+    }
+  }
+  private sendStreamDelta(to: string, id: string, delta: string, deltaObj?: unknown) {
+    this.peerManager.sendTo(to, {
+      type: "model_stream",
+      id,
+      from: this.config.nodeId,
+      to,
+      timestamp: Date.now(),
+      payload: { delta, ...(deltaObj !== undefined && { deltaObj }), done: false },
+    } satisfies ModelStreamChunk);
+  }
+  private sendStreamDone(to: string, id: string, usage?: { inputTokens: number; outputTokens: number }) {
+    this.peerManager.sendTo(to, {
+      type: "model_stream",
+      id,
+      from: this.config.nodeId,
+      to,
+      timestamp: Date.now(),
+      payload: { delta: "", done: true, usage },
+    } satisfies ModelStreamChunk);
+  }
   /** Handle model_req locally: call the model API directly or fall back to OpenClaw gateway. */
   async handleModelRequest(frame: ModelRequest): Promise<void> {
     const { id, from, payload } = frame;
-    debug("model_req", `handling model="${payload.model}" from=${from} stream=${payload.stream}`);
+    debug("model_req", `handling model="${payload.model}" provider=${payload.provider ?? "any"} from=${from} stream=${payload.stream}`);
-    const model = this.config.models.find((m) => m.id === payload.model);
+    const model = payload.provider
+      ? this.config.models.find((m) => m.id === payload.model && m.provider === payload.provider)
+        ?? this.config.models.find((m) => m.id === payload.model)
+      : this.config.models.find((m) => m.id === payload.model);
     if (!model) {
       this.peerManager.sendTo(from, {
         type: "model_res",
@@ -505,28 +744,42 @@ export class ModelProxy {
     try {
       const endpoint = this.resolveModelEndpoint(model);
+      const isResponsesApi = endpoint.api === "openai-responses" || endpoint.api === "openai-codex-responses";
+      const path = isResponsesApi ? "/responses" : "/chat/completions";
+      const url = `${endpoint.baseUrl}${path}`;
       const headers: Record<string, string> = { "Content-Type": "application/json" };
       if (endpoint.direct) {
         if (endpoint.apiKey) headers["Authorization"] = `Bearer ${endpoint.apiKey}`;
-        debug("model_req", `direct API call to ${endpoint.url}`);
+        debug("model_req", `direct API call to ${url} (api=${endpoint.api})`);
       } else {
         const { authHeader } = this.gatewayInfo;
         if (authHeader) headers["Authorization"] = authHeader;
-        debug("model_req", `gateway fallback to ${endpoint.url} (not recommended)`);
+        debug("model_req", `gateway fallback to ${url}`);
       }
-      const response = await fetch(endpoint.url, {
+      const modelField = endpoint.direct ? model.id : `${model.provider}/${model.id}`;
+      const requestBody = isResponsesApi
+        ? {
+            model: modelField,
+            input: payload.messages,
+            stream: payload.stream,
+            temperature: payload.temperature,
+            max_output_tokens: payload.maxTokens,
+          }
+        : {
+            model: modelField,
+            messages: payload.messages,
+            temperature: payload.temperature,
+            max_tokens: payload.maxTokens,
+            stream: payload.stream,
+            ...(payload.stream ? { stream_options: { include_usage: true } } : {}),
+          };
+      const response = await fetch(url, {
         method: "POST",
         headers,
-        body: JSON.stringify({
-          model: endpoint.direct ? model.id : `${model.provider}/${model.id}`,
-          messages: payload.messages,
-          temperature: payload.temperature,
-          max_tokens: payload.maxTokens,
-          stream: payload.stream,
-          ...(payload.stream ? { stream_options: { include_usage: true } } : {}),
-        }),
+        body: JSON.stringify(requestBody),
       });
       if (!response.ok) {
@@ -549,73 +802,122 @@ export class ModelProxy {
             if (done) break;
             buffer += decoder.decode(value, { stream: true });
+            if (buffer.length > MAX_STREAM_BUFFER) {
+              throw new Error("Stream buffer exceeded 1MB — upstream may be malformed");
+            }
             const lines = buffer.split("\n");
             buffer = lines.pop()!;
+            // Track SSE event type for responses API
+            let currentEvent = "";
             for (const line of lines) {
+              if (line.startsWith("event: ")) {
+                currentEvent = line.slice(7).trim();
+                continue;
+              }
               if (!line.startsWith("data: ")) continue;
               const data = line.slice(6).trim();
               if (data === "[DONE]") {
-                this.peerManager.sendTo(from, {
-                  type: "model_stream",
-                  id,
-                  from: this.config.nodeId,
-                  to: from,
-                  timestamp: Date.now(),
-                  payload: { delta: "", done: true, usage: lastUsage },
-                } satisfies ModelStreamChunk);
+                this.sendStreamDone(from, id, lastUsage);
                 streamDone = true;
                 break;
               }
               try {
                 const parsed = JSON.parse(data);
-                if (parsed.usage) {
-                  lastUsage = {
-                    inputTokens: parsed.usage.prompt_tokens,
-                    outputTokens: parsed.usage.completion_tokens,
-                  };
-                }
-                const d = parsed.choices?.[0]?.delta;
-                const delta = d?.content || d?.reasoning_content || "";
-                if (delta) {
-                  this.peerManager.sendTo(from, {
-                    type: "model_stream",
-                    id,
-                    from: this.config.nodeId,
-                    to: from,
-                    timestamp: Date.now(),
-                    payload: { delta, done: false },
-                  } satisfies ModelStreamChunk);
+                if (isResponsesApi) {
+                  const evtType = currentEvent || parsed.type;
+                  if (evtType === "response.output_text.delta") {
+                    const delta = parsed.delta || "";
+                    if (delta) {
+                      this.sendStreamDelta(from, id, delta);
+                    }
+                  } else if (evtType === "response.completed") {
+                    const usage = parsed.response?.usage;
+                    if (usage) {
+                      lastUsage = {
+                        inputTokens: usage.input_tokens ?? usage.prompt_tokens ?? 0,
+                        outputTokens: usage.output_tokens ?? usage.completion_tokens ?? 0,
+                      };
+                    }
+                    this.sendStreamDone(from, id, lastUsage);
+                    streamDone = true;
+                    break;
+                  }
+                } else {
+                  // Chat completions format
+                  if (parsed.usage) {
+                    lastUsage = {
+                      inputTokens: parsed.usage.prompt_tokens,
+                      outputTokens: parsed.usage.completion_tokens,
+                    };
+                  }
+                  const d = parsed.choices?.[0]?.delta;
+                  const delta = d?.content || d?.reasoning_content || "";
+                  // Pass full delta object when it contains tool_calls or other structured data
+                  const hasStructured = d?.tool_calls || d?.refusal != null;
+                  if (delta || hasStructured) {
+                    this.sendStreamDelta(from, id, delta, hasStructured ? d : undefined);
+                  }
                 }
               } catch {
                 // skip malformed chunks
               }
+              currentEvent = "";
             }
           }
-          // If the upstream closed without sending [DONE], send a completion
-          // frame so the requesting side doesn't hang until MODEL_TIMEOUT.
+          // If the upstream closed without sending [DONE] or response.completed,
+          // send a completion frame so the requesting side doesn't hang.
           if (!streamDone) {
-            this.peerManager.sendTo(from, {
-              type: "model_stream",
-              id,
-              from: this.config.nodeId,
-              to: from,
-              timestamp: Date.now(),
-              payload: { delta: "", done: true, usage: lastUsage },
-            } satisfies ModelStreamChunk);
+            this.sendStreamDone(from, id, lastUsage);
           }
         } finally {
           reader.releaseLock();
         }
       } else {
-        const result = (await response.json()) as {
-          choices?: { message?: { content?: string; reasoning_content?: string } }[];
-          usage?: { prompt_tokens: number; completion_tokens: number };
-        };
-        const msg = result.choices?.[0]?.message;
-        const content = msg?.content || msg?.reasoning_content || "";
-        const usage = result.usage;
+        // Non-streaming response
+        const result = await response.json();
+        let content: string;
+        let message: unknown | undefined;
+        let usage: { inputTokens: number; outputTokens: number } | undefined;
+        if (isResponsesApi) {
+          // Responses API: extract text from output[].content[].text
+          content = "";
+          const output = result.output as { type?: string; content?: { type?: string; text?: string }[] }[] | undefined;
+          if (Array.isArray(output)) {
+            for (const item of output) {
+              if (item.type === "message" && Array.isArray(item.content)) {
+                for (const part of item.content) {
+                  if (part.type === "output_text" && part.text) content += part.text;
+                }
+              }
+            }
+          }
+          // Carry full output array for structured data (function_call items, etc.)
+          message = result.output;
+          if (result.usage) {
+            usage = {
+              inputTokens: result.usage.input_tokens ?? result.usage.prompt_tokens ?? 0,
+              outputTokens: result.usage.output_tokens ?? result.usage.completion_tokens ?? 0,
+            };
+          }
+        } else {
+          // Chat completions format
+          const msg = result.choices?.[0]?.message;
+          content = msg?.content || msg?.reasoning_content || "";
+          // Carry full message object when it has tool_calls or other structured data
+          if (msg?.tool_calls || msg?.refusal != null || msg?.function_call) {
+            message = msg;
+          }
+          if (result.usage) {
+            usage = {
+              inputTokens: result.usage.prompt_tokens,
+              outputTokens: result.usage.completion_tokens,
+            };
+          }
+        }
         this.peerManager.sendTo(from, {
           type: "model_res",
@@ -626,9 +928,8 @@ export class ModelProxy {
           payload: {
             success: true,
             content,
-            usage: usage
-              ? { inputTokens: usage.prompt_tokens, outputTokens: usage.completion_tokens }
-              : undefined,
+            ...(message !== undefined && { message }),
+            usage,
           },
         } satisfies ModelResponse);
       }