clawmatrix 0.1.22 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,7 +11,16 @@ import type {
11
11
  import { debug } from "./debug.ts";
12
12
  import { readBody } from "./http-utils.ts";
13
13
 
14
- const MODEL_TIMEOUT = 120_000; // 2 minutes
14
+ const DEFAULT_MODEL_TIMEOUT = 120_000; // 2 minutes
15
+
16
+ /** Normalize usage from OpenAI-compatible APIs (supports both field naming conventions). */
17
+ function parseUsage(usage: Record<string, number> | undefined): { inputTokens: number; outputTokens: number } | undefined {
18
+ if (!usage) return undefined;
19
+ return {
20
+ inputTokens: usage.input_tokens ?? usage.prompt_tokens ?? 0,
21
+ outputTokens: usage.output_tokens ?? usage.completion_tokens ?? 0,
22
+ };
23
+ }
15
24
  const MAX_STREAM_BUFFER = 1_048_576; // 1MB — guard against upstream not sending newlines
16
25
 
17
26
  type ResponseFormat = "chat" | "responses";
@@ -53,12 +62,54 @@ export class ModelProxy {
53
62
  private httpServer: Server | null = null;
54
63
  private gatewayInfo: GatewayInfo;
55
64
  private openclawConfig: OpenClawConfig;
65
+ private readonly modelTimeout: number;
66
+
67
+ /** Dynamically discovered proxy models from peer capabilities (auto-discovery). */
68
+ private discoveredModels: import("./config.ts").ProxyModel[] = [];
69
+
70
+ /** Cache of models that need a different API format than configured (detected at runtime).
71
+ * Entries expire after 10 minutes so upstream upgrades are eventually detected. */
72
+ private modelApiCache = new Map<string, { api: string; ts: number }>();
73
+ private static readonly MODEL_API_CACHE_TTL = 600_000; // 10 minutes
74
+ private cacheCleanupTimer: ReturnType<typeof setInterval> | null = null;
56
75
 
57
76
  constructor(config: ClawMatrixConfig, peerManager: PeerManager, gatewayInfo: GatewayInfo, openclawConfig: OpenClawConfig) {
58
77
  this.config = config;
59
78
  this.peerManager = peerManager;
60
79
  this.gatewayInfo = gatewayInfo;
61
80
  this.openclawConfig = openclawConfig;
81
+ this.modelTimeout = config.modelTimeout ?? DEFAULT_MODEL_TIMEOUT;
82
+ }
83
+
84
+ /** All proxy models: static config + dynamically discovered from peers. */
85
+ get allProxyModels(): import("./config.ts").ProxyModel[] {
86
+ if (this.discoveredModels.length === 0) return this.config.proxyModels;
87
+ return [...this.config.proxyModels, ...this.discoveredModels];
88
+ }
89
+
90
+ /** Rebuild discovered models from current peer capabilities.
91
+ * Skips models already present in static config.proxyModels. */
92
+ updateDiscoveredModels(peers: import("./router.ts").RouteEntry[]) {
93
+ const staticKeys = new Set(this.config.proxyModels.map((m) => `${m.nodeId}/${m.id}`));
94
+ const next: import("./config.ts").ProxyModel[] = [];
95
+ for (const peer of peers) {
96
+ for (const m of peer.models) {
97
+ const key = `${peer.nodeId}/${m.id}`;
98
+ if (staticKeys.has(key)) continue;
99
+ next.push({
100
+ id: m.id,
101
+ nodeId: peer.nodeId,
102
+ provider: m.provider,
103
+ description: m.description,
104
+ input: m.input,
105
+ compat: m.compat as import("./config.ts").ProxyModel["compat"],
106
+ });
107
+ }
108
+ }
109
+ this.discoveredModels = next;
110
+ if (next.length > 0) {
111
+ debug("proxy", `Auto-discovered ${next.length} model(s) from peers: ${next.map((m) => `${m.nodeId}/${m.id}`).join(", ")}`);
112
+ }
62
113
  }
63
114
 
64
115
  /**
@@ -212,8 +263,18 @@ export class ModelProxy {
212
263
  return items;
213
264
  }
214
265
 
215
- /** Resolve API endpoint for a model: explicit config > OpenClaw provider > gateway fallback */
216
- private resolveModelEndpoint(model: { id: string; provider: string; baseUrl?: string; apiKey?: string; api?: string }): { baseUrl: string; apiKey?: string; direct: boolean; api: string } {
266
+ /**
267
+ * Resolve API endpoint for a model: explicit config > OpenClaw provider > null.
268
+ *
269
+ * ⚠️ 重要:绝对不能 fallback 到 OpenClaw gateway 的 /v1/chat/completions!
270
+ * OpenClaw gateway 的 /v1/chat/completions 会走 Agent 系统,每次请求都会创建
271
+ * 一个新的 Agent session(带记忆、system prompt 等)。这会导致:
272
+ * - 远程节点(如 iPhone)的每次 model_req 都在本地产生一个多余的 OpenClaw 会话
273
+ * - 模型响应被 OpenClaw Agent 的 system prompt 和记忆污染,结果不正确
274
+ *
275
+ * 如果找不到直连 API 端点,必须返回 null 让调用方报错,而不是静默 fallback。
276
+ */
277
+ private resolveModelEndpoint(model: { id: string; provider: string; baseUrl?: string; apiKey?: string; api?: string }): { baseUrl: string; apiKey?: string; direct: boolean; api: string } | null {
217
278
  const defaultApi = "openai-completions";
218
279
 
219
280
  // 1. Explicit baseUrl in ClawMatrix model config
@@ -227,9 +288,11 @@ export class ModelProxy {
227
288
  }
228
289
 
229
290
  // 2. Read from OpenClaw's models.providers[provider]
230
- const providers = (this.openclawConfig as Record<string, unknown>).models as
231
- { providers?: Record<string, { baseUrl?: string; apiKey?: string; api?: string }> } | undefined;
232
- const providerConfig = providers?.providers?.[model.provider];
291
+ const modelsConfig = (this.openclawConfig as Record<string, unknown>).models;
292
+ const providers = (modelsConfig && typeof modelsConfig === "object")
293
+ ? (modelsConfig as { providers?: Record<string, { baseUrl?: string; apiKey?: string; api?: string }> }).providers
294
+ : undefined;
295
+ const providerConfig = providers?.[model.provider];
233
296
  if (providerConfig?.baseUrl) {
234
297
  return {
235
298
  baseUrl: providerConfig.baseUrl.replace(/\/$/, ""),
@@ -239,18 +302,20 @@ export class ModelProxy {
239
302
  };
240
303
  }
241
304
 
242
- // 3. Fallback: OpenClaw gateway
243
- const { port } = this.gatewayInfo;
244
- return {
245
- baseUrl: `http://127.0.0.1:${port}/v1`,
246
- apiKey: undefined,
247
- direct: false,
248
- api: model.api ?? defaultApi,
249
- };
305
+ // 找不到直连端点 返回 null(见上方注释,不能 fallback 到 gateway
306
+ return null;
250
307
  }
251
308
 
252
309
  /** Start the local HTTP proxy server for OpenAI-compatible requests. */
253
310
  start() {
311
+ // Periodically prune expired model API cache entries
312
+ this.cacheCleanupTimer = setInterval(() => {
313
+ const now = Date.now();
314
+ for (const [id, entry] of this.modelApiCache) {
315
+ if (now - entry.ts > ModelProxy.MODEL_API_CACHE_TTL) this.modelApiCache.delete(id);
316
+ }
317
+ }, ModelProxy.MODEL_API_CACHE_TTL);
318
+
254
319
  this.httpServer = createServer(async (req, res) => {
255
320
  try {
256
321
  const url = new URL(req.url ?? "/", `http://${req.headers.host ?? "localhost"}`);
@@ -292,6 +357,10 @@ export class ModelProxy {
292
357
  }
293
358
 
294
359
  stop() {
360
+ if (this.cacheCleanupTimer) {
361
+ clearInterval(this.cacheCleanupTimer);
362
+ this.cacheCleanupTimer = null;
363
+ }
295
364
  if (this.httpServer) {
296
365
  this.httpServer.close();
297
366
  this.httpServer = null;
@@ -303,8 +372,17 @@ export class ModelProxy {
303
372
  this.pending.clear();
304
373
  this.streamText.clear();
305
374
  this.streamSetupSent.clear();
375
+ this.modelApiCache.clear();
306
376
  }
307
377
 
378
+ /** Clean up all tracking state for a request (pending, streamText, streamSetupSent). */
379
+ private cleanupRequest(id: string, stableStreamId?: string) {
380
+ const pending = this.pending.get(id);
381
+ if (pending) clearTimeout(pending.timer);
382
+ this.pending.delete(id);
383
+ this.streamText.delete(id);
384
+ if (stableStreamId) this.streamSetupSent.delete(stableStreamId);
385
+ }
308
386
 
309
387
  private sendResponse(res: import("node:http").ServerResponse, response: ProxyResponse) {
310
388
  res.writeHead(response.status, response.headers);
@@ -313,18 +391,34 @@ export class ModelProxy {
313
391
  } else {
314
392
  // Stream response
315
393
  const reader = response.body.getReader();
394
+ let finished = false;
395
+
396
+ // Clean up stream when client disconnects mid-stream
397
+ res.on("close", () => {
398
+ if (!finished) {
399
+ finished = true;
400
+ reader.cancel().catch(() => {});
401
+ }
402
+ });
403
+
316
404
  const pump = (): void => {
317
405
  reader.read().then(({ done, value }) => {
318
- if (done) {
406
+ if (done || finished) {
407
+ finished = true;
319
408
  reader.releaseLock();
320
- res.end();
409
+ if (!res.writableEnded) res.end();
321
410
  return;
322
411
  }
323
- res.write(value);
324
- pump();
412
+ const ok = res.write(value);
413
+ if (ok) {
414
+ pump();
415
+ } else {
416
+ res.once("drain", pump);
417
+ }
325
418
  }).catch(() => {
419
+ finished = true;
326
420
  reader.releaseLock();
327
- res.end();
421
+ if (!res.writableEnded) res.end();
328
422
  });
329
423
  };
330
424
  pump();
@@ -340,16 +434,17 @@ export class ModelProxy {
340
434
  } | { error: { status: number; message: string } } {
341
435
  const slashIdx = rawModelId.indexOf("/");
342
436
  let modelId: string;
343
- let matchingModels: (typeof this.config.proxyModels)[number][];
437
+ const proxyModels = this.allProxyModels;
438
+ let matchingModels: (typeof proxyModels)[number][];
344
439
 
345
440
  if (slashIdx > 0) {
346
441
  const nodeId = rawModelId.slice(0, slashIdx);
347
442
  modelId = rawModelId.slice(slashIdx + 1);
348
443
  // Explicit node/model — only target that specific node, no failover to others
349
- matchingModels = this.config.proxyModels.filter((m) => m.id === modelId && m.nodeId === nodeId);
444
+ matchingModels = proxyModels.filter((m) => m.id === modelId && m.nodeId === nodeId);
350
445
  } else {
351
446
  modelId = rawModelId;
352
- matchingModels = this.config.proxyModels.filter((m) => m.id === modelId);
447
+ matchingModels = proxyModels.filter((m) => m.id === modelId);
353
448
  }
354
449
 
355
450
  if (matchingModels.length === 0) {
@@ -368,6 +463,16 @@ export class ModelProxy {
368
463
  }
369
464
  }
370
465
 
466
+ // Sort candidates by latency (lowest first) for optimal first-try and failover order
467
+ candidates.sort((a, b) => {
468
+ const routeA = this.peerManager.router.getRoute(a.routeNodeId);
469
+ const routeB = this.peerManager.router.getRoute(b.routeNodeId);
470
+ const aDirect = routeA?.connection ? 0 : 1;
471
+ const bDirect = routeB?.connection ? 0 : 1;
472
+ if (aDirect !== bDirect) return aDirect - bDirect;
473
+ return (routeA?.latencyMs ?? 0) - (routeB?.latencyMs ?? 0);
474
+ });
475
+
371
476
  debug("proxy", `model raw="${rawModelId}" modelId="${modelId}" candidates=${candidates.map((c) => c.routeNodeId).join(",") || "none"}`);
372
477
  if (candidates.length === 0) {
373
478
  return { error: { status: 502, message: `No reachable node for model "${rawModelId}"` } };
@@ -377,7 +482,7 @@ export class ModelProxy {
377
482
  }
378
483
 
379
484
  private async handleChatCompletion(rawBody: string, _api: string): Promise<ProxyResponse> {
380
- let body: { model: string; messages: unknown[]; stream?: boolean; temperature?: number; max_tokens?: number };
485
+ let body: { model: string; messages: unknown[]; stream?: boolean; temperature?: number; max_tokens?: number; tools?: unknown[]; tool_choice?: unknown };
381
486
  try {
382
487
  body = JSON.parse(rawBody);
383
488
  } catch {
@@ -410,7 +515,7 @@ export class ModelProxy {
410
515
  }
411
516
  return {
412
517
  type: "model_req", id, from: this.config.nodeId, to: candidate.routeNodeId, timestamp: Date.now(),
413
- payload: { model: modelId, provider: candidate.proxyModel?.provider, api: candidate.proxyModel?.api, messages, temperature: body.temperature, maxTokens: body.max_tokens, stream },
518
+ payload: { model: modelId, provider: candidate.proxyModel?.provider, api: candidate.proxyModel?.api, messages, tools: body.tools, toolChoice: body.tool_choice, temperature: body.temperature, maxTokens: body.max_tokens, stream },
414
519
  };
415
520
  };
416
521
  const frame = buildFrame(first, requestId);
@@ -423,7 +528,7 @@ export class ModelProxy {
423
528
  }
424
529
 
425
530
  private async handleResponses(rawBody: string): Promise<ProxyResponse> {
426
- let body: { model: string; input: unknown; stream?: boolean; temperature?: number; max_output_tokens?: number; instructions?: string };
531
+ let body: { model: string; input: unknown; stream?: boolean; temperature?: number; max_output_tokens?: number; instructions?: string; tools?: unknown[]; tool_choice?: unknown };
427
532
  try {
428
533
  body = JSON.parse(rawBody);
429
534
  } catch {
@@ -467,7 +572,7 @@ export class ModelProxy {
467
572
  }
468
573
  return {
469
574
  type: "model_req", id, from: this.config.nodeId, to: candidate.routeNodeId, timestamp: Date.now(),
470
- payload: { model: modelId, provider: candidate.proxyModel?.provider, api: candidate.proxyModel?.api, messages: inputItems, inputFormat: "responses", temperature: body.temperature, maxTokens: body.max_output_tokens, stream },
575
+ payload: { model: modelId, provider: candidate.proxyModel?.provider, api: candidate.proxyModel?.api, messages: inputItems, inputFormat: "responses", tools: body.tools, toolChoice: body.tool_choice, temperature: body.temperature, maxTokens: body.max_output_tokens, stream },
471
576
  };
472
577
  };
473
578
  const frame = buildFrame(first, requestId);
@@ -490,10 +595,21 @@ export class ModelProxy {
490
595
  const encoder = new TextEncoder();
491
596
  const model = frame.payload.model;
492
597
 
598
+ let streamController: ReadableStreamDefaultController;
493
599
  const readable = new ReadableStream({
494
600
  start: (controller) => {
601
+ streamController = controller;
495
602
  this.startStreamAttempt(requestId, targetNodeId, frame, responseFormat, controller, encoder, model, failoverCandidates, buildFrame);
496
603
  },
604
+ cancel: () => {
605
+ // Client disconnected — find and clean up the pending request using this controller
606
+ for (const [id, p] of this.pending) {
607
+ if (p.controller === streamController) {
608
+ this.cleanupRequest(id, p.stableStreamId);
609
+ break;
610
+ }
611
+ }
612
+ },
497
613
  });
498
614
 
499
615
  // Emit setup events for responses API (once, before any attempts)
@@ -524,11 +640,10 @@ export class ModelProxy {
524
640
  const stableId = streamId ?? requestId;
525
641
 
526
642
  const timer = setTimeout(() => {
527
- this.pending.delete(requestId);
528
- this.streamText.delete(requestId);
643
+ this.cleanupRequest(requestId);
529
644
  this.peerManager.router.markFailed(requestId);
530
645
  this.tryStreamFailover(stableId, responseFormat, controller, encoder, model, failoverCandidates, buildFrame, `model request to "${targetNodeId}" timed out`);
531
- }, MODEL_TIMEOUT);
646
+ }, this.modelTimeout);
532
647
 
533
648
  this.pending.set(requestId, {
534
649
  resolve: () => {}, reject: () => {},
@@ -542,14 +657,14 @@ export class ModelProxy {
542
657
 
543
658
  // Emit setup events for responses API (only once per stream, keyed by stableId)
544
659
  if (responseFormat === "responses" && !this.streamSetupSent.has(stableId)) {
545
- this.enqueueResponsesStreamSetup(controller, encoder, stableId, model);
660
+ const hasTools = Array.isArray(frame.payload.tools) && frame.payload.tools.length > 0;
661
+ this.enqueueResponsesStreamSetup(controller, encoder, stableId, model, hasTools);
546
662
  this.streamSetupSent.add(stableId);
547
663
  }
548
664
 
549
665
  const sent = this.peerManager.sendTo(targetNodeId, frame);
550
666
  if (!sent) {
551
- this.pending.delete(requestId);
552
- clearTimeout(timer);
667
+ this.cleanupRequest(requestId);
553
668
  this.tryStreamFailover(stableId, responseFormat, controller, encoder, model, failoverCandidates, buildFrame, `cannot reach node "${targetNodeId}"`);
554
669
  }
555
670
  }
@@ -593,18 +708,23 @@ export class ModelProxy {
593
708
  }
594
709
 
595
710
  /** Emit responses API stream setup events (response.created → content_part.added). */
596
- private enqueueResponsesStreamSetup(controller: ReadableStreamDefaultController, encoder: TextEncoder, id: string, model: string) {
711
+ private enqueueResponsesStreamSetup(controller: ReadableStreamDefaultController, encoder: TextEncoder, id: string, model: string, hasTools = false) {
597
712
  const respId = `resp_${id}`;
598
- const msgId = `msg_${id}`;
599
713
  const now = Math.floor(Date.now() / 1000);
600
714
  const baseResp = { id: respId, object: "response", created_at: now, status: "in_progress", model, output: [] };
601
- const msgItem = { type: "message", id: msgId, role: "assistant", content: [], status: "in_progress" };
602
- const textPart = { type: "output_text", text: "" };
603
715
 
604
716
  controller.enqueue(encoder.encode(`event: response.created\ndata: ${JSON.stringify({ type: "response.created", response: baseResp })}\n\n`));
605
717
  controller.enqueue(encoder.encode(`event: response.in_progress\ndata: ${JSON.stringify({ type: "response.in_progress", response: baseResp })}\n\n`));
606
- controller.enqueue(encoder.encode(`event: response.output_item.added\ndata: ${JSON.stringify({ type: "response.output_item.added", output_index: 0, item: msgItem })}\n\n`));
607
- controller.enqueue(encoder.encode(`event: response.content_part.added\ndata: ${JSON.stringify({ type: "response.content_part.added", item_id: msgId, output_index: 0, content_index: 0, part: textPart })}\n\n`));
718
+
719
+ // When tools are present, skip pre-fabricated output_item/content_part events
720
+ // the real events (including function_call items) will be forwarded from the remote.
721
+ if (!hasTools) {
722
+ const msgId = `msg_${id}`;
723
+ const msgItem = { type: "message", id: msgId, role: "assistant", content: [], status: "in_progress" };
724
+ const textPart = { type: "output_text", text: "" };
725
+ controller.enqueue(encoder.encode(`event: response.output_item.added\ndata: ${JSON.stringify({ type: "response.output_item.added", output_index: 0, item: msgItem })}\n\n`));
726
+ controller.enqueue(encoder.encode(`event: response.content_part.added\ndata: ${JSON.stringify({ type: "response.content_part.added", item_id: msgId, output_index: 0, content_index: 0, part: textPart })}\n\n`));
727
+ }
608
728
  }
609
729
 
610
730
  /** Emit responses API stream completion events (output_text.done → response.completed). */
@@ -697,7 +817,7 @@ export class ModelProxy {
697
817
  this.pending.delete(requestId);
698
818
  this.peerManager.router.markFailed(requestId);
699
819
  reject(new Error(`Model request to "${targetNodeId}" timed out`));
700
- }, MODEL_TIMEOUT);
820
+ }, this.modelTimeout);
701
821
 
702
822
  this.pending.set(requestId, {
703
823
  resolve: resolve as (v: unknown) => void,
@@ -781,7 +901,7 @@ export class ModelProxy {
781
901
  .map((p) => p.nodeId),
782
902
  );
783
903
 
784
- const models = this.config.proxyModels.map((m) => {
904
+ const models = this.allProxyModels.map((m) => {
785
905
  const entry: Record<string, unknown> = {
786
906
  id: m.id,
787
907
  object: "model",
@@ -820,18 +940,17 @@ export class ModelProxy {
820
940
  // process the request and sent model_res instead of model_stream).
821
941
  if (pending.stream) {
822
942
  if (!frame.payload.success && pending.controller && pending.encoder) {
823
- clearTimeout(pending.timer);
824
- this.pending.delete(frame.id);
825
- this.streamText.delete(frame.id);
943
+ const stableId = pending.stableStreamId ?? frame.id;
826
944
  // Try failover if no content has been sent yet
827
945
  if (!pending.hasContent && pending.failoverCandidates?.length && pending.buildFrame) {
946
+ this.cleanupRequest(frame.id);
828
947
  this.tryStreamFailover(
829
- pending.stableStreamId ?? frame.id, pending.responseFormat, pending.controller, pending.encoder,
948
+ stableId, pending.responseFormat, pending.controller, pending.encoder,
830
949
  pending.model ?? "", pending.failoverCandidates, pending.buildFrame,
831
950
  `remote error: ${frame.payload.error}`,
832
951
  );
833
952
  } else {
834
- const stableId = pending.stableStreamId ?? frame.id;
953
+ this.cleanupRequest(frame.id, stableId);
835
954
  try {
836
955
  const errMsg = `[ClawMatrix] Remote error: ${frame.payload.error}`;
837
956
  if (pending.responseFormat === "responses") {
@@ -843,14 +962,12 @@ export class ModelProxy {
843
962
  }
844
963
  pending.controller.close();
845
964
  } catch { /* controller may already be closed */ }
846
- this.streamSetupSent.delete(stableId);
847
965
  }
848
966
  }
849
967
  return;
850
968
  }
851
969
 
852
- clearTimeout(pending.timer);
853
- this.pending.delete(frame.id);
970
+ this.cleanupRequest(frame.id);
854
971
  pending.resolve(frame.payload);
855
972
  }
856
973
 
@@ -870,9 +987,8 @@ export class ModelProxy {
870
987
  this.handleModelStreamChat(frame, pending);
871
988
  }
872
989
  } catch {
873
- clearTimeout(pending.timer);
874
- this.pending.delete(frame.id);
875
- this.streamText.delete(frame.id);
990
+ this.cleanupRequest(frame.id, pending.stableStreamId);
991
+ try { pending.controller?.close(); } catch { /* already closed */ }
876
992
  }
877
993
  }
878
994
 
@@ -889,12 +1005,13 @@ export class ModelProxy {
889
1005
  pending.controller!.enqueue(pending.encoder!.encode(`data: ${JSON.stringify(finalChunk)}\n\n`));
890
1006
  pending.controller!.enqueue(pending.encoder!.encode("data: [DONE]\n\n"));
891
1007
  pending.controller!.close();
892
- clearTimeout(pending.timer);
893
- this.pending.delete(frame.id);
894
- this.streamSetupSent.delete(stableId);
1008
+ this.cleanupRequest(frame.id, stableId);
895
1009
  } else {
896
1010
  // Use full deltaObj when available (carries tool_calls etc.), otherwise simple text delta
897
- const delta = frame.payload.deltaObj ?? { content: frame.payload.delta };
1011
+ const delta = frame.payload.deltaObj ?? {
1012
+ content: frame.payload.delta,
1013
+ ...(frame.payload.reasoningDelta && { reasoning_content: frame.payload.reasoningDelta }),
1014
+ };
898
1015
  const chunkStableId = pending.stableStreamId ?? frame.id;
899
1016
  const chunk = { id: `chatcmpl-${chunkStableId}`, object: "chat.completion.chunk", choices: [{ index: 0, delta, finish_reason: null }] };
900
1017
  pending.controller!.enqueue(pending.encoder!.encode(`data: ${JSON.stringify(chunk)}\n\n`));
@@ -907,13 +1024,27 @@ export class ModelProxy {
907
1024
  const stableId = pending.stableStreamId ?? frame.id;
908
1025
  const fullText = this.streamText.get(frame.id) ?? "";
909
1026
  this.streamText.delete(frame.id);
910
- this.enqueueResponsesStreamDone(pending.controller!, pending.encoder!, stableId, pending.model ?? "", fullText, frame.payload.usage);
1027
+
1028
+ // If the remote forwarded the full response.completed event, emit it directly
1029
+ const doneObj = frame.payload.deltaObj as { event?: string; data?: unknown } | undefined;
1030
+ if (doneObj?.event === "response.completed" && doneObj.data) {
1031
+ pending.controller!.enqueue(pending.encoder!.encode(`event: response.completed\ndata: ${JSON.stringify(doneObj.data)}\n\n`));
1032
+ } else {
1033
+ // Fallback: reconstruct text-only completion
1034
+ this.enqueueResponsesStreamDone(pending.controller!, pending.encoder!, stableId, pending.model ?? "", fullText, frame.payload.usage);
1035
+ }
911
1036
  pending.controller!.close();
912
- clearTimeout(pending.timer);
913
- this.pending.delete(frame.id);
914
- this.streamSetupSent.delete(stableId);
1037
+ this.cleanupRequest(frame.id, stableId);
915
1038
  } else {
916
- // Accumulate text for done event
1039
+ // Forward structured Responses API events (function_call, output_item, etc.)
1040
+ const obj = frame.payload.deltaObj as { event?: string; data?: unknown } | undefined;
1041
+ if (obj?.event && obj.data) {
1042
+ pending.controller!.enqueue(pending.encoder!.encode(`event: ${obj.event}\ndata: ${JSON.stringify(obj.data)}\n\n`));
1043
+ pending.hasContent = true;
1044
+ return;
1045
+ }
1046
+
1047
+ // Text delta
917
1048
  this.streamText.set(frame.id, (this.streamText.get(frame.id) ?? "") + (frame.payload.delta ?? ""));
918
1049
  const respStableId = pending.stableStreamId ?? frame.id;
919
1050
  const evt = { type: "response.output_text.delta", item_id: `msg_${respStableId}`, output_index: 0, content_index: 0, delta: frame.payload.delta };
@@ -922,25 +1053,30 @@ export class ModelProxy {
922
1053
  }
923
1054
  }
924
1055
 
925
- private sendStreamDelta(to: string, id: string, delta: string, deltaObj?: unknown) {
1056
+ private sendStreamDelta(to: string, id: string, delta: string, deltaObj?: unknown, reasoningDelta?: string) {
926
1057
  this.peerManager.sendTo(to, {
927
1058
  type: "model_stream",
928
1059
  id,
929
1060
  from: this.config.nodeId,
930
1061
  to,
931
1062
  timestamp: Date.now(),
932
- payload: { delta, ...(deltaObj !== undefined && { deltaObj }), done: false },
1063
+ payload: {
1064
+ delta,
1065
+ ...(reasoningDelta && { reasoningDelta }),
1066
+ ...(deltaObj !== undefined && { deltaObj }),
1067
+ done: false,
1068
+ },
933
1069
  } satisfies ModelStreamChunk);
934
1070
  }
935
1071
 
936
- private sendStreamDone(to: string, id: string, usage?: { inputTokens: number; outputTokens: number }) {
1072
+ private sendStreamDone(to: string, id: string, usage?: { inputTokens: number; outputTokens: number }, deltaObj?: unknown) {
937
1073
  this.peerManager.sendTo(to, {
938
1074
  type: "model_stream",
939
1075
  id,
940
1076
  from: this.config.nodeId,
941
1077
  to,
942
1078
  timestamp: Date.now(),
943
- payload: { delta: "", done: true, usage },
1079
+ payload: { delta: "", done: true, usage, ...(deltaObj !== undefined && { deltaObj }) },
944
1080
  } satisfies ModelStreamChunk);
945
1081
  }
946
1082
 
@@ -967,21 +1103,27 @@ export class ModelProxy {
967
1103
 
968
1104
  try {
969
1105
  const endpoint = this.resolveModelEndpoint(model);
970
- const isResponsesApi = endpoint.api === "openai-responses" || endpoint.api === "openai-codex-responses";
1106
+ if (!endpoint) {
1107
+ this.peerManager.sendTo(from, {
1108
+ type: "model_res", id, from: this.config.nodeId, to: from, timestamp: Date.now(),
1109
+ payload: { success: false, error: `No direct API endpoint configured for model "${payload.model}" (provider: ${model.provider}). Configure baseUrl/apiKey in ClawMatrix model config or OpenClaw provider config.` },
1110
+ } satisfies ModelResponse);
1111
+ return;
1112
+ }
1113
+ // Use payload.api override from requesting side, or cached API from previous auto-detection
1114
+ const cached = this.modelApiCache.get(model.id);
1115
+ const cachedApi = (cached && Date.now() - cached.ts < ModelProxy.MODEL_API_CACHE_TTL) ? cached.api : undefined;
1116
+ if (cached && !cachedApi) this.modelApiCache.delete(model.id); // expired
1117
+ const effectiveApi = payload.api ?? cachedApi ?? endpoint.api;
1118
+ const isResponsesApi = effectiveApi === "openai-responses" || effectiveApi === "openai-codex-responses";
971
1119
  const path = isResponsesApi ? "/responses" : "/chat/completions";
972
1120
  const url = `${endpoint.baseUrl}${path}`;
973
1121
  const headers: Record<string, string> = { "Content-Type": "application/json" };
974
1122
 
975
- if (endpoint.direct) {
976
- if (endpoint.apiKey) headers["Authorization"] = `Bearer ${endpoint.apiKey}`;
977
- debug("model_req", `direct API call to ${url} (api=${endpoint.api})`);
978
- } else {
979
- const { authHeader } = this.gatewayInfo;
980
- if (authHeader) headers["Authorization"] = authHeader;
981
- debug("model_req", `gateway fallback to ${url}`);
982
- }
1123
+ if (endpoint.apiKey) headers["Authorization"] = `Bearer ${endpoint.apiKey}`;
1124
+ debug("model_req", `direct API call to ${url} (api=${endpoint.api})`);
983
1125
 
984
- const modelField = endpoint.direct ? model.id : `${model.provider}/${model.id}`;
1126
+ const modelField = model.id;
985
1127
  const srcFormat = payload.inputFormat ?? "chat";
986
1128
 
987
1129
  // Convert messages between formats if source and target API differ
@@ -996,6 +1138,8 @@ export class ModelProxy {
996
1138
  stream: payload.stream,
997
1139
  temperature: payload.temperature,
998
1140
  max_output_tokens: payload.maxTokens,
1141
+ ...(payload.tools && { tools: payload.tools }),
1142
+ ...(payload.toolChoice !== undefined && { tool_choice: payload.toolChoice }),
999
1143
  };
1000
1144
  } else {
1001
1145
  const messages = srcFormat === "chat"
@@ -1008,6 +1152,8 @@ export class ModelProxy {
1008
1152
  max_tokens: payload.maxTokens,
1009
1153
  stream: payload.stream,
1010
1154
  ...(payload.stream ? { stream_options: { include_usage: true } } : {}),
1155
+ ...(payload.tools && { tools: payload.tools }),
1156
+ ...(payload.toolChoice !== undefined && { tool_choice: payload.toolChoice }),
1011
1157
  };
1012
1158
  }
1013
1159
 
@@ -1031,6 +1177,8 @@ export class ModelProxy {
1031
1177
  let buffer = "";
1032
1178
  let lastUsage: { inputTokens: number; outputTokens: number } | undefined;
1033
1179
  let streamDone = false;
1180
+ let contentSent = false;
1181
+ let completedEvent: unknown = undefined;
1034
1182
 
1035
1183
  while (!streamDone) {
1036
1184
  const { done, value } = await reader.read();
@@ -1053,7 +1201,6 @@ export class ModelProxy {
1053
1201
  if (!line.startsWith("data: ")) continue;
1054
1202
  const data = line.slice(6).trim();
1055
1203
  if (data === "[DONE]") {
1056
- this.sendStreamDone(from, id, lastUsage);
1057
1204
  streamDone = true;
1058
1205
  break;
1059
1206
  }
@@ -1067,33 +1214,38 @@ export class ModelProxy {
1067
1214
  const delta = parsed.delta || "";
1068
1215
  if (delta) {
1069
1216
  this.sendStreamDelta(from, id, delta);
1217
+ contentSent = true;
1070
1218
  }
1219
+ } else if (
1220
+ evtType === "response.output_item.added" ||
1221
+ evtType === "response.output_item.done" ||
1222
+ evtType === "response.content_part.added" ||
1223
+ evtType === "response.content_part.done" ||
1224
+ evtType === "response.output_text.done" ||
1225
+ evtType === "response.function_call_arguments.delta" ||
1226
+ evtType === "response.function_call_arguments.done"
1227
+ ) {
1228
+ // Forward structured Responses API events via deltaObj
1229
+ this.sendStreamDelta(from, id, "", { event: evtType, data: parsed });
1230
+ contentSent = true;
1071
1231
  } else if (evtType === "response.completed") {
1072
1232
  const usage = parsed.response?.usage;
1073
- if (usage) {
1074
- lastUsage = {
1075
- inputTokens: usage.input_tokens ?? usage.prompt_tokens ?? 0,
1076
- outputTokens: usage.output_tokens ?? usage.completion_tokens ?? 0,
1077
- };
1078
- }
1079
- this.sendStreamDone(from, id, lastUsage);
1233
+ lastUsage = parseUsage(usage) ?? lastUsage;
1234
+ completedEvent = { event: evtType, data: parsed };
1080
1235
  streamDone = true;
1081
1236
  break;
1082
1237
  }
1083
1238
  } else {
1084
1239
  // Chat completions format
1085
- if (parsed.usage) {
1086
- lastUsage = {
1087
- inputTokens: parsed.usage.prompt_tokens,
1088
- outputTokens: parsed.usage.completion_tokens,
1089
- };
1090
- }
1240
+ lastUsage = parseUsage(parsed.usage) ?? lastUsage;
1091
1241
  const d = parsed.choices?.[0]?.delta;
1092
- const delta = d?.content || d?.reasoning_content || "";
1242
+ const delta = d?.content || "";
1243
+ const reasoningDelta = d?.reasoning_content || "";
1093
1244
  // Pass full delta object when it contains tool_calls or other structured data
1094
1245
  const hasStructured = d?.tool_calls || d?.refusal != null;
1095
- if (delta || hasStructured) {
1096
- this.sendStreamDelta(from, id, delta, hasStructured ? d : undefined);
1246
+ if (delta || reasoningDelta || hasStructured) {
1247
+ this.sendStreamDelta(from, id, delta, hasStructured ? d : undefined, reasoningDelta || undefined);
1248
+ contentSent = true;
1097
1249
  }
1098
1250
  }
1099
1251
  } catch {
@@ -1102,9 +1254,30 @@ export class ModelProxy {
1102
1254
  currentEvent = "";
1103
1255
  }
1104
1256
  }
1105
- // If the upstream closed without sending [DONE] or response.completed,
1106
- // send a completion frame so the requesting side doesn't hang.
1107
- if (!streamDone) {
1257
+
1258
+ // Responses API stream produced no content fall back to chat completions
1259
+ if (isResponsesApi && !contentSent && !cachedApi) {
1260
+ debug("model_req", `responses API stream produced no content for "${model.id}", retrying with chat completions`);
1261
+ const chatResult = await this.retryWithChatCompletions(endpoint, modelField, payload, headers);
1262
+ if (chatResult) {
1263
+ this.modelApiCache.set(model.id, { api: "openai-completions", ts: Date.now() });
1264
+ debug("model_req", `cached "${model.id}" as openai-completions (stream fallback)`);
1265
+ if (chatResult.content) {
1266
+ this.sendStreamDelta(from, id, chatResult.content);
1267
+ }
1268
+ this.sendStreamDone(from, id, chatResult.usage);
1269
+ } else if (completedEvent) {
1270
+ this.sendStreamDone(from, id, lastUsage, completedEvent);
1271
+ } else {
1272
+ this.sendStreamDone(from, id, lastUsage);
1273
+ }
1274
+ } else if (completedEvent) {
1275
+ this.sendStreamDone(from, id, lastUsage, completedEvent);
1276
+ } else if (!streamDone) {
1277
+ // Upstream closed without sending [DONE] or response.completed
1278
+ this.sendStreamDone(from, id, lastUsage);
1279
+ } else {
1280
+ // Chat completions [DONE] received
1108
1281
  this.sendStreamDone(from, id, lastUsage);
1109
1282
  }
1110
1283
  } finally {
@@ -1112,12 +1285,33 @@ export class ModelProxy {
1112
1285
  }
1113
1286
  } else {
1114
1287
  // Non-streaming response
1115
- const result = await response.json();
1288
+ const responseText = await response.text();
1289
+ let result: Record<string, unknown>;
1290
+ let chatFallbackResult: Awaited<ReturnType<ModelProxy["retryWithChatCompletions"]>> = null;
1291
+ try {
1292
+ result = JSON.parse(responseText);
1293
+ } catch {
1294
+ // Upstream returned non-JSON (e.g. SSE in non-stream mode) — try chat completions fallback
1295
+ if (!cachedApi && isResponsesApi) {
1296
+ debug("model_req", `responses API returned non-JSON for "${model.id}", retrying with chat completions`);
1297
+ chatFallbackResult = await this.retryWithChatCompletions(endpoint, modelField, payload, headers);
1298
+ if (chatFallbackResult) {
1299
+ this.modelApiCache.set(model.id, { api: "openai-completions", ts: Date.now() });
1300
+ debug("model_req", `cached "${model.id}" as openai-completions (non-JSON fallback)`);
1301
+ }
1302
+ }
1303
+ if (!chatFallbackResult) throw new Error(`Upstream returned non-JSON: ${responseText.slice(0, 100)}`);
1304
+ result = {}; // unused — chatFallbackResult takes precedence
1305
+ }
1116
1306
  let content: string;
1307
+ let reasoning = "";
1117
1308
  let message: unknown | undefined;
1118
1309
  let usage: { inputTokens: number; outputTokens: number } | undefined;
1119
1310
 
1120
- if (isResponsesApi) {
1311
+ if (chatFallbackResult) {
1312
+ ({ content, message, usage } = chatFallbackResult);
1313
+ reasoning = chatFallbackResult.reasoning ?? "";
1314
+ } else if (isResponsesApi) {
1121
1315
  // Responses API: extract text from output[].content[].text
1122
1316
  content = "";
1123
1317
  const output = result.output as { type?: string; content?: { type?: string; text?: string }[] }[] | undefined;
@@ -1130,28 +1324,40 @@ export class ModelProxy {
1130
1324
  }
1131
1325
  }
1132
1326
  }
1133
- // Carry full output array for structured data (function_call items, etc.)
1134
- message = result.output;
1135
- if (result.usage) {
1136
- usage = {
1137
- inputTokens: result.usage.input_tokens ?? result.usage.prompt_tokens ?? 0,
1138
- outputTokens: result.usage.output_tokens ?? result.usage.completion_tokens ?? 0,
1139
- };
1327
+
1328
+ // Auto-detect: if Responses API returned empty output but produced tokens,
1329
+ // the upstream adapter likely doesn't support Responses API properly.
1330
+ // Retry with chat completions and cache the result.
1331
+ const parsedUsage = parseUsage(result.usage as Record<string, number> | undefined);
1332
+ const hasMessage = Array.isArray(output) && output.some((o: { type?: string }) => o.type === "message");
1333
+ if (!hasMessage && (parsedUsage?.outputTokens ?? 0) > 0 && !cachedApi) {
1334
+ debug("model_req", `responses API returned empty output for "${model.id}" (output_tokens=${parsedUsage!.outputTokens}), retrying with chat completions`);
1335
+ const chatResult = await this.retryWithChatCompletions(endpoint, modelField, payload, headers);
1336
+ if (chatResult) {
1337
+ this.modelApiCache.set(model.id, { api: "openai-completions", ts: Date.now() });
1338
+ debug("model_req", `cached "${model.id}" as openai-completions`);
1339
+ ({ content, message, usage } = chatResult);
1340
+ } else {
1341
+ message = result.output;
1342
+ usage = parsedUsage;
1343
+ }
1344
+ } else {
1345
+ // Carry full output array for structured data (function_call items, etc.)
1346
+ message = result.output;
1347
+ usage = parsedUsage;
1140
1348
  }
1141
1349
  } else {
1142
1350
  // Chat completions format
1143
1351
  const msg = result.choices?.[0]?.message;
1144
- content = msg?.content || msg?.reasoning_content || "";
1352
+ content = msg?.content || "";
1353
+ reasoning = msg?.reasoning_content || "";
1145
1354
  // Carry full message object when it has tool_calls or other structured data
1146
1355
  if (msg?.tool_calls || msg?.refusal != null || msg?.function_call) {
1147
1356
  message = msg;
1148
1357
  }
1149
- if (result.usage) {
1150
- usage = {
1151
- inputTokens: result.usage.prompt_tokens,
1152
- outputTokens: result.usage.completion_tokens,
1153
- };
1154
- }
1358
+ usage = parseUsage(result.usage);
1359
+ // If no content but has reasoning, use reasoning as content fallback
1360
+ if (!content && reasoning) content = reasoning;
1155
1361
  }
1156
1362
 
1157
1363
  this.peerManager.sendTo(from, {
@@ -1163,6 +1369,7 @@ export class ModelProxy {
1163
1369
  payload: {
1164
1370
  success: true,
1165
1371
  content,
1372
+ ...(reasoning && { reasoning }),
1166
1373
  ...(message !== undefined && { message }),
1167
1374
  usage,
1168
1375
  },
@@ -1182,4 +1389,45 @@ export class ModelProxy {
1182
1389
  } satisfies ModelResponse);
1183
1390
  }
1184
1391
  }
1392
+
1393
+ /** Retry a model request using chat completions format (fallback from Responses API). */
1394
+ private async retryWithChatCompletions(
1395
+ endpoint: { baseUrl: string; apiKey?: string; direct: boolean; api: string },
1396
+ modelField: string,
1397
+ payload: ModelRequest["payload"],
1398
+ headers: Record<string, string>,
1399
+ ): Promise<{ content: string; reasoning?: string; message?: unknown; usage?: { inputTokens: number; outputTokens: number } } | null> {
1400
+ try {
1401
+ const srcFormat = payload.inputFormat ?? "chat";
1402
+ const messages = srcFormat === "chat"
1403
+ ? payload.messages
1404
+ : ModelProxy.normalizeResponsesInput(payload.messages);
1405
+ const chatBody: Record<string, unknown> = {
1406
+ model: modelField,
1407
+ messages,
1408
+ temperature: payload.temperature,
1409
+ max_tokens: payload.maxTokens,
1410
+ stream: false,
1411
+ ...(payload.tools && { tools: payload.tools }),
1412
+ ...(payload.toolChoice !== undefined && { tool_choice: payload.toolChoice }),
1413
+ };
1414
+ const chatUrl = `${endpoint.baseUrl}/chat/completions`;
1415
+ const chatResp = await fetch(chatUrl, {
1416
+ method: "POST",
1417
+ headers,
1418
+ body: JSON.stringify(chatBody),
1419
+ });
1420
+ if (!chatResp.ok) return null;
1421
+ const chatResult = await chatResp.json();
1422
+ const msg = chatResult.choices?.[0]?.message;
1423
+ const content = msg?.content || "";
1424
+ const reasoningContent = msg?.reasoning_content || "";
1425
+ const message = (msg?.tool_calls || msg?.refusal != null || msg?.function_call) ? msg : undefined;
1426
+ const usage = parseUsage(chatResult.usage);
1427
+ return { content: content || reasoningContent, reasoning: reasoningContent || undefined, message, usage };
1428
+ } catch (err) {
1429
+ debug("model_req", `retryWithChatCompletions failed for "${modelField}": ${err instanceof Error ? err.message : String(err)}`);
1430
+ return null;
1431
+ }
1432
+ }
1185
1433
  }