clawmatrix 0.1.22 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -1
- package/package.json +4 -2
- package/src/acp-proxy.ts +2073 -0
- package/src/audit.ts +42 -0
- package/src/auth.ts +2 -3
- package/src/cli.ts +76 -2
- package/src/cluster-service.ts +243 -3
- package/src/compat.ts +84 -3
- package/src/config.ts +117 -4
- package/src/connection.ts +290 -85
- package/src/crypto.ts +179 -0
- package/src/debug.ts +15 -2
- package/src/e2e/helpers.ts +318 -0
- package/src/handoff.ts +132 -87
- package/src/identity.ts +95 -0
- package/src/index.ts +539 -45
- package/src/knowledge-sync.ts +777 -205
- package/src/local-tools.ts +9 -2
- package/src/model-proxy.ts +358 -110
- package/src/peer-approval.ts +628 -0
- package/src/peer-manager.ts +270 -38
- package/src/rate-limiter.ts +88 -0
- package/src/router.ts +32 -10
- package/src/sentinel-manager.ts +142 -0
- package/src/sentinel.ts +618 -0
- package/src/task-activity.ts +74 -0
- package/src/terminal.ts +566 -0
- package/src/tool-proxy.ts +127 -3
- package/src/tools/cluster-acp.ts +237 -0
- package/src/tools/cluster-batch.ts +76 -0
- package/src/tools/cluster-diagnostic.ts +174 -0
- package/src/tools/cluster-edit.ts +70 -0
- package/src/tools/cluster-peers.ts +59 -14
- package/src/tools/cluster-terminal.ts +232 -0
- package/src/tools/cluster-tool.ts +26 -11
- package/src/types.ts +477 -3
- package/src/web.ts +2 -2
package/src/model-proxy.ts
CHANGED
|
@@ -11,7 +11,16 @@ import type {
|
|
|
11
11
|
import { debug } from "./debug.ts";
|
|
12
12
|
import { readBody } from "./http-utils.ts";
|
|
13
13
|
|
|
14
|
-
const
|
|
14
|
+
const DEFAULT_MODEL_TIMEOUT = 120_000; // 2 minutes
|
|
15
|
+
|
|
16
|
+
/** Normalize usage from OpenAI-compatible APIs (supports both field naming conventions). */
|
|
17
|
+
function parseUsage(usage: Record<string, number> | undefined): { inputTokens: number; outputTokens: number } | undefined {
|
|
18
|
+
if (!usage) return undefined;
|
|
19
|
+
return {
|
|
20
|
+
inputTokens: usage.input_tokens ?? usage.prompt_tokens ?? 0,
|
|
21
|
+
outputTokens: usage.output_tokens ?? usage.completion_tokens ?? 0,
|
|
22
|
+
};
|
|
23
|
+
}
|
|
15
24
|
const MAX_STREAM_BUFFER = 1_048_576; // 1MB — guard against upstream not sending newlines
|
|
16
25
|
|
|
17
26
|
type ResponseFormat = "chat" | "responses";
|
|
@@ -53,12 +62,54 @@ export class ModelProxy {
|
|
|
53
62
|
private httpServer: Server | null = null;
|
|
54
63
|
private gatewayInfo: GatewayInfo;
|
|
55
64
|
private openclawConfig: OpenClawConfig;
|
|
65
|
+
private readonly modelTimeout: number;
|
|
66
|
+
|
|
67
|
+
/** Dynamically discovered proxy models from peer capabilities (auto-discovery). */
|
|
68
|
+
private discoveredModels: import("./config.ts").ProxyModel[] = [];
|
|
69
|
+
|
|
70
|
+
/** Cache of models that need a different API format than configured (detected at runtime).
|
|
71
|
+
* Entries expire after 10 minutes so upstream upgrades are eventually detected. */
|
|
72
|
+
private modelApiCache = new Map<string, { api: string; ts: number }>();
|
|
73
|
+
private static readonly MODEL_API_CACHE_TTL = 600_000; // 10 minutes
|
|
74
|
+
private cacheCleanupTimer: ReturnType<typeof setInterval> | null = null;
|
|
56
75
|
|
|
57
76
|
constructor(config: ClawMatrixConfig, peerManager: PeerManager, gatewayInfo: GatewayInfo, openclawConfig: OpenClawConfig) {
|
|
58
77
|
this.config = config;
|
|
59
78
|
this.peerManager = peerManager;
|
|
60
79
|
this.gatewayInfo = gatewayInfo;
|
|
61
80
|
this.openclawConfig = openclawConfig;
|
|
81
|
+
this.modelTimeout = config.modelTimeout ?? DEFAULT_MODEL_TIMEOUT;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/** All proxy models: static config + dynamically discovered from peers. */
|
|
85
|
+
get allProxyModels(): import("./config.ts").ProxyModel[] {
|
|
86
|
+
if (this.discoveredModels.length === 0) return this.config.proxyModels;
|
|
87
|
+
return [...this.config.proxyModels, ...this.discoveredModels];
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/** Rebuild discovered models from current peer capabilities.
|
|
91
|
+
* Skips models already present in static config.proxyModels. */
|
|
92
|
+
updateDiscoveredModels(peers: import("./router.ts").RouteEntry[]) {
|
|
93
|
+
const staticKeys = new Set(this.config.proxyModels.map((m) => `${m.nodeId}/${m.id}`));
|
|
94
|
+
const next: import("./config.ts").ProxyModel[] = [];
|
|
95
|
+
for (const peer of peers) {
|
|
96
|
+
for (const m of peer.models) {
|
|
97
|
+
const key = `${peer.nodeId}/${m.id}`;
|
|
98
|
+
if (staticKeys.has(key)) continue;
|
|
99
|
+
next.push({
|
|
100
|
+
id: m.id,
|
|
101
|
+
nodeId: peer.nodeId,
|
|
102
|
+
provider: m.provider,
|
|
103
|
+
description: m.description,
|
|
104
|
+
input: m.input,
|
|
105
|
+
compat: m.compat as import("./config.ts").ProxyModel["compat"],
|
|
106
|
+
});
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
this.discoveredModels = next;
|
|
110
|
+
if (next.length > 0) {
|
|
111
|
+
debug("proxy", `Auto-discovered ${next.length} model(s) from peers: ${next.map((m) => `${m.nodeId}/${m.id}`).join(", ")}`);
|
|
112
|
+
}
|
|
62
113
|
}
|
|
63
114
|
|
|
64
115
|
/**
|
|
@@ -212,8 +263,18 @@ export class ModelProxy {
|
|
|
212
263
|
return items;
|
|
213
264
|
}
|
|
214
265
|
|
|
215
|
-
/**
|
|
216
|
-
|
|
266
|
+
/**
|
|
267
|
+
* Resolve API endpoint for a model: explicit config > OpenClaw provider > null.
|
|
268
|
+
*
|
|
269
|
+
* ⚠️ 重要:绝对不能 fallback 到 OpenClaw gateway 的 /v1/chat/completions!
|
|
270
|
+
* OpenClaw gateway 的 /v1/chat/completions 会走 Agent 系统,每次请求都会创建
|
|
271
|
+
* 一个新的 Agent session(带记忆、system prompt 等)。这会导致:
|
|
272
|
+
* - 远程节点(如 iPhone)的每次 model_req 都在本地产生一个多余的 OpenClaw 会话
|
|
273
|
+
* - 模型响应被 OpenClaw Agent 的 system prompt 和记忆污染,结果不正确
|
|
274
|
+
*
|
|
275
|
+
* 如果找不到直连 API 端点,必须返回 null 让调用方报错,而不是静默 fallback。
|
|
276
|
+
*/
|
|
277
|
+
private resolveModelEndpoint(model: { id: string; provider: string; baseUrl?: string; apiKey?: string; api?: string }): { baseUrl: string; apiKey?: string; direct: boolean; api: string } | null {
|
|
217
278
|
const defaultApi = "openai-completions";
|
|
218
279
|
|
|
219
280
|
// 1. Explicit baseUrl in ClawMatrix model config
|
|
@@ -227,9 +288,11 @@ export class ModelProxy {
|
|
|
227
288
|
}
|
|
228
289
|
|
|
229
290
|
// 2. Read from OpenClaw's models.providers[provider]
|
|
230
|
-
const
|
|
231
|
-
|
|
232
|
-
|
|
291
|
+
const modelsConfig = (this.openclawConfig as Record<string, unknown>).models;
|
|
292
|
+
const providers = (modelsConfig && typeof modelsConfig === "object")
|
|
293
|
+
? (modelsConfig as { providers?: Record<string, { baseUrl?: string; apiKey?: string; api?: string }> }).providers
|
|
294
|
+
: undefined;
|
|
295
|
+
const providerConfig = providers?.[model.provider];
|
|
233
296
|
if (providerConfig?.baseUrl) {
|
|
234
297
|
return {
|
|
235
298
|
baseUrl: providerConfig.baseUrl.replace(/\/$/, ""),
|
|
@@ -239,18 +302,20 @@ export class ModelProxy {
|
|
|
239
302
|
};
|
|
240
303
|
}
|
|
241
304
|
|
|
242
|
-
//
|
|
243
|
-
|
|
244
|
-
return {
|
|
245
|
-
baseUrl: `http://127.0.0.1:${port}/v1`,
|
|
246
|
-
apiKey: undefined,
|
|
247
|
-
direct: false,
|
|
248
|
-
api: model.api ?? defaultApi,
|
|
249
|
-
};
|
|
305
|
+
// 找不到直连端点 → 返回 null(见上方注释,不能 fallback 到 gateway)
|
|
306
|
+
return null;
|
|
250
307
|
}
|
|
251
308
|
|
|
252
309
|
/** Start the local HTTP proxy server for OpenAI-compatible requests. */
|
|
253
310
|
start() {
|
|
311
|
+
// Periodically prune expired model API cache entries
|
|
312
|
+
this.cacheCleanupTimer = setInterval(() => {
|
|
313
|
+
const now = Date.now();
|
|
314
|
+
for (const [id, entry] of this.modelApiCache) {
|
|
315
|
+
if (now - entry.ts > ModelProxy.MODEL_API_CACHE_TTL) this.modelApiCache.delete(id);
|
|
316
|
+
}
|
|
317
|
+
}, ModelProxy.MODEL_API_CACHE_TTL);
|
|
318
|
+
|
|
254
319
|
this.httpServer = createServer(async (req, res) => {
|
|
255
320
|
try {
|
|
256
321
|
const url = new URL(req.url ?? "/", `http://${req.headers.host ?? "localhost"}`);
|
|
@@ -292,6 +357,10 @@ export class ModelProxy {
|
|
|
292
357
|
}
|
|
293
358
|
|
|
294
359
|
stop() {
|
|
360
|
+
if (this.cacheCleanupTimer) {
|
|
361
|
+
clearInterval(this.cacheCleanupTimer);
|
|
362
|
+
this.cacheCleanupTimer = null;
|
|
363
|
+
}
|
|
295
364
|
if (this.httpServer) {
|
|
296
365
|
this.httpServer.close();
|
|
297
366
|
this.httpServer = null;
|
|
@@ -303,8 +372,17 @@ export class ModelProxy {
|
|
|
303
372
|
this.pending.clear();
|
|
304
373
|
this.streamText.clear();
|
|
305
374
|
this.streamSetupSent.clear();
|
|
375
|
+
this.modelApiCache.clear();
|
|
306
376
|
}
|
|
307
377
|
|
|
378
|
+
/** Clean up all tracking state for a request (pending, streamText, streamSetupSent). */
|
|
379
|
+
private cleanupRequest(id: string, stableStreamId?: string) {
|
|
380
|
+
const pending = this.pending.get(id);
|
|
381
|
+
if (pending) clearTimeout(pending.timer);
|
|
382
|
+
this.pending.delete(id);
|
|
383
|
+
this.streamText.delete(id);
|
|
384
|
+
if (stableStreamId) this.streamSetupSent.delete(stableStreamId);
|
|
385
|
+
}
|
|
308
386
|
|
|
309
387
|
private sendResponse(res: import("node:http").ServerResponse, response: ProxyResponse) {
|
|
310
388
|
res.writeHead(response.status, response.headers);
|
|
@@ -313,18 +391,34 @@ export class ModelProxy {
|
|
|
313
391
|
} else {
|
|
314
392
|
// Stream response
|
|
315
393
|
const reader = response.body.getReader();
|
|
394
|
+
let finished = false;
|
|
395
|
+
|
|
396
|
+
// Clean up stream when client disconnects mid-stream
|
|
397
|
+
res.on("close", () => {
|
|
398
|
+
if (!finished) {
|
|
399
|
+
finished = true;
|
|
400
|
+
reader.cancel().catch(() => {});
|
|
401
|
+
}
|
|
402
|
+
});
|
|
403
|
+
|
|
316
404
|
const pump = (): void => {
|
|
317
405
|
reader.read().then(({ done, value }) => {
|
|
318
|
-
if (done) {
|
|
406
|
+
if (done || finished) {
|
|
407
|
+
finished = true;
|
|
319
408
|
reader.releaseLock();
|
|
320
|
-
res.end();
|
|
409
|
+
if (!res.writableEnded) res.end();
|
|
321
410
|
return;
|
|
322
411
|
}
|
|
323
|
-
res.write(value);
|
|
324
|
-
|
|
412
|
+
const ok = res.write(value);
|
|
413
|
+
if (ok) {
|
|
414
|
+
pump();
|
|
415
|
+
} else {
|
|
416
|
+
res.once("drain", pump);
|
|
417
|
+
}
|
|
325
418
|
}).catch(() => {
|
|
419
|
+
finished = true;
|
|
326
420
|
reader.releaseLock();
|
|
327
|
-
res.end();
|
|
421
|
+
if (!res.writableEnded) res.end();
|
|
328
422
|
});
|
|
329
423
|
};
|
|
330
424
|
pump();
|
|
@@ -340,16 +434,17 @@ export class ModelProxy {
|
|
|
340
434
|
} | { error: { status: number; message: string } } {
|
|
341
435
|
const slashIdx = rawModelId.indexOf("/");
|
|
342
436
|
let modelId: string;
|
|
343
|
-
|
|
437
|
+
const proxyModels = this.allProxyModels;
|
|
438
|
+
let matchingModels: (typeof proxyModels)[number][];
|
|
344
439
|
|
|
345
440
|
if (slashIdx > 0) {
|
|
346
441
|
const nodeId = rawModelId.slice(0, slashIdx);
|
|
347
442
|
modelId = rawModelId.slice(slashIdx + 1);
|
|
348
443
|
// Explicit node/model — only target that specific node, no failover to others
|
|
349
|
-
matchingModels =
|
|
444
|
+
matchingModels = proxyModels.filter((m) => m.id === modelId && m.nodeId === nodeId);
|
|
350
445
|
} else {
|
|
351
446
|
modelId = rawModelId;
|
|
352
|
-
matchingModels =
|
|
447
|
+
matchingModels = proxyModels.filter((m) => m.id === modelId);
|
|
353
448
|
}
|
|
354
449
|
|
|
355
450
|
if (matchingModels.length === 0) {
|
|
@@ -368,6 +463,16 @@ export class ModelProxy {
|
|
|
368
463
|
}
|
|
369
464
|
}
|
|
370
465
|
|
|
466
|
+
// Sort candidates by latency (lowest first) for optimal first-try and failover order
|
|
467
|
+
candidates.sort((a, b) => {
|
|
468
|
+
const routeA = this.peerManager.router.getRoute(a.routeNodeId);
|
|
469
|
+
const routeB = this.peerManager.router.getRoute(b.routeNodeId);
|
|
470
|
+
const aDirect = routeA?.connection ? 0 : 1;
|
|
471
|
+
const bDirect = routeB?.connection ? 0 : 1;
|
|
472
|
+
if (aDirect !== bDirect) return aDirect - bDirect;
|
|
473
|
+
return (routeA?.latencyMs ?? 0) - (routeB?.latencyMs ?? 0);
|
|
474
|
+
});
|
|
475
|
+
|
|
371
476
|
debug("proxy", `model raw="${rawModelId}" modelId="${modelId}" candidates=${candidates.map((c) => c.routeNodeId).join(",") || "none"}`);
|
|
372
477
|
if (candidates.length === 0) {
|
|
373
478
|
return { error: { status: 502, message: `No reachable node for model "${rawModelId}"` } };
|
|
@@ -377,7 +482,7 @@ export class ModelProxy {
|
|
|
377
482
|
}
|
|
378
483
|
|
|
379
484
|
private async handleChatCompletion(rawBody: string, _api: string): Promise<ProxyResponse> {
|
|
380
|
-
let body: { model: string; messages: unknown[]; stream?: boolean; temperature?: number; max_tokens?: number };
|
|
485
|
+
let body: { model: string; messages: unknown[]; stream?: boolean; temperature?: number; max_tokens?: number; tools?: unknown[]; tool_choice?: unknown };
|
|
381
486
|
try {
|
|
382
487
|
body = JSON.parse(rawBody);
|
|
383
488
|
} catch {
|
|
@@ -410,7 +515,7 @@ export class ModelProxy {
|
|
|
410
515
|
}
|
|
411
516
|
return {
|
|
412
517
|
type: "model_req", id, from: this.config.nodeId, to: candidate.routeNodeId, timestamp: Date.now(),
|
|
413
|
-
payload: { model: modelId, provider: candidate.proxyModel?.provider, api: candidate.proxyModel?.api, messages, temperature: body.temperature, maxTokens: body.max_tokens, stream },
|
|
518
|
+
payload: { model: modelId, provider: candidate.proxyModel?.provider, api: candidate.proxyModel?.api, messages, tools: body.tools, toolChoice: body.tool_choice, temperature: body.temperature, maxTokens: body.max_tokens, stream },
|
|
414
519
|
};
|
|
415
520
|
};
|
|
416
521
|
const frame = buildFrame(first, requestId);
|
|
@@ -423,7 +528,7 @@ export class ModelProxy {
|
|
|
423
528
|
}
|
|
424
529
|
|
|
425
530
|
private async handleResponses(rawBody: string): Promise<ProxyResponse> {
|
|
426
|
-
let body: { model: string; input: unknown; stream?: boolean; temperature?: number; max_output_tokens?: number; instructions?: string };
|
|
531
|
+
let body: { model: string; input: unknown; stream?: boolean; temperature?: number; max_output_tokens?: number; instructions?: string; tools?: unknown[]; tool_choice?: unknown };
|
|
427
532
|
try {
|
|
428
533
|
body = JSON.parse(rawBody);
|
|
429
534
|
} catch {
|
|
@@ -467,7 +572,7 @@ export class ModelProxy {
|
|
|
467
572
|
}
|
|
468
573
|
return {
|
|
469
574
|
type: "model_req", id, from: this.config.nodeId, to: candidate.routeNodeId, timestamp: Date.now(),
|
|
470
|
-
payload: { model: modelId, provider: candidate.proxyModel?.provider, api: candidate.proxyModel?.api, messages: inputItems, inputFormat: "responses", temperature: body.temperature, maxTokens: body.max_output_tokens, stream },
|
|
575
|
+
payload: { model: modelId, provider: candidate.proxyModel?.provider, api: candidate.proxyModel?.api, messages: inputItems, inputFormat: "responses", tools: body.tools, toolChoice: body.tool_choice, temperature: body.temperature, maxTokens: body.max_output_tokens, stream },
|
|
471
576
|
};
|
|
472
577
|
};
|
|
473
578
|
const frame = buildFrame(first, requestId);
|
|
@@ -490,10 +595,21 @@ export class ModelProxy {
|
|
|
490
595
|
const encoder = new TextEncoder();
|
|
491
596
|
const model = frame.payload.model;
|
|
492
597
|
|
|
598
|
+
let streamController: ReadableStreamDefaultController;
|
|
493
599
|
const readable = new ReadableStream({
|
|
494
600
|
start: (controller) => {
|
|
601
|
+
streamController = controller;
|
|
495
602
|
this.startStreamAttempt(requestId, targetNodeId, frame, responseFormat, controller, encoder, model, failoverCandidates, buildFrame);
|
|
496
603
|
},
|
|
604
|
+
cancel: () => {
|
|
605
|
+
// Client disconnected — find and clean up the pending request using this controller
|
|
606
|
+
for (const [id, p] of this.pending) {
|
|
607
|
+
if (p.controller === streamController) {
|
|
608
|
+
this.cleanupRequest(id, p.stableStreamId);
|
|
609
|
+
break;
|
|
610
|
+
}
|
|
611
|
+
}
|
|
612
|
+
},
|
|
497
613
|
});
|
|
498
614
|
|
|
499
615
|
// Emit setup events for responses API (once, before any attempts)
|
|
@@ -524,11 +640,10 @@ export class ModelProxy {
|
|
|
524
640
|
const stableId = streamId ?? requestId;
|
|
525
641
|
|
|
526
642
|
const timer = setTimeout(() => {
|
|
527
|
-
this.
|
|
528
|
-
this.streamText.delete(requestId);
|
|
643
|
+
this.cleanupRequest(requestId);
|
|
529
644
|
this.peerManager.router.markFailed(requestId);
|
|
530
645
|
this.tryStreamFailover(stableId, responseFormat, controller, encoder, model, failoverCandidates, buildFrame, `model request to "${targetNodeId}" timed out`);
|
|
531
|
-
},
|
|
646
|
+
}, this.modelTimeout);
|
|
532
647
|
|
|
533
648
|
this.pending.set(requestId, {
|
|
534
649
|
resolve: () => {}, reject: () => {},
|
|
@@ -542,14 +657,14 @@ export class ModelProxy {
|
|
|
542
657
|
|
|
543
658
|
// Emit setup events for responses API (only once per stream, keyed by stableId)
|
|
544
659
|
if (responseFormat === "responses" && !this.streamSetupSent.has(stableId)) {
|
|
545
|
-
|
|
660
|
+
const hasTools = Array.isArray(frame.payload.tools) && frame.payload.tools.length > 0;
|
|
661
|
+
this.enqueueResponsesStreamSetup(controller, encoder, stableId, model, hasTools);
|
|
546
662
|
this.streamSetupSent.add(stableId);
|
|
547
663
|
}
|
|
548
664
|
|
|
549
665
|
const sent = this.peerManager.sendTo(targetNodeId, frame);
|
|
550
666
|
if (!sent) {
|
|
551
|
-
this.
|
|
552
|
-
clearTimeout(timer);
|
|
667
|
+
this.cleanupRequest(requestId);
|
|
553
668
|
this.tryStreamFailover(stableId, responseFormat, controller, encoder, model, failoverCandidates, buildFrame, `cannot reach node "${targetNodeId}"`);
|
|
554
669
|
}
|
|
555
670
|
}
|
|
@@ -593,18 +708,23 @@ export class ModelProxy {
|
|
|
593
708
|
}
|
|
594
709
|
|
|
595
710
|
/** Emit responses API stream setup events (response.created → content_part.added). */
|
|
596
|
-
private enqueueResponsesStreamSetup(controller: ReadableStreamDefaultController, encoder: TextEncoder, id: string, model: string) {
|
|
711
|
+
private enqueueResponsesStreamSetup(controller: ReadableStreamDefaultController, encoder: TextEncoder, id: string, model: string, hasTools = false) {
|
|
597
712
|
const respId = `resp_${id}`;
|
|
598
|
-
const msgId = `msg_${id}`;
|
|
599
713
|
const now = Math.floor(Date.now() / 1000);
|
|
600
714
|
const baseResp = { id: respId, object: "response", created_at: now, status: "in_progress", model, output: [] };
|
|
601
|
-
const msgItem = { type: "message", id: msgId, role: "assistant", content: [], status: "in_progress" };
|
|
602
|
-
const textPart = { type: "output_text", text: "" };
|
|
603
715
|
|
|
604
716
|
controller.enqueue(encoder.encode(`event: response.created\ndata: ${JSON.stringify({ type: "response.created", response: baseResp })}\n\n`));
|
|
605
717
|
controller.enqueue(encoder.encode(`event: response.in_progress\ndata: ${JSON.stringify({ type: "response.in_progress", response: baseResp })}\n\n`));
|
|
606
|
-
|
|
607
|
-
|
|
718
|
+
|
|
719
|
+
// When tools are present, skip pre-fabricated output_item/content_part events —
|
|
720
|
+
// the real events (including function_call items) will be forwarded from the remote.
|
|
721
|
+
if (!hasTools) {
|
|
722
|
+
const msgId = `msg_${id}`;
|
|
723
|
+
const msgItem = { type: "message", id: msgId, role: "assistant", content: [], status: "in_progress" };
|
|
724
|
+
const textPart = { type: "output_text", text: "" };
|
|
725
|
+
controller.enqueue(encoder.encode(`event: response.output_item.added\ndata: ${JSON.stringify({ type: "response.output_item.added", output_index: 0, item: msgItem })}\n\n`));
|
|
726
|
+
controller.enqueue(encoder.encode(`event: response.content_part.added\ndata: ${JSON.stringify({ type: "response.content_part.added", item_id: msgId, output_index: 0, content_index: 0, part: textPart })}\n\n`));
|
|
727
|
+
}
|
|
608
728
|
}
|
|
609
729
|
|
|
610
730
|
/** Emit responses API stream completion events (output_text.done → response.completed). */
|
|
@@ -697,7 +817,7 @@ export class ModelProxy {
|
|
|
697
817
|
this.pending.delete(requestId);
|
|
698
818
|
this.peerManager.router.markFailed(requestId);
|
|
699
819
|
reject(new Error(`Model request to "${targetNodeId}" timed out`));
|
|
700
|
-
},
|
|
820
|
+
}, this.modelTimeout);
|
|
701
821
|
|
|
702
822
|
this.pending.set(requestId, {
|
|
703
823
|
resolve: resolve as (v: unknown) => void,
|
|
@@ -781,7 +901,7 @@ export class ModelProxy {
|
|
|
781
901
|
.map((p) => p.nodeId),
|
|
782
902
|
);
|
|
783
903
|
|
|
784
|
-
const models = this.
|
|
904
|
+
const models = this.allProxyModels.map((m) => {
|
|
785
905
|
const entry: Record<string, unknown> = {
|
|
786
906
|
id: m.id,
|
|
787
907
|
object: "model",
|
|
@@ -820,18 +940,17 @@ export class ModelProxy {
|
|
|
820
940
|
// process the request and sent model_res instead of model_stream).
|
|
821
941
|
if (pending.stream) {
|
|
822
942
|
if (!frame.payload.success && pending.controller && pending.encoder) {
|
|
823
|
-
|
|
824
|
-
this.pending.delete(frame.id);
|
|
825
|
-
this.streamText.delete(frame.id);
|
|
943
|
+
const stableId = pending.stableStreamId ?? frame.id;
|
|
826
944
|
// Try failover if no content has been sent yet
|
|
827
945
|
if (!pending.hasContent && pending.failoverCandidates?.length && pending.buildFrame) {
|
|
946
|
+
this.cleanupRequest(frame.id);
|
|
828
947
|
this.tryStreamFailover(
|
|
829
|
-
|
|
948
|
+
stableId, pending.responseFormat, pending.controller, pending.encoder,
|
|
830
949
|
pending.model ?? "", pending.failoverCandidates, pending.buildFrame,
|
|
831
950
|
`remote error: ${frame.payload.error}`,
|
|
832
951
|
);
|
|
833
952
|
} else {
|
|
834
|
-
|
|
953
|
+
this.cleanupRequest(frame.id, stableId);
|
|
835
954
|
try {
|
|
836
955
|
const errMsg = `[ClawMatrix] Remote error: ${frame.payload.error}`;
|
|
837
956
|
if (pending.responseFormat === "responses") {
|
|
@@ -843,14 +962,12 @@ export class ModelProxy {
|
|
|
843
962
|
}
|
|
844
963
|
pending.controller.close();
|
|
845
964
|
} catch { /* controller may already be closed */ }
|
|
846
|
-
this.streamSetupSent.delete(stableId);
|
|
847
965
|
}
|
|
848
966
|
}
|
|
849
967
|
return;
|
|
850
968
|
}
|
|
851
969
|
|
|
852
|
-
|
|
853
|
-
this.pending.delete(frame.id);
|
|
970
|
+
this.cleanupRequest(frame.id);
|
|
854
971
|
pending.resolve(frame.payload);
|
|
855
972
|
}
|
|
856
973
|
|
|
@@ -870,9 +987,8 @@ export class ModelProxy {
|
|
|
870
987
|
this.handleModelStreamChat(frame, pending);
|
|
871
988
|
}
|
|
872
989
|
} catch {
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
this.streamText.delete(frame.id);
|
|
990
|
+
this.cleanupRequest(frame.id, pending.stableStreamId);
|
|
991
|
+
try { pending.controller?.close(); } catch { /* already closed */ }
|
|
876
992
|
}
|
|
877
993
|
}
|
|
878
994
|
|
|
@@ -889,12 +1005,13 @@ export class ModelProxy {
|
|
|
889
1005
|
pending.controller!.enqueue(pending.encoder!.encode(`data: ${JSON.stringify(finalChunk)}\n\n`));
|
|
890
1006
|
pending.controller!.enqueue(pending.encoder!.encode("data: [DONE]\n\n"));
|
|
891
1007
|
pending.controller!.close();
|
|
892
|
-
|
|
893
|
-
this.pending.delete(frame.id);
|
|
894
|
-
this.streamSetupSent.delete(stableId);
|
|
1008
|
+
this.cleanupRequest(frame.id, stableId);
|
|
895
1009
|
} else {
|
|
896
1010
|
// Use full deltaObj when available (carries tool_calls etc.), otherwise simple text delta
|
|
897
|
-
const delta = frame.payload.deltaObj ?? {
|
|
1011
|
+
const delta = frame.payload.deltaObj ?? {
|
|
1012
|
+
content: frame.payload.delta,
|
|
1013
|
+
...(frame.payload.reasoningDelta && { reasoning_content: frame.payload.reasoningDelta }),
|
|
1014
|
+
};
|
|
898
1015
|
const chunkStableId = pending.stableStreamId ?? frame.id;
|
|
899
1016
|
const chunk = { id: `chatcmpl-${chunkStableId}`, object: "chat.completion.chunk", choices: [{ index: 0, delta, finish_reason: null }] };
|
|
900
1017
|
pending.controller!.enqueue(pending.encoder!.encode(`data: ${JSON.stringify(chunk)}\n\n`));
|
|
@@ -907,13 +1024,27 @@ export class ModelProxy {
|
|
|
907
1024
|
const stableId = pending.stableStreamId ?? frame.id;
|
|
908
1025
|
const fullText = this.streamText.get(frame.id) ?? "";
|
|
909
1026
|
this.streamText.delete(frame.id);
|
|
910
|
-
|
|
1027
|
+
|
|
1028
|
+
// If the remote forwarded the full response.completed event, emit it directly
|
|
1029
|
+
const doneObj = frame.payload.deltaObj as { event?: string; data?: unknown } | undefined;
|
|
1030
|
+
if (doneObj?.event === "response.completed" && doneObj.data) {
|
|
1031
|
+
pending.controller!.enqueue(pending.encoder!.encode(`event: response.completed\ndata: ${JSON.stringify(doneObj.data)}\n\n`));
|
|
1032
|
+
} else {
|
|
1033
|
+
// Fallback: reconstruct text-only completion
|
|
1034
|
+
this.enqueueResponsesStreamDone(pending.controller!, pending.encoder!, stableId, pending.model ?? "", fullText, frame.payload.usage);
|
|
1035
|
+
}
|
|
911
1036
|
pending.controller!.close();
|
|
912
|
-
|
|
913
|
-
this.pending.delete(frame.id);
|
|
914
|
-
this.streamSetupSent.delete(stableId);
|
|
1037
|
+
this.cleanupRequest(frame.id, stableId);
|
|
915
1038
|
} else {
|
|
916
|
-
//
|
|
1039
|
+
// Forward structured Responses API events (function_call, output_item, etc.)
|
|
1040
|
+
const obj = frame.payload.deltaObj as { event?: string; data?: unknown } | undefined;
|
|
1041
|
+
if (obj?.event && obj.data) {
|
|
1042
|
+
pending.controller!.enqueue(pending.encoder!.encode(`event: ${obj.event}\ndata: ${JSON.stringify(obj.data)}\n\n`));
|
|
1043
|
+
pending.hasContent = true;
|
|
1044
|
+
return;
|
|
1045
|
+
}
|
|
1046
|
+
|
|
1047
|
+
// Text delta
|
|
917
1048
|
this.streamText.set(frame.id, (this.streamText.get(frame.id) ?? "") + (frame.payload.delta ?? ""));
|
|
918
1049
|
const respStableId = pending.stableStreamId ?? frame.id;
|
|
919
1050
|
const evt = { type: "response.output_text.delta", item_id: `msg_${respStableId}`, output_index: 0, content_index: 0, delta: frame.payload.delta };
|
|
@@ -922,25 +1053,30 @@ export class ModelProxy {
|
|
|
922
1053
|
}
|
|
923
1054
|
}
|
|
924
1055
|
|
|
925
|
-
private sendStreamDelta(to: string, id: string, delta: string, deltaObj?: unknown) {
|
|
1056
|
+
private sendStreamDelta(to: string, id: string, delta: string, deltaObj?: unknown, reasoningDelta?: string) {
|
|
926
1057
|
this.peerManager.sendTo(to, {
|
|
927
1058
|
type: "model_stream",
|
|
928
1059
|
id,
|
|
929
1060
|
from: this.config.nodeId,
|
|
930
1061
|
to,
|
|
931
1062
|
timestamp: Date.now(),
|
|
932
|
-
payload: {
|
|
1063
|
+
payload: {
|
|
1064
|
+
delta,
|
|
1065
|
+
...(reasoningDelta && { reasoningDelta }),
|
|
1066
|
+
...(deltaObj !== undefined && { deltaObj }),
|
|
1067
|
+
done: false,
|
|
1068
|
+
},
|
|
933
1069
|
} satisfies ModelStreamChunk);
|
|
934
1070
|
}
|
|
935
1071
|
|
|
936
|
-
private sendStreamDone(to: string, id: string, usage?: { inputTokens: number; outputTokens: number }) {
|
|
1072
|
+
private sendStreamDone(to: string, id: string, usage?: { inputTokens: number; outputTokens: number }, deltaObj?: unknown) {
|
|
937
1073
|
this.peerManager.sendTo(to, {
|
|
938
1074
|
type: "model_stream",
|
|
939
1075
|
id,
|
|
940
1076
|
from: this.config.nodeId,
|
|
941
1077
|
to,
|
|
942
1078
|
timestamp: Date.now(),
|
|
943
|
-
payload: { delta: "", done: true, usage },
|
|
1079
|
+
payload: { delta: "", done: true, usage, ...(deltaObj !== undefined && { deltaObj }) },
|
|
944
1080
|
} satisfies ModelStreamChunk);
|
|
945
1081
|
}
|
|
946
1082
|
|
|
@@ -967,21 +1103,27 @@ export class ModelProxy {
|
|
|
967
1103
|
|
|
968
1104
|
try {
|
|
969
1105
|
const endpoint = this.resolveModelEndpoint(model);
|
|
970
|
-
|
|
1106
|
+
if (!endpoint) {
|
|
1107
|
+
this.peerManager.sendTo(from, {
|
|
1108
|
+
type: "model_res", id, from: this.config.nodeId, to: from, timestamp: Date.now(),
|
|
1109
|
+
payload: { success: false, error: `No direct API endpoint configured for model "${payload.model}" (provider: ${model.provider}). Configure baseUrl/apiKey in ClawMatrix model config or OpenClaw provider config.` },
|
|
1110
|
+
} satisfies ModelResponse);
|
|
1111
|
+
return;
|
|
1112
|
+
}
|
|
1113
|
+
// Use payload.api override from requesting side, or cached API from previous auto-detection
|
|
1114
|
+
const cached = this.modelApiCache.get(model.id);
|
|
1115
|
+
const cachedApi = (cached && Date.now() - cached.ts < ModelProxy.MODEL_API_CACHE_TTL) ? cached.api : undefined;
|
|
1116
|
+
if (cached && !cachedApi) this.modelApiCache.delete(model.id); // expired
|
|
1117
|
+
const effectiveApi = payload.api ?? cachedApi ?? endpoint.api;
|
|
1118
|
+
const isResponsesApi = effectiveApi === "openai-responses" || effectiveApi === "openai-codex-responses";
|
|
971
1119
|
const path = isResponsesApi ? "/responses" : "/chat/completions";
|
|
972
1120
|
const url = `${endpoint.baseUrl}${path}`;
|
|
973
1121
|
const headers: Record<string, string> = { "Content-Type": "application/json" };
|
|
974
1122
|
|
|
975
|
-
if (endpoint.
|
|
976
|
-
|
|
977
|
-
debug("model_req", `direct API call to ${url} (api=${endpoint.api})`);
|
|
978
|
-
} else {
|
|
979
|
-
const { authHeader } = this.gatewayInfo;
|
|
980
|
-
if (authHeader) headers["Authorization"] = authHeader;
|
|
981
|
-
debug("model_req", `gateway fallback to ${url}`);
|
|
982
|
-
}
|
|
1123
|
+
if (endpoint.apiKey) headers["Authorization"] = `Bearer ${endpoint.apiKey}`;
|
|
1124
|
+
debug("model_req", `direct API call to ${url} (api=${endpoint.api})`);
|
|
983
1125
|
|
|
984
|
-
const modelField =
|
|
1126
|
+
const modelField = model.id;
|
|
985
1127
|
const srcFormat = payload.inputFormat ?? "chat";
|
|
986
1128
|
|
|
987
1129
|
// Convert messages between formats if source and target API differ
|
|
@@ -996,6 +1138,8 @@ export class ModelProxy {
|
|
|
996
1138
|
stream: payload.stream,
|
|
997
1139
|
temperature: payload.temperature,
|
|
998
1140
|
max_output_tokens: payload.maxTokens,
|
|
1141
|
+
...(payload.tools && { tools: payload.tools }),
|
|
1142
|
+
...(payload.toolChoice !== undefined && { tool_choice: payload.toolChoice }),
|
|
999
1143
|
};
|
|
1000
1144
|
} else {
|
|
1001
1145
|
const messages = srcFormat === "chat"
|
|
@@ -1008,6 +1152,8 @@ export class ModelProxy {
|
|
|
1008
1152
|
max_tokens: payload.maxTokens,
|
|
1009
1153
|
stream: payload.stream,
|
|
1010
1154
|
...(payload.stream ? { stream_options: { include_usage: true } } : {}),
|
|
1155
|
+
...(payload.tools && { tools: payload.tools }),
|
|
1156
|
+
...(payload.toolChoice !== undefined && { tool_choice: payload.toolChoice }),
|
|
1011
1157
|
};
|
|
1012
1158
|
}
|
|
1013
1159
|
|
|
@@ -1031,6 +1177,8 @@ export class ModelProxy {
|
|
|
1031
1177
|
let buffer = "";
|
|
1032
1178
|
let lastUsage: { inputTokens: number; outputTokens: number } | undefined;
|
|
1033
1179
|
let streamDone = false;
|
|
1180
|
+
let contentSent = false;
|
|
1181
|
+
let completedEvent: unknown = undefined;
|
|
1034
1182
|
|
|
1035
1183
|
while (!streamDone) {
|
|
1036
1184
|
const { done, value } = await reader.read();
|
|
@@ -1053,7 +1201,6 @@ export class ModelProxy {
|
|
|
1053
1201
|
if (!line.startsWith("data: ")) continue;
|
|
1054
1202
|
const data = line.slice(6).trim();
|
|
1055
1203
|
if (data === "[DONE]") {
|
|
1056
|
-
this.sendStreamDone(from, id, lastUsage);
|
|
1057
1204
|
streamDone = true;
|
|
1058
1205
|
break;
|
|
1059
1206
|
}
|
|
@@ -1067,33 +1214,38 @@ export class ModelProxy {
|
|
|
1067
1214
|
const delta = parsed.delta || "";
|
|
1068
1215
|
if (delta) {
|
|
1069
1216
|
this.sendStreamDelta(from, id, delta);
|
|
1217
|
+
contentSent = true;
|
|
1070
1218
|
}
|
|
1219
|
+
} else if (
|
|
1220
|
+
evtType === "response.output_item.added" ||
|
|
1221
|
+
evtType === "response.output_item.done" ||
|
|
1222
|
+
evtType === "response.content_part.added" ||
|
|
1223
|
+
evtType === "response.content_part.done" ||
|
|
1224
|
+
evtType === "response.output_text.done" ||
|
|
1225
|
+
evtType === "response.function_call_arguments.delta" ||
|
|
1226
|
+
evtType === "response.function_call_arguments.done"
|
|
1227
|
+
) {
|
|
1228
|
+
// Forward structured Responses API events via deltaObj
|
|
1229
|
+
this.sendStreamDelta(from, id, "", { event: evtType, data: parsed });
|
|
1230
|
+
contentSent = true;
|
|
1071
1231
|
} else if (evtType === "response.completed") {
|
|
1072
1232
|
const usage = parsed.response?.usage;
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
inputTokens: usage.input_tokens ?? usage.prompt_tokens ?? 0,
|
|
1076
|
-
outputTokens: usage.output_tokens ?? usage.completion_tokens ?? 0,
|
|
1077
|
-
};
|
|
1078
|
-
}
|
|
1079
|
-
this.sendStreamDone(from, id, lastUsage);
|
|
1233
|
+
lastUsage = parseUsage(usage) ?? lastUsage;
|
|
1234
|
+
completedEvent = { event: evtType, data: parsed };
|
|
1080
1235
|
streamDone = true;
|
|
1081
1236
|
break;
|
|
1082
1237
|
}
|
|
1083
1238
|
} else {
|
|
1084
1239
|
// Chat completions format
|
|
1085
|
-
|
|
1086
|
-
lastUsage = {
|
|
1087
|
-
inputTokens: parsed.usage.prompt_tokens,
|
|
1088
|
-
outputTokens: parsed.usage.completion_tokens,
|
|
1089
|
-
};
|
|
1090
|
-
}
|
|
1240
|
+
lastUsage = parseUsage(parsed.usage) ?? lastUsage;
|
|
1091
1241
|
const d = parsed.choices?.[0]?.delta;
|
|
1092
|
-
const delta = d?.content ||
|
|
1242
|
+
const delta = d?.content || "";
|
|
1243
|
+
const reasoningDelta = d?.reasoning_content || "";
|
|
1093
1244
|
// Pass full delta object when it contains tool_calls or other structured data
|
|
1094
1245
|
const hasStructured = d?.tool_calls || d?.refusal != null;
|
|
1095
|
-
if (delta || hasStructured) {
|
|
1096
|
-
this.sendStreamDelta(from, id, delta, hasStructured ? d : undefined);
|
|
1246
|
+
if (delta || reasoningDelta || hasStructured) {
|
|
1247
|
+
this.sendStreamDelta(from, id, delta, hasStructured ? d : undefined, reasoningDelta || undefined);
|
|
1248
|
+
contentSent = true;
|
|
1097
1249
|
}
|
|
1098
1250
|
}
|
|
1099
1251
|
} catch {
|
|
@@ -1102,9 +1254,30 @@ export class ModelProxy {
|
|
|
1102
1254
|
currentEvent = "";
|
|
1103
1255
|
}
|
|
1104
1256
|
}
|
|
1105
|
-
|
|
1106
|
-
//
|
|
1107
|
-
if (!
|
|
1257
|
+
|
|
1258
|
+
// Responses API stream produced no content — fall back to chat completions
|
|
1259
|
+
if (isResponsesApi && !contentSent && !cachedApi) {
|
|
1260
|
+
debug("model_req", `responses API stream produced no content for "${model.id}", retrying with chat completions`);
|
|
1261
|
+
const chatResult = await this.retryWithChatCompletions(endpoint, modelField, payload, headers);
|
|
1262
|
+
if (chatResult) {
|
|
1263
|
+
this.modelApiCache.set(model.id, { api: "openai-completions", ts: Date.now() });
|
|
1264
|
+
debug("model_req", `cached "${model.id}" as openai-completions (stream fallback)`);
|
|
1265
|
+
if (chatResult.content) {
|
|
1266
|
+
this.sendStreamDelta(from, id, chatResult.content);
|
|
1267
|
+
}
|
|
1268
|
+
this.sendStreamDone(from, id, chatResult.usage);
|
|
1269
|
+
} else if (completedEvent) {
|
|
1270
|
+
this.sendStreamDone(from, id, lastUsage, completedEvent);
|
|
1271
|
+
} else {
|
|
1272
|
+
this.sendStreamDone(from, id, lastUsage);
|
|
1273
|
+
}
|
|
1274
|
+
} else if (completedEvent) {
|
|
1275
|
+
this.sendStreamDone(from, id, lastUsage, completedEvent);
|
|
1276
|
+
} else if (!streamDone) {
|
|
1277
|
+
// Upstream closed without sending [DONE] or response.completed
|
|
1278
|
+
this.sendStreamDone(from, id, lastUsage);
|
|
1279
|
+
} else {
|
|
1280
|
+
// Chat completions [DONE] received
|
|
1108
1281
|
this.sendStreamDone(from, id, lastUsage);
|
|
1109
1282
|
}
|
|
1110
1283
|
} finally {
|
|
@@ -1112,12 +1285,33 @@ export class ModelProxy {
|
|
|
1112
1285
|
}
|
|
1113
1286
|
} else {
|
|
1114
1287
|
// Non-streaming response
|
|
1115
|
-
const
|
|
1288
|
+
const responseText = await response.text();
|
|
1289
|
+
let result: Record<string, unknown>;
|
|
1290
|
+
let chatFallbackResult: Awaited<ReturnType<ModelProxy["retryWithChatCompletions"]>> = null;
|
|
1291
|
+
try {
|
|
1292
|
+
result = JSON.parse(responseText);
|
|
1293
|
+
} catch {
|
|
1294
|
+
// Upstream returned non-JSON (e.g. SSE in non-stream mode) — try chat completions fallback
|
|
1295
|
+
if (!cachedApi && isResponsesApi) {
|
|
1296
|
+
debug("model_req", `responses API returned non-JSON for "${model.id}", retrying with chat completions`);
|
|
1297
|
+
chatFallbackResult = await this.retryWithChatCompletions(endpoint, modelField, payload, headers);
|
|
1298
|
+
if (chatFallbackResult) {
|
|
1299
|
+
this.modelApiCache.set(model.id, { api: "openai-completions", ts: Date.now() });
|
|
1300
|
+
debug("model_req", `cached "${model.id}" as openai-completions (non-JSON fallback)`);
|
|
1301
|
+
}
|
|
1302
|
+
}
|
|
1303
|
+
if (!chatFallbackResult) throw new Error(`Upstream returned non-JSON: ${responseText.slice(0, 100)}`);
|
|
1304
|
+
result = {}; // unused — chatFallbackResult takes precedence
|
|
1305
|
+
}
|
|
1116
1306
|
let content: string;
|
|
1307
|
+
let reasoning = "";
|
|
1117
1308
|
let message: unknown | undefined;
|
|
1118
1309
|
let usage: { inputTokens: number; outputTokens: number } | undefined;
|
|
1119
1310
|
|
|
1120
|
-
if (
|
|
1311
|
+
if (chatFallbackResult) {
|
|
1312
|
+
({ content, message, usage } = chatFallbackResult);
|
|
1313
|
+
reasoning = chatFallbackResult.reasoning ?? "";
|
|
1314
|
+
} else if (isResponsesApi) {
|
|
1121
1315
|
// Responses API: extract text from output[].content[].text
|
|
1122
1316
|
content = "";
|
|
1123
1317
|
const output = result.output as { type?: string; content?: { type?: string; text?: string }[] }[] | undefined;
|
|
@@ -1130,28 +1324,40 @@ export class ModelProxy {
|
|
|
1130
1324
|
}
|
|
1131
1325
|
}
|
|
1132
1326
|
}
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
|
|
1327
|
+
|
|
1328
|
+
// Auto-detect: if Responses API returned empty output but produced tokens,
|
|
1329
|
+
// the upstream adapter likely doesn't support Responses API properly.
|
|
1330
|
+
// Retry with chat completions and cache the result.
|
|
1331
|
+
const parsedUsage = parseUsage(result.usage as Record<string, number> | undefined);
|
|
1332
|
+
const hasMessage = Array.isArray(output) && output.some((o: { type?: string }) => o.type === "message");
|
|
1333
|
+
if (!hasMessage && (parsedUsage?.outputTokens ?? 0) > 0 && !cachedApi) {
|
|
1334
|
+
debug("model_req", `responses API returned empty output for "${model.id}" (output_tokens=${parsedUsage!.outputTokens}), retrying with chat completions`);
|
|
1335
|
+
const chatResult = await this.retryWithChatCompletions(endpoint, modelField, payload, headers);
|
|
1336
|
+
if (chatResult) {
|
|
1337
|
+
this.modelApiCache.set(model.id, { api: "openai-completions", ts: Date.now() });
|
|
1338
|
+
debug("model_req", `cached "${model.id}" as openai-completions`);
|
|
1339
|
+
({ content, message, usage } = chatResult);
|
|
1340
|
+
} else {
|
|
1341
|
+
message = result.output;
|
|
1342
|
+
usage = parsedUsage;
|
|
1343
|
+
}
|
|
1344
|
+
} else {
|
|
1345
|
+
// Carry full output array for structured data (function_call items, etc.)
|
|
1346
|
+
message = result.output;
|
|
1347
|
+
usage = parsedUsage;
|
|
1140
1348
|
}
|
|
1141
1349
|
} else {
|
|
1142
1350
|
// Chat completions format
|
|
1143
1351
|
const msg = result.choices?.[0]?.message;
|
|
1144
|
-
content = msg?.content ||
|
|
1352
|
+
content = msg?.content || "";
|
|
1353
|
+
reasoning = msg?.reasoning_content || "";
|
|
1145
1354
|
// Carry full message object when it has tool_calls or other structured data
|
|
1146
1355
|
if (msg?.tool_calls || msg?.refusal != null || msg?.function_call) {
|
|
1147
1356
|
message = msg;
|
|
1148
1357
|
}
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
outputTokens: result.usage.completion_tokens,
|
|
1153
|
-
};
|
|
1154
|
-
}
|
|
1358
|
+
usage = parseUsage(result.usage);
|
|
1359
|
+
// If no content but has reasoning, use reasoning as content fallback
|
|
1360
|
+
if (!content && reasoning) content = reasoning;
|
|
1155
1361
|
}
|
|
1156
1362
|
|
|
1157
1363
|
this.peerManager.sendTo(from, {
|
|
@@ -1163,6 +1369,7 @@ export class ModelProxy {
|
|
|
1163
1369
|
payload: {
|
|
1164
1370
|
success: true,
|
|
1165
1371
|
content,
|
|
1372
|
+
...(reasoning && { reasoning }),
|
|
1166
1373
|
...(message !== undefined && { message }),
|
|
1167
1374
|
usage,
|
|
1168
1375
|
},
|
|
@@ -1182,4 +1389,45 @@ export class ModelProxy {
|
|
|
1182
1389
|
} satisfies ModelResponse);
|
|
1183
1390
|
}
|
|
1184
1391
|
}
|
|
1392
|
+
|
|
1393
|
+
/** Retry a model request using chat completions format (fallback from Responses API). */
|
|
1394
|
+
private async retryWithChatCompletions(
|
|
1395
|
+
endpoint: { baseUrl: string; apiKey?: string; direct: boolean; api: string },
|
|
1396
|
+
modelField: string,
|
|
1397
|
+
payload: ModelRequest["payload"],
|
|
1398
|
+
headers: Record<string, string>,
|
|
1399
|
+
): Promise<{ content: string; reasoning?: string; message?: unknown; usage?: { inputTokens: number; outputTokens: number } } | null> {
|
|
1400
|
+
try {
|
|
1401
|
+
const srcFormat = payload.inputFormat ?? "chat";
|
|
1402
|
+
const messages = srcFormat === "chat"
|
|
1403
|
+
? payload.messages
|
|
1404
|
+
: ModelProxy.normalizeResponsesInput(payload.messages);
|
|
1405
|
+
const chatBody: Record<string, unknown> = {
|
|
1406
|
+
model: modelField,
|
|
1407
|
+
messages,
|
|
1408
|
+
temperature: payload.temperature,
|
|
1409
|
+
max_tokens: payload.maxTokens,
|
|
1410
|
+
stream: false,
|
|
1411
|
+
...(payload.tools && { tools: payload.tools }),
|
|
1412
|
+
...(payload.toolChoice !== undefined && { tool_choice: payload.toolChoice }),
|
|
1413
|
+
};
|
|
1414
|
+
const chatUrl = `${endpoint.baseUrl}/chat/completions`;
|
|
1415
|
+
const chatResp = await fetch(chatUrl, {
|
|
1416
|
+
method: "POST",
|
|
1417
|
+
headers,
|
|
1418
|
+
body: JSON.stringify(chatBody),
|
|
1419
|
+
});
|
|
1420
|
+
if (!chatResp.ok) return null;
|
|
1421
|
+
const chatResult = await chatResp.json();
|
|
1422
|
+
const msg = chatResult.choices?.[0]?.message;
|
|
1423
|
+
const content = msg?.content || "";
|
|
1424
|
+
const reasoningContent = msg?.reasoning_content || "";
|
|
1425
|
+
const message = (msg?.tool_calls || msg?.refusal != null || msg?.function_call) ? msg : undefined;
|
|
1426
|
+
const usage = parseUsage(chatResult.usage);
|
|
1427
|
+
return { content: content || reasoningContent, reasoning: reasoningContent || undefined, message, usage };
|
|
1428
|
+
} catch (err) {
|
|
1429
|
+
debug("model_req", `retryWithChatCompletions failed for "${modelField}": ${err instanceof Error ? err.message : String(err)}`);
|
|
1430
|
+
return null;
|
|
1431
|
+
}
|
|
1432
|
+
}
|
|
1185
1433
|
}
|