clawmatrix 0.1.20 → 0.1.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -22,6 +22,11 @@ interface ProxyResponse {
22
22
  body: string | ReadableStream;
23
23
  }
24
24
 
25
+ interface FailoverCandidate {
26
+ proxyModel: import("./config.ts").ProxyModel | undefined;
27
+ routeNodeId: string;
28
+ }
29
+
25
30
  interface PendingModelReq {
26
31
  resolve: (value: unknown) => void;
27
32
  reject: (error: Error) => void;
@@ -31,6 +36,14 @@ interface PendingModelReq {
31
36
  model?: string;
32
37
  controller?: ReadableStreamDefaultController;
33
38
  encoder?: TextEncoder;
39
+ /** Whether real content (not just setup events) has been sent to the stream. */
40
+ hasContent?: boolean;
41
+ /** Remaining failover candidates (excludes the currently attempted node). */
42
+ failoverCandidates?: FailoverCandidate[];
43
+ /** Factory to build a new frame for the next failover candidate. */
44
+ buildFrame?: (candidate: FailoverCandidate, newId: string) => ModelRequest;
45
+ /** Stable ID for the entire stream (for setup events & final close). */
46
+ stableStreamId?: string;
34
47
  }
35
48
 
36
49
  export class ModelProxy {
@@ -80,6 +93,26 @@ export class ModelProxy {
80
93
  continue;
81
94
  }
82
95
 
96
+ // function_call → merge into assistant message with tool_calls
97
+ if (obj.type === "function_call") {
98
+ const toolCall = {
99
+ id: obj.call_id ?? obj.id,
100
+ type: "function",
101
+ function: {
102
+ name: obj.name,
103
+ arguments: typeof obj.arguments === "string" ? obj.arguments : JSON.stringify(obj.arguments),
104
+ },
105
+ };
106
+ // Merge consecutive function_calls into one assistant message
107
+ const last = messages[messages.length - 1] as { role?: string; tool_calls?: unknown[] } | undefined;
108
+ if (last?.role === "assistant" && Array.isArray(last.tool_calls)) {
109
+ last.tool_calls.push(toolCall);
110
+ } else {
111
+ messages.push({ role: "assistant", content: null, tool_calls: [toolCall] });
112
+ }
113
+ continue;
114
+ }
115
+
83
116
  const role = typeof obj.role === "string" ? obj.role : "user";
84
117
 
85
118
  // Simple shorthand: {role: "user", content: "hello"}
@@ -133,6 +166,52 @@ export class ModelProxy {
133
166
  return messages;
134
167
  }
135
168
 
169
+ /**
170
+ * Convert chat-format messages back to Responses API input items.
171
+ *
172
+ * Reverses normalizeResponsesInput:
173
+ * - {role: "tool", tool_call_id, content} → {type: "function_call_output", call_id, output}
174
+ * - {role: "assistant", tool_calls: [...]} → {type: "function_call", ...} items
175
+ * - {role: "developer"|"user"|"assistant"|"system", content} → pass through
176
+ */
177
+ private static chatToResponsesInput(messages: unknown[]): unknown[] {
178
+ const items: unknown[] = [];
179
+ for (const msg of messages) {
180
+ if (!msg || typeof msg !== "object") continue;
181
+ const obj = msg as Record<string, unknown>;
182
+
183
+ if (obj.role === "tool") {
184
+ items.push({
185
+ type: "function_call_output",
186
+ call_id: obj.tool_call_id,
187
+ output: typeof obj.content === "string" ? obj.content : JSON.stringify(obj.content),
188
+ });
189
+ } else if (obj.role === "assistant" && Array.isArray(obj.tool_calls)) {
190
+ // Emit text content first if present
191
+ if (typeof obj.content === "string" && obj.content) {
192
+ items.push({ role: "assistant", content: obj.content });
193
+ }
194
+ // Convert each tool_call to a function_call item
195
+ for (const tc of obj.tool_calls) {
196
+ if (!tc || typeof tc !== "object") continue;
197
+ const call = tc as Record<string, unknown>;
198
+ const fn = call.function as Record<string, unknown> | undefined;
199
+ if (fn) {
200
+ items.push({
201
+ type: "function_call",
202
+ call_id: call.id,
203
+ name: fn.name,
204
+ arguments: typeof fn.arguments === "string" ? fn.arguments : JSON.stringify(fn.arguments),
205
+ });
206
+ }
207
+ }
208
+ } else {
209
+ items.push(obj);
210
+ }
211
+ }
212
+ return items;
213
+ }
214
+
136
215
  /** Resolve API endpoint for a model: explicit config > OpenClaw provider > gateway fallback */
137
216
  private resolveModelEndpoint(model: { id: string; provider: string; baseUrl?: string; apiKey?: string; api?: string }): { baseUrl: string; apiKey?: string; direct: boolean; api: string } {
138
217
  const defaultApi = "openai-completions";
@@ -223,6 +302,7 @@ export class ModelProxy {
223
302
  }
224
303
  this.pending.clear();
225
304
  this.streamText.clear();
305
+ this.streamSetupSent.clear();
226
306
  }
227
307
 
228
308
 
@@ -253,39 +333,47 @@ export class ModelProxy {
253
333
 
254
334
  // ── HTTP handlers ──────────────────────────────────────────────
255
335
 
256
- /** Resolve model ID → proxyModel + route. Shared by chat completions and responses handlers. */
257
- private resolveModelRoute(rawModelId: string): {
258
- nodeId: string; modelId: string;
259
- proxyModel: (typeof this.config.proxyModels)[number] | undefined;
260
- routeNodeId: string;
336
+ /** Resolve model ID → all reachable candidates, ordered for failover. */
337
+ private resolveModelCandidates(rawModelId: string): {
338
+ modelId: string;
339
+ candidates: FailoverCandidate[];
261
340
  } | { error: { status: number; message: string } } {
262
341
  const slashIdx = rawModelId.indexOf("/");
263
- let nodeId: string;
264
342
  let modelId: string;
265
- let proxyModel: (typeof this.config.proxyModels)[number] | undefined;
343
+ let matchingModels: (typeof this.config.proxyModels)[number][];
266
344
 
267
345
  if (slashIdx > 0) {
268
- nodeId = rawModelId.slice(0, slashIdx);
346
+ const nodeId = rawModelId.slice(0, slashIdx);
269
347
  modelId = rawModelId.slice(slashIdx + 1);
270
- proxyModel = this.config.proxyModels.find((m) => m.id === modelId && m.nodeId === nodeId);
348
+ // Explicit node/model only target that specific node, no failover to others
349
+ matchingModels = this.config.proxyModels.filter((m) => m.id === modelId && m.nodeId === nodeId);
271
350
  } else {
272
351
  modelId = rawModelId;
273
- proxyModel = this.config.proxyModels.find((m) => m.id === modelId);
274
- if (!proxyModel) {
275
- return { error: { status: 404, message: `Model "${rawModelId}" not found in proxy models` } };
276
- }
277
- nodeId = proxyModel.nodeId;
352
+ matchingModels = this.config.proxyModels.filter((m) => m.id === modelId);
278
353
  }
279
354
 
280
- const route = this.peerManager.router.getRoute(nodeId);
281
- debug("proxy", `model raw="${rawModelId}" nodeId=${nodeId} modelId="${modelId}" route=${route?.nodeId ?? "none"}`);
282
- if (!route) {
283
- return { error: { status: 404, message: `Node "${nodeId}" not found in cluster` } };
355
+ if (matchingModels.length === 0) {
356
+ return { error: { status: 404, message: `Model "${rawModelId}" not found in proxy models` } };
284
357
  }
285
- if (!this.peerManager.canReach(route.nodeId)) {
286
- return { error: { status: 502, message: `Cannot reach model node "${route.nodeId}"` } };
358
+
359
+ // Build candidates from all reachable nodes
360
+ const candidates: FailoverCandidate[] = [];
361
+ const seenNodes = new Set<string>();
362
+ for (const pm of matchingModels) {
363
+ if (seenNodes.has(pm.nodeId)) continue;
364
+ seenNodes.add(pm.nodeId);
365
+ const route = this.peerManager.router.getRoute(pm.nodeId);
366
+ if (route && this.peerManager.canReach(route.nodeId)) {
367
+ candidates.push({ proxyModel: pm, routeNodeId: route.nodeId });
368
+ }
369
+ }
370
+
371
+ debug("proxy", `model raw="${rawModelId}" modelId="${modelId}" candidates=${candidates.map((c) => c.routeNodeId).join(",") || "none"}`);
372
+ if (candidates.length === 0) {
373
+ return { error: { status: 502, message: `No reachable node for model "${rawModelId}"` } };
287
374
  }
288
- return { nodeId, modelId, proxyModel, routeNodeId: route.nodeId };
375
+
376
+ return { modelId, candidates };
289
377
  }
290
378
 
291
379
  private async handleChatCompletion(rawBody: string, _api: string): Promise<ProxyResponse> {
@@ -296,35 +384,41 @@ export class ModelProxy {
296
384
  return { status: 400, headers: { "Content-Type": "application/json" }, body: JSON.stringify({ error: "Invalid JSON" }) };
297
385
  }
298
386
 
299
- const resolved = this.resolveModelRoute(body.model);
387
+ const resolved = this.resolveModelCandidates(body.model);
300
388
  if ("error" in resolved) {
301
389
  return { status: resolved.error.status, headers: { "Content-Type": "application/json" }, body: JSON.stringify({ error: { message: resolved.error.message } }) };
302
390
  }
303
391
 
304
- const { modelId, proxyModel, routeNodeId } = resolved;
305
- const messages = body.messages;
306
- debug("proxy", `messages count=${messages?.length ?? 0} roles=${(messages ?? []).map((m: unknown) => (m as Record<string, unknown>)?.role).join(",")}`);
307
-
308
- if (proxyModel?.description) {
309
- const first = messages[0] as { role?: string; content?: string } | undefined;
310
- if (first?.role === "system" && typeof first.content === "string") {
311
- first.content = `[Model: ${proxyModel.description}]\n${first.content}`;
312
- } else {
313
- messages.unshift({ role: "system", content: `[Model: ${proxyModel.description}]` });
314
- }
315
- }
392
+ const { modelId, candidates } = resolved;
393
+ const first = candidates[0]!;
394
+ const rest = candidates.slice(1);
395
+ const baseMessages = body.messages;
396
+ debug("proxy", `messages count=${baseMessages?.length ?? 0} roles=${(baseMessages ?? []).map((m: unknown) => (m as Record<string, unknown>)?.role).join(",")}`);
316
397
 
317
398
  const stream = body.stream ?? false;
318
399
  const requestId = crypto.randomUUID();
319
- const frame: ModelRequest = {
320
- type: "model_req", id: requestId, from: this.config.nodeId, to: routeNodeId, timestamp: Date.now(),
321
- payload: { model: modelId, provider: proxyModel?.provider, api: proxyModel?.api, messages, temperature: body.temperature, maxTokens: body.max_tokens, stream },
400
+ const buildFrame = (candidate: FailoverCandidate, id: string): ModelRequest => {
401
+ // Clone messages so each candidate gets its own description prefix
402
+ const messages = baseMessages.map((m: unknown) => (m && typeof m === "object" ? { ...(m as object) } : m));
403
+ if (candidate.proxyModel?.description) {
404
+ const firstMsg = messages[0] as { role?: string; content?: string } | undefined;
405
+ if (firstMsg?.role === "system" && typeof firstMsg.content === "string") {
406
+ firstMsg.content = `[Model: ${candidate.proxyModel.description}]\n${firstMsg.content}`;
407
+ } else {
408
+ messages.unshift({ role: "system", content: `[Model: ${candidate.proxyModel.description}]` });
409
+ }
410
+ }
411
+ return {
412
+ type: "model_req", id, from: this.config.nodeId, to: candidate.routeNodeId, timestamp: Date.now(),
413
+ payload: { model: modelId, provider: candidate.proxyModel?.provider, api: candidate.proxyModel?.api, messages, temperature: body.temperature, maxTokens: body.max_tokens, stream },
414
+ };
322
415
  };
416
+ const frame = buildFrame(first, requestId);
323
417
 
324
418
  if (stream) {
325
- return this.handleStreamRequest(requestId, routeNodeId, frame, "chat");
419
+ return this.handleStreamRequest(requestId, first.routeNodeId, frame, "chat", rest, buildFrame);
326
420
  } else {
327
- return this.handleNonStreamRequest(requestId, routeNodeId, frame, "chat");
421
+ return this.handleNonStreamRequest(requestId, first.routeNodeId, frame, "chat", rest, buildFrame);
328
422
  }
329
423
  }
330
424
 
@@ -336,46 +430,52 @@ export class ModelProxy {
336
430
  return { status: 400, headers: { "Content-Type": "application/json" }, body: JSON.stringify({ error: "Invalid JSON" }) };
337
431
  }
338
432
 
339
- const resolved = this.resolveModelRoute(body.model);
433
+ const resolved = this.resolveModelCandidates(body.model);
340
434
  if ("error" in resolved) {
341
435
  return { status: resolved.error.status, headers: { "Content-Type": "application/json" }, body: JSON.stringify({ error: { message: resolved.error.message } }) };
342
436
  }
343
437
 
344
- const { modelId, proxyModel, routeNodeId } = resolved;
438
+ const { modelId, candidates } = resolved;
439
+ const first = candidates[0]!;
440
+ const rest = candidates.slice(1);
345
441
 
346
- // Normalize responses API input simple chat messages for WS transport.
347
- // Responses API items use {type: "message", role, content: [{type: "input_text", text}]}
348
- // but WS protocol carries simple {role, content} chat messages.
349
- const messages = ModelProxy.normalizeResponsesInput(body.input);
442
+ // Pass original Responses API input items through WS without normalizing.
443
+ // The remote side will convert formats as needed based on its own API type.
444
+ // Note: requires all cluster nodes to be on the same plugin version (inputFormat field).
445
+ const baseItems = Array.isArray(body.input) ? [...body.input] : (typeof body.input === "string" ? [{ role: "user", content: body.input }] : []);
350
446
 
351
447
  // Prepend instructions as system/developer message
352
448
  if (body.instructions) {
353
- messages.unshift({ role: "developer", content: body.instructions });
354
- }
355
-
356
- if (proxyModel?.description) {
357
- const first = messages[0] as { role?: string; content?: string } | undefined;
358
- if (first?.role === "system" && typeof first.content === "string") {
359
- first.content = `[Model: ${proxyModel.description}]\n${first.content}`;
360
- } else if (first?.role === "developer" && typeof first.content === "string") {
361
- first.content = `[Model: ${proxyModel.description}]\n${first.content}`;
362
- } else {
363
- messages.unshift({ role: "system", content: `[Model: ${proxyModel.description}]` });
364
- }
449
+ baseItems.unshift({ role: "developer", content: body.instructions });
365
450
  }
366
451
 
367
452
  const stream = body.stream ?? false;
368
453
  const requestId = crypto.randomUUID();
369
- debug("proxy", `responses: stream=${stream} messages=${messages.length} input_type=${typeof body.input}${Array.isArray(body.input) ? `[${body.input.length}]` : ""}`);
370
- const frame: ModelRequest = {
371
- type: "model_req", id: requestId, from: this.config.nodeId, to: routeNodeId, timestamp: Date.now(),
372
- payload: { model: modelId, provider: proxyModel?.provider, api: proxyModel?.api, messages, temperature: body.temperature, maxTokens: body.max_output_tokens, stream },
454
+ debug("proxy", `responses: stream=${stream} messages=${baseItems.length} input_type=${typeof body.input}${Array.isArray(body.input) ? `[${body.input.length}]` : ""}`);
455
+ const buildFrame = (candidate: FailoverCandidate, id: string): ModelRequest => {
456
+ // Clone items so each candidate gets its own description prefix
457
+ const inputItems = baseItems.map((item: unknown) => (item && typeof item === "object" ? { ...(item as object) } : item));
458
+ if (candidate.proxyModel?.description) {
459
+ const firstItem = inputItems[0] as { role?: string; content?: string } | undefined;
460
+ if (firstItem?.role === "system" && typeof firstItem.content === "string") {
461
+ firstItem.content = `[Model: ${candidate.proxyModel.description}]\n${firstItem.content}`;
462
+ } else if (firstItem?.role === "developer" && typeof firstItem.content === "string") {
463
+ firstItem.content = `[Model: ${candidate.proxyModel.description}]\n${firstItem.content}`;
464
+ } else {
465
+ inputItems.unshift({ role: "system", content: `[Model: ${candidate.proxyModel.description}]` });
466
+ }
467
+ }
468
+ return {
469
+ type: "model_req", id, from: this.config.nodeId, to: candidate.routeNodeId, timestamp: Date.now(),
470
+ payload: { model: modelId, provider: candidate.proxyModel?.provider, api: candidate.proxyModel?.api, messages: inputItems, inputFormat: "responses", temperature: body.temperature, maxTokens: body.max_output_tokens, stream },
471
+ };
373
472
  };
473
+ const frame = buildFrame(first, requestId);
374
474
 
375
475
  if (stream) {
376
- return this.handleStreamRequest(requestId, routeNodeId, frame, "responses");
476
+ return this.handleStreamRequest(requestId, first.routeNodeId, frame, "responses", rest, buildFrame);
377
477
  } else {
378
- return this.handleNonStreamRequest(requestId, routeNodeId, frame, "responses");
478
+ return this.handleNonStreamRequest(requestId, first.routeNodeId, frame, "responses", rest, buildFrame);
379
479
  }
380
480
  }
381
481
 
@@ -384,57 +484,22 @@ export class ModelProxy {
384
484
  targetNodeId: string,
385
485
  frame: ModelRequest,
386
486
  responseFormat: ResponseFormat,
487
+ failoverCandidates: FailoverCandidate[] = [],
488
+ buildFrame?: (candidate: FailoverCandidate, newId: string) => ModelRequest,
387
489
  ): ProxyResponse & { body: ReadableStream } {
388
490
  const encoder = new TextEncoder();
389
491
  const model = frame.payload.model;
390
492
 
391
493
  const readable = new ReadableStream({
392
494
  start: (controller) => {
393
- const timer = setTimeout(() => {
394
- this.pending.delete(requestId);
395
- this.streamText.delete(requestId);
396
- this.peerManager.router.markFailed(requestId);
397
- try {
398
- if (responseFormat === "responses") {
399
- controller.enqueue(encoder.encode(`event: response.output_text.delta\ndata: ${JSON.stringify({ type: "response.output_text.delta", delta: "\n\n[ClawMatrix] Error: model request timed out" })}\n\n`));
400
- this.enqueueResponsesStreamDone(controller, encoder, requestId, model);
401
- } else {
402
- controller.enqueue(encoder.encode(`data: ${JSON.stringify({ id: `chatcmpl-${requestId}`, object: "chat.completion.chunk", choices: [{ index: 0, delta: { content: "\n\n[ClawMatrix] Error: model request timed out" }, finish_reason: "stop" }] })}\n\n`));
403
- controller.enqueue(encoder.encode("data: [DONE]\n\n"));
404
- }
405
- controller.close();
406
- } catch { /* controller may already be closed */ }
407
- }, MODEL_TIMEOUT);
408
-
409
- this.pending.set(requestId, {
410
- resolve: () => {}, reject: () => {},
411
- timer, stream: true, responseFormat, model,
412
- controller, encoder,
413
- });
414
-
415
- // Emit setup events for responses API
416
- if (responseFormat === "responses") {
417
- this.enqueueResponsesStreamSetup(controller, encoder, requestId, model);
418
- }
419
-
420
- const sent = this.peerManager.sendTo(targetNodeId, frame);
421
- if (!sent) {
422
- this.pending.delete(requestId);
423
- clearTimeout(timer);
424
- try {
425
- if (responseFormat === "responses") {
426
- controller.enqueue(encoder.encode(`event: response.output_text.delta\ndata: ${JSON.stringify({ type: "response.output_text.delta", delta: `[ClawMatrix] Cannot reach model node "${targetNodeId}"` })}\n\n`));
427
- this.enqueueResponsesStreamDone(controller, encoder, requestId, model);
428
- } else {
429
- controller.enqueue(encoder.encode(`data: ${JSON.stringify({ id: `chatcmpl-${requestId}`, object: "chat.completion.chunk", choices: [{ index: 0, delta: { content: `[ClawMatrix] Cannot reach model node "${targetNodeId}"` }, finish_reason: "stop" }] })}\n\n`));
430
- controller.enqueue(encoder.encode("data: [DONE]\n\n"));
431
- }
432
- controller.close();
433
- } catch { /* controller may already be closed */ }
434
- }
495
+ this.startStreamAttempt(requestId, targetNodeId, frame, responseFormat, controller, encoder, model, failoverCandidates, buildFrame);
435
496
  },
436
497
  });
437
498
 
499
+ // Emit setup events for responses API (once, before any attempts)
500
+ // Note: we enqueue after creating the ReadableStream but the controller
501
+ // is available synchronously in the `start` callback.
502
+
438
503
  return {
439
504
  status: 200,
440
505
  headers: { "Content-Type": "text/event-stream", "Cache-Control": "no-cache", "Connection": "keep-alive" },
@@ -442,6 +507,91 @@ export class ModelProxy {
442
507
  };
443
508
  }
444
509
 
510
+ /** Start (or retry) a stream attempt to a specific node. */
511
+ private startStreamAttempt(
512
+ requestId: string,
513
+ targetNodeId: string,
514
+ frame: ModelRequest,
515
+ responseFormat: ResponseFormat,
516
+ controller: ReadableStreamDefaultController,
517
+ encoder: TextEncoder,
518
+ model: string,
519
+ failoverCandidates: FailoverCandidate[],
520
+ buildFrame?: (candidate: FailoverCandidate, newId: string) => ModelRequest,
521
+ /** Stable ID for the entire stream (used for setup events & final close). Defaults to requestId on first attempt. */
522
+ streamId?: string,
523
+ ) {
524
+ const stableId = streamId ?? requestId;
525
+
526
+ const timer = setTimeout(() => {
527
+ this.pending.delete(requestId);
528
+ this.streamText.delete(requestId);
529
+ this.peerManager.router.markFailed(requestId);
530
+ this.tryStreamFailover(stableId, responseFormat, controller, encoder, model, failoverCandidates, buildFrame, `model request to "${targetNodeId}" timed out`);
531
+ }, MODEL_TIMEOUT);
532
+
533
+ this.pending.set(requestId, {
534
+ resolve: () => {}, reject: () => {},
535
+ timer, stream: true, responseFormat, model,
536
+ controller, encoder,
537
+ hasContent: false,
538
+ failoverCandidates,
539
+ buildFrame,
540
+ stableStreamId: stableId,
541
+ });
542
+
543
+ // Emit setup events for responses API (only once per stream, keyed by stableId)
544
+ if (responseFormat === "responses" && !this.streamSetupSent.has(stableId)) {
545
+ this.enqueueResponsesStreamSetup(controller, encoder, stableId, model);
546
+ this.streamSetupSent.add(stableId);
547
+ }
548
+
549
+ const sent = this.peerManager.sendTo(targetNodeId, frame);
550
+ if (!sent) {
551
+ this.pending.delete(requestId);
552
+ clearTimeout(timer);
553
+ this.tryStreamFailover(stableId, responseFormat, controller, encoder, model, failoverCandidates, buildFrame, `cannot reach node "${targetNodeId}"`);
554
+ }
555
+ }
556
+
557
+ /** Track which stream requests have already sent responses API setup events. */
558
+ private streamSetupSent = new Set<string>();
559
+
560
+ /** Attempt failover to next candidate, or close stream with error. */
561
+ private tryStreamFailover(
562
+ stableStreamId: string,
563
+ responseFormat: ResponseFormat,
564
+ controller: ReadableStreamDefaultController,
565
+ encoder: TextEncoder,
566
+ model: string,
567
+ candidates: FailoverCandidate[],
568
+ buildFrame?: (candidate: FailoverCandidate, newId: string) => ModelRequest,
569
+ reason?: string,
570
+ ) {
571
+ if (candidates.length > 0 && buildFrame) {
572
+ const next = candidates[0]!;
573
+ const remaining = candidates.slice(1);
574
+ const newId = crypto.randomUUID();
575
+ const newFrame = buildFrame(next, newId);
576
+ debug("proxy", `failover: ${reason} → trying ${next.routeNodeId} (${remaining.length} left)`);
577
+ this.startStreamAttempt(newId, next.routeNodeId, newFrame, responseFormat, controller, encoder, model, remaining, buildFrame, stableStreamId);
578
+ } else {
579
+ debug("proxy", `failover exhausted: ${reason}`);
580
+ try {
581
+ const errMsg = `\n\n[ClawMatrix] Error: ${reason ?? "all candidates failed"}`;
582
+ if (responseFormat === "responses") {
583
+ controller.enqueue(encoder.encode(`event: response.output_text.delta\ndata: ${JSON.stringify({ type: "response.output_text.delta", delta: errMsg })}\n\n`));
584
+ this.enqueueResponsesStreamDone(controller, encoder, stableStreamId, model, errMsg);
585
+ } else {
586
+ controller.enqueue(encoder.encode(`data: ${JSON.stringify({ id: `chatcmpl-${stableStreamId}`, object: "chat.completion.chunk", choices: [{ index: 0, delta: { content: errMsg }, finish_reason: "stop" }] })}\n\n`));
587
+ controller.enqueue(encoder.encode("data: [DONE]\n\n"));
588
+ }
589
+ controller.close();
590
+ } catch { /* controller may already be closed */ }
591
+ this.streamSetupSent.delete(stableStreamId);
592
+ }
593
+ }
594
+
445
595
  /** Emit responses API stream setup events (response.created → content_part.added). */
446
596
  private enqueueResponsesStreamSetup(controller: ReadableStreamDefaultController, encoder: TextEncoder, id: string, model: string) {
447
597
  const respId = `resp_${id}`;
@@ -478,98 +628,148 @@ export class ModelProxy {
478
628
  targetNodeId: string,
479
629
  frame: ModelRequest,
480
630
  responseFormat: ResponseFormat,
631
+ failoverCandidates: FailoverCandidate[] = [],
632
+ buildFrame?: (candidate: FailoverCandidate, newId: string) => ModelRequest,
481
633
  ): Promise<ProxyResponse & { body: string }> {
482
- try {
483
- const result = await new Promise<ModelResponse["payload"]>(
484
- (resolve, reject) => {
485
- const timer = setTimeout(() => {
486
- this.pending.delete(requestId);
487
- this.peerManager.router.markFailed(requestId);
488
- reject(new Error("Model request timed out"));
489
- }, MODEL_TIMEOUT);
490
-
491
- this.pending.set(requestId, {
492
- resolve: resolve as (v: unknown) => void,
493
- reject, timer, stream: false, responseFormat,
494
- });
495
-
496
- const sent = this.peerManager.sendTo(targetNodeId, frame);
497
- if (!sent) {
498
- this.pending.delete(requestId);
499
- clearTimeout(timer);
500
- reject(new Error("Cannot reach model node"));
634
+ let currentId = requestId;
635
+ let currentTarget = targetNodeId;
636
+ let currentFrame = frame;
637
+ let remaining = failoverCandidates;
638
+ const maxAttempts = failoverCandidates.length + 1;
639
+
640
+ for (let attempt = 0; attempt < maxAttempts; attempt++) {
641
+ try {
642
+ const result = await this.sendNonStreamAndWait(currentId, currentTarget, currentFrame, responseFormat);
643
+
644
+ if (!result.success) {
645
+ // Upstream error try failover if available
646
+ if (remaining.length > 0 && buildFrame) {
647
+ const next = remaining[0]!;
648
+ debug("proxy", `failover: remote error "${result.error}" → trying ${next.routeNodeId} (${remaining.length - 1} left)`);
649
+ currentId = crypto.randomUUID();
650
+ currentFrame = buildFrame(next, currentId);
651
+ currentTarget = next.routeNodeId;
652
+ remaining = remaining.slice(1);
653
+ continue;
501
654
  }
502
- },
503
- );
655
+ return {
656
+ status: 502,
657
+ headers: { "Content-Type": "application/json" },
658
+ body: JSON.stringify({ error: { message: result.error } }),
659
+ };
660
+ }
504
661
 
505
- if (!result.success) {
662
+ return this.formatNonStreamResult(result, currentId, currentFrame, responseFormat);
663
+ } catch (err) {
664
+ // Timeout or send failure — try failover
665
+ if (remaining.length > 0 && buildFrame) {
666
+ const next = remaining[0]!;
667
+ debug("proxy", `failover: ${err instanceof Error ? err.message : String(err)} → trying ${next.routeNodeId} (${remaining.length - 1} left)`);
668
+ currentId = crypto.randomUUID();
669
+ currentFrame = buildFrame(next, currentId);
670
+ currentTarget = next.routeNodeId;
671
+ remaining = remaining.slice(1);
672
+ continue;
673
+ }
506
674
  return {
507
675
  status: 502,
508
676
  headers: { "Content-Type": "application/json" },
509
- body: JSON.stringify({ error: { message: result.error } }),
677
+ body: JSON.stringify({ error: { message: err instanceof Error ? err.message : String(err) } }),
510
678
  };
511
679
  }
680
+ }
681
+ // Safety: should not reach here, but return error if loop exhausts
682
+ return {
683
+ status: 502,
684
+ headers: { "Content-Type": "application/json" },
685
+ body: JSON.stringify({ error: { message: "All failover candidates exhausted" } }),
686
+ };
687
+ }
512
688
 
513
- if (responseFormat === "responses") {
514
- const msgId = `msg_${requestId}`;
515
- const usageObj = result.usage
516
- ? { input_tokens: result.usage.inputTokens, output_tokens: result.usage.outputTokens, total_tokens: result.usage.inputTokens + result.usage.outputTokens }
517
- : { input_tokens: 0, output_tokens: 0, total_tokens: 0 };
518
- // If upstream sent full output array (responses API), use it directly
519
- const output = Array.isArray(result.message)
520
- ? result.message
521
- : [{
522
- type: "message", id: msgId, role: "assistant",
523
- content: [{ type: "output_text", text: result.content ?? "" }],
524
- status: "completed",
525
- }];
526
- return {
527
- status: 200,
528
- headers: { "Content-Type": "application/json" },
529
- body: JSON.stringify({
530
- id: `resp_${requestId}`,
531
- object: "response",
532
- created_at: Math.floor(Date.now() / 1000),
533
- status: "completed",
534
- model: frame.payload.model,
535
- output,
536
- usage: usageObj,
537
- }),
538
- };
539
- }
689
+ private sendNonStreamAndWait(
690
+ requestId: string,
691
+ targetNodeId: string,
692
+ frame: ModelRequest,
693
+ responseFormat: ResponseFormat,
694
+ ): Promise<ModelResponse["payload"]> {
695
+ return new Promise<ModelResponse["payload"]>((resolve, reject) => {
696
+ const timer = setTimeout(() => {
697
+ this.pending.delete(requestId);
698
+ this.peerManager.router.markFailed(requestId);
699
+ reject(new Error(`Model request to "${targetNodeId}" timed out`));
700
+ }, MODEL_TIMEOUT);
701
+
702
+ this.pending.set(requestId, {
703
+ resolve: resolve as (v: unknown) => void,
704
+ reject, timer, stream: false, responseFormat,
705
+ });
540
706
 
541
- // Chat completions format — use full message object when available (has tool_calls etc.)
542
- const msg = result.message as Record<string, unknown> | undefined;
543
- const message = msg
544
- ? { role: "assistant", ...msg }
545
- : { role: "assistant", content: result.content };
546
- const finishReason = msg?.tool_calls ? "tool_calls" : "stop";
707
+ const sent = this.peerManager.sendTo(targetNodeId, frame);
708
+ if (!sent) {
709
+ this.pending.delete(requestId);
710
+ clearTimeout(timer);
711
+ reject(new Error(`Cannot reach model node "${targetNodeId}"`));
712
+ }
713
+ });
714
+ }
547
715
 
716
+ private formatNonStreamResult(
717
+ result: ModelResponse["payload"],
718
+ requestId: string,
719
+ frame: ModelRequest,
720
+ responseFormat: ResponseFormat,
721
+ ): ProxyResponse & { body: string } {
722
+ if (responseFormat === "responses") {
723
+ const msgId = `msg_${requestId}`;
724
+ const usageObj = result.usage
725
+ ? { input_tokens: result.usage.inputTokens, output_tokens: result.usage.outputTokens, total_tokens: result.usage.inputTokens + result.usage.outputTokens }
726
+ : { input_tokens: 0, output_tokens: 0, total_tokens: 0 };
727
+ const output = Array.isArray(result.message)
728
+ ? result.message
729
+ : [{
730
+ type: "message", id: msgId, role: "assistant",
731
+ content: [{ type: "output_text", text: result.content ?? "" }],
732
+ status: "completed",
733
+ }];
548
734
  return {
549
735
  status: 200,
550
736
  headers: { "Content-Type": "application/json" },
551
737
  body: JSON.stringify({
552
- id: `chatcmpl-${requestId}`,
553
- object: "chat.completion",
554
- created: Math.floor(Date.now() / 1000),
738
+ id: `resp_${requestId}`,
739
+ object: "response",
740
+ created_at: Math.floor(Date.now() / 1000),
741
+ status: "completed",
555
742
  model: frame.payload.model,
556
- choices: [{
557
- index: 0,
558
- message,
559
- finish_reason: finishReason,
560
- }],
561
- usage: result.usage
562
- ? { prompt_tokens: result.usage.inputTokens, completion_tokens: result.usage.outputTokens, total_tokens: result.usage.inputTokens + result.usage.outputTokens }
563
- : undefined,
743
+ output,
744
+ usage: usageObj,
564
745
  }),
565
746
  };
566
- } catch (err) {
567
- return {
568
- status: 502,
569
- headers: { "Content-Type": "application/json" },
570
- body: JSON.stringify({ error: { message: err instanceof Error ? err.message : String(err) } }),
571
- };
572
747
  }
748
+
749
+ const msg = result.message as Record<string, unknown> | undefined;
750
+ const message = msg
751
+ ? { role: "assistant", ...msg }
752
+ : { role: "assistant", content: result.content };
753
+ const finishReason = msg?.tool_calls ? "tool_calls" : "stop";
754
+
755
+ return {
756
+ status: 200,
757
+ headers: { "Content-Type": "application/json" },
758
+ body: JSON.stringify({
759
+ id: `chatcmpl-${requestId}`,
760
+ object: "chat.completion",
761
+ created: Math.floor(Date.now() / 1000),
762
+ model: frame.payload.model,
763
+ choices: [{
764
+ index: 0,
765
+ message,
766
+ finish_reason: finishReason,
767
+ }],
768
+ usage: result.usage
769
+ ? { prompt_tokens: result.usage.inputTokens, completion_tokens: result.usage.outputTokens, total_tokens: result.usage.inputTokens + result.usage.outputTokens }
770
+ : undefined,
771
+ }),
772
+ };
573
773
  }
574
774
 
575
775
  private handleListModels(): ProxyResponse & { body: string } {
@@ -622,17 +822,29 @@ export class ModelProxy {
622
822
  if (!frame.payload.success && pending.controller && pending.encoder) {
623
823
  clearTimeout(pending.timer);
624
824
  this.pending.delete(frame.id);
625
- try {
626
- const errMsg = `[ClawMatrix] Remote error: ${frame.payload.error}`;
627
- if (pending.responseFormat === "responses") {
628
- pending.controller.enqueue(pending.encoder.encode(`event: response.output_text.delta\ndata: ${JSON.stringify({ type: "response.output_text.delta", delta: errMsg })}\n\n`));
629
- this.enqueueResponsesStreamDone(pending.controller, pending.encoder, frame.id, pending.model ?? "", errMsg);
630
- } else {
631
- pending.controller.enqueue(pending.encoder.encode(`data: ${JSON.stringify({ id: `chatcmpl-${frame.id}`, object: "chat.completion.chunk", choices: [{ index: 0, delta: { content: errMsg }, finish_reason: "stop" }] })}\n\n`));
632
- pending.controller.enqueue(pending.encoder.encode("data: [DONE]\n\n"));
633
- }
634
- pending.controller.close();
635
- } catch { /* controller may already be closed */ }
825
+ this.streamText.delete(frame.id);
826
+ // Try failover if no content has been sent yet
827
+ if (!pending.hasContent && pending.failoverCandidates?.length && pending.buildFrame) {
828
+ this.tryStreamFailover(
829
+ pending.stableStreamId ?? frame.id, pending.responseFormat, pending.controller, pending.encoder,
830
+ pending.model ?? "", pending.failoverCandidates, pending.buildFrame,
831
+ `remote error: ${frame.payload.error}`,
832
+ );
833
+ } else {
834
+ const stableId = pending.stableStreamId ?? frame.id;
835
+ try {
836
+ const errMsg = `[ClawMatrix] Remote error: ${frame.payload.error}`;
837
+ if (pending.responseFormat === "responses") {
838
+ pending.controller.enqueue(pending.encoder.encode(`event: response.output_text.delta\ndata: ${JSON.stringify({ type: "response.output_text.delta", delta: errMsg })}\n\n`));
839
+ this.enqueueResponsesStreamDone(pending.controller, pending.encoder, stableId, pending.model ?? "", errMsg);
840
+ } else {
841
+ pending.controller.enqueue(pending.encoder.encode(`data: ${JSON.stringify({ id: `chatcmpl-${stableId}`, object: "chat.completion.chunk", choices: [{ index: 0, delta: { content: errMsg }, finish_reason: "stop" }] })}\n\n`));
842
+ pending.controller.enqueue(pending.encoder.encode("data: [DONE]\n\n"));
843
+ }
844
+ pending.controller.close();
845
+ } catch { /* controller may already be closed */ }
846
+ this.streamSetupSent.delete(stableId);
847
+ }
636
848
  }
637
849
  return;
638
850
  }
@@ -666,8 +878,9 @@ export class ModelProxy {
666
878
 
667
879
  private handleModelStreamChat(frame: ModelStreamChunk, pending: PendingModelReq) {
668
880
  if (frame.payload.done) {
881
+ const stableId = pending.stableStreamId ?? frame.id;
669
882
  const finalChunk: Record<string, unknown> = {
670
- id: `chatcmpl-${frame.id}`, object: "chat.completion.chunk",
883
+ id: `chatcmpl-${stableId}`, object: "chat.completion.chunk",
671
884
  choices: [{ index: 0, delta: {}, finish_reason: "stop" }],
672
885
  };
673
886
  if (frame.payload.usage) {
@@ -678,27 +891,34 @@ export class ModelProxy {
678
891
  pending.controller!.close();
679
892
  clearTimeout(pending.timer);
680
893
  this.pending.delete(frame.id);
894
+ this.streamSetupSent.delete(stableId);
681
895
  } else {
682
896
  // Use full deltaObj when available (carries tool_calls etc.), otherwise simple text delta
683
897
  const delta = frame.payload.deltaObj ?? { content: frame.payload.delta };
684
- const chunk = { id: `chatcmpl-${frame.id}`, object: "chat.completion.chunk", choices: [{ index: 0, delta, finish_reason: null }] };
898
+ const chunkStableId = pending.stableStreamId ?? frame.id;
899
+ const chunk = { id: `chatcmpl-${chunkStableId}`, object: "chat.completion.chunk", choices: [{ index: 0, delta, finish_reason: null }] };
685
900
  pending.controller!.enqueue(pending.encoder!.encode(`data: ${JSON.stringify(chunk)}\n\n`));
901
+ pending.hasContent = true;
686
902
  }
687
903
  }
688
904
 
689
905
  private handleModelStreamResponses(frame: ModelStreamChunk, pending: PendingModelReq) {
690
906
  if (frame.payload.done) {
907
+ const stableId = pending.stableStreamId ?? frame.id;
691
908
  const fullText = this.streamText.get(frame.id) ?? "";
692
909
  this.streamText.delete(frame.id);
693
- this.enqueueResponsesStreamDone(pending.controller!, pending.encoder!, frame.id, pending.model ?? "", fullText, frame.payload.usage);
910
+ this.enqueueResponsesStreamDone(pending.controller!, pending.encoder!, stableId, pending.model ?? "", fullText, frame.payload.usage);
694
911
  pending.controller!.close();
695
912
  clearTimeout(pending.timer);
696
913
  this.pending.delete(frame.id);
914
+ this.streamSetupSent.delete(stableId);
697
915
  } else {
698
916
  // Accumulate text for done event
699
- this.streamText.set(frame.id, (this.streamText.get(frame.id) ?? "") + frame.payload.delta);
700
- const evt = { type: "response.output_text.delta", item_id: `msg_${frame.id}`, output_index: 0, content_index: 0, delta: frame.payload.delta };
917
+ this.streamText.set(frame.id, (this.streamText.get(frame.id) ?? "") + (frame.payload.delta ?? ""));
918
+ const respStableId = pending.stableStreamId ?? frame.id;
919
+ const evt = { type: "response.output_text.delta", item_id: `msg_${respStableId}`, output_index: 0, content_index: 0, delta: frame.payload.delta };
701
920
  pending.controller!.enqueue(pending.encoder!.encode(`event: response.output_text.delta\ndata: ${JSON.stringify(evt)}\n\n`));
921
+ pending.hasContent = true;
702
922
  }
703
923
  }
704
924
 
@@ -762,22 +982,34 @@ export class ModelProxy {
762
982
  }
763
983
 
764
984
  const modelField = endpoint.direct ? model.id : `${model.provider}/${model.id}`;
765
- const requestBody = isResponsesApi
766
- ? {
767
- model: modelField,
768
- input: payload.messages,
769
- stream: payload.stream,
770
- temperature: payload.temperature,
771
- max_output_tokens: payload.maxTokens,
772
- }
773
- : {
774
- model: modelField,
775
- messages: payload.messages,
776
- temperature: payload.temperature,
777
- max_tokens: payload.maxTokens,
778
- stream: payload.stream,
779
- ...(payload.stream ? { stream_options: { include_usage: true } } : {}),
780
- };
985
+ const srcFormat = payload.inputFormat ?? "chat";
986
+
987
+ // Convert messages between formats if source and target API differ
988
+ let requestBody: Record<string, unknown>;
989
+ if (isResponsesApi) {
990
+ const input = srcFormat === "responses"
991
+ ? payload.messages // already Responses API format, pass through
992
+ : ModelProxy.chatToResponsesInput(payload.messages); // chat → responses
993
+ requestBody = {
994
+ model: modelField,
995
+ input,
996
+ stream: payload.stream,
997
+ temperature: payload.temperature,
998
+ max_output_tokens: payload.maxTokens,
999
+ };
1000
+ } else {
1001
+ const messages = srcFormat === "chat"
1002
+ ? payload.messages // already chat format, pass through
1003
+ : ModelProxy.normalizeResponsesInput(payload.messages); // responses → chat
1004
+ requestBody = {
1005
+ model: modelField,
1006
+ messages,
1007
+ temperature: payload.temperature,
1008
+ max_tokens: payload.maxTokens,
1009
+ stream: payload.stream,
1010
+ ...(payload.stream ? { stream_options: { include_usage: true } } : {}),
1011
+ };
1012
+ }
781
1013
 
782
1014
  const response = await fetch(url, {
783
1015
  method: "POST",