clawmatrix 0.4.2 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,11 +8,52 @@ import type {
8
8
  ModelResponse,
9
9
  ModelStreamChunk,
10
10
  } from "./types.ts";
11
+ import { nanoid } from "nanoid";
12
+ import { LRUCache } from "lru-cache";
13
+ import { Semaphore as AsyncSemaphore } from "async-mutex";
14
+ import { getNodeCircuitBreaker, isNodeCircuitOpen, removeNodeCircuitBreaker, resetAllCircuitBreakers } from "./retry.ts";
11
15
  import { debug } from "./debug.ts";
12
16
  import { readBody } from "./http-utils.ts";
13
17
 
14
18
  const DEFAULT_MODEL_TIMEOUT = 120_000; // 2 minutes
15
19
 
20
+ // ── Semaphore for per-node concurrency control (backed by async-mutex) ──
21
+ class Semaphore {
22
+ private sem: AsyncSemaphore;
23
+
24
+ constructor(max: number) {
25
+ this.sem = new AsyncSemaphore(max);
26
+ }
27
+
28
+ /** Acquire a permit. Rejects if timeout expires before a slot opens. */
29
+ acquire(timeoutMs: number): Promise<void> {
30
+ return new Promise<void>((resolve, reject) => {
31
+ let settled = false;
32
+ const timer = setTimeout(() => {
33
+ if (!settled) { settled = true; reject(new Error("Semaphore timeout")); }
34
+ }, timeoutMs);
35
+
36
+ this.sem.acquire().then(([, release]) => {
37
+ clearTimeout(timer);
38
+ if (settled) { release(); return; } // timeout already fired → release slot to avoid deadlock
39
+ settled = true;
40
+ resolve();
41
+ }, (err) => {
42
+ clearTimeout(timer);
43
+ if (!settled) { settled = true; reject(err); }
44
+ });
45
+ });
46
+ }
47
+
48
+ release() {
49
+ this.sem.release();
50
+ }
51
+
52
+ drain() {
53
+ this.sem.cancel();
54
+ }
55
+ }
56
+
16
57
  /** Normalize usage from OpenAI-compatible APIs (supports both field naming conventions). */
17
58
  function parseUsage(usage: Record<string, number> | undefined): { inputTokens: number; outputTokens: number } | undefined {
18
59
  if (!usage) return undefined;
@@ -54,6 +95,8 @@ interface PendingModelReq {
54
95
  buildFrame?: (candidate: FailoverCandidate, newId: string) => ModelRequest;
55
96
  /** Stable ID for the entire stream (for setup events & final close). */
56
97
  stableStreamId?: string;
98
+ /** Release concurrency semaphore when request completes. */
99
+ release?: () => void;
57
100
  }
58
101
 
59
102
  export class ModelProxy {
@@ -65,14 +108,17 @@ export class ModelProxy {
65
108
  private openclawConfig: OpenClawConfig;
66
109
  private readonly modelTimeout: number;
67
110
 
111
+ /** Per-node concurrency control semaphores. */
112
+ private nodeSemaphores = new Map<string, Semaphore>();
113
+ private readonly modelConcurrency: number;
114
+
68
115
  /** Dynamically discovered proxy models from peer capabilities (auto-discovery). */
69
116
  private discoveredModels: import("./config.ts").ProxyModel[] = [];
70
117
 
71
118
  /** Cache of models that need a different API format than configured (detected at runtime).
72
119
  * Entries expire after 10 minutes so upstream upgrades are eventually detected. */
73
- private modelApiCache = new Map<string, { api: string; ts: number }>();
74
120
  private static readonly MODEL_API_CACHE_TTL = 600_000; // 10 minutes
75
- private cacheCleanupTimer: ReturnType<typeof setInterval> | null = null;
121
+ private modelApiCache = new LRUCache<string, string>({ max: 200, ttl: ModelProxy.MODEL_API_CACHE_TTL });
76
122
 
77
123
  constructor(config: ClawMatrixConfig, peerManager: PeerManager, gatewayInfo: GatewayInfo, openclawConfig: OpenClawConfig) {
78
124
  this.config = config;
@@ -80,6 +126,16 @@ export class ModelProxy {
80
126
  this.gatewayInfo = gatewayInfo;
81
127
  this.openclawConfig = openclawConfig;
82
128
  this.modelTimeout = config.modelTimeout ?? DEFAULT_MODEL_TIMEOUT;
129
+ this.modelConcurrency = config.modelConcurrency ?? 5;
130
+ }
131
+
132
+ private getSemaphore(nodeId: string): Semaphore {
133
+ let sem = this.nodeSemaphores.get(nodeId);
134
+ if (!sem) {
135
+ sem = new Semaphore(this.modelConcurrency);
136
+ this.nodeSemaphores.set(nodeId, sem);
137
+ }
138
+ return sem;
83
139
  }
84
140
 
85
141
  /** All proxy models: static config + dynamically discovered from peers. */
@@ -309,13 +365,7 @@ export class ModelProxy {
309
365
 
310
366
  /** Start the local HTTP proxy server for OpenAI-compatible requests. */
311
367
  start() {
312
- // Periodically prune expired model API cache entries
313
- this.cacheCleanupTimer = setInterval(() => {
314
- const now = Date.now();
315
- for (const [id, entry] of this.modelApiCache) {
316
- if (now - entry.ts > ModelProxy.MODEL_API_CACHE_TTL) this.modelApiCache.delete(id);
317
- }
318
- }, ModelProxy.MODEL_API_CACHE_TTL);
368
+ // LRU cache handles TTL-based expiration automatically
319
369
 
320
370
  this.httpServer = createServer(async (req, res) => {
321
371
  try {
@@ -366,10 +416,7 @@ export class ModelProxy {
366
416
  }
367
417
 
368
418
  stop() {
369
- if (this.cacheCleanupTimer) {
370
- clearInterval(this.cacheCleanupTimer);
371
- this.cacheCleanupTimer = null;
372
- }
419
+ this.modelApiCache.clear();
373
420
  if (this.httpServer) {
374
421
  // Force-close all keep-alive connections so the port is released immediately
375
422
  const server = this.httpServer as typeof this.httpServer & { closeAllConnections?: () => void };
@@ -387,12 +434,18 @@ export class ModelProxy {
387
434
  this.streamText.clear();
388
435
  this.streamSetupSent.clear();
389
436
  this.modelApiCache.clear();
437
+ for (const sem of this.nodeSemaphores.values()) sem.drain();
438
+ this.nodeSemaphores.clear();
439
+ resetAllCircuitBreakers();
390
440
  }
391
441
 
392
442
  /** Clean up all tracking state for a request (pending, streamText, streamSetupSent). */
393
443
  private cleanupRequest(id: string, stableStreamId?: string) {
394
444
  const pending = this.pending.get(id);
395
- if (pending) clearTimeout(pending.timer);
445
+ if (pending) {
446
+ clearTimeout(pending.timer);
447
+ pending.release?.();
448
+ }
396
449
  this.pending.delete(id);
397
450
  this.streamText.delete(id);
398
451
  if (stableStreamId) this.streamSetupSent.delete(stableStreamId);
@@ -477,8 +530,11 @@ export class ModelProxy {
477
530
  }
478
531
  }
479
532
 
480
- // Sort candidates by latency (lowest first) for optimal first-try and failover order
533
+ // Sort candidates: circuit-open nodes last, then direct before relay, then by latency
481
534
  candidates.sort((a, b) => {
535
+ const aOpen = isNodeCircuitOpen(a.routeNodeId) ? 1 : 0;
536
+ const bOpen = isNodeCircuitOpen(b.routeNodeId) ? 1 : 0;
537
+ if (aOpen !== bOpen) return aOpen - bOpen;
482
538
  const routeA = this.peerManager.router.getRoute(a.routeNodeId);
483
539
  const routeB = this.peerManager.router.getRoute(b.routeNodeId);
484
540
  const aDirect = routeA?.connection ? 0 : 1;
@@ -515,7 +571,7 @@ export class ModelProxy {
515
571
  debug("proxy", `messages count=${baseMessages?.length ?? 0} roles=${(baseMessages ?? []).map((m: unknown) => (m as Record<string, unknown>)?.role).join(",")}`);
516
572
 
517
573
  const stream = body.stream ?? false;
518
- const requestId = crypto.randomUUID();
574
+ const requestId = nanoid();
519
575
  const buildFrame = (candidate: FailoverCandidate, id: string): ModelRequest => {
520
576
  // Clone messages so each candidate gets its own description prefix
521
577
  const messages = baseMessages.map((m: unknown) => (m && typeof m === "object" ? { ...(m as object) } : m));
@@ -569,7 +625,7 @@ export class ModelProxy {
569
625
  }
570
626
 
571
627
  const stream = body.stream ?? false;
572
- const requestId = crypto.randomUUID();
628
+ const requestId = nanoid();
573
629
  debug("proxy", `responses: stream=${stream} messages=${baseItems.length} input_type=${typeof body.input}${Array.isArray(body.input) ? `[${body.input.length}]` : ""}`);
574
630
  const buildFrame = (candidate: FailoverCandidate, id: string): ModelRequest => {
575
631
  // Clone items so each candidate gets its own description prefix
@@ -653,35 +709,47 @@ export class ModelProxy {
653
709
  ) {
654
710
  const stableId = streamId ?? requestId;
655
711
 
656
- const timer = setTimeout(() => {
657
- this.cleanupRequest(requestId);
658
- this.peerManager.router.markFailed(requestId);
659
- this.tryStreamFailover(stableId, responseFormat, controller, encoder, model, failoverCandidates, buildFrame, `model request to "${targetNodeId}" timed out`);
660
- }, this.modelTimeout);
661
-
662
- this.pending.set(requestId, {
663
- resolve: () => {}, reject: () => {},
664
- timer, stream: true, responseFormat, model,
665
- targetNodeId,
666
- controller, encoder,
667
- hasContent: false,
668
- failoverCandidates,
669
- buildFrame,
670
- stableStreamId: stableId,
671
- });
712
+ // Acquire per-node concurrency permit (async, then send)
713
+ const sem = this.getSemaphore(targetNodeId);
714
+ sem.acquire(this.modelTimeout).then(() => {
715
+ if (this.pending.has(requestId)) { sem.release(); return; } // already cleaned up release permit
672
716
 
673
- // Emit setup events for responses API (only once per stream, keyed by stableId)
674
- if (responseFormat === "responses" && !this.streamSetupSent.has(stableId)) {
675
- const hasTools = Array.isArray(frame.payload.tools) && frame.payload.tools.length > 0;
676
- this.enqueueResponsesStreamSetup(controller, encoder, stableId, model, hasTools);
677
- this.streamSetupSent.add(stableId);
678
- }
717
+ const release = () => sem.release();
679
718
 
680
- const sent = this.peerManager.sendTo(targetNodeId, frame);
681
- if (!sent) {
682
- this.cleanupRequest(requestId);
683
- this.tryStreamFailover(stableId, responseFormat, controller, encoder, model, failoverCandidates, buildFrame, `cannot reach node "${targetNodeId}"`);
684
- }
719
+ const timer = setTimeout(() => {
720
+ this.cleanupRequest(requestId);
721
+ this.peerManager.router.markFailed(requestId);
722
+ this.tryStreamFailover(stableId, responseFormat, controller, encoder, model, failoverCandidates, buildFrame, `model request to "${targetNodeId}" timed out`);
723
+ }, this.modelTimeout);
724
+
725
+ this.pending.set(requestId, {
726
+ resolve: () => {}, reject: () => {},
727
+ timer, stream: true, responseFormat, model,
728
+ targetNodeId,
729
+ controller, encoder,
730
+ hasContent: false,
731
+ failoverCandidates,
732
+ buildFrame,
733
+ stableStreamId: stableId,
734
+ release,
735
+ });
736
+
737
+ // Emit setup events for responses API (only once per stream, keyed by stableId)
738
+ if (responseFormat === "responses" && !this.streamSetupSent.has(stableId)) {
739
+ const hasTools = Array.isArray(frame.payload.tools) && frame.payload.tools.length > 0;
740
+ this.enqueueResponsesStreamSetup(controller, encoder, stableId, model, hasTools);
741
+ this.streamSetupSent.add(stableId);
742
+ }
743
+
744
+ const sent = this.peerManager.sendTo(targetNodeId, frame);
745
+ if (!sent) {
746
+ this.cleanupRequest(requestId);
747
+ this.tryStreamFailover(stableId, responseFormat, controller, encoder, model, failoverCandidates, buildFrame, `cannot reach node "${targetNodeId}"`);
748
+ }
749
+ }).catch(() => {
750
+ // Semaphore timeout — all slots busy
751
+ this.tryStreamFailover(stableId, responseFormat, controller, encoder, model, failoverCandidates, buildFrame, `node "${targetNodeId}" concurrency limit reached`);
752
+ });
685
753
  }
686
754
 
687
755
  /** Track which stream requests have already sent responses API setup events. */
@@ -701,7 +769,7 @@ export class ModelProxy {
701
769
  if (candidates.length > 0 && buildFrame) {
702
770
  const next = candidates[0]!;
703
771
  const remaining = candidates.slice(1);
704
- const newId = crypto.randomUUID();
772
+ const newId = nanoid();
705
773
  const newFrame = buildFrame(next, newId);
706
774
  debug("proxy", `failover: ${reason} → trying ${next.routeNodeId} (${remaining.length} left)`);
707
775
  this.startStreamAttempt(newId, next.routeNodeId, newFrame, responseFormat, controller, encoder, model, remaining, buildFrame, stableStreamId);
@@ -773,8 +841,20 @@ export class ModelProxy {
773
841
  const maxAttempts = failoverCandidates.length + 1;
774
842
 
775
843
  for (let attempt = 0; attempt < maxAttempts; attempt++) {
844
+ // Acquire per-node concurrency permit
845
+ const sem = this.getSemaphore(currentTarget);
776
846
  try {
777
- const result = await this.sendNonStreamAndWait(currentId, currentTarget, currentFrame, responseFormat);
847
+ await sem.acquire(this.modelTimeout);
848
+ } catch {
849
+ return {
850
+ status: 503,
851
+ headers: { "Content-Type": "application/json" },
852
+ body: JSON.stringify({ error: { message: `Node "${currentTarget}" concurrency limit reached` } }),
853
+ };
854
+ }
855
+ try {
856
+ const release = () => sem.release();
857
+ const result = await this.sendNonStreamAndWait(currentId, currentTarget, currentFrame, responseFormat, release);
778
858
 
779
859
  if (!result.success) {
780
860
  // Upstream error — try failover if available
@@ -782,7 +862,7 @@ export class ModelProxy {
782
862
  const next = failoverCandidates[failoverIdx]!;
783
863
  debug("proxy", `failover: remote error "${result.error}" → trying ${next.routeNodeId} (${failoverCandidates.length - failoverIdx - 1} left)`);
784
864
  failoverIdx++;
785
- currentId = crypto.randomUUID();
865
+ currentId = nanoid();
786
866
  currentFrame = buildFrame(next, currentId);
787
867
  currentTarget = next.routeNodeId;
788
868
  continue;
@@ -801,7 +881,7 @@ export class ModelProxy {
801
881
  const next = failoverCandidates[failoverIdx]!;
802
882
  debug("proxy", `failover: ${err instanceof Error ? err.message : String(err)} → trying ${next.routeNodeId} (${failoverCandidates.length - failoverIdx - 1} left)`);
803
883
  failoverIdx++;
804
- currentId = crypto.randomUUID();
884
+ currentId = nanoid();
805
885
  currentFrame = buildFrame(next, currentId);
806
886
  currentTarget = next.routeNodeId;
807
887
  continue;
@@ -826,10 +906,11 @@ export class ModelProxy {
826
906
  targetNodeId: string,
827
907
  frame: ModelRequest,
828
908
  responseFormat: ResponseFormat,
909
+ release?: () => void,
829
910
  ): Promise<ModelResponse["payload"]> {
830
911
  return new Promise<ModelResponse["payload"]>((resolve, reject) => {
831
912
  const timer = setTimeout(() => {
832
- this.pending.delete(requestId);
913
+ this.cleanupRequest(requestId);
833
914
  this.peerManager.router.markFailed(requestId);
834
915
  reject(new Error(`Model request to "${targetNodeId}" timed out`));
835
916
  }, this.modelTimeout);
@@ -837,13 +918,12 @@ export class ModelProxy {
837
918
  this.pending.set(requestId, {
838
919
  resolve: resolve as (v: unknown) => void,
839
920
  reject, timer, stream: false, responseFormat,
840
- targetNodeId,
921
+ targetNodeId, release,
841
922
  });
842
923
 
843
924
  const sent = this.peerManager.sendTo(targetNodeId, frame);
844
925
  if (!sent) {
845
- this.pending.delete(requestId);
846
- clearTimeout(timer);
926
+ this.cleanupRequest(requestId);
847
927
  reject(new Error(`Cannot reach model node "${targetNodeId}"`));
848
928
  }
849
929
  });
@@ -952,6 +1032,16 @@ export class ModelProxy {
952
1032
  const pending = this.pending.get(frame.id);
953
1033
  if (!pending) return;
954
1034
 
1035
+ // Record circuit breaker outcome for the responding node
1036
+ if (pending.targetNodeId) {
1037
+ const cb = getNodeCircuitBreaker(pending.targetNodeId);
1038
+ if (frame.payload.success) {
1039
+ cb.onSuccess();
1040
+ } else {
1041
+ cb.onFailure();
1042
+ }
1043
+ }
1044
+
955
1045
  // For stream requests, handle error responses (the remote node couldn't
956
1046
  // process the request and sent model_res instead of model_stream).
957
1047
  if (pending.stream) {
@@ -999,10 +1089,14 @@ export class ModelProxy {
999
1089
  // Reset activity timer — keeps long-running streams alive and detects
1000
1090
  // stalled connections within modelTimeout of the last received chunk.
1001
1091
  clearTimeout(pending.timer);
1002
- if (!frame.payload.done) {
1092
+ if (frame.payload.done) {
1093
+ // Stream completed successfully — record circuit breaker success
1094
+ if (pending.targetNodeId) getNodeCircuitBreaker(pending.targetNodeId).onSuccess();
1095
+ } else {
1003
1096
  pending.timer = setTimeout(() => {
1004
1097
  // Capture references before cleanup removes pending from the map
1005
- const { stableStreamId, responseFormat, controller, encoder, model, failoverCandidates, buildFrame } = pending;
1098
+ const { stableStreamId, responseFormat, controller, encoder, model, failoverCandidates, buildFrame, targetNodeId } = pending;
1099
+ if (targetNodeId) getNodeCircuitBreaker(targetNodeId).onFailure();
1006
1100
  this.cleanupRequest(frame.id);
1007
1101
  this.peerManager.router.markFailed(frame.id);
1008
1102
  this.tryStreamFailover(
@@ -1160,9 +1254,7 @@ export class ModelProxy {
1160
1254
  return;
1161
1255
  }
1162
1256
  // Use payload.api override from requesting side, or cached API from previous auto-detection
1163
- const cached = this.modelApiCache.get(model.id);
1164
- const cachedApi = (cached && Date.now() - cached.ts < ModelProxy.MODEL_API_CACHE_TTL) ? cached.api : undefined;
1165
- if (cached && !cachedApi) this.modelApiCache.delete(model.id); // expired
1257
+ const cachedApi = this.modelApiCache.get(model.id);
1166
1258
  const effectiveApi = payload.api ?? cachedApi ?? endpoint.api;
1167
1259
  const isResponsesApi = effectiveApi === "openai-responses" || effectiveApi === "openai-codex-responses";
1168
1260
  const path = isResponsesApi ? "/responses" : "/chat/completions";
@@ -1309,7 +1401,7 @@ export class ModelProxy {
1309
1401
  debug("model_req", `responses API stream produced no content for "${model.id}", retrying with chat completions`);
1310
1402
  const chatResult = await this.retryWithChatCompletions(endpoint, modelField, payload, headers);
1311
1403
  if (chatResult) {
1312
- this.modelApiCache.set(model.id, { api: "openai-completions", ts: Date.now() });
1404
+ this.modelApiCache.set(model.id, "openai-completions");
1313
1405
  debug("model_req", `cached "${model.id}" as openai-completions (stream fallback)`);
1314
1406
  if (chatResult.content) {
1315
1407
  this.sendStreamDelta(from, id, chatResult.content);
@@ -1352,7 +1444,7 @@ export class ModelProxy {
1352
1444
  debug("model_req", `responses API returned non-JSON for "${model.id}", retrying with chat completions`);
1353
1445
  chatFallbackResult = await this.retryWithChatCompletions(endpoint, modelField, payload, headers);
1354
1446
  if (chatFallbackResult) {
1355
- this.modelApiCache.set(model.id, { api: "openai-completions", ts: Date.now() });
1447
+ this.modelApiCache.set(model.id, "openai-completions");
1356
1448
  debug("model_req", `cached "${model.id}" as openai-completions (non-JSON fallback)`);
1357
1449
  }
1358
1450
  }
@@ -1390,7 +1482,7 @@ export class ModelProxy {
1390
1482
  debug("model_req", `responses API returned empty output for "${model.id}" (output_tokens=${parsedUsage!.outputTokens}), retrying with chat completions`);
1391
1483
  const chatResult = await this.retryWithChatCompletions(endpoint, modelField, payload, headers);
1392
1484
  if (chatResult) {
1393
- this.modelApiCache.set(model.id, { api: "openai-completions", ts: Date.now() });
1485
+ this.modelApiCache.set(model.id, "openai-completions");
1394
1486
  debug("model_req", `cached "${model.id}" as openai-completions`);
1395
1487
  ({ content, message, usage } = chatResult);
1396
1488
  } else {
@@ -1,6 +1,7 @@
1
- import { EventEmitter } from "node:events";
1
+ import { EventEmitter } from "eventemitter3";
2
2
  import fs from "node:fs";
3
3
  import path from "node:path";
4
+ import { nanoid } from "nanoid";
4
5
  import type { PeerApprovalConfig } from "./config.ts";
5
6
  import { debug } from "./debug.ts";
6
7
  import type {
@@ -257,7 +258,7 @@ export class PeerApprovalManager extends EventEmitter<PeerApprovalEvents> {
257
258
  return this.waitForBaseApproval(baseNodeId, nodeId, capabilities, publicKey);
258
259
  }
259
260
 
260
- const approvalId = crypto.randomUUID();
261
+ const approvalId = nanoid();
261
262
  this.log(`requestApproval: nodeId=${nodeId} mode=${this.config.mode} approvalId=${approvalId}`);
262
263
 
263
264
  if (this.config.mode === "notify") {