@blockrun/clawrouter 0.12.55 → 0.12.60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -40443,11 +40443,22 @@ var MODEL_ALIASES = {
40443
40443
  "grok-fast": "xai/grok-4-fast-reasoning",
40444
40444
  "grok-code": "deepseek/deepseek-chat",
40445
40445
  // was grok-code-fast-1, delisted due to poor retention
40446
+ // Delisted model redirects — full model IDs that were previously valid but removed
40447
+ "grok-code-fast-1": "deepseek/deepseek-chat",
40448
+ // bare alias
40449
+ "xai/grok-code-fast-1": "deepseek/deepseek-chat",
40450
+ // delisted 2026-03-12
40451
+ "xai/grok-3-fast": "xai/grok-4-fast-reasoning",
40452
+ // delisted (too expensive)
40446
40453
  // NVIDIA
40447
40454
  nvidia: "nvidia/gpt-oss-120b",
40448
40455
  "gpt-120b": "nvidia/gpt-oss-120b",
40449
40456
  // MiniMax
40450
40457
  minimax: "minimax/minimax-m2.5",
40458
+ // Z.AI GLM-5
40459
+ glm: "zai/glm-5",
40460
+ "glm-5": "zai/glm-5",
40461
+ "glm-5-turbo": "zai/glm-5-turbo",
40451
40462
  // Routing profile aliases (common variations)
40452
40463
  "auto-router": "auto",
40453
40464
  router: "auto"
@@ -40982,6 +40993,27 @@ var BLOCKRUN_MODELS = [
40982
40993
  contextWindow: 262144,
40983
40994
  maxOutput: 16384,
40984
40995
  toolCalling: true
40996
+ },
40997
+ // Z.AI GLM-5 Models
40998
+ {
40999
+ id: "zai/glm-5",
41000
+ name: "GLM-5",
41001
+ version: "5",
41002
+ inputPrice: 1,
41003
+ outputPrice: 3.2,
41004
+ contextWindow: 2e5,
41005
+ maxOutput: 128e3,
41006
+ toolCalling: true
41007
+ },
41008
+ {
41009
+ id: "zai/glm-5-turbo",
41010
+ name: "GLM-5 Turbo",
41011
+ version: "5-turbo",
41012
+ inputPrice: 1.2,
41013
+ outputPrice: 4,
41014
+ contextWindow: 2e5,
41015
+ maxOutput: 128e3,
41016
+ toolCalling: true
40985
41017
  }
40986
41018
  ];
40987
41019
  function toOpenClawModel(m) {
@@ -46122,7 +46154,8 @@ var SessionStore = class {
46122
46154
  requestCount: 1,
46123
46155
  recentHashes: [],
46124
46156
  strikes: 0,
46125
- escalated: false
46157
+ escalated: false,
46158
+ sessionCostMicros: 0n
46126
46159
  });
46127
46160
  }
46128
46161
  }
@@ -46211,6 +46244,39 @@ var SessionStore = class {
46211
46244
  entry.escalated = true;
46212
46245
  return { model: nextConfig.primary, tier: nextTier };
46213
46246
  }
46247
+ /**
46248
+ * Add cost to a session's running total for maxCostPerRun tracking.
46249
+ * Cost is in USDC 6-decimal units (micros).
46250
+ * Creates a cost-tracking-only entry if none exists (e.g., explicit model requests
46251
+ * that never go through the routing path).
46252
+ */
46253
+ addSessionCost(sessionId, additionalMicros) {
46254
+ let entry = this.sessions.get(sessionId);
46255
+ if (!entry) {
46256
+ const now = Date.now();
46257
+ entry = {
46258
+ model: "",
46259
+ tier: "DIRECT",
46260
+ createdAt: now,
46261
+ lastUsedAt: now,
46262
+ requestCount: 0,
46263
+ recentHashes: [],
46264
+ strikes: 0,
46265
+ escalated: false,
46266
+ sessionCostMicros: 0n
46267
+ };
46268
+ this.sessions.set(sessionId, entry);
46269
+ }
46270
+ entry.sessionCostMicros += additionalMicros;
46271
+ }
46272
+ /**
46273
+ * Get the total accumulated cost for a session in USD.
46274
+ */
46275
+ getSessionCostUsd(sessionId) {
46276
+ const entry = this.sessions.get(sessionId);
46277
+ if (!entry) return 0;
46278
+ return Number(entry.sessionCostMicros) / 1e6;
46279
+ }
46214
46280
  /**
46215
46281
  * Stop the cleanup interval.
46216
46282
  */
@@ -46486,9 +46552,11 @@ var MAX_MESSAGES = 200;
46486
46552
  var CONTEXT_LIMIT_KB = 5120;
46487
46553
  var HEARTBEAT_INTERVAL_MS = 2e3;
46488
46554
  var DEFAULT_REQUEST_TIMEOUT_MS = 18e4;
46555
+ var PER_MODEL_TIMEOUT_MS = 6e4;
46489
46556
  var MAX_FALLBACK_ATTEMPTS = 5;
46490
46557
  var HEALTH_CHECK_TIMEOUT_MS = 2e3;
46491
46558
  var RATE_LIMIT_COOLDOWN_MS = 6e4;
46559
+ var OVERLOAD_COOLDOWN_MS = 15e3;
46492
46560
  var PORT_RETRY_ATTEMPTS = 5;
46493
46561
  var PORT_RETRY_DELAY_MS = 1e3;
46494
46562
  var MODEL_BODY_READ_TIMEOUT_MS = 3e5;
@@ -46638,7 +46706,41 @@ function transformPaymentError(errorBody) {
46638
46706
  }
46639
46707
  return errorBody;
46640
46708
  }
46709
+ function categorizeError(status, body) {
46710
+ if (status === 401) return "auth_failure";
46711
+ if (status === 402) return "payment_error";
46712
+ if (status === 403) {
46713
+ if (/plan.*limit|quota.*exceeded|subscription|allowance/i.test(body))
46714
+ return "quota_exceeded";
46715
+ return "auth_failure";
46716
+ }
46717
+ if (status === 429) return "rate_limited";
46718
+ if (status === 529) return "overloaded";
46719
+ if (status === 503 && /overload|capacity|too.*many.*request/i.test(body)) return "overloaded";
46720
+ if (status >= 500) return "server_error";
46721
+ if (status === 400 || status === 413) {
46722
+ if (PROVIDER_ERROR_PATTERNS.some((p) => p.test(body))) return "config_error";
46723
+ return null;
46724
+ }
46725
+ return null;
46726
+ }
46641
46727
  var rateLimitedModels = /* @__PURE__ */ new Map();
46728
+ var overloadedModels = /* @__PURE__ */ new Map();
46729
+ var perProviderErrors = /* @__PURE__ */ new Map();
46730
+ function recordProviderError(modelId, category) {
46731
+ if (!perProviderErrors.has(modelId)) {
46732
+ perProviderErrors.set(modelId, {
46733
+ auth_failure: 0,
46734
+ quota_exceeded: 0,
46735
+ rate_limited: 0,
46736
+ overloaded: 0,
46737
+ server_error: 0,
46738
+ payment_error: 0,
46739
+ config_error: 0
46740
+ });
46741
+ }
46742
+ perProviderErrors.get(modelId)[category]++;
46743
+ }
46642
46744
  function isRateLimited(modelId) {
46643
46745
  const hitTime = rateLimitedModels.get(modelId);
46644
46746
  if (!hitTime) return false;
@@ -46653,17 +46755,30 @@ function markRateLimited(modelId) {
46653
46755
  rateLimitedModels.set(modelId, Date.now());
46654
46756
  console.log(`[ClawRouter] Model ${modelId} rate-limited, will deprioritize for 60s`);
46655
46757
  }
46758
+ function markOverloaded(modelId) {
46759
+ overloadedModels.set(modelId, Date.now());
46760
+ console.log(`[ClawRouter] Model ${modelId} overloaded, will deprioritize for 15s`);
46761
+ }
46762
+ function isOverloaded(modelId) {
46763
+ const hitTime = overloadedModels.get(modelId);
46764
+ if (!hitTime) return false;
46765
+ if (Date.now() - hitTime >= OVERLOAD_COOLDOWN_MS) {
46766
+ overloadedModels.delete(modelId);
46767
+ return false;
46768
+ }
46769
+ return true;
46770
+ }
46656
46771
  function prioritizeNonRateLimited(models) {
46657
46772
  const available = [];
46658
- const rateLimited = [];
46773
+ const degraded = [];
46659
46774
  for (const model of models) {
46660
- if (isRateLimited(model)) {
46661
- rateLimited.push(model);
46775
+ if (isRateLimited(model) || isOverloaded(model)) {
46776
+ degraded.push(model);
46662
46777
  } else {
46663
46778
  available.push(model);
46664
46779
  }
46665
46780
  }
46666
- return [...available, ...rateLimited];
46781
+ return [...available, ...degraded];
46667
46782
  }
46668
46783
  function canWrite(res) {
46669
46784
  return !res.writableEnded && !res.destroyed && res.socket !== null && !res.socket.destroyed && res.socket.writable;
@@ -46798,37 +46913,6 @@ function detectDegradedSuccessResponse(body) {
46798
46913
  }
46799
46914
  return void 0;
46800
46915
  }
46801
- var FALLBACK_STATUS_CODES = [
46802
- 400,
46803
- // Bad request - sometimes used for billing errors
46804
- 401,
46805
- // Unauthorized - provider API key issues
46806
- 402,
46807
- // Payment required - but from upstream, not x402
46808
- 403,
46809
- // Forbidden - provider restrictions
46810
- 413,
46811
- // Payload too large - request exceeds model's context limit
46812
- 429,
46813
- // Rate limited
46814
- 500,
46815
- // Internal server error
46816
- 502,
46817
- // Bad gateway
46818
- 503,
46819
- // Service unavailable
46820
- 504
46821
- // Gateway timeout
46822
- ];
46823
- function isProviderError(status, body) {
46824
- if (!FALLBACK_STATUS_CODES.includes(status)) {
46825
- return false;
46826
- }
46827
- if (status >= 500) {
46828
- return true;
46829
- }
46830
- return PROVIDER_ERROR_PATTERNS.some((pattern) => pattern.test(body));
46831
- }
46832
46916
  var VALID_ROLES = /* @__PURE__ */ new Set(["system", "user", "assistant", "tool", "function"]);
46833
46917
  var ROLE_MAPPINGS = {
46834
46918
  developer: "system",
@@ -47334,7 +47418,16 @@ async function startProxy(options) {
47334
47418
  "Content-Type": "application/json",
47335
47419
  "Cache-Control": "no-cache"
47336
47420
  });
47337
- res.end(JSON.stringify(stats, null, 2));
47421
+ res.end(
47422
+ JSON.stringify(
47423
+ {
47424
+ ...stats,
47425
+ providerErrors: Object.fromEntries(perProviderErrors)
47426
+ },
47427
+ null,
47428
+ 2
47429
+ )
47430
+ );
47338
47431
  } catch (err) {
47339
47432
  res.writeHead(500, { "Content-Type": "application/json" });
47340
47433
  res.end(
@@ -47777,12 +47870,13 @@ async function tryModelRequest(upstreamUrl, method, headers, body, modelId, maxT
47777
47870
  if (response.status !== 200) {
47778
47871
  const errorBodyChunks = await readBodyWithTimeout(response.body, ERROR_BODY_READ_TIMEOUT_MS);
47779
47872
  const errorBody = Buffer.concat(errorBodyChunks).toString();
47780
- const isProviderErr = isProviderError(response.status, errorBody);
47873
+ const category = categorizeError(response.status, errorBody);
47781
47874
  return {
47782
47875
  success: false,
47783
47876
  errorBody,
47784
47877
  errorStatus: response.status,
47785
- isProviderError: isProviderErr
47878
+ isProviderError: category !== null,
47879
+ errorCategory: category ?? void 0
47786
47880
  };
47787
47881
  }
47788
47882
  const contentType = response.headers.get("content-type") || "";
@@ -47835,6 +47929,8 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
47835
47929
  let maxTokens = 4096;
47836
47930
  let routingProfile = null;
47837
47931
  let balanceFallbackNotice;
47932
+ let budgetDowngradeNotice;
47933
+ let budgetDowngradeHeaderMode;
47838
47934
  let accumulatedContent = "";
47839
47935
  let responseInputTokens;
47840
47936
  const isChatCompletion = req.url?.includes("/chat/completions");
@@ -47849,6 +47945,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
47849
47945
  let bodyModified = false;
47850
47946
  const parsedMessages = Array.isArray(parsed.messages) ? parsed.messages : [];
47851
47947
  const lastUserMsg = [...parsedMessages].reverse().find((m) => m.role === "user");
47948
+ hasTools = Array.isArray(parsed.tools) && parsed.tools.length > 0;
47852
47949
  const rawLastContent = lastUserMsg?.content;
47853
47950
  const lastContent = typeof rawLastContent === "string" ? rawLastContent : Array.isArray(rawLastContent) ? rawLastContent.filter((b) => b.type === "text").map((b) => b.text ?? "").join(" ") : "";
47854
47951
  if (sessionId && parsedMessages.length > 0) {
@@ -48505,6 +48602,9 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
48505
48602
  options.onRouted?.(routingDecision);
48506
48603
  }
48507
48604
  }
48605
+ if (!effectiveSessionId && parsedMessages.length > 0) {
48606
+ effectiveSessionId = deriveSessionId(parsedMessages);
48607
+ }
48508
48608
  if (bodyModified) {
48509
48609
  body = Buffer.from(JSON.stringify(parsed));
48510
48610
  }
@@ -48595,7 +48695,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
48595
48695
  }
48596
48696
  deduplicator.markInflight(dedupKey);
48597
48697
  let estimatedCostMicros;
48598
- const isFreeModel = modelId === FREE_MODEL;
48698
+ let isFreeModel = modelId === FREE_MODEL;
48599
48699
  if (modelId && !options.skipBalanceCheck && !isFreeModel) {
48600
48700
  const estimated = estimateAmount(modelId, body.length, maxTokens);
48601
48701
  if (estimated) {
@@ -48608,6 +48708,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
48608
48708
  `[ClawRouter] Wallet ${sufficiency.info.isEmpty ? "empty" : "insufficient"} (${sufficiency.info.balanceUSD}), falling back to free model: ${FREE_MODEL} (requested: ${originalModel})`
48609
48709
  );
48610
48710
  modelId = FREE_MODEL;
48711
+ isFreeModel = true;
48611
48712
  const parsed = JSON.parse(body.toString());
48612
48713
  parsed.model = FREE_MODEL;
48613
48714
  body = Buffer.from(JSON.stringify(parsed));
@@ -48634,6 +48735,89 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
48634
48735
  }
48635
48736
  }
48636
48737
  }
48738
+ if (options.maxCostPerRunUsd && effectiveSessionId && !isFreeModel && (options.maxCostPerRunMode ?? "graceful") === "strict") {
48739
+ const runCostUsd = sessionStore.getSessionCostUsd(effectiveSessionId);
48740
+ const thisReqEstStr = estimatedCostMicros !== void 0 ? estimatedCostMicros.toString() : modelId ? estimateAmount(modelId, body.length, maxTokens) : void 0;
48741
+ const thisReqEstUsd = thisReqEstStr ? Number(thisReqEstStr) / 1e6 : 0;
48742
+ const projectedCostUsd = runCostUsd + thisReqEstUsd;
48743
+ if (projectedCostUsd > options.maxCostPerRunUsd) {
48744
+ console.log(
48745
+ `[ClawRouter] Cost cap exceeded for session ${effectiveSessionId.slice(0, 8)}...: projected $${projectedCostUsd.toFixed(4)} (spent $${runCostUsd.toFixed(4)} + est $${thisReqEstUsd.toFixed(4)}) > $${options.maxCostPerRunUsd} limit`
48746
+ );
48747
+ res.writeHead(429, {
48748
+ "Content-Type": "application/json",
48749
+ "X-ClawRouter-Cost-Cap-Exceeded": "1"
48750
+ });
48751
+ res.end(
48752
+ JSON.stringify({
48753
+ error: {
48754
+ message: `ClawRouter cost cap exceeded: projected spend $${projectedCostUsd.toFixed(4)} (spent $${runCostUsd.toFixed(4)} + est $${thisReqEstUsd.toFixed(4)}) would exceed limit $${options.maxCostPerRunUsd}`,
48755
+ type: "cost_cap_exceeded",
48756
+ code: "cost_cap_exceeded"
48757
+ }
48758
+ })
48759
+ );
48760
+ deduplicator.removeInflight(dedupKey);
48761
+ return;
48762
+ }
48763
+ }
48764
+ if (options.maxCostPerRunUsd && effectiveSessionId && !isFreeModel && (options.maxCostPerRunMode ?? "graceful") === "graceful") {
48765
+ const runCostUsd = sessionStore.getSessionCostUsd(effectiveSessionId);
48766
+ const remainingUsd = options.maxCostPerRunUsd - runCostUsd;
48767
+ const isComplexOrAgentic = hasTools || routingDecision?.tier === "COMPLEX" || routingDecision?.tier === "REASONING";
48768
+ if (isComplexOrAgentic) {
48769
+ const canAffordAnyNonFreeModel = BLOCKRUN_MODELS.some((m) => {
48770
+ if (m.id === FREE_MODEL) return false;
48771
+ const est = estimateAmount(m.id, body.length, maxTokens);
48772
+ return est !== void 0 && Number(est) / 1e6 <= remainingUsd;
48773
+ });
48774
+ if (!canAffordAnyNonFreeModel) {
48775
+ console.log(
48776
+ `[ClawRouter] Budget insufficient for agentic/complex session ${effectiveSessionId.slice(0, 8)}...: $${Math.max(0, remainingUsd).toFixed(4)} remaining \u2014 blocking (silent downgrade would corrupt tool/complex responses)`
48777
+ );
48778
+ res.writeHead(429, {
48779
+ "Content-Type": "application/json",
48780
+ "X-ClawRouter-Cost-Cap-Exceeded": "1",
48781
+ "X-ClawRouter-Budget-Mode": "blocked"
48782
+ });
48783
+ res.end(
48784
+ JSON.stringify({
48785
+ error: {
48786
+ message: `ClawRouter budget exhausted: $${Math.max(0, remainingUsd).toFixed(4)} remaining (limit: $${options.maxCostPerRunUsd}). Increase maxCostPerRun to continue.`,
48787
+ type: "cost_cap_exceeded",
48788
+ code: "budget_exhausted"
48789
+ }
48790
+ })
48791
+ );
48792
+ deduplicator.removeInflight(dedupKey);
48793
+ return;
48794
+ }
48795
+ } else if (!routingDecision && modelId && modelId !== FREE_MODEL) {
48796
+ const est = estimateAmount(modelId, body.length, maxTokens);
48797
+ const canAfford = !est || Number(est) / 1e6 <= remainingUsd;
48798
+ if (!canAfford) {
48799
+ console.log(
48800
+ `[ClawRouter] Budget insufficient for explicit model ${modelId} in session ${effectiveSessionId.slice(0, 8)}...: $${Math.max(0, remainingUsd).toFixed(4)} remaining \u2014 blocking (user explicitly chose ${modelId})`
48801
+ );
48802
+ res.writeHead(429, {
48803
+ "Content-Type": "application/json",
48804
+ "X-ClawRouter-Cost-Cap-Exceeded": "1",
48805
+ "X-ClawRouter-Budget-Mode": "blocked"
48806
+ });
48807
+ res.end(
48808
+ JSON.stringify({
48809
+ error: {
48810
+ message: `ClawRouter budget exhausted: $${Math.max(0, remainingUsd).toFixed(4)} remaining (limit: $${options.maxCostPerRunUsd}). Increase maxCostPerRun to continue using ${modelId}.`,
48811
+ type: "cost_cap_exceeded",
48812
+ code: "budget_exhausted"
48813
+ }
48814
+ })
48815
+ );
48816
+ deduplicator.removeInflight(dedupKey);
48817
+ return;
48818
+ }
48819
+ }
48820
+ }
48637
48821
  let heartbeatInterval;
48638
48822
  let headersSentEarly = false;
48639
48823
  if (isStreaming) {
@@ -48678,8 +48862,8 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
48678
48862
  }
48679
48863
  });
48680
48864
  const timeoutMs = options.requestTimeoutMs ?? DEFAULT_REQUEST_TIMEOUT_MS;
48681
- const controller = new AbortController();
48682
- const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
48865
+ const globalController = new AbortController();
48866
+ const timeoutId = setTimeout(() => globalController.abort(), timeoutMs);
48683
48867
  try {
48684
48868
  let modelsToTry;
48685
48869
  if (routingDecision) {
@@ -48736,13 +48920,83 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
48736
48920
  if (!hasTools && !modelsToTry.includes(FREE_MODEL)) {
48737
48921
  modelsToTry.push(FREE_MODEL);
48738
48922
  }
48923
+ if (options.maxCostPerRunUsd && effectiveSessionId && !isFreeModel && (options.maxCostPerRunMode ?? "graceful") === "graceful") {
48924
+ const runCostUsd = sessionStore.getSessionCostUsd(effectiveSessionId);
48925
+ const remainingUsd = options.maxCostPerRunUsd - runCostUsd;
48926
+ const beforeFilter = [...modelsToTry];
48927
+ modelsToTry = modelsToTry.filter((m) => {
48928
+ if (m === FREE_MODEL) return true;
48929
+ const est = estimateAmount(m, body.length, maxTokens);
48930
+ if (!est) return true;
48931
+ return Number(est) / 1e6 <= remainingUsd;
48932
+ });
48933
+ const excluded = beforeFilter.filter((m) => !modelsToTry.includes(m));
48934
+ const isComplexOrAgenticFilter = hasTools || routingDecision?.tier === "COMPLEX" || routingDecision?.tier === "REASONING" || routingDecision === void 0;
48935
+ const filteredToFreeOnly = modelsToTry.length > 0 && modelsToTry.every((m) => m === FREE_MODEL);
48936
+ if (isComplexOrAgenticFilter && filteredToFreeOnly) {
48937
+ const budgetSummary = `$${Math.max(0, remainingUsd).toFixed(4)} remaining (limit: $${options.maxCostPerRunUsd})`;
48938
+ console.log(
48939
+ `[ClawRouter] Budget filter left only free model for complex/agentic session \u2014 blocking (${budgetSummary})`
48940
+ );
48941
+ const errPayload = JSON.stringify({
48942
+ error: {
48943
+ message: `ClawRouter budget exhausted: remaining budget (${budgetSummary}) cannot support a complex/tool request. Increase maxCostPerRun to continue.`,
48944
+ type: "cost_cap_exceeded",
48945
+ code: "budget_exhausted"
48946
+ }
48947
+ });
48948
+ if (heartbeatInterval) clearInterval(heartbeatInterval);
48949
+ if (headersSentEarly) {
48950
+ safeWrite(res, `data: ${errPayload}
48951
+
48952
+ data: [DONE]
48953
+
48954
+ `);
48955
+ res.end();
48956
+ } else {
48957
+ res.writeHead(429, {
48958
+ "Content-Type": "application/json",
48959
+ "X-ClawRouter-Cost-Cap-Exceeded": "1",
48960
+ "X-ClawRouter-Budget-Mode": "blocked"
48961
+ });
48962
+ res.end(errPayload);
48963
+ }
48964
+ deduplicator.removeInflight(dedupKey);
48965
+ return;
48966
+ }
48967
+ if (excluded.length > 0) {
48968
+ const budgetSummary = remainingUsd > 0 ? `$${remainingUsd.toFixed(4)} remaining` : `budget exhausted ($${runCostUsd.toFixed(4)}/$${options.maxCostPerRunUsd})`;
48969
+ console.log(
48970
+ `[ClawRouter] Budget downgrade (${budgetSummary}): excluded ${excluded.join(", ")}`
48971
+ );
48972
+ const fromModel = excluded[0];
48973
+ const usingFree = modelsToTry.length === 1 && modelsToTry[0] === FREE_MODEL;
48974
+ if (usingFree) {
48975
+ budgetDowngradeNotice = `> **\u26A0\uFE0F Budget cap reached** ($${runCostUsd.toFixed(4)}/$${options.maxCostPerRunUsd}) \u2014 downgraded to free model. Quality may be reduced. Increase \`maxCostPerRun\` to continue with ${fromModel}.
48976
+
48977
+ `;
48978
+ } else {
48979
+ const toModel = modelsToTry[0] ?? FREE_MODEL;
48980
+ budgetDowngradeNotice = `> **\u26A0\uFE0F Budget low** ($${remainingUsd > 0 ? remainingUsd.toFixed(4) : "0.0000"} remaining) \u2014 using ${toModel} instead of ${fromModel}.
48981
+
48982
+ `;
48983
+ }
48984
+ budgetDowngradeHeaderMode = "downgraded";
48985
+ }
48986
+ }
48739
48987
  let upstream;
48740
48988
  let lastError;
48741
48989
  let actualModelUsed = modelId;
48742
48990
  for (let i = 0; i < modelsToTry.length; i++) {
48743
48991
  const tryModel = modelsToTry[i];
48744
48992
  const isLastAttempt = i === modelsToTry.length - 1;
48993
+ if (globalController.signal.aborted) {
48994
+ throw new Error(`Request timed out after ${timeoutMs}ms`);
48995
+ }
48745
48996
  console.log(`[ClawRouter] Trying model ${i + 1}/${modelsToTry.length}: ${tryModel}`);
48997
+ const modelController = new AbortController();
48998
+ const modelTimeoutId = setTimeout(() => modelController.abort(), PER_MODEL_TIMEOUT_MS);
48999
+ const combinedSignal = AbortSignal.any([globalController.signal, modelController.signal]);
48746
49000
  const result = await tryModelRequest(
48747
49001
  upstreamUrl,
48748
49002
  req.method ?? "POST",
@@ -48752,12 +49006,29 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
48752
49006
  maxTokens,
48753
49007
  payFetch,
48754
49008
  balanceMonitor,
48755
- controller.signal
49009
+ combinedSignal
48756
49010
  );
49011
+ clearTimeout(modelTimeoutId);
49012
+ if (globalController.signal.aborted) {
49013
+ throw new Error(`Request timed out after ${timeoutMs}ms`);
49014
+ }
49015
+ if (!result.success && modelController.signal.aborted && !isLastAttempt) {
49016
+ console.log(
49017
+ `[ClawRouter] Model ${tryModel} timed out after ${PER_MODEL_TIMEOUT_MS}ms, trying fallback`
49018
+ );
49019
+ recordProviderError(tryModel, "server_error");
49020
+ continue;
49021
+ }
48757
49022
  if (result.success && result.response) {
48758
49023
  upstream = result.response;
48759
49024
  actualModelUsed = tryModel;
48760
49025
  console.log(`[ClawRouter] Success with model: ${tryModel}`);
49026
+ if (options.maxCostPerRunUsd && effectiveSessionId && tryModel !== FREE_MODEL) {
49027
+ const costEst = estimateAmount(tryModel, body.length, maxTokens);
49028
+ if (costEst) {
49029
+ sessionStore.addSessionCost(effectiveSessionId, BigInt(costEst));
49030
+ }
49031
+ }
48761
49032
  break;
48762
49033
  }
48763
49034
  lastError = {
@@ -48773,7 +49044,52 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
48773
49044
  );
48774
49045
  break;
48775
49046
  }
48776
- if (result.errorStatus === 429) {
49047
+ const errorCat = result.errorCategory;
49048
+ if (errorCat) {
49049
+ recordProviderError(tryModel, errorCat);
49050
+ }
49051
+ if (errorCat === "rate_limited") {
49052
+ if (!isLastAttempt && !globalController.signal.aborted) {
49053
+ console.log(
49054
+ `[ClawRouter] Rate-limited on ${tryModel}, retrying in 200ms before failover`
49055
+ );
49056
+ await new Promise((resolve) => setTimeout(resolve, 200));
49057
+ if (!globalController.signal.aborted) {
49058
+ const retryController = new AbortController();
49059
+ const retryTimeoutId = setTimeout(
49060
+ () => retryController.abort(),
49061
+ PER_MODEL_TIMEOUT_MS
49062
+ );
49063
+ const retrySignal = AbortSignal.any([
49064
+ globalController.signal,
49065
+ retryController.signal
49066
+ ]);
49067
+ const retryResult = await tryModelRequest(
49068
+ upstreamUrl,
49069
+ req.method ?? "POST",
49070
+ headers,
49071
+ body,
49072
+ tryModel,
49073
+ maxTokens,
49074
+ payFetch,
49075
+ balanceMonitor,
49076
+ retrySignal
49077
+ );
49078
+ clearTimeout(retryTimeoutId);
49079
+ if (retryResult.success && retryResult.response) {
49080
+ upstream = retryResult.response;
49081
+ actualModelUsed = tryModel;
49082
+ console.log(`[ClawRouter] Rate-limit retry succeeded for: ${tryModel}`);
49083
+ if (options.maxCostPerRunUsd && effectiveSessionId && tryModel !== FREE_MODEL) {
49084
+ const costEst = estimateAmount(tryModel, body.length, maxTokens);
49085
+ if (costEst) {
49086
+ sessionStore.addSessionCost(effectiveSessionId, BigInt(costEst));
49087
+ }
49088
+ }
49089
+ break;
49090
+ }
49091
+ }
49092
+ }
48777
49093
  markRateLimited(tryModel);
48778
49094
  try {
48779
49095
  const parsed = JSON.parse(result.errorBody || "{}");
@@ -48789,6 +49105,12 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
48789
49105
  }
48790
49106
  } catch {
48791
49107
  }
49108
+ } else if (errorCat === "overloaded") {
49109
+ markOverloaded(tryModel);
49110
+ } else if (errorCat === "auth_failure" || errorCat === "quota_exceeded") {
49111
+ console.log(
49112
+ `[ClawRouter] \u{1F511} ${errorCat === "auth_failure" ? "Auth failure" : "Quota exceeded"} for ${tryModel} \u2014 check provider config`
49113
+ );
48792
49114
  }
48793
49115
  const isPaymentErr = /payment.*verification.*failed|payment.*settlement.*failed|insufficient.*funds|transaction_simulation_failed/i.test(
48794
49116
  result.errorBody || ""
@@ -48948,6 +49270,25 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
48948
49270
  responseChunks.push(Buffer.from(noticeData));
48949
49271
  balanceFallbackNotice = void 0;
48950
49272
  }
49273
+ if (budgetDowngradeNotice) {
49274
+ const noticeChunk = {
49275
+ ...baseChunk,
49276
+ choices: [
49277
+ {
49278
+ index: index2,
49279
+ delta: { content: budgetDowngradeNotice },
49280
+ logprobs: null,
49281
+ finish_reason: null
49282
+ }
49283
+ ]
49284
+ };
49285
+ const noticeData = `data: ${JSON.stringify(noticeChunk)}
49286
+
49287
+ `;
49288
+ safeWrite(res, noticeData);
49289
+ responseChunks.push(Buffer.from(noticeData));
49290
+ budgetDowngradeNotice = void 0;
49291
+ }
48951
49292
  if (content) {
48952
49293
  const contentChunk = {
48953
49294
  ...baseChunk,
@@ -49051,6 +49392,22 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
49051
49392
  }
49052
49393
  balanceFallbackNotice = void 0;
49053
49394
  }
49395
+ if (budgetDowngradeNotice && responseBody.length > 0) {
49396
+ try {
49397
+ const parsed = JSON.parse(responseBody.toString());
49398
+ if (parsed.choices?.[0]?.message?.content !== void 0) {
49399
+ parsed.choices[0].message.content = budgetDowngradeNotice + parsed.choices[0].message.content;
49400
+ responseBody = Buffer.from(JSON.stringify(parsed));
49401
+ }
49402
+ } catch {
49403
+ }
49404
+ budgetDowngradeNotice = void 0;
49405
+ }
49406
+ if (budgetDowngradeHeaderMode) {
49407
+ responseHeaders["x-clawrouter-budget-downgrade"] = "1";
49408
+ responseHeaders["x-clawrouter-budget-mode"] = budgetDowngradeHeaderMode;
49409
+ budgetDowngradeHeaderMode = void 0;
49410
+ }
49054
49411
  responseHeaders["content-length"] = String(responseBody.length);
49055
49412
  res.writeHead(upstream.status, responseHeaders);
49056
49413
  safeWrite(res, responseBody);