@blockrun/clawrouter 0.12.56 → 0.12.60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -40443,6 +40443,13 @@ var MODEL_ALIASES = {
40443
40443
  "grok-fast": "xai/grok-4-fast-reasoning",
40444
40444
  "grok-code": "deepseek/deepseek-chat",
40445
40445
  // was grok-code-fast-1, delisted due to poor retention
40446
+ // Delisted model redirects — full model IDs that were previously valid but removed
40447
+ "grok-code-fast-1": "deepseek/deepseek-chat",
40448
+ // bare alias
40449
+ "xai/grok-code-fast-1": "deepseek/deepseek-chat",
40450
+ // delisted 2026-03-12
40451
+ "xai/grok-3-fast": "xai/grok-4-fast-reasoning",
40452
+ // delisted (too expensive)
40446
40453
  // NVIDIA
40447
40454
  nvidia: "nvidia/gpt-oss-120b",
40448
40455
  "gpt-120b": "nvidia/gpt-oss-120b",
@@ -46147,7 +46154,8 @@ var SessionStore = class {
46147
46154
  requestCount: 1,
46148
46155
  recentHashes: [],
46149
46156
  strikes: 0,
46150
- escalated: false
46157
+ escalated: false,
46158
+ sessionCostMicros: 0n
46151
46159
  });
46152
46160
  }
46153
46161
  }
@@ -46236,6 +46244,39 @@ var SessionStore = class {
46236
46244
  entry.escalated = true;
46237
46245
  return { model: nextConfig.primary, tier: nextTier };
46238
46246
  }
46247
+ /**
46248
+ * Add cost to a session's running total for maxCostPerRun tracking.
46249
+ * Cost is in USDC 6-decimal units (micros).
46250
+ * Creates a cost-tracking-only entry if none exists (e.g., explicit model requests
46251
+ * that never go through the routing path).
46252
+ */
46253
+ addSessionCost(sessionId, additionalMicros) {
46254
+ let entry = this.sessions.get(sessionId);
46255
+ if (!entry) {
46256
+ const now = Date.now();
46257
+ entry = {
46258
+ model: "",
46259
+ tier: "DIRECT",
46260
+ createdAt: now,
46261
+ lastUsedAt: now,
46262
+ requestCount: 0,
46263
+ recentHashes: [],
46264
+ strikes: 0,
46265
+ escalated: false,
46266
+ sessionCostMicros: 0n
46267
+ };
46268
+ this.sessions.set(sessionId, entry);
46269
+ }
46270
+ entry.sessionCostMicros += additionalMicros;
46271
+ }
46272
+ /**
46273
+ * Get the total accumulated cost for a session in USD.
46274
+ */
46275
+ getSessionCostUsd(sessionId) {
46276
+ const entry = this.sessions.get(sessionId);
46277
+ if (!entry) return 0;
46278
+ return Number(entry.sessionCostMicros) / 1e6;
46279
+ }
46239
46280
  /**
46240
46281
  * Stop the cleanup interval.
46241
46282
  */
@@ -46511,9 +46552,11 @@ var MAX_MESSAGES = 200;
46511
46552
  var CONTEXT_LIMIT_KB = 5120;
46512
46553
  var HEARTBEAT_INTERVAL_MS = 2e3;
46513
46554
  var DEFAULT_REQUEST_TIMEOUT_MS = 18e4;
46555
+ var PER_MODEL_TIMEOUT_MS = 6e4;
46514
46556
  var MAX_FALLBACK_ATTEMPTS = 5;
46515
46557
  var HEALTH_CHECK_TIMEOUT_MS = 2e3;
46516
46558
  var RATE_LIMIT_COOLDOWN_MS = 6e4;
46559
+ var OVERLOAD_COOLDOWN_MS = 15e3;
46517
46560
  var PORT_RETRY_ATTEMPTS = 5;
46518
46561
  var PORT_RETRY_DELAY_MS = 1e3;
46519
46562
  var MODEL_BODY_READ_TIMEOUT_MS = 3e5;
@@ -46663,7 +46706,41 @@ function transformPaymentError(errorBody) {
46663
46706
  }
46664
46707
  return errorBody;
46665
46708
  }
46709
+ function categorizeError(status, body) {
46710
+ if (status === 401) return "auth_failure";
46711
+ if (status === 402) return "payment_error";
46712
+ if (status === 403) {
46713
+ if (/plan.*limit|quota.*exceeded|subscription|allowance/i.test(body))
46714
+ return "quota_exceeded";
46715
+ return "auth_failure";
46716
+ }
46717
+ if (status === 429) return "rate_limited";
46718
+ if (status === 529) return "overloaded";
46719
+ if (status === 503 && /overload|capacity|too.*many.*request/i.test(body)) return "overloaded";
46720
+ if (status >= 500) return "server_error";
46721
+ if (status === 400 || status === 413) {
46722
+ if (PROVIDER_ERROR_PATTERNS.some((p) => p.test(body))) return "config_error";
46723
+ return null;
46724
+ }
46725
+ return null;
46726
+ }
46666
46727
  var rateLimitedModels = /* @__PURE__ */ new Map();
46728
+ var overloadedModels = /* @__PURE__ */ new Map();
46729
+ var perProviderErrors = /* @__PURE__ */ new Map();
46730
+ function recordProviderError(modelId, category) {
46731
+ if (!perProviderErrors.has(modelId)) {
46732
+ perProviderErrors.set(modelId, {
46733
+ auth_failure: 0,
46734
+ quota_exceeded: 0,
46735
+ rate_limited: 0,
46736
+ overloaded: 0,
46737
+ server_error: 0,
46738
+ payment_error: 0,
46739
+ config_error: 0
46740
+ });
46741
+ }
46742
+ perProviderErrors.get(modelId)[category]++;
46743
+ }
46667
46744
  function isRateLimited(modelId) {
46668
46745
  const hitTime = rateLimitedModels.get(modelId);
46669
46746
  if (!hitTime) return false;
@@ -46678,17 +46755,30 @@ function markRateLimited(modelId) {
46678
46755
  rateLimitedModels.set(modelId, Date.now());
46679
46756
  console.log(`[ClawRouter] Model ${modelId} rate-limited, will deprioritize for 60s`);
46680
46757
  }
46758
+ function markOverloaded(modelId) {
46759
+ overloadedModels.set(modelId, Date.now());
46760
+ console.log(`[ClawRouter] Model ${modelId} overloaded, will deprioritize for 15s`);
46761
+ }
46762
+ function isOverloaded(modelId) {
46763
+ const hitTime = overloadedModels.get(modelId);
46764
+ if (!hitTime) return false;
46765
+ if (Date.now() - hitTime >= OVERLOAD_COOLDOWN_MS) {
46766
+ overloadedModels.delete(modelId);
46767
+ return false;
46768
+ }
46769
+ return true;
46770
+ }
46681
46771
  function prioritizeNonRateLimited(models) {
46682
46772
  const available = [];
46683
- const rateLimited = [];
46773
+ const degraded = [];
46684
46774
  for (const model of models) {
46685
- if (isRateLimited(model)) {
46686
- rateLimited.push(model);
46775
+ if (isRateLimited(model) || isOverloaded(model)) {
46776
+ degraded.push(model);
46687
46777
  } else {
46688
46778
  available.push(model);
46689
46779
  }
46690
46780
  }
46691
- return [...available, ...rateLimited];
46781
+ return [...available, ...degraded];
46692
46782
  }
46693
46783
  function canWrite(res) {
46694
46784
  return !res.writableEnded && !res.destroyed && res.socket !== null && !res.socket.destroyed && res.socket.writable;
@@ -46823,37 +46913,6 @@ function detectDegradedSuccessResponse(body) {
46823
46913
  }
46824
46914
  return void 0;
46825
46915
  }
46826
- var FALLBACK_STATUS_CODES = [
46827
- 400,
46828
- // Bad request - sometimes used for billing errors
46829
- 401,
46830
- // Unauthorized - provider API key issues
46831
- 402,
46832
- // Payment required - but from upstream, not x402
46833
- 403,
46834
- // Forbidden - provider restrictions
46835
- 413,
46836
- // Payload too large - request exceeds model's context limit
46837
- 429,
46838
- // Rate limited
46839
- 500,
46840
- // Internal server error
46841
- 502,
46842
- // Bad gateway
46843
- 503,
46844
- // Service unavailable
46845
- 504
46846
- // Gateway timeout
46847
- ];
46848
- function isProviderError(status, body) {
46849
- if (!FALLBACK_STATUS_CODES.includes(status)) {
46850
- return false;
46851
- }
46852
- if (status >= 500) {
46853
- return true;
46854
- }
46855
- return PROVIDER_ERROR_PATTERNS.some((pattern) => pattern.test(body));
46856
- }
46857
46916
  var VALID_ROLES = /* @__PURE__ */ new Set(["system", "user", "assistant", "tool", "function"]);
46858
46917
  var ROLE_MAPPINGS = {
46859
46918
  developer: "system",
@@ -47359,7 +47418,16 @@ async function startProxy(options) {
47359
47418
  "Content-Type": "application/json",
47360
47419
  "Cache-Control": "no-cache"
47361
47420
  });
47362
- res.end(JSON.stringify(stats, null, 2));
47421
+ res.end(
47422
+ JSON.stringify(
47423
+ {
47424
+ ...stats,
47425
+ providerErrors: Object.fromEntries(perProviderErrors)
47426
+ },
47427
+ null,
47428
+ 2
47429
+ )
47430
+ );
47363
47431
  } catch (err) {
47364
47432
  res.writeHead(500, { "Content-Type": "application/json" });
47365
47433
  res.end(
@@ -47802,12 +47870,13 @@ async function tryModelRequest(upstreamUrl, method, headers, body, modelId, maxT
47802
47870
  if (response.status !== 200) {
47803
47871
  const errorBodyChunks = await readBodyWithTimeout(response.body, ERROR_BODY_READ_TIMEOUT_MS);
47804
47872
  const errorBody = Buffer.concat(errorBodyChunks).toString();
47805
- const isProviderErr = isProviderError(response.status, errorBody);
47873
+ const category = categorizeError(response.status, errorBody);
47806
47874
  return {
47807
47875
  success: false,
47808
47876
  errorBody,
47809
47877
  errorStatus: response.status,
47810
- isProviderError: isProviderErr
47878
+ isProviderError: category !== null,
47879
+ errorCategory: category ?? void 0
47811
47880
  };
47812
47881
  }
47813
47882
  const contentType = response.headers.get("content-type") || "";
@@ -47860,6 +47929,8 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
47860
47929
  let maxTokens = 4096;
47861
47930
  let routingProfile = null;
47862
47931
  let balanceFallbackNotice;
47932
+ let budgetDowngradeNotice;
47933
+ let budgetDowngradeHeaderMode;
47863
47934
  let accumulatedContent = "";
47864
47935
  let responseInputTokens;
47865
47936
  const isChatCompletion = req.url?.includes("/chat/completions");
@@ -47874,6 +47945,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
47874
47945
  let bodyModified = false;
47875
47946
  const parsedMessages = Array.isArray(parsed.messages) ? parsed.messages : [];
47876
47947
  const lastUserMsg = [...parsedMessages].reverse().find((m) => m.role === "user");
47948
+ hasTools = Array.isArray(parsed.tools) && parsed.tools.length > 0;
47877
47949
  const rawLastContent = lastUserMsg?.content;
47878
47950
  const lastContent = typeof rawLastContent === "string" ? rawLastContent : Array.isArray(rawLastContent) ? rawLastContent.filter((b) => b.type === "text").map((b) => b.text ?? "").join(" ") : "";
47879
47951
  if (sessionId && parsedMessages.length > 0) {
@@ -48530,6 +48602,9 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
48530
48602
  options.onRouted?.(routingDecision);
48531
48603
  }
48532
48604
  }
48605
+ if (!effectiveSessionId && parsedMessages.length > 0) {
48606
+ effectiveSessionId = deriveSessionId(parsedMessages);
48607
+ }
48533
48608
  if (bodyModified) {
48534
48609
  body = Buffer.from(JSON.stringify(parsed));
48535
48610
  }
@@ -48620,7 +48695,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
48620
48695
  }
48621
48696
  deduplicator.markInflight(dedupKey);
48622
48697
  let estimatedCostMicros;
48623
- const isFreeModel = modelId === FREE_MODEL;
48698
+ let isFreeModel = modelId === FREE_MODEL;
48624
48699
  if (modelId && !options.skipBalanceCheck && !isFreeModel) {
48625
48700
  const estimated = estimateAmount(modelId, body.length, maxTokens);
48626
48701
  if (estimated) {
@@ -48633,6 +48708,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
48633
48708
  `[ClawRouter] Wallet ${sufficiency.info.isEmpty ? "empty" : "insufficient"} (${sufficiency.info.balanceUSD}), falling back to free model: ${FREE_MODEL} (requested: ${originalModel})`
48634
48709
  );
48635
48710
  modelId = FREE_MODEL;
48711
+ isFreeModel = true;
48636
48712
  const parsed = JSON.parse(body.toString());
48637
48713
  parsed.model = FREE_MODEL;
48638
48714
  body = Buffer.from(JSON.stringify(parsed));
@@ -48659,6 +48735,89 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
48659
48735
  }
48660
48736
  }
48661
48737
  }
48738
+ if (options.maxCostPerRunUsd && effectiveSessionId && !isFreeModel && (options.maxCostPerRunMode ?? "graceful") === "strict") {
48739
+ const runCostUsd = sessionStore.getSessionCostUsd(effectiveSessionId);
48740
+ const thisReqEstStr = estimatedCostMicros !== void 0 ? estimatedCostMicros.toString() : modelId ? estimateAmount(modelId, body.length, maxTokens) : void 0;
48741
+ const thisReqEstUsd = thisReqEstStr ? Number(thisReqEstStr) / 1e6 : 0;
48742
+ const projectedCostUsd = runCostUsd + thisReqEstUsd;
48743
+ if (projectedCostUsd > options.maxCostPerRunUsd) {
48744
+ console.log(
48745
+ `[ClawRouter] Cost cap exceeded for session ${effectiveSessionId.slice(0, 8)}...: projected $${projectedCostUsd.toFixed(4)} (spent $${runCostUsd.toFixed(4)} + est $${thisReqEstUsd.toFixed(4)}) > $${options.maxCostPerRunUsd} limit`
48746
+ );
48747
+ res.writeHead(429, {
48748
+ "Content-Type": "application/json",
48749
+ "X-ClawRouter-Cost-Cap-Exceeded": "1"
48750
+ });
48751
+ res.end(
48752
+ JSON.stringify({
48753
+ error: {
48754
+ message: `ClawRouter cost cap exceeded: projected spend $${projectedCostUsd.toFixed(4)} (spent $${runCostUsd.toFixed(4)} + est $${thisReqEstUsd.toFixed(4)}) would exceed limit $${options.maxCostPerRunUsd}`,
48755
+ type: "cost_cap_exceeded",
48756
+ code: "cost_cap_exceeded"
48757
+ }
48758
+ })
48759
+ );
48760
+ deduplicator.removeInflight(dedupKey);
48761
+ return;
48762
+ }
48763
+ }
48764
+ if (options.maxCostPerRunUsd && effectiveSessionId && !isFreeModel && (options.maxCostPerRunMode ?? "graceful") === "graceful") {
48765
+ const runCostUsd = sessionStore.getSessionCostUsd(effectiveSessionId);
48766
+ const remainingUsd = options.maxCostPerRunUsd - runCostUsd;
48767
+ const isComplexOrAgentic = hasTools || routingDecision?.tier === "COMPLEX" || routingDecision?.tier === "REASONING";
48768
+ if (isComplexOrAgentic) {
48769
+ const canAffordAnyNonFreeModel = BLOCKRUN_MODELS.some((m) => {
48770
+ if (m.id === FREE_MODEL) return false;
48771
+ const est = estimateAmount(m.id, body.length, maxTokens);
48772
+ return est !== void 0 && Number(est) / 1e6 <= remainingUsd;
48773
+ });
48774
+ if (!canAffordAnyNonFreeModel) {
48775
+ console.log(
48776
+ `[ClawRouter] Budget insufficient for agentic/complex session ${effectiveSessionId.slice(0, 8)}...: $${Math.max(0, remainingUsd).toFixed(4)} remaining \u2014 blocking (silent downgrade would corrupt tool/complex responses)`
48777
+ );
48778
+ res.writeHead(429, {
48779
+ "Content-Type": "application/json",
48780
+ "X-ClawRouter-Cost-Cap-Exceeded": "1",
48781
+ "X-ClawRouter-Budget-Mode": "blocked"
48782
+ });
48783
+ res.end(
48784
+ JSON.stringify({
48785
+ error: {
48786
+ message: `ClawRouter budget exhausted: $${Math.max(0, remainingUsd).toFixed(4)} remaining (limit: $${options.maxCostPerRunUsd}). Increase maxCostPerRun to continue.`,
48787
+ type: "cost_cap_exceeded",
48788
+ code: "budget_exhausted"
48789
+ }
48790
+ })
48791
+ );
48792
+ deduplicator.removeInflight(dedupKey);
48793
+ return;
48794
+ }
48795
+ } else if (!routingDecision && modelId && modelId !== FREE_MODEL) {
48796
+ const est = estimateAmount(modelId, body.length, maxTokens);
48797
+ const canAfford = !est || Number(est) / 1e6 <= remainingUsd;
48798
+ if (!canAfford) {
48799
+ console.log(
48800
+ `[ClawRouter] Budget insufficient for explicit model ${modelId} in session ${effectiveSessionId.slice(0, 8)}...: $${Math.max(0, remainingUsd).toFixed(4)} remaining \u2014 blocking (user explicitly chose ${modelId})`
48801
+ );
48802
+ res.writeHead(429, {
48803
+ "Content-Type": "application/json",
48804
+ "X-ClawRouter-Cost-Cap-Exceeded": "1",
48805
+ "X-ClawRouter-Budget-Mode": "blocked"
48806
+ });
48807
+ res.end(
48808
+ JSON.stringify({
48809
+ error: {
48810
+ message: `ClawRouter budget exhausted: $${Math.max(0, remainingUsd).toFixed(4)} remaining (limit: $${options.maxCostPerRunUsd}). Increase maxCostPerRun to continue using ${modelId}.`,
48811
+ type: "cost_cap_exceeded",
48812
+ code: "budget_exhausted"
48813
+ }
48814
+ })
48815
+ );
48816
+ deduplicator.removeInflight(dedupKey);
48817
+ return;
48818
+ }
48819
+ }
48820
+ }
48662
48821
  let heartbeatInterval;
48663
48822
  let headersSentEarly = false;
48664
48823
  if (isStreaming) {
@@ -48703,8 +48862,8 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
48703
48862
  }
48704
48863
  });
48705
48864
  const timeoutMs = options.requestTimeoutMs ?? DEFAULT_REQUEST_TIMEOUT_MS;
48706
- const controller = new AbortController();
48707
- const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
48865
+ const globalController = new AbortController();
48866
+ const timeoutId = setTimeout(() => globalController.abort(), timeoutMs);
48708
48867
  try {
48709
48868
  let modelsToTry;
48710
48869
  if (routingDecision) {
@@ -48761,13 +48920,83 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
48761
48920
  if (!hasTools && !modelsToTry.includes(FREE_MODEL)) {
48762
48921
  modelsToTry.push(FREE_MODEL);
48763
48922
  }
48923
+ if (options.maxCostPerRunUsd && effectiveSessionId && !isFreeModel && (options.maxCostPerRunMode ?? "graceful") === "graceful") {
48924
+ const runCostUsd = sessionStore.getSessionCostUsd(effectiveSessionId);
48925
+ const remainingUsd = options.maxCostPerRunUsd - runCostUsd;
48926
+ const beforeFilter = [...modelsToTry];
48927
+ modelsToTry = modelsToTry.filter((m) => {
48928
+ if (m === FREE_MODEL) return true;
48929
+ const est = estimateAmount(m, body.length, maxTokens);
48930
+ if (!est) return true;
48931
+ return Number(est) / 1e6 <= remainingUsd;
48932
+ });
48933
+ const excluded = beforeFilter.filter((m) => !modelsToTry.includes(m));
48934
+ const isComplexOrAgenticFilter = hasTools || routingDecision?.tier === "COMPLEX" || routingDecision?.tier === "REASONING" || routingDecision === void 0;
48935
+ const filteredToFreeOnly = modelsToTry.length > 0 && modelsToTry.every((m) => m === FREE_MODEL);
48936
+ if (isComplexOrAgenticFilter && filteredToFreeOnly) {
48937
+ const budgetSummary = `$${Math.max(0, remainingUsd).toFixed(4)} remaining (limit: $${options.maxCostPerRunUsd})`;
48938
+ console.log(
48939
+ `[ClawRouter] Budget filter left only free model for complex/agentic session \u2014 blocking (${budgetSummary})`
48940
+ );
48941
+ const errPayload = JSON.stringify({
48942
+ error: {
48943
+ message: `ClawRouter budget exhausted: remaining budget (${budgetSummary}) cannot support a complex/tool request. Increase maxCostPerRun to continue.`,
48944
+ type: "cost_cap_exceeded",
48945
+ code: "budget_exhausted"
48946
+ }
48947
+ });
48948
+ if (heartbeatInterval) clearInterval(heartbeatInterval);
48949
+ if (headersSentEarly) {
48950
+ safeWrite(res, `data: ${errPayload}
48951
+
48952
+ data: [DONE]
48953
+
48954
+ `);
48955
+ res.end();
48956
+ } else {
48957
+ res.writeHead(429, {
48958
+ "Content-Type": "application/json",
48959
+ "X-ClawRouter-Cost-Cap-Exceeded": "1",
48960
+ "X-ClawRouter-Budget-Mode": "blocked"
48961
+ });
48962
+ res.end(errPayload);
48963
+ }
48964
+ deduplicator.removeInflight(dedupKey);
48965
+ return;
48966
+ }
48967
+ if (excluded.length > 0) {
48968
+ const budgetSummary = remainingUsd > 0 ? `$${remainingUsd.toFixed(4)} remaining` : `budget exhausted ($${runCostUsd.toFixed(4)}/$${options.maxCostPerRunUsd})`;
48969
+ console.log(
48970
+ `[ClawRouter] Budget downgrade (${budgetSummary}): excluded ${excluded.join(", ")}`
48971
+ );
48972
+ const fromModel = excluded[0];
48973
+ const usingFree = modelsToTry.length === 1 && modelsToTry[0] === FREE_MODEL;
48974
+ if (usingFree) {
48975
+ budgetDowngradeNotice = `> **\u26A0\uFE0F Budget cap reached** ($${runCostUsd.toFixed(4)}/$${options.maxCostPerRunUsd}) \u2014 downgraded to free model. Quality may be reduced. Increase \`maxCostPerRun\` to continue with ${fromModel}.
48976
+
48977
+ `;
48978
+ } else {
48979
+ const toModel = modelsToTry[0] ?? FREE_MODEL;
48980
+ budgetDowngradeNotice = `> **\u26A0\uFE0F Budget low** ($${remainingUsd > 0 ? remainingUsd.toFixed(4) : "0.0000"} remaining) \u2014 using ${toModel} instead of ${fromModel}.
48981
+
48982
+ `;
48983
+ }
48984
+ budgetDowngradeHeaderMode = "downgraded";
48985
+ }
48986
+ }
48764
48987
  let upstream;
48765
48988
  let lastError;
48766
48989
  let actualModelUsed = modelId;
48767
48990
  for (let i = 0; i < modelsToTry.length; i++) {
48768
48991
  const tryModel = modelsToTry[i];
48769
48992
  const isLastAttempt = i === modelsToTry.length - 1;
48993
+ if (globalController.signal.aborted) {
48994
+ throw new Error(`Request timed out after ${timeoutMs}ms`);
48995
+ }
48770
48996
  console.log(`[ClawRouter] Trying model ${i + 1}/${modelsToTry.length}: ${tryModel}`);
48997
+ const modelController = new AbortController();
48998
+ const modelTimeoutId = setTimeout(() => modelController.abort(), PER_MODEL_TIMEOUT_MS);
48999
+ const combinedSignal = AbortSignal.any([globalController.signal, modelController.signal]);
48771
49000
  const result = await tryModelRequest(
48772
49001
  upstreamUrl,
48773
49002
  req.method ?? "POST",
@@ -48777,12 +49006,29 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
48777
49006
  maxTokens,
48778
49007
  payFetch,
48779
49008
  balanceMonitor,
48780
- controller.signal
49009
+ combinedSignal
48781
49010
  );
49011
+ clearTimeout(modelTimeoutId);
49012
+ if (globalController.signal.aborted) {
49013
+ throw new Error(`Request timed out after ${timeoutMs}ms`);
49014
+ }
49015
+ if (!result.success && modelController.signal.aborted && !isLastAttempt) {
49016
+ console.log(
49017
+ `[ClawRouter] Model ${tryModel} timed out after ${PER_MODEL_TIMEOUT_MS}ms, trying fallback`
49018
+ );
49019
+ recordProviderError(tryModel, "server_error");
49020
+ continue;
49021
+ }
48782
49022
  if (result.success && result.response) {
48783
49023
  upstream = result.response;
48784
49024
  actualModelUsed = tryModel;
48785
49025
  console.log(`[ClawRouter] Success with model: ${tryModel}`);
49026
+ if (options.maxCostPerRunUsd && effectiveSessionId && tryModel !== FREE_MODEL) {
49027
+ const costEst = estimateAmount(tryModel, body.length, maxTokens);
49028
+ if (costEst) {
49029
+ sessionStore.addSessionCost(effectiveSessionId, BigInt(costEst));
49030
+ }
49031
+ }
48786
49032
  break;
48787
49033
  }
48788
49034
  lastError = {
@@ -48798,7 +49044,52 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
48798
49044
  );
48799
49045
  break;
48800
49046
  }
48801
- if (result.errorStatus === 429) {
49047
+ const errorCat = result.errorCategory;
49048
+ if (errorCat) {
49049
+ recordProviderError(tryModel, errorCat);
49050
+ }
49051
+ if (errorCat === "rate_limited") {
49052
+ if (!isLastAttempt && !globalController.signal.aborted) {
49053
+ console.log(
49054
+ `[ClawRouter] Rate-limited on ${tryModel}, retrying in 200ms before failover`
49055
+ );
49056
+ await new Promise((resolve) => setTimeout(resolve, 200));
49057
+ if (!globalController.signal.aborted) {
49058
+ const retryController = new AbortController();
49059
+ const retryTimeoutId = setTimeout(
49060
+ () => retryController.abort(),
49061
+ PER_MODEL_TIMEOUT_MS
49062
+ );
49063
+ const retrySignal = AbortSignal.any([
49064
+ globalController.signal,
49065
+ retryController.signal
49066
+ ]);
49067
+ const retryResult = await tryModelRequest(
49068
+ upstreamUrl,
49069
+ req.method ?? "POST",
49070
+ headers,
49071
+ body,
49072
+ tryModel,
49073
+ maxTokens,
49074
+ payFetch,
49075
+ balanceMonitor,
49076
+ retrySignal
49077
+ );
49078
+ clearTimeout(retryTimeoutId);
49079
+ if (retryResult.success && retryResult.response) {
49080
+ upstream = retryResult.response;
49081
+ actualModelUsed = tryModel;
49082
+ console.log(`[ClawRouter] Rate-limit retry succeeded for: ${tryModel}`);
49083
+ if (options.maxCostPerRunUsd && effectiveSessionId && tryModel !== FREE_MODEL) {
49084
+ const costEst = estimateAmount(tryModel, body.length, maxTokens);
49085
+ if (costEst) {
49086
+ sessionStore.addSessionCost(effectiveSessionId, BigInt(costEst));
49087
+ }
49088
+ }
49089
+ break;
49090
+ }
49091
+ }
49092
+ }
48802
49093
  markRateLimited(tryModel);
48803
49094
  try {
48804
49095
  const parsed = JSON.parse(result.errorBody || "{}");
@@ -48814,6 +49105,12 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
48814
49105
  }
48815
49106
  } catch {
48816
49107
  }
49108
+ } else if (errorCat === "overloaded") {
49109
+ markOverloaded(tryModel);
49110
+ } else if (errorCat === "auth_failure" || errorCat === "quota_exceeded") {
49111
+ console.log(
49112
+ `[ClawRouter] \u{1F511} ${errorCat === "auth_failure" ? "Auth failure" : "Quota exceeded"} for ${tryModel} \u2014 check provider config`
49113
+ );
48817
49114
  }
48818
49115
  const isPaymentErr = /payment.*verification.*failed|payment.*settlement.*failed|insufficient.*funds|transaction_simulation_failed/i.test(
48819
49116
  result.errorBody || ""
@@ -48973,6 +49270,25 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
48973
49270
  responseChunks.push(Buffer.from(noticeData));
48974
49271
  balanceFallbackNotice = void 0;
48975
49272
  }
49273
+ if (budgetDowngradeNotice) {
49274
+ const noticeChunk = {
49275
+ ...baseChunk,
49276
+ choices: [
49277
+ {
49278
+ index: index2,
49279
+ delta: { content: budgetDowngradeNotice },
49280
+ logprobs: null,
49281
+ finish_reason: null
49282
+ }
49283
+ ]
49284
+ };
49285
+ const noticeData = `data: ${JSON.stringify(noticeChunk)}
49286
+
49287
+ `;
49288
+ safeWrite(res, noticeData);
49289
+ responseChunks.push(Buffer.from(noticeData));
49290
+ budgetDowngradeNotice = void 0;
49291
+ }
48976
49292
  if (content) {
48977
49293
  const contentChunk = {
48978
49294
  ...baseChunk,
@@ -49076,6 +49392,22 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
49076
49392
  }
49077
49393
  balanceFallbackNotice = void 0;
49078
49394
  }
49395
+ if (budgetDowngradeNotice && responseBody.length > 0) {
49396
+ try {
49397
+ const parsed = JSON.parse(responseBody.toString());
49398
+ if (parsed.choices?.[0]?.message?.content !== void 0) {
49399
+ parsed.choices[0].message.content = budgetDowngradeNotice + parsed.choices[0].message.content;
49400
+ responseBody = Buffer.from(JSON.stringify(parsed));
49401
+ }
49402
+ } catch {
49403
+ }
49404
+ budgetDowngradeNotice = void 0;
49405
+ }
49406
+ if (budgetDowngradeHeaderMode) {
49407
+ responseHeaders["x-clawrouter-budget-downgrade"] = "1";
49408
+ responseHeaders["x-clawrouter-budget-mode"] = budgetDowngradeHeaderMode;
49409
+ budgetDowngradeHeaderMode = void 0;
49410
+ }
49079
49411
  responseHeaders["content-length"] = String(responseBody.length);
49080
49412
  res.writeHead(upstream.status, responseHeaders);
49081
49413
  safeWrite(res, responseBody);