@blockrun/clawrouter 0.12.56 → 0.12.61

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -25522,10 +25522,10 @@ var init_client = __esm({
25522
25522
  // src/proxy.ts
25523
25523
  import { createServer } from "http";
25524
25524
  import { finished } from "stream";
25525
- import { homedir as homedir4 } from "os";
25526
- import { join as join7 } from "path";
25525
+ import { homedir as homedir5 } from "os";
25526
+ import { join as join8 } from "path";
25527
25527
  import { mkdir as mkdir3, writeFile as writeFile2, readFile, stat as fsStat } from "fs/promises";
25528
- import { readFileSync, existsSync } from "fs";
25528
+ import { readFileSync as readFileSync2, existsSync } from "fs";
25529
25529
 
25530
25530
  // node_modules/viem/_esm/utils/getAction.js
25531
25531
  function getAction(client, actionFn, name) {
@@ -39019,6 +39019,11 @@ function filterByVision(models, hasVision, supportsVision2) {
39019
39019
  const filtered = models.filter(supportsVision2);
39020
39020
  return filtered.length > 0 ? filtered : models;
39021
39021
  }
39022
+ function filterByExcludeList(models, excludeList) {
39023
+ if (excludeList.size === 0) return models;
39024
+ const filtered = models.filter((m) => !excludeList.has(m));
39025
+ return filtered.length > 0 ? filtered : models;
39026
+ }
39022
39027
  function getFallbackChainFiltered(tier, tierConfigs, estimatedTotalTokens, getContextWindow) {
39023
39028
  const fullChain = getFallbackChain(tier, tierConfigs);
39024
39029
  const filtered = fullChain.filter((modelId) => {
@@ -40443,11 +40448,20 @@ var MODEL_ALIASES = {
40443
40448
  "grok-fast": "xai/grok-4-fast-reasoning",
40444
40449
  "grok-code": "deepseek/deepseek-chat",
40445
40450
  // was grok-code-fast-1, delisted due to poor retention
40451
+ // Delisted model redirects — full model IDs that were previously valid but removed
40452
+ "grok-code-fast-1": "deepseek/deepseek-chat",
40453
+ // bare alias
40454
+ "xai/grok-code-fast-1": "deepseek/deepseek-chat",
40455
+ // delisted 2026-03-12
40456
+ "xai/grok-3-fast": "xai/grok-4-fast-reasoning",
40457
+ // delisted (too expensive)
40446
40458
  // NVIDIA
40447
40459
  nvidia: "nvidia/gpt-oss-120b",
40448
40460
  "gpt-120b": "nvidia/gpt-oss-120b",
40449
40461
  // MiniMax
40450
- minimax: "minimax/minimax-m2.5",
40462
+ minimax: "minimax/minimax-m2.7",
40463
+ "minimax-m2.7": "minimax/minimax-m2.7",
40464
+ "minimax-m2.5": "minimax/minimax-m2.5",
40451
40465
  // Z.AI GLM-5
40452
40466
  glm: "zai/glm-5",
40453
40467
  "glm-5": "zai/glm-5",
@@ -40953,6 +40967,18 @@ var BLOCKRUN_MODELS = [
40953
40967
  toolCalling: true
40954
40968
  },
40955
40969
  // MiniMax
40970
+ {
40971
+ id: "minimax/minimax-m2.7",
40972
+ name: "MiniMax M2.7",
40973
+ version: "m2.7",
40974
+ inputPrice: 0.3,
40975
+ outputPrice: 1.2,
40976
+ contextWindow: 204800,
40977
+ maxOutput: 16384,
40978
+ reasoning: true,
40979
+ agentic: true,
40980
+ toolCalling: true
40981
+ },
40956
40982
  {
40957
40983
  id: "minimax/minimax-m2.5",
40958
40984
  name: "MiniMax M2.5",
@@ -46147,7 +46173,8 @@ var SessionStore = class {
46147
46173
  requestCount: 1,
46148
46174
  recentHashes: [],
46149
46175
  strikes: 0,
46150
- escalated: false
46176
+ escalated: false,
46177
+ sessionCostMicros: 0n
46151
46178
  });
46152
46179
  }
46153
46180
  }
@@ -46236,6 +46263,39 @@ var SessionStore = class {
46236
46263
  entry.escalated = true;
46237
46264
  return { model: nextConfig.primary, tier: nextTier };
46238
46265
  }
46266
+ /**
46267
+ * Add cost to a session's running total for maxCostPerRun tracking.
46268
+ * Cost is in USDC 6-decimal units (micros).
46269
+ * Creates a cost-tracking-only entry if none exists (e.g., explicit model requests
46270
+ * that never go through the routing path).
46271
+ */
46272
+ addSessionCost(sessionId, additionalMicros) {
46273
+ let entry = this.sessions.get(sessionId);
46274
+ if (!entry) {
46275
+ const now = Date.now();
46276
+ entry = {
46277
+ model: "",
46278
+ tier: "DIRECT",
46279
+ createdAt: now,
46280
+ lastUsedAt: now,
46281
+ requestCount: 0,
46282
+ recentHashes: [],
46283
+ strikes: 0,
46284
+ escalated: false,
46285
+ sessionCostMicros: 0n
46286
+ };
46287
+ this.sessions.set(sessionId, entry);
46288
+ }
46289
+ entry.sessionCostMicros += additionalMicros;
46290
+ }
46291
+ /**
46292
+ * Get the total accumulated cost for a session in USD.
46293
+ */
46294
+ getSessionCostUsd(sessionId) {
46295
+ const entry = this.sessions.get(sessionId);
46296
+ if (!entry) return 0;
46297
+ return Number(entry.sessionCostMicros) / 1e6;
46298
+ }
46239
46299
  /**
46240
46300
  * Stop the cleanup interval.
46241
46301
  */
@@ -46303,6 +46363,29 @@ async function checkForUpdates() {
46303
46363
  }
46304
46364
  }
46305
46365
 
46366
+ // src/exclude-models.ts
46367
+ import { readFileSync, writeFileSync, mkdirSync } from "fs";
46368
+ import { join as join7, dirname as dirname2 } from "path";
46369
+ import { homedir as homedir4 } from "os";
46370
+ var DEFAULT_FILE_PATH = join7(
46371
+ homedir4(),
46372
+ ".openclaw",
46373
+ "blockrun",
46374
+ "exclude-models.json"
46375
+ );
46376
+ function loadExcludeList(filePath = DEFAULT_FILE_PATH) {
46377
+ try {
46378
+ const raw = readFileSync(filePath, "utf-8");
46379
+ const arr = JSON.parse(raw);
46380
+ if (Array.isArray(arr)) {
46381
+ return new Set(arr.filter((x) => typeof x === "string"));
46382
+ }
46383
+ return /* @__PURE__ */ new Set();
46384
+ } catch {
46385
+ return /* @__PURE__ */ new Set();
46386
+ }
46387
+ }
46388
+
46306
46389
  // src/config.ts
46307
46390
  var DEFAULT_PORT = 8402;
46308
46391
  var PROXY_PORT = (() => {
@@ -46487,7 +46570,7 @@ ${lines.join("\n")}`;
46487
46570
  // src/proxy.ts
46488
46571
  var BLOCKRUN_API = "https://blockrun.ai/api";
46489
46572
  var BLOCKRUN_SOLANA_API = "https://sol.blockrun.ai/api";
46490
- var IMAGE_DIR = join7(homedir4(), ".openclaw", "blockrun", "images");
46573
+ var IMAGE_DIR = join8(homedir5(), ".openclaw", "blockrun", "images");
46491
46574
  var AUTO_MODEL = "blockrun/auto";
46492
46575
  var ROUTING_PROFILES = /* @__PURE__ */ new Set([
46493
46576
  "blockrun/free",
@@ -46511,9 +46594,11 @@ var MAX_MESSAGES = 200;
46511
46594
  var CONTEXT_LIMIT_KB = 5120;
46512
46595
  var HEARTBEAT_INTERVAL_MS = 2e3;
46513
46596
  var DEFAULT_REQUEST_TIMEOUT_MS = 18e4;
46597
+ var PER_MODEL_TIMEOUT_MS = 6e4;
46514
46598
  var MAX_FALLBACK_ATTEMPTS = 5;
46515
46599
  var HEALTH_CHECK_TIMEOUT_MS = 2e3;
46516
46600
  var RATE_LIMIT_COOLDOWN_MS = 6e4;
46601
+ var OVERLOAD_COOLDOWN_MS = 15e3;
46517
46602
  var PORT_RETRY_ATTEMPTS = 5;
46518
46603
  var PORT_RETRY_DELAY_MS = 1e3;
46519
46604
  var MODEL_BODY_READ_TIMEOUT_MS = 3e5;
@@ -46663,7 +46748,41 @@ function transformPaymentError(errorBody) {
46663
46748
  }
46664
46749
  return errorBody;
46665
46750
  }
46751
+ function categorizeError(status, body) {
46752
+ if (status === 401) return "auth_failure";
46753
+ if (status === 402) return "payment_error";
46754
+ if (status === 403) {
46755
+ if (/plan.*limit|quota.*exceeded|subscription|allowance/i.test(body))
46756
+ return "quota_exceeded";
46757
+ return "auth_failure";
46758
+ }
46759
+ if (status === 429) return "rate_limited";
46760
+ if (status === 529) return "overloaded";
46761
+ if (status === 503 && /overload|capacity|too.*many.*request/i.test(body)) return "overloaded";
46762
+ if (status >= 500) return "server_error";
46763
+ if (status === 400 || status === 413) {
46764
+ if (PROVIDER_ERROR_PATTERNS.some((p) => p.test(body))) return "config_error";
46765
+ return null;
46766
+ }
46767
+ return null;
46768
+ }
46666
46769
  var rateLimitedModels = /* @__PURE__ */ new Map();
46770
+ var overloadedModels = /* @__PURE__ */ new Map();
46771
+ var perProviderErrors = /* @__PURE__ */ new Map();
46772
+ function recordProviderError(modelId, category) {
46773
+ if (!perProviderErrors.has(modelId)) {
46774
+ perProviderErrors.set(modelId, {
46775
+ auth_failure: 0,
46776
+ quota_exceeded: 0,
46777
+ rate_limited: 0,
46778
+ overloaded: 0,
46779
+ server_error: 0,
46780
+ payment_error: 0,
46781
+ config_error: 0
46782
+ });
46783
+ }
46784
+ perProviderErrors.get(modelId)[category]++;
46785
+ }
46667
46786
  function isRateLimited(modelId) {
46668
46787
  const hitTime = rateLimitedModels.get(modelId);
46669
46788
  if (!hitTime) return false;
@@ -46678,17 +46797,30 @@ function markRateLimited(modelId) {
46678
46797
  rateLimitedModels.set(modelId, Date.now());
46679
46798
  console.log(`[ClawRouter] Model ${modelId} rate-limited, will deprioritize for 60s`);
46680
46799
  }
46800
+ function markOverloaded(modelId) {
46801
+ overloadedModels.set(modelId, Date.now());
46802
+ console.log(`[ClawRouter] Model ${modelId} overloaded, will deprioritize for 15s`);
46803
+ }
46804
+ function isOverloaded(modelId) {
46805
+ const hitTime = overloadedModels.get(modelId);
46806
+ if (!hitTime) return false;
46807
+ if (Date.now() - hitTime >= OVERLOAD_COOLDOWN_MS) {
46808
+ overloadedModels.delete(modelId);
46809
+ return false;
46810
+ }
46811
+ return true;
46812
+ }
46681
46813
  function prioritizeNonRateLimited(models) {
46682
46814
  const available = [];
46683
- const rateLimited = [];
46815
+ const degraded = [];
46684
46816
  for (const model of models) {
46685
- if (isRateLimited(model)) {
46686
- rateLimited.push(model);
46817
+ if (isRateLimited(model) || isOverloaded(model)) {
46818
+ degraded.push(model);
46687
46819
  } else {
46688
46820
  available.push(model);
46689
46821
  }
46690
46822
  }
46691
- return [...available, ...rateLimited];
46823
+ return [...available, ...degraded];
46692
46824
  }
46693
46825
  function canWrite(res) {
46694
46826
  return !res.writableEnded && !res.destroyed && res.socket !== null && !res.socket.destroyed && res.socket.writable;
@@ -46823,37 +46955,6 @@ function detectDegradedSuccessResponse(body) {
46823
46955
  }
46824
46956
  return void 0;
46825
46957
  }
46826
- var FALLBACK_STATUS_CODES = [
46827
- 400,
46828
- // Bad request - sometimes used for billing errors
46829
- 401,
46830
- // Unauthorized - provider API key issues
46831
- 402,
46832
- // Payment required - but from upstream, not x402
46833
- 403,
46834
- // Forbidden - provider restrictions
46835
- 413,
46836
- // Payload too large - request exceeds model's context limit
46837
- 429,
46838
- // Rate limited
46839
- 500,
46840
- // Internal server error
46841
- 502,
46842
- // Bad gateway
46843
- 503,
46844
- // Service unavailable
46845
- 504
46846
- // Gateway timeout
46847
- ];
46848
- function isProviderError(status, body) {
46849
- if (!FALLBACK_STATUS_CODES.includes(status)) {
46850
- return false;
46851
- }
46852
- if (status >= 500) {
46853
- return true;
46854
- }
46855
- return PROVIDER_ERROR_PATTERNS.some((pattern) => pattern.test(body));
46856
- }
46857
46958
  var VALID_ROLES = /* @__PURE__ */ new Set(["system", "user", "assistant", "tool", "function"]);
46858
46959
  var ROLE_MAPPINGS = {
46859
46960
  developer: "system",
@@ -47137,7 +47238,7 @@ async function proxyPartnerRequest(req, res, apiBase, payFetch) {
47137
47238
  });
47138
47239
  }
47139
47240
  function readImageFileAsDataUri(filePath) {
47140
- const resolved = filePath.startsWith("~/") ? join7(homedir4(), filePath.slice(2)) : filePath;
47241
+ const resolved = filePath.startsWith("~/") ? join8(homedir5(), filePath.slice(2)) : filePath;
47141
47242
  if (!existsSync(resolved)) {
47142
47243
  throw new Error(`Image file not found: ${resolved}`);
47143
47244
  }
@@ -47149,7 +47250,7 @@ function readImageFileAsDataUri(filePath) {
47149
47250
  webp: "image/webp"
47150
47251
  };
47151
47252
  const mime = mimeMap[ext] ?? "image/png";
47152
- const data = readFileSync(resolved);
47253
+ const data = readFileSync2(resolved);
47153
47254
  return `data:${mime};base64,${data.toString("base64")}`;
47154
47255
  }
47155
47256
  async function uploadDataUriToHost(dataUri) {
@@ -47267,7 +47368,9 @@ async function startProxy(options) {
47267
47368
  skipPreAuth: paymentChain === "solana"
47268
47369
  });
47269
47370
  let balanceMonitor;
47270
- if (paymentChain === "solana" && solanaAddress) {
47371
+ if (options._balanceMonitorOverride) {
47372
+ balanceMonitor = options._balanceMonitorOverride;
47373
+ } else if (paymentChain === "solana" && solanaAddress) {
47271
47374
  const { SolanaBalanceMonitor: SolanaBalanceMonitor2 } = await Promise.resolve().then(() => (init_solana_balance(), solana_balance_exports));
47272
47375
  balanceMonitor = new SolanaBalanceMonitor2(solanaAddress);
47273
47376
  } else {
@@ -47359,7 +47462,16 @@ async function startProxy(options) {
47359
47462
  "Content-Type": "application/json",
47360
47463
  "Cache-Control": "no-cache"
47361
47464
  });
47362
- res.end(JSON.stringify(stats, null, 2));
47465
+ res.end(
47466
+ JSON.stringify(
47467
+ {
47468
+ ...stats,
47469
+ providerErrors: Object.fromEntries(perProviderErrors)
47470
+ },
47471
+ null,
47472
+ 2
47473
+ )
47474
+ );
47363
47475
  } catch (err) {
47364
47476
  res.writeHead(500, { "Content-Type": "application/json" });
47365
47477
  res.end(
@@ -47383,7 +47495,7 @@ async function startProxy(options) {
47383
47495
  res.end("Bad request");
47384
47496
  return;
47385
47497
  }
47386
- const filePath = join7(IMAGE_DIR, filename);
47498
+ const filePath = join8(IMAGE_DIR, filename);
47387
47499
  try {
47388
47500
  const s3 = await fsStat(filePath);
47389
47501
  if (!s3.isFile()) throw new Error("not a file");
@@ -47442,7 +47554,7 @@ async function startProxy(options) {
47442
47554
  const [, mimeType, b64] = dataUriMatch;
47443
47555
  const ext = mimeType === "image/jpeg" ? "jpg" : mimeType.split("/")[1] ?? "png";
47444
47556
  const filename = `${Date.now()}-${Math.random().toString(36).slice(2, 10)}.${ext}`;
47445
- await writeFile2(join7(IMAGE_DIR, filename), Buffer.from(b64, "base64"));
47557
+ await writeFile2(join8(IMAGE_DIR, filename), Buffer.from(b64, "base64"));
47446
47558
  img.url = `http://localhost:${port2}/images/${filename}`;
47447
47559
  console.log(`[ClawRouter] Image saved \u2192 ${img.url}`);
47448
47560
  } else if (img.url?.startsWith("https://") || img.url?.startsWith("http://")) {
@@ -47453,7 +47565,7 @@ async function startProxy(options) {
47453
47565
  const ext = contentType.includes("jpeg") || contentType.includes("jpg") ? "jpg" : contentType.includes("webp") ? "webp" : "png";
47454
47566
  const filename = `${Date.now()}-${Math.random().toString(36).slice(2, 10)}.${ext}`;
47455
47567
  const buf = Buffer.from(await imgResp.arrayBuffer());
47456
- await writeFile2(join7(IMAGE_DIR, filename), buf);
47568
+ await writeFile2(join8(IMAGE_DIR, filename), buf);
47457
47569
  img.url = `http://localhost:${port2}/images/${filename}`;
47458
47570
  console.log(`[ClawRouter] Image downloaded & saved \u2192 ${img.url}`);
47459
47571
  }
@@ -47542,7 +47654,7 @@ async function startProxy(options) {
47542
47654
  const [, mimeType, b64] = dataUriMatch;
47543
47655
  const ext = mimeType === "image/jpeg" ? "jpg" : mimeType.split("/")[1] ?? "png";
47544
47656
  const filename = `${Date.now()}-${Math.random().toString(36).slice(2, 10)}.${ext}`;
47545
- await writeFile2(join7(IMAGE_DIR, filename), Buffer.from(b64, "base64"));
47657
+ await writeFile2(join8(IMAGE_DIR, filename), Buffer.from(b64, "base64"));
47546
47658
  img.url = `http://localhost:${port2}/images/${filename}`;
47547
47659
  console.log(`[ClawRouter] Image saved \u2192 ${img.url}`);
47548
47660
  } else if (img.url?.startsWith("https://") || img.url?.startsWith("http://")) {
@@ -47553,7 +47665,7 @@ async function startProxy(options) {
47553
47665
  const ext = contentType.includes("jpeg") || contentType.includes("jpg") ? "jpg" : contentType.includes("webp") ? "webp" : "png";
47554
47666
  const filename = `${Date.now()}-${Math.random().toString(36).slice(2, 10)}.${ext}`;
47555
47667
  const buf = Buffer.from(await imgResp.arrayBuffer());
47556
- await writeFile2(join7(IMAGE_DIR, filename), buf);
47668
+ await writeFile2(join8(IMAGE_DIR, filename), buf);
47557
47669
  img.url = `http://localhost:${port2}/images/${filename}`;
47558
47670
  console.log(`[ClawRouter] Image downloaded & saved \u2192 ${img.url}`);
47559
47671
  }
@@ -47802,12 +47914,13 @@ async function tryModelRequest(upstreamUrl, method, headers, body, modelId, maxT
47802
47914
  if (response.status !== 200) {
47803
47915
  const errorBodyChunks = await readBodyWithTimeout(response.body, ERROR_BODY_READ_TIMEOUT_MS);
47804
47916
  const errorBody = Buffer.concat(errorBodyChunks).toString();
47805
- const isProviderErr = isProviderError(response.status, errorBody);
47917
+ const category = categorizeError(response.status, errorBody);
47806
47918
  return {
47807
47919
  success: false,
47808
47920
  errorBody,
47809
47921
  errorStatus: response.status,
47810
- isProviderError: isProviderErr
47922
+ isProviderError: category !== null,
47923
+ errorCategory: category ?? void 0
47811
47924
  };
47812
47925
  }
47813
47926
  const contentType = response.headers.get("content-type") || "";
@@ -47860,8 +47973,11 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
47860
47973
  let maxTokens = 4096;
47861
47974
  let routingProfile = null;
47862
47975
  let balanceFallbackNotice;
47976
+ let budgetDowngradeNotice;
47977
+ let budgetDowngradeHeaderMode;
47863
47978
  let accumulatedContent = "";
47864
47979
  let responseInputTokens;
47980
+ let responseOutputTokens;
47865
47981
  const isChatCompletion = req.url?.includes("/chat/completions");
47866
47982
  const sessionId = getSessionId(req.headers);
47867
47983
  let effectiveSessionId = sessionId;
@@ -47874,6 +47990,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
47874
47990
  let bodyModified = false;
47875
47991
  const parsedMessages = Array.isArray(parsed.messages) ? parsed.messages : [];
47876
47992
  const lastUserMsg = [...parsedMessages].reverse().find((m) => m.role === "user");
47993
+ hasTools = Array.isArray(parsed.tools) && parsed.tools.length > 0;
47877
47994
  const rawLastContent = lastUserMsg?.content;
47878
47995
  const lastContent = typeof rawLastContent === "string" ? rawLastContent : Array.isArray(rawLastContent) ? rawLastContent.filter((b) => b.type === "text").map((b) => b.text ?? "").join(" ") : "";
47879
47996
  if (sessionId && parsedMessages.length > 0) {
@@ -48530,6 +48647,9 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
48530
48647
  options.onRouted?.(routingDecision);
48531
48648
  }
48532
48649
  }
48650
+ if (!effectiveSessionId && parsedMessages.length > 0) {
48651
+ effectiveSessionId = deriveSessionId(parsedMessages);
48652
+ }
48533
48653
  if (bodyModified) {
48534
48654
  body = Buffer.from(JSON.stringify(parsed));
48535
48655
  }
@@ -48620,7 +48740,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
48620
48740
  }
48621
48741
  deduplicator.markInflight(dedupKey);
48622
48742
  let estimatedCostMicros;
48623
- const isFreeModel = modelId === FREE_MODEL;
48743
+ let isFreeModel = modelId === FREE_MODEL;
48624
48744
  if (modelId && !options.skipBalanceCheck && !isFreeModel) {
48625
48745
  const estimated = estimateAmount(modelId, body.length, maxTokens);
48626
48746
  if (estimated) {
@@ -48633,6 +48753,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
48633
48753
  `[ClawRouter] Wallet ${sufficiency.info.isEmpty ? "empty" : "insufficient"} (${sufficiency.info.balanceUSD}), falling back to free model: ${FREE_MODEL} (requested: ${originalModel})`
48634
48754
  );
48635
48755
  modelId = FREE_MODEL;
48756
+ isFreeModel = true;
48636
48757
  const parsed = JSON.parse(body.toString());
48637
48758
  parsed.model = FREE_MODEL;
48638
48759
  body = Buffer.from(JSON.stringify(parsed));
@@ -48659,6 +48780,89 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
48659
48780
  }
48660
48781
  }
48661
48782
  }
48783
+ if (options.maxCostPerRunUsd && effectiveSessionId && !isFreeModel && (options.maxCostPerRunMode ?? "graceful") === "strict") {
48784
+ const runCostUsd = sessionStore.getSessionCostUsd(effectiveSessionId);
48785
+ const thisReqEstStr = estimatedCostMicros !== void 0 ? estimatedCostMicros.toString() : modelId ? estimateAmount(modelId, body.length, maxTokens) : void 0;
48786
+ const thisReqEstUsd = thisReqEstStr ? Number(thisReqEstStr) / 1e6 : 0;
48787
+ const projectedCostUsd = runCostUsd + thisReqEstUsd;
48788
+ if (projectedCostUsd > options.maxCostPerRunUsd) {
48789
+ console.log(
48790
+ `[ClawRouter] Cost cap exceeded for session ${effectiveSessionId.slice(0, 8)}...: projected $${projectedCostUsd.toFixed(4)} (spent $${runCostUsd.toFixed(4)} + est $${thisReqEstUsd.toFixed(4)}) > $${options.maxCostPerRunUsd} limit`
48791
+ );
48792
+ res.writeHead(429, {
48793
+ "Content-Type": "application/json",
48794
+ "X-ClawRouter-Cost-Cap-Exceeded": "1"
48795
+ });
48796
+ res.end(
48797
+ JSON.stringify({
48798
+ error: {
48799
+ message: `ClawRouter cost cap exceeded: projected spend $${projectedCostUsd.toFixed(4)} (spent $${runCostUsd.toFixed(4)} + est $${thisReqEstUsd.toFixed(4)}) would exceed limit $${options.maxCostPerRunUsd}`,
48800
+ type: "cost_cap_exceeded",
48801
+ code: "cost_cap_exceeded"
48802
+ }
48803
+ })
48804
+ );
48805
+ deduplicator.removeInflight(dedupKey);
48806
+ return;
48807
+ }
48808
+ }
48809
+ if (options.maxCostPerRunUsd && effectiveSessionId && !isFreeModel && (options.maxCostPerRunMode ?? "graceful") === "graceful") {
48810
+ const runCostUsd = sessionStore.getSessionCostUsd(effectiveSessionId);
48811
+ const remainingUsd = options.maxCostPerRunUsd - runCostUsd;
48812
+ const isComplexOrAgentic = hasTools || routingDecision?.tier === "COMPLEX" || routingDecision?.tier === "REASONING";
48813
+ if (isComplexOrAgentic) {
48814
+ const canAffordAnyNonFreeModel = BLOCKRUN_MODELS.some((m) => {
48815
+ if (m.id === FREE_MODEL) return false;
48816
+ const est = estimateAmount(m.id, body.length, maxTokens);
48817
+ return est !== void 0 && Number(est) / 1e6 <= remainingUsd;
48818
+ });
48819
+ if (!canAffordAnyNonFreeModel) {
48820
+ console.log(
48821
+ `[ClawRouter] Budget insufficient for agentic/complex session ${effectiveSessionId.slice(0, 8)}...: $${Math.max(0, remainingUsd).toFixed(4)} remaining \u2014 blocking (silent downgrade would corrupt tool/complex responses)`
48822
+ );
48823
+ res.writeHead(429, {
48824
+ "Content-Type": "application/json",
48825
+ "X-ClawRouter-Cost-Cap-Exceeded": "1",
48826
+ "X-ClawRouter-Budget-Mode": "blocked"
48827
+ });
48828
+ res.end(
48829
+ JSON.stringify({
48830
+ error: {
48831
+ message: `ClawRouter budget exhausted: $${Math.max(0, remainingUsd).toFixed(4)} remaining (limit: $${options.maxCostPerRunUsd}). Increase maxCostPerRun to continue.`,
48832
+ type: "cost_cap_exceeded",
48833
+ code: "budget_exhausted"
48834
+ }
48835
+ })
48836
+ );
48837
+ deduplicator.removeInflight(dedupKey);
48838
+ return;
48839
+ }
48840
+ } else if (!routingDecision && modelId && modelId !== FREE_MODEL) {
48841
+ const est = estimateAmount(modelId, body.length, maxTokens);
48842
+ const canAfford = !est || Number(est) / 1e6 <= remainingUsd;
48843
+ if (!canAfford) {
48844
+ console.log(
48845
+ `[ClawRouter] Budget insufficient for explicit model ${modelId} in session ${effectiveSessionId.slice(0, 8)}...: $${Math.max(0, remainingUsd).toFixed(4)} remaining \u2014 blocking (user explicitly chose ${modelId})`
48846
+ );
48847
+ res.writeHead(429, {
48848
+ "Content-Type": "application/json",
48849
+ "X-ClawRouter-Cost-Cap-Exceeded": "1",
48850
+ "X-ClawRouter-Budget-Mode": "blocked"
48851
+ });
48852
+ res.end(
48853
+ JSON.stringify({
48854
+ error: {
48855
+ message: `ClawRouter budget exhausted: $${Math.max(0, remainingUsd).toFixed(4)} remaining (limit: $${options.maxCostPerRunUsd}). Increase maxCostPerRun to continue using ${modelId}.`,
48856
+ type: "cost_cap_exceeded",
48857
+ code: "budget_exhausted"
48858
+ }
48859
+ })
48860
+ );
48861
+ deduplicator.removeInflight(dedupKey);
48862
+ return;
48863
+ }
48864
+ }
48865
+ }
48662
48866
  let heartbeatInterval;
48663
48867
  let headersSentEarly = false;
48664
48868
  if (isStreaming) {
@@ -48703,10 +48907,11 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
48703
48907
  }
48704
48908
  });
48705
48909
  const timeoutMs = options.requestTimeoutMs ?? DEFAULT_REQUEST_TIMEOUT_MS;
48706
- const controller = new AbortController();
48707
- const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
48910
+ const globalController = new AbortController();
48911
+ const timeoutId = setTimeout(() => globalController.abort(), timeoutMs);
48708
48912
  try {
48709
48913
  let modelsToTry;
48914
+ const excludeList = options.excludeModels ?? loadExcludeList();
48710
48915
  if (routingDecision) {
48711
48916
  const estimatedInputTokens = Math.ceil(body.length / 4);
48712
48917
  const estimatedTotalTokens = estimatedInputTokens + maxTokens;
@@ -48724,8 +48929,15 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
48724
48929
  `[ClawRouter] Context filter (~${estimatedTotalTokens} tokens): excluded ${contextExcluded.join(", ")}`
48725
48930
  );
48726
48931
  }
48727
- let toolFiltered = filterByToolCalling(contextFiltered, hasTools, supportsToolCalling);
48728
- const toolExcluded = contextFiltered.filter((m) => !toolFiltered.includes(m));
48932
+ const excludeFiltered = filterByExcludeList(contextFiltered, excludeList);
48933
+ const excludeExcluded = contextFiltered.filter((m) => !excludeFiltered.includes(m));
48934
+ if (excludeExcluded.length > 0) {
48935
+ console.log(
48936
+ `[ClawRouter] Exclude filter: excluded ${excludeExcluded.join(", ")} (user preference)`
48937
+ );
48938
+ }
48939
+ let toolFiltered = filterByToolCalling(excludeFiltered, hasTools, supportsToolCalling);
48940
+ const toolExcluded = excludeFiltered.filter((m) => !toolFiltered.includes(m));
48729
48941
  if (toolExcluded.length > 0) {
48730
48942
  console.log(
48731
48943
  `[ClawRouter] Tool-calling filter: excluded ${toolExcluded.join(", ")} (no structured function call support)`
@@ -48758,16 +48970,86 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
48758
48970
  } else {
48759
48971
  modelsToTry = modelId ? [modelId] : [];
48760
48972
  }
48761
- if (!hasTools && !modelsToTry.includes(FREE_MODEL)) {
48973
+ if (!hasTools && !modelsToTry.includes(FREE_MODEL) && !excludeList.has(FREE_MODEL)) {
48762
48974
  modelsToTry.push(FREE_MODEL);
48763
48975
  }
48976
+ if (options.maxCostPerRunUsd && effectiveSessionId && !isFreeModel && (options.maxCostPerRunMode ?? "graceful") === "graceful") {
48977
+ const runCostUsd = sessionStore.getSessionCostUsd(effectiveSessionId);
48978
+ const remainingUsd = options.maxCostPerRunUsd - runCostUsd;
48979
+ const beforeFilter = [...modelsToTry];
48980
+ modelsToTry = modelsToTry.filter((m) => {
48981
+ if (m === FREE_MODEL) return true;
48982
+ const est = estimateAmount(m, body.length, maxTokens);
48983
+ if (!est) return true;
48984
+ return Number(est) / 1e6 <= remainingUsd;
48985
+ });
48986
+ const excluded = beforeFilter.filter((m) => !modelsToTry.includes(m));
48987
+ const isComplexOrAgenticFilter = hasTools || routingDecision?.tier === "COMPLEX" || routingDecision?.tier === "REASONING" || routingDecision === void 0;
48988
+ const filteredToFreeOnly = modelsToTry.length > 0 && modelsToTry.every((m) => m === FREE_MODEL);
48989
+ if (isComplexOrAgenticFilter && filteredToFreeOnly) {
48990
+ const budgetSummary = `$${Math.max(0, remainingUsd).toFixed(4)} remaining (limit: $${options.maxCostPerRunUsd})`;
48991
+ console.log(
48992
+ `[ClawRouter] Budget filter left only free model for complex/agentic session \u2014 blocking (${budgetSummary})`
48993
+ );
48994
+ const errPayload = JSON.stringify({
48995
+ error: {
48996
+ message: `ClawRouter budget exhausted: remaining budget (${budgetSummary}) cannot support a complex/tool request. Increase maxCostPerRun to continue.`,
48997
+ type: "cost_cap_exceeded",
48998
+ code: "budget_exhausted"
48999
+ }
49000
+ });
49001
+ if (heartbeatInterval) clearInterval(heartbeatInterval);
49002
+ if (headersSentEarly) {
49003
+ safeWrite(res, `data: ${errPayload}
49004
+
49005
+ data: [DONE]
49006
+
49007
+ `);
49008
+ res.end();
49009
+ } else {
49010
+ res.writeHead(429, {
49011
+ "Content-Type": "application/json",
49012
+ "X-ClawRouter-Cost-Cap-Exceeded": "1",
49013
+ "X-ClawRouter-Budget-Mode": "blocked"
49014
+ });
49015
+ res.end(errPayload);
49016
+ }
49017
+ deduplicator.removeInflight(dedupKey);
49018
+ return;
49019
+ }
49020
+ if (excluded.length > 0) {
49021
+ const budgetSummary = remainingUsd > 0 ? `$${remainingUsd.toFixed(4)} remaining` : `budget exhausted ($${runCostUsd.toFixed(4)}/$${options.maxCostPerRunUsd})`;
49022
+ console.log(
49023
+ `[ClawRouter] Budget downgrade (${budgetSummary}): excluded ${excluded.join(", ")}`
49024
+ );
49025
+ const fromModel = excluded[0];
49026
+ const usingFree = modelsToTry.length === 1 && modelsToTry[0] === FREE_MODEL;
49027
+ if (usingFree) {
49028
+ budgetDowngradeNotice = `> **\u26A0\uFE0F Budget cap reached** ($${runCostUsd.toFixed(4)}/$${options.maxCostPerRunUsd}) \u2014 downgraded to free model. Quality may be reduced. Increase \`maxCostPerRun\` to continue with ${fromModel}.
49029
+
49030
+ `;
49031
+ } else {
49032
+ const toModel = modelsToTry[0] ?? FREE_MODEL;
49033
+ budgetDowngradeNotice = `> **\u26A0\uFE0F Budget low** ($${remainingUsd > 0 ? remainingUsd.toFixed(4) : "0.0000"} remaining) \u2014 using ${toModel} instead of ${fromModel}.
49034
+
49035
+ `;
49036
+ }
49037
+ budgetDowngradeHeaderMode = "downgraded";
49038
+ }
49039
+ }
48764
49040
  let upstream;
48765
49041
  let lastError;
48766
49042
  let actualModelUsed = modelId;
48767
49043
  for (let i = 0; i < modelsToTry.length; i++) {
48768
49044
  const tryModel = modelsToTry[i];
48769
49045
  const isLastAttempt = i === modelsToTry.length - 1;
49046
+ if (globalController.signal.aborted) {
49047
+ throw new Error(`Request timed out after ${timeoutMs}ms`);
49048
+ }
48770
49049
  console.log(`[ClawRouter] Trying model ${i + 1}/${modelsToTry.length}: ${tryModel}`);
49050
+ const modelController = new AbortController();
49051
+ const modelTimeoutId = setTimeout(() => modelController.abort(), PER_MODEL_TIMEOUT_MS);
49052
+ const combinedSignal = AbortSignal.any([globalController.signal, modelController.signal]);
48771
49053
  const result = await tryModelRequest(
48772
49054
  upstreamUrl,
48773
49055
  req.method ?? "POST",
@@ -48777,12 +49059,29 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
48777
49059
  maxTokens,
48778
49060
  payFetch,
48779
49061
  balanceMonitor,
48780
- controller.signal
49062
+ combinedSignal
48781
49063
  );
49064
+ clearTimeout(modelTimeoutId);
49065
+ if (globalController.signal.aborted) {
49066
+ throw new Error(`Request timed out after ${timeoutMs}ms`);
49067
+ }
49068
+ if (!result.success && modelController.signal.aborted && !isLastAttempt) {
49069
+ console.log(
49070
+ `[ClawRouter] Model ${tryModel} timed out after ${PER_MODEL_TIMEOUT_MS}ms, trying fallback`
49071
+ );
49072
+ recordProviderError(tryModel, "server_error");
49073
+ continue;
49074
+ }
48782
49075
  if (result.success && result.response) {
48783
49076
  upstream = result.response;
48784
49077
  actualModelUsed = tryModel;
48785
49078
  console.log(`[ClawRouter] Success with model: ${tryModel}`);
49079
+ if (options.maxCostPerRunUsd && effectiveSessionId && tryModel !== FREE_MODEL) {
49080
+ const costEst = estimateAmount(tryModel, body.length, maxTokens);
49081
+ if (costEst) {
49082
+ sessionStore.addSessionCost(effectiveSessionId, BigInt(costEst));
49083
+ }
49084
+ }
48786
49085
  break;
48787
49086
  }
48788
49087
  lastError = {
@@ -48798,7 +49097,52 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
48798
49097
  );
48799
49098
  break;
48800
49099
  }
48801
- if (result.errorStatus === 429) {
49100
+ const errorCat = result.errorCategory;
49101
+ if (errorCat) {
49102
+ recordProviderError(tryModel, errorCat);
49103
+ }
49104
+ if (errorCat === "rate_limited") {
49105
+ if (!isLastAttempt && !globalController.signal.aborted) {
49106
+ console.log(
49107
+ `[ClawRouter] Rate-limited on ${tryModel}, retrying in 200ms before failover`
49108
+ );
49109
+ await new Promise((resolve) => setTimeout(resolve, 200));
49110
+ if (!globalController.signal.aborted) {
49111
+ const retryController = new AbortController();
49112
+ const retryTimeoutId = setTimeout(
49113
+ () => retryController.abort(),
49114
+ PER_MODEL_TIMEOUT_MS
49115
+ );
49116
+ const retrySignal = AbortSignal.any([
49117
+ globalController.signal,
49118
+ retryController.signal
49119
+ ]);
49120
+ const retryResult = await tryModelRequest(
49121
+ upstreamUrl,
49122
+ req.method ?? "POST",
49123
+ headers,
49124
+ body,
49125
+ tryModel,
49126
+ maxTokens,
49127
+ payFetch,
49128
+ balanceMonitor,
49129
+ retrySignal
49130
+ );
49131
+ clearTimeout(retryTimeoutId);
49132
+ if (retryResult.success && retryResult.response) {
49133
+ upstream = retryResult.response;
49134
+ actualModelUsed = tryModel;
49135
+ console.log(`[ClawRouter] Rate-limit retry succeeded for: ${tryModel}`);
49136
+ if (options.maxCostPerRunUsd && effectiveSessionId && tryModel !== FREE_MODEL) {
49137
+ const costEst = estimateAmount(tryModel, body.length, maxTokens);
49138
+ if (costEst) {
49139
+ sessionStore.addSessionCost(effectiveSessionId, BigInt(costEst));
49140
+ }
49141
+ }
49142
+ break;
49143
+ }
49144
+ }
49145
+ }
48802
49146
  markRateLimited(tryModel);
48803
49147
  try {
48804
49148
  const parsed = JSON.parse(result.errorBody || "{}");
@@ -48814,6 +49158,12 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
48814
49158
  }
48815
49159
  } catch {
48816
49160
  }
49161
+ } else if (errorCat === "overloaded") {
49162
+ markOverloaded(tryModel);
49163
+ } else if (errorCat === "auth_failure" || errorCat === "quota_exceeded") {
49164
+ console.log(
49165
+ `[ClawRouter] \u{1F511} ${errorCat === "auth_failure" ? "Auth failure" : "Quota exceeded"} for ${tryModel} \u2014 check provider config`
49166
+ );
48817
49167
  }
48818
49168
  const isPaymentErr = /payment.*verification.*failed|payment.*settlement.*failed|insufficient.*funds|transaction_simulation_failed/i.test(
48819
49169
  result.errorBody || ""
@@ -48928,6 +49278,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
48928
49278
  if (rsp.usage && typeof rsp.usage === "object") {
48929
49279
  const u = rsp.usage;
48930
49280
  if (typeof u.prompt_tokens === "number") responseInputTokens = u.prompt_tokens;
49281
+ if (typeof u.completion_tokens === "number") responseOutputTokens = u.completion_tokens;
48931
49282
  }
48932
49283
  const baseChunk = {
48933
49284
  id: rsp.id ?? `chatcmpl-${Date.now()}`,
@@ -48973,6 +49324,25 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
48973
49324
  responseChunks.push(Buffer.from(noticeData));
48974
49325
  balanceFallbackNotice = void 0;
48975
49326
  }
49327
+ if (budgetDowngradeNotice) {
49328
+ const noticeChunk = {
49329
+ ...baseChunk,
49330
+ choices: [
49331
+ {
49332
+ index: index2,
49333
+ delta: { content: budgetDowngradeNotice },
49334
+ logprobs: null,
49335
+ finish_reason: null
49336
+ }
49337
+ ]
49338
+ };
49339
+ const noticeData = `data: ${JSON.stringify(noticeChunk)}
49340
+
49341
+ `;
49342
+ safeWrite(res, noticeData);
49343
+ responseChunks.push(Buffer.from(noticeData));
49344
+ budgetDowngradeNotice = void 0;
49345
+ }
48976
49346
  if (content) {
48977
49347
  const contentChunk = {
48978
49348
  ...baseChunk,
@@ -49076,6 +49446,22 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
49076
49446
  }
49077
49447
  balanceFallbackNotice = void 0;
49078
49448
  }
49449
+ if (budgetDowngradeNotice && responseBody.length > 0) {
49450
+ try {
49451
+ const parsed = JSON.parse(responseBody.toString());
49452
+ if (parsed.choices?.[0]?.message?.content !== void 0) {
49453
+ parsed.choices[0].message.content = budgetDowngradeNotice + parsed.choices[0].message.content;
49454
+ responseBody = Buffer.from(JSON.stringify(parsed));
49455
+ }
49456
+ } catch {
49457
+ }
49458
+ budgetDowngradeNotice = void 0;
49459
+ }
49460
+ if (budgetDowngradeHeaderMode) {
49461
+ responseHeaders["x-clawrouter-budget-downgrade"] = "1";
49462
+ responseHeaders["x-clawrouter-budget-mode"] = budgetDowngradeHeaderMode;
49463
+ budgetDowngradeHeaderMode = void 0;
49464
+ }
49079
49465
  responseHeaders["content-length"] = String(responseBody.length);
49080
49466
  res.writeHead(upstream.status, responseHeaders);
49081
49467
  safeWrite(res, responseBody);
@@ -49106,6 +49492,8 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
49106
49492
  if (rspJson.usage && typeof rspJson.usage === "object") {
49107
49493
  if (typeof rspJson.usage.prompt_tokens === "number")
49108
49494
  responseInputTokens = rspJson.usage.prompt_tokens;
49495
+ if (typeof rspJson.usage.completion_tokens === "number")
49496
+ responseOutputTokens = rspJson.usage.completion_tokens;
49109
49497
  }
49110
49498
  } catch {
49111
49499
  }
@@ -49138,25 +49526,25 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
49138
49526
  }
49139
49527
  const logModel = routingDecision?.model ?? modelId;
49140
49528
  if (logModel) {
49141
- const estimatedInputTokens = Math.ceil(body.length / 4);
49529
+ const actualInputTokens = responseInputTokens ?? Math.ceil(body.length / 4);
49530
+ const actualOutputTokens = responseOutputTokens ?? maxTokens;
49142
49531
  const accurateCosts = calculateModelCost(
49143
49532
  logModel,
49144
49533
  routerOpts.modelPricing,
49145
- estimatedInputTokens,
49146
- maxTokens,
49534
+ actualInputTokens,
49535
+ actualOutputTokens,
49147
49536
  routingProfile ?? void 0
49148
49537
  );
49149
- const costWithBuffer = accurateCosts.costEstimate * 1.2;
49150
- const baselineWithBuffer = accurateCosts.baselineCost * 1.2;
49151
49538
  const entry = {
49152
49539
  timestamp: (/* @__PURE__ */ new Date()).toISOString(),
49153
49540
  model: logModel,
49154
49541
  tier: routingDecision?.tier ?? "DIRECT",
49155
- cost: costWithBuffer,
49156
- baselineCost: baselineWithBuffer,
49542
+ cost: accurateCosts.costEstimate,
49543
+ baselineCost: accurateCosts.baselineCost,
49157
49544
  savings: accurateCosts.savings,
49158
49545
  latencyMs: Date.now() - startTime,
49159
- ...responseInputTokens !== void 0 && { inputTokens: responseInputTokens }
49546
+ ...responseInputTokens !== void 0 && { inputTokens: responseInputTokens },
49547
+ ...responseOutputTokens !== void 0 && { outputTokens: responseOutputTokens }
49160
49548
  };
49161
49549
  logUsage(entry).catch(() => {
49162
49550
  });