@blockrun/clawrouter 0.12.55 → 0.12.60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -32878,11 +32878,22 @@ var MODEL_ALIASES = {
32878
32878
  "grok-fast": "xai/grok-4-fast-reasoning",
32879
32879
  "grok-code": "deepseek/deepseek-chat",
32880
32880
  // was grok-code-fast-1, delisted due to poor retention
32881
+ // Delisted model redirects — full model IDs that were previously valid but removed
32882
+ "grok-code-fast-1": "deepseek/deepseek-chat",
32883
+ // bare alias
32884
+ "xai/grok-code-fast-1": "deepseek/deepseek-chat",
32885
+ // delisted 2026-03-12
32886
+ "xai/grok-3-fast": "xai/grok-4-fast-reasoning",
32887
+ // delisted (too expensive)
32881
32888
  // NVIDIA
32882
32889
  nvidia: "nvidia/gpt-oss-120b",
32883
32890
  "gpt-120b": "nvidia/gpt-oss-120b",
32884
32891
  // MiniMax
32885
32892
  minimax: "minimax/minimax-m2.5",
32893
+ // Z.AI GLM-5
32894
+ glm: "zai/glm-5",
32895
+ "glm-5": "zai/glm-5",
32896
+ "glm-5-turbo": "zai/glm-5-turbo",
32886
32897
  // Routing profile aliases (common variations)
32887
32898
  "auto-router": "auto",
32888
32899
  router: "auto"
@@ -33417,6 +33428,27 @@ var BLOCKRUN_MODELS = [
33417
33428
  contextWindow: 262144,
33418
33429
  maxOutput: 16384,
33419
33430
  toolCalling: true
33431
+ },
33432
+ // Z.AI GLM-5 Models
33433
+ {
33434
+ id: "zai/glm-5",
33435
+ name: "GLM-5",
33436
+ version: "5",
33437
+ inputPrice: 1,
33438
+ outputPrice: 3.2,
33439
+ contextWindow: 2e5,
33440
+ maxOutput: 128e3,
33441
+ toolCalling: true
33442
+ },
33443
+ {
33444
+ id: "zai/glm-5-turbo",
33445
+ name: "GLM-5 Turbo",
33446
+ version: "5-turbo",
33447
+ inputPrice: 1.2,
33448
+ outputPrice: 4,
33449
+ contextWindow: 2e5,
33450
+ maxOutput: 128e3,
33451
+ toolCalling: true
33420
33452
  }
33421
33453
  ];
33422
33454
  function toOpenClawModel(m) {
@@ -46617,7 +46649,8 @@ var SessionStore = class {
46617
46649
  requestCount: 1,
46618
46650
  recentHashes: [],
46619
46651
  strikes: 0,
46620
- escalated: false
46652
+ escalated: false,
46653
+ sessionCostMicros: 0n
46621
46654
  });
46622
46655
  }
46623
46656
  }
@@ -46706,6 +46739,39 @@ var SessionStore = class {
46706
46739
  entry.escalated = true;
46707
46740
  return { model: nextConfig.primary, tier: nextTier };
46708
46741
  }
46742
+ /**
46743
+ * Add cost to a session's running total for maxCostPerRun tracking.
46744
+ * Cost is in USDC 6-decimal units (micros).
46745
+ * Creates a cost-tracking-only entry if none exists (e.g., explicit model requests
46746
+ * that never go through the routing path).
46747
+ */
46748
+ addSessionCost(sessionId, additionalMicros) {
46749
+ let entry = this.sessions.get(sessionId);
46750
+ if (!entry) {
46751
+ const now = Date.now();
46752
+ entry = {
46753
+ model: "",
46754
+ tier: "DIRECT",
46755
+ createdAt: now,
46756
+ lastUsedAt: now,
46757
+ requestCount: 0,
46758
+ recentHashes: [],
46759
+ strikes: 0,
46760
+ escalated: false,
46761
+ sessionCostMicros: 0n
46762
+ };
46763
+ this.sessions.set(sessionId, entry);
46764
+ }
46765
+ entry.sessionCostMicros += additionalMicros;
46766
+ }
46767
+ /**
46768
+ * Get the total accumulated cost for a session in USD.
46769
+ */
46770
+ getSessionCostUsd(sessionId) {
46771
+ const entry = this.sessions.get(sessionId);
46772
+ if (!entry) return 0;
46773
+ return Number(entry.sessionCostMicros) / 1e6;
46774
+ }
46709
46775
  /**
46710
46776
  * Stop the cleanup interval.
46711
46777
  */
@@ -46981,9 +47047,11 @@ var MAX_MESSAGES = 200;
46981
47047
  var CONTEXT_LIMIT_KB = 5120;
46982
47048
  var HEARTBEAT_INTERVAL_MS = 2e3;
46983
47049
  var DEFAULT_REQUEST_TIMEOUT_MS = 18e4;
47050
+ var PER_MODEL_TIMEOUT_MS = 6e4;
46984
47051
  var MAX_FALLBACK_ATTEMPTS = 5;
46985
47052
  var HEALTH_CHECK_TIMEOUT_MS = 2e3;
46986
47053
  var RATE_LIMIT_COOLDOWN_MS = 6e4;
47054
+ var OVERLOAD_COOLDOWN_MS = 15e3;
46987
47055
  var PORT_RETRY_ATTEMPTS = 5;
46988
47056
  var PORT_RETRY_DELAY_MS = 1e3;
46989
47057
  var MODEL_BODY_READ_TIMEOUT_MS = 3e5;
@@ -47133,7 +47201,41 @@ function transformPaymentError(errorBody) {
47133
47201
  }
47134
47202
  return errorBody;
47135
47203
  }
47204
+ function categorizeError(status, body) {
47205
+ if (status === 401) return "auth_failure";
47206
+ if (status === 402) return "payment_error";
47207
+ if (status === 403) {
47208
+ if (/plan.*limit|quota.*exceeded|subscription|allowance/i.test(body))
47209
+ return "quota_exceeded";
47210
+ return "auth_failure";
47211
+ }
47212
+ if (status === 429) return "rate_limited";
47213
+ if (status === 529) return "overloaded";
47214
+ if (status === 503 && /overload|capacity|too.*many.*request/i.test(body)) return "overloaded";
47215
+ if (status >= 500) return "server_error";
47216
+ if (status === 400 || status === 413) {
47217
+ if (PROVIDER_ERROR_PATTERNS.some((p) => p.test(body))) return "config_error";
47218
+ return null;
47219
+ }
47220
+ return null;
47221
+ }
47136
47222
  var rateLimitedModels = /* @__PURE__ */ new Map();
47223
+ var overloadedModels = /* @__PURE__ */ new Map();
47224
+ var perProviderErrors = /* @__PURE__ */ new Map();
47225
+ function recordProviderError(modelId, category) {
47226
+ if (!perProviderErrors.has(modelId)) {
47227
+ perProviderErrors.set(modelId, {
47228
+ auth_failure: 0,
47229
+ quota_exceeded: 0,
47230
+ rate_limited: 0,
47231
+ overloaded: 0,
47232
+ server_error: 0,
47233
+ payment_error: 0,
47234
+ config_error: 0
47235
+ });
47236
+ }
47237
+ perProviderErrors.get(modelId)[category]++;
47238
+ }
47137
47239
  function isRateLimited(modelId) {
47138
47240
  const hitTime = rateLimitedModels.get(modelId);
47139
47241
  if (!hitTime) return false;
@@ -47148,17 +47250,30 @@ function markRateLimited(modelId) {
47148
47250
  rateLimitedModels.set(modelId, Date.now());
47149
47251
  console.log(`[ClawRouter] Model ${modelId} rate-limited, will deprioritize for 60s`);
47150
47252
  }
47253
+ function markOverloaded(modelId) {
47254
+ overloadedModels.set(modelId, Date.now());
47255
+ console.log(`[ClawRouter] Model ${modelId} overloaded, will deprioritize for 15s`);
47256
+ }
47257
+ function isOverloaded(modelId) {
47258
+ const hitTime = overloadedModels.get(modelId);
47259
+ if (!hitTime) return false;
47260
+ if (Date.now() - hitTime >= OVERLOAD_COOLDOWN_MS) {
47261
+ overloadedModels.delete(modelId);
47262
+ return false;
47263
+ }
47264
+ return true;
47265
+ }
47151
47266
  function prioritizeNonRateLimited(models) {
47152
47267
  const available = [];
47153
- const rateLimited = [];
47268
+ const degraded = [];
47154
47269
  for (const model of models) {
47155
- if (isRateLimited(model)) {
47156
- rateLimited.push(model);
47270
+ if (isRateLimited(model) || isOverloaded(model)) {
47271
+ degraded.push(model);
47157
47272
  } else {
47158
47273
  available.push(model);
47159
47274
  }
47160
47275
  }
47161
- return [...available, ...rateLimited];
47276
+ return [...available, ...degraded];
47162
47277
  }
47163
47278
  function canWrite(res) {
47164
47279
  return !res.writableEnded && !res.destroyed && res.socket !== null && !res.socket.destroyed && res.socket.writable;
@@ -47293,37 +47408,6 @@ function detectDegradedSuccessResponse(body) {
47293
47408
  }
47294
47409
  return void 0;
47295
47410
  }
47296
- var FALLBACK_STATUS_CODES = [
47297
- 400,
47298
- // Bad request - sometimes used for billing errors
47299
- 401,
47300
- // Unauthorized - provider API key issues
47301
- 402,
47302
- // Payment required - but from upstream, not x402
47303
- 403,
47304
- // Forbidden - provider restrictions
47305
- 413,
47306
- // Payload too large - request exceeds model's context limit
47307
- 429,
47308
- // Rate limited
47309
- 500,
47310
- // Internal server error
47311
- 502,
47312
- // Bad gateway
47313
- 503,
47314
- // Service unavailable
47315
- 504
47316
- // Gateway timeout
47317
- ];
47318
- function isProviderError(status, body) {
47319
- if (!FALLBACK_STATUS_CODES.includes(status)) {
47320
- return false;
47321
- }
47322
- if (status >= 500) {
47323
- return true;
47324
- }
47325
- return PROVIDER_ERROR_PATTERNS.some((pattern) => pattern.test(body));
47326
- }
47327
47411
  var VALID_ROLES = /* @__PURE__ */ new Set(["system", "user", "assistant", "tool", "function"]);
47328
47412
  var ROLE_MAPPINGS = {
47329
47413
  developer: "system",
@@ -47829,7 +47913,16 @@ async function startProxy(options) {
47829
47913
  "Content-Type": "application/json",
47830
47914
  "Cache-Control": "no-cache"
47831
47915
  });
47832
- res.end(JSON.stringify(stats, null, 2));
47916
+ res.end(
47917
+ JSON.stringify(
47918
+ {
47919
+ ...stats,
47920
+ providerErrors: Object.fromEntries(perProviderErrors)
47921
+ },
47922
+ null,
47923
+ 2
47924
+ )
47925
+ );
47833
47926
  } catch (err) {
47834
47927
  res.writeHead(500, { "Content-Type": "application/json" });
47835
47928
  res.end(
@@ -48272,12 +48365,13 @@ async function tryModelRequest(upstreamUrl, method, headers, body, modelId, maxT
48272
48365
  if (response.status !== 200) {
48273
48366
  const errorBodyChunks = await readBodyWithTimeout(response.body, ERROR_BODY_READ_TIMEOUT_MS);
48274
48367
  const errorBody = Buffer.concat(errorBodyChunks).toString();
48275
- const isProviderErr = isProviderError(response.status, errorBody);
48368
+ const category = categorizeError(response.status, errorBody);
48276
48369
  return {
48277
48370
  success: false,
48278
48371
  errorBody,
48279
48372
  errorStatus: response.status,
48280
- isProviderError: isProviderErr
48373
+ isProviderError: category !== null,
48374
+ errorCategory: category ?? void 0
48281
48375
  };
48282
48376
  }
48283
48377
  const contentType = response.headers.get("content-type") || "";
@@ -48330,6 +48424,8 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
48330
48424
  let maxTokens = 4096;
48331
48425
  let routingProfile = null;
48332
48426
  let balanceFallbackNotice;
48427
+ let budgetDowngradeNotice;
48428
+ let budgetDowngradeHeaderMode;
48333
48429
  let accumulatedContent = "";
48334
48430
  let responseInputTokens;
48335
48431
  const isChatCompletion = req.url?.includes("/chat/completions");
@@ -48344,6 +48440,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
48344
48440
  let bodyModified = false;
48345
48441
  const parsedMessages = Array.isArray(parsed.messages) ? parsed.messages : [];
48346
48442
  const lastUserMsg = [...parsedMessages].reverse().find((m) => m.role === "user");
48443
+ hasTools = Array.isArray(parsed.tools) && parsed.tools.length > 0;
48347
48444
  const rawLastContent = lastUserMsg?.content;
48348
48445
  const lastContent = typeof rawLastContent === "string" ? rawLastContent : Array.isArray(rawLastContent) ? rawLastContent.filter((b) => b.type === "text").map((b) => b.text ?? "").join(" ") : "";
48349
48446
  if (sessionId && parsedMessages.length > 0) {
@@ -49000,6 +49097,9 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
49000
49097
  options.onRouted?.(routingDecision);
49001
49098
  }
49002
49099
  }
49100
+ if (!effectiveSessionId && parsedMessages.length > 0) {
49101
+ effectiveSessionId = deriveSessionId(parsedMessages);
49102
+ }
49003
49103
  if (bodyModified) {
49004
49104
  body = Buffer.from(JSON.stringify(parsed));
49005
49105
  }
@@ -49090,7 +49190,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
49090
49190
  }
49091
49191
  deduplicator.markInflight(dedupKey);
49092
49192
  let estimatedCostMicros;
49093
- const isFreeModel = modelId === FREE_MODEL;
49193
+ let isFreeModel = modelId === FREE_MODEL;
49094
49194
  if (modelId && !options.skipBalanceCheck && !isFreeModel) {
49095
49195
  const estimated = estimateAmount(modelId, body.length, maxTokens);
49096
49196
  if (estimated) {
@@ -49103,6 +49203,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
49103
49203
  `[ClawRouter] Wallet ${sufficiency.info.isEmpty ? "empty" : "insufficient"} (${sufficiency.info.balanceUSD}), falling back to free model: ${FREE_MODEL} (requested: ${originalModel})`
49104
49204
  );
49105
49205
  modelId = FREE_MODEL;
49206
+ isFreeModel = true;
49106
49207
  const parsed = JSON.parse(body.toString());
49107
49208
  parsed.model = FREE_MODEL;
49108
49209
  body = Buffer.from(JSON.stringify(parsed));
@@ -49129,6 +49230,89 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
49129
49230
  }
49130
49231
  }
49131
49232
  }
49233
+ if (options.maxCostPerRunUsd && effectiveSessionId && !isFreeModel && (options.maxCostPerRunMode ?? "graceful") === "strict") {
49234
+ const runCostUsd = sessionStore.getSessionCostUsd(effectiveSessionId);
49235
+ const thisReqEstStr = estimatedCostMicros !== void 0 ? estimatedCostMicros.toString() : modelId ? estimateAmount(modelId, body.length, maxTokens) : void 0;
49236
+ const thisReqEstUsd = thisReqEstStr ? Number(thisReqEstStr) / 1e6 : 0;
49237
+ const projectedCostUsd = runCostUsd + thisReqEstUsd;
49238
+ if (projectedCostUsd > options.maxCostPerRunUsd) {
49239
+ console.log(
49240
+ `[ClawRouter] Cost cap exceeded for session ${effectiveSessionId.slice(0, 8)}...: projected $${projectedCostUsd.toFixed(4)} (spent $${runCostUsd.toFixed(4)} + est $${thisReqEstUsd.toFixed(4)}) > $${options.maxCostPerRunUsd} limit`
49241
+ );
49242
+ res.writeHead(429, {
49243
+ "Content-Type": "application/json",
49244
+ "X-ClawRouter-Cost-Cap-Exceeded": "1"
49245
+ });
49246
+ res.end(
49247
+ JSON.stringify({
49248
+ error: {
49249
+ message: `ClawRouter cost cap exceeded: projected spend $${projectedCostUsd.toFixed(4)} (spent $${runCostUsd.toFixed(4)} + est $${thisReqEstUsd.toFixed(4)}) would exceed limit $${options.maxCostPerRunUsd}`,
49250
+ type: "cost_cap_exceeded",
49251
+ code: "cost_cap_exceeded"
49252
+ }
49253
+ })
49254
+ );
49255
+ deduplicator.removeInflight(dedupKey);
49256
+ return;
49257
+ }
49258
+ }
49259
+ if (options.maxCostPerRunUsd && effectiveSessionId && !isFreeModel && (options.maxCostPerRunMode ?? "graceful") === "graceful") {
49260
+ const runCostUsd = sessionStore.getSessionCostUsd(effectiveSessionId);
49261
+ const remainingUsd = options.maxCostPerRunUsd - runCostUsd;
49262
+ const isComplexOrAgentic = hasTools || routingDecision?.tier === "COMPLEX" || routingDecision?.tier === "REASONING";
49263
+ if (isComplexOrAgentic) {
49264
+ const canAffordAnyNonFreeModel = BLOCKRUN_MODELS.some((m) => {
49265
+ if (m.id === FREE_MODEL) return false;
49266
+ const est = estimateAmount(m.id, body.length, maxTokens);
49267
+ return est !== void 0 && Number(est) / 1e6 <= remainingUsd;
49268
+ });
49269
+ if (!canAffordAnyNonFreeModel) {
49270
+ console.log(
49271
+ `[ClawRouter] Budget insufficient for agentic/complex session ${effectiveSessionId.slice(0, 8)}...: $${Math.max(0, remainingUsd).toFixed(4)} remaining \u2014 blocking (silent downgrade would corrupt tool/complex responses)`
49272
+ );
49273
+ res.writeHead(429, {
49274
+ "Content-Type": "application/json",
49275
+ "X-ClawRouter-Cost-Cap-Exceeded": "1",
49276
+ "X-ClawRouter-Budget-Mode": "blocked"
49277
+ });
49278
+ res.end(
49279
+ JSON.stringify({
49280
+ error: {
49281
+ message: `ClawRouter budget exhausted: $${Math.max(0, remainingUsd).toFixed(4)} remaining (limit: $${options.maxCostPerRunUsd}). Increase maxCostPerRun to continue.`,
49282
+ type: "cost_cap_exceeded",
49283
+ code: "budget_exhausted"
49284
+ }
49285
+ })
49286
+ );
49287
+ deduplicator.removeInflight(dedupKey);
49288
+ return;
49289
+ }
49290
+ } else if (!routingDecision && modelId && modelId !== FREE_MODEL) {
49291
+ const est = estimateAmount(modelId, body.length, maxTokens);
49292
+ const canAfford = !est || Number(est) / 1e6 <= remainingUsd;
49293
+ if (!canAfford) {
49294
+ console.log(
49295
+ `[ClawRouter] Budget insufficient for explicit model ${modelId} in session ${effectiveSessionId.slice(0, 8)}...: $${Math.max(0, remainingUsd).toFixed(4)} remaining \u2014 blocking (user explicitly chose ${modelId})`
49296
+ );
49297
+ res.writeHead(429, {
49298
+ "Content-Type": "application/json",
49299
+ "X-ClawRouter-Cost-Cap-Exceeded": "1",
49300
+ "X-ClawRouter-Budget-Mode": "blocked"
49301
+ });
49302
+ res.end(
49303
+ JSON.stringify({
49304
+ error: {
49305
+ message: `ClawRouter budget exhausted: $${Math.max(0, remainingUsd).toFixed(4)} remaining (limit: $${options.maxCostPerRunUsd}). Increase maxCostPerRun to continue using ${modelId}.`,
49306
+ type: "cost_cap_exceeded",
49307
+ code: "budget_exhausted"
49308
+ }
49309
+ })
49310
+ );
49311
+ deduplicator.removeInflight(dedupKey);
49312
+ return;
49313
+ }
49314
+ }
49315
+ }
49132
49316
  let heartbeatInterval;
49133
49317
  let headersSentEarly = false;
49134
49318
  if (isStreaming) {
@@ -49173,8 +49357,8 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
49173
49357
  }
49174
49358
  });
49175
49359
  const timeoutMs = options.requestTimeoutMs ?? DEFAULT_REQUEST_TIMEOUT_MS;
49176
- const controller = new AbortController();
49177
- const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
49360
+ const globalController = new AbortController();
49361
+ const timeoutId = setTimeout(() => globalController.abort(), timeoutMs);
49178
49362
  try {
49179
49363
  let modelsToTry;
49180
49364
  if (routingDecision) {
@@ -49231,13 +49415,83 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
49231
49415
  if (!hasTools && !modelsToTry.includes(FREE_MODEL)) {
49232
49416
  modelsToTry.push(FREE_MODEL);
49233
49417
  }
49418
+ if (options.maxCostPerRunUsd && effectiveSessionId && !isFreeModel && (options.maxCostPerRunMode ?? "graceful") === "graceful") {
49419
+ const runCostUsd = sessionStore.getSessionCostUsd(effectiveSessionId);
49420
+ const remainingUsd = options.maxCostPerRunUsd - runCostUsd;
49421
+ const beforeFilter = [...modelsToTry];
49422
+ modelsToTry = modelsToTry.filter((m) => {
49423
+ if (m === FREE_MODEL) return true;
49424
+ const est = estimateAmount(m, body.length, maxTokens);
49425
+ if (!est) return true;
49426
+ return Number(est) / 1e6 <= remainingUsd;
49427
+ });
49428
+ const excluded = beforeFilter.filter((m) => !modelsToTry.includes(m));
49429
+ const isComplexOrAgenticFilter = hasTools || routingDecision?.tier === "COMPLEX" || routingDecision?.tier === "REASONING" || routingDecision === void 0;
49430
+ const filteredToFreeOnly = modelsToTry.length > 0 && modelsToTry.every((m) => m === FREE_MODEL);
49431
+ if (isComplexOrAgenticFilter && filteredToFreeOnly) {
49432
+ const budgetSummary = `$${Math.max(0, remainingUsd).toFixed(4)} remaining (limit: $${options.maxCostPerRunUsd})`;
49433
+ console.log(
49434
+ `[ClawRouter] Budget filter left only free model for complex/agentic session \u2014 blocking (${budgetSummary})`
49435
+ );
49436
+ const errPayload = JSON.stringify({
49437
+ error: {
49438
+ message: `ClawRouter budget exhausted: remaining budget (${budgetSummary}) cannot support a complex/tool request. Increase maxCostPerRun to continue.`,
49439
+ type: "cost_cap_exceeded",
49440
+ code: "budget_exhausted"
49441
+ }
49442
+ });
49443
+ if (heartbeatInterval) clearInterval(heartbeatInterval);
49444
+ if (headersSentEarly) {
49445
+ safeWrite(res, `data: ${errPayload}
49446
+
49447
+ data: [DONE]
49448
+
49449
+ `);
49450
+ res.end();
49451
+ } else {
49452
+ res.writeHead(429, {
49453
+ "Content-Type": "application/json",
49454
+ "X-ClawRouter-Cost-Cap-Exceeded": "1",
49455
+ "X-ClawRouter-Budget-Mode": "blocked"
49456
+ });
49457
+ res.end(errPayload);
49458
+ }
49459
+ deduplicator.removeInflight(dedupKey);
49460
+ return;
49461
+ }
49462
+ if (excluded.length > 0) {
49463
+ const budgetSummary = remainingUsd > 0 ? `$${remainingUsd.toFixed(4)} remaining` : `budget exhausted ($${runCostUsd.toFixed(4)}/$${options.maxCostPerRunUsd})`;
49464
+ console.log(
49465
+ `[ClawRouter] Budget downgrade (${budgetSummary}): excluded ${excluded.join(", ")}`
49466
+ );
49467
+ const fromModel = excluded[0];
49468
+ const usingFree = modelsToTry.length === 1 && modelsToTry[0] === FREE_MODEL;
49469
+ if (usingFree) {
49470
+ budgetDowngradeNotice = `> **\u26A0\uFE0F Budget cap reached** ($${runCostUsd.toFixed(4)}/$${options.maxCostPerRunUsd}) \u2014 downgraded to free model. Quality may be reduced. Increase \`maxCostPerRun\` to continue with ${fromModel}.
49471
+
49472
+ `;
49473
+ } else {
49474
+ const toModel = modelsToTry[0] ?? FREE_MODEL;
49475
+ budgetDowngradeNotice = `> **\u26A0\uFE0F Budget low** ($${remainingUsd > 0 ? remainingUsd.toFixed(4) : "0.0000"} remaining) \u2014 using ${toModel} instead of ${fromModel}.
49476
+
49477
+ `;
49478
+ }
49479
+ budgetDowngradeHeaderMode = "downgraded";
49480
+ }
49481
+ }
49234
49482
  let upstream;
49235
49483
  let lastError;
49236
49484
  let actualModelUsed = modelId;
49237
49485
  for (let i = 0; i < modelsToTry.length; i++) {
49238
49486
  const tryModel = modelsToTry[i];
49239
49487
  const isLastAttempt = i === modelsToTry.length - 1;
49488
+ if (globalController.signal.aborted) {
49489
+ throw new Error(`Request timed out after ${timeoutMs}ms`);
49490
+ }
49240
49491
  console.log(`[ClawRouter] Trying model ${i + 1}/${modelsToTry.length}: ${tryModel}`);
49492
+ const modelController = new AbortController();
49493
+ const modelTimeoutId = setTimeout(() => modelController.abort(), PER_MODEL_TIMEOUT_MS);
49494
+ const combinedSignal = AbortSignal.any([globalController.signal, modelController.signal]);
49241
49495
  const result = await tryModelRequest(
49242
49496
  upstreamUrl,
49243
49497
  req.method ?? "POST",
@@ -49247,12 +49501,29 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
49247
49501
  maxTokens,
49248
49502
  payFetch,
49249
49503
  balanceMonitor,
49250
- controller.signal
49504
+ combinedSignal
49251
49505
  );
49506
+ clearTimeout(modelTimeoutId);
49507
+ if (globalController.signal.aborted) {
49508
+ throw new Error(`Request timed out after ${timeoutMs}ms`);
49509
+ }
49510
+ if (!result.success && modelController.signal.aborted && !isLastAttempt) {
49511
+ console.log(
49512
+ `[ClawRouter] Model ${tryModel} timed out after ${PER_MODEL_TIMEOUT_MS}ms, trying fallback`
49513
+ );
49514
+ recordProviderError(tryModel, "server_error");
49515
+ continue;
49516
+ }
49252
49517
  if (result.success && result.response) {
49253
49518
  upstream = result.response;
49254
49519
  actualModelUsed = tryModel;
49255
49520
  console.log(`[ClawRouter] Success with model: ${tryModel}`);
49521
+ if (options.maxCostPerRunUsd && effectiveSessionId && tryModel !== FREE_MODEL) {
49522
+ const costEst = estimateAmount(tryModel, body.length, maxTokens);
49523
+ if (costEst) {
49524
+ sessionStore.addSessionCost(effectiveSessionId, BigInt(costEst));
49525
+ }
49526
+ }
49256
49527
  break;
49257
49528
  }
49258
49529
  lastError = {
@@ -49268,7 +49539,52 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
49268
49539
  );
49269
49540
  break;
49270
49541
  }
49271
- if (result.errorStatus === 429) {
49542
+ const errorCat = result.errorCategory;
49543
+ if (errorCat) {
49544
+ recordProviderError(tryModel, errorCat);
49545
+ }
49546
+ if (errorCat === "rate_limited") {
49547
+ if (!isLastAttempt && !globalController.signal.aborted) {
49548
+ console.log(
49549
+ `[ClawRouter] Rate-limited on ${tryModel}, retrying in 200ms before failover`
49550
+ );
49551
+ await new Promise((resolve) => setTimeout(resolve, 200));
49552
+ if (!globalController.signal.aborted) {
49553
+ const retryController = new AbortController();
49554
+ const retryTimeoutId = setTimeout(
49555
+ () => retryController.abort(),
49556
+ PER_MODEL_TIMEOUT_MS
49557
+ );
49558
+ const retrySignal = AbortSignal.any([
49559
+ globalController.signal,
49560
+ retryController.signal
49561
+ ]);
49562
+ const retryResult = await tryModelRequest(
49563
+ upstreamUrl,
49564
+ req.method ?? "POST",
49565
+ headers,
49566
+ body,
49567
+ tryModel,
49568
+ maxTokens,
49569
+ payFetch,
49570
+ balanceMonitor,
49571
+ retrySignal
49572
+ );
49573
+ clearTimeout(retryTimeoutId);
49574
+ if (retryResult.success && retryResult.response) {
49575
+ upstream = retryResult.response;
49576
+ actualModelUsed = tryModel;
49577
+ console.log(`[ClawRouter] Rate-limit retry succeeded for: ${tryModel}`);
49578
+ if (options.maxCostPerRunUsd && effectiveSessionId && tryModel !== FREE_MODEL) {
49579
+ const costEst = estimateAmount(tryModel, body.length, maxTokens);
49580
+ if (costEst) {
49581
+ sessionStore.addSessionCost(effectiveSessionId, BigInt(costEst));
49582
+ }
49583
+ }
49584
+ break;
49585
+ }
49586
+ }
49587
+ }
49272
49588
  markRateLimited(tryModel);
49273
49589
  try {
49274
49590
  const parsed = JSON.parse(result.errorBody || "{}");
@@ -49284,6 +49600,12 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
49284
49600
  }
49285
49601
  } catch {
49286
49602
  }
49603
+ } else if (errorCat === "overloaded") {
49604
+ markOverloaded(tryModel);
49605
+ } else if (errorCat === "auth_failure" || errorCat === "quota_exceeded") {
49606
+ console.log(
49607
+ `[ClawRouter] \u{1F511} ${errorCat === "auth_failure" ? "Auth failure" : "Quota exceeded"} for ${tryModel} \u2014 check provider config`
49608
+ );
49287
49609
  }
49288
49610
  const isPaymentErr = /payment.*verification.*failed|payment.*settlement.*failed|insufficient.*funds|transaction_simulation_failed/i.test(
49289
49611
  result.errorBody || ""
@@ -49443,6 +49765,25 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
49443
49765
  responseChunks.push(Buffer.from(noticeData));
49444
49766
  balanceFallbackNotice = void 0;
49445
49767
  }
49768
+ if (budgetDowngradeNotice) {
49769
+ const noticeChunk = {
49770
+ ...baseChunk,
49771
+ choices: [
49772
+ {
49773
+ index: index2,
49774
+ delta: { content: budgetDowngradeNotice },
49775
+ logprobs: null,
49776
+ finish_reason: null
49777
+ }
49778
+ ]
49779
+ };
49780
+ const noticeData = `data: ${JSON.stringify(noticeChunk)}
49781
+
49782
+ `;
49783
+ safeWrite(res, noticeData);
49784
+ responseChunks.push(Buffer.from(noticeData));
49785
+ budgetDowngradeNotice = void 0;
49786
+ }
49446
49787
  if (content) {
49447
49788
  const contentChunk = {
49448
49789
  ...baseChunk,
@@ -49546,6 +49887,22 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
49546
49887
  }
49547
49888
  balanceFallbackNotice = void 0;
49548
49889
  }
49890
+ if (budgetDowngradeNotice && responseBody.length > 0) {
49891
+ try {
49892
+ const parsed = JSON.parse(responseBody.toString());
49893
+ if (parsed.choices?.[0]?.message?.content !== void 0) {
49894
+ parsed.choices[0].message.content = budgetDowngradeNotice + parsed.choices[0].message.content;
49895
+ responseBody = Buffer.from(JSON.stringify(parsed));
49896
+ }
49897
+ } catch {
49898
+ }
49899
+ budgetDowngradeNotice = void 0;
49900
+ }
49901
+ if (budgetDowngradeHeaderMode) {
49902
+ responseHeaders["x-clawrouter-budget-downgrade"] = "1";
49903
+ responseHeaders["x-clawrouter-budget-mode"] = budgetDowngradeHeaderMode;
49904
+ budgetDowngradeHeaderMode = void 0;
49905
+ }
49549
49906
  responseHeaders["content-length"] = String(responseBody.length);
49550
49907
  res.writeHead(upstream.status, responseHeaders);
49551
49908
  safeWrite(res, responseBody);
@@ -50213,6 +50570,18 @@ function injectModelsConfig(logger) {
50213
50570
  needsWrite = true;
50214
50571
  }
50215
50572
  const allowlist = defaults.models;
50573
+ const DEPRECATED_BLOCKRUN_MODELS = ["blockrun/xai/grok-code-fast-1"];
50574
+ let removedDeprecatedCount = 0;
50575
+ for (const key of DEPRECATED_BLOCKRUN_MODELS) {
50576
+ if (allowlist[key]) {
50577
+ delete allowlist[key];
50578
+ removedDeprecatedCount++;
50579
+ }
50580
+ }
50581
+ if (removedDeprecatedCount > 0) {
50582
+ needsWrite = true;
50583
+ logger.info(`Removed ${removedDeprecatedCount} deprecated model entries from allowlist`);
50584
+ }
50216
50585
  let addedCount = 0;
50217
50586
  for (const id of TOP_MODELS) {
50218
50587
  const key = `blockrun/${id}`;
@@ -50314,9 +50683,18 @@ async function startProxyInBackground(api) {
50314
50683
  api.logger.info(`Using wallet from BLOCKRUN_WALLET_KEY: ${wallet.address}`);
50315
50684
  }
50316
50685
  const routingConfig = api.pluginConfig?.routing;
50686
+ const maxCostPerRunUsd = typeof api.pluginConfig?.maxCostPerRun === "number" ? api.pluginConfig.maxCostPerRun : void 0;
50687
+ const maxCostPerRunMode = api.pluginConfig?.maxCostPerRunMode === "strict" ? "strict" : "graceful";
50688
+ if (maxCostPerRunUsd !== void 0) {
50689
+ api.logger.info(
50690
+ `Cost cap: $${maxCostPerRunUsd.toFixed(2)} per session (mode: ${maxCostPerRunMode})`
50691
+ );
50692
+ }
50317
50693
  const proxy = await startProxy({
50318
50694
  wallet,
50319
50695
  routingConfig,
50696
+ maxCostPerRunUsd,
50697
+ maxCostPerRunMode,
50320
50698
  onReady: (port) => {
50321
50699
  api.logger.info(`BlockRun x402 proxy listening on port ${port}`);
50322
50700
  },