@blockrun/clawrouter 0.12.56 → 0.12.60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -522,6 +522,7 @@ type SessionEntry = {
522
522
  recentHashes: string[];
523
523
  strikes: number;
524
524
  escalated: boolean;
525
+ sessionCostMicros: bigint;
525
526
  };
526
527
  type SessionConfig = {
527
528
  /** Enable session persistence (default: false) */
@@ -590,6 +591,17 @@ declare class SessionStore {
590
591
  model: string;
591
592
  tier: string;
592
593
  } | null;
594
+ /**
595
+ * Add cost to a session's running total for maxCostPerRun tracking.
596
+ * Cost is in USDC 6-decimal units (micros).
597
+ * Creates a cost-tracking-only entry if none exists (e.g., explicit model requests
598
+ * that never go through the routing path).
599
+ */
600
+ addSessionCost(sessionId: string, additionalMicros: bigint): void;
601
+ /**
602
+ * Get the total accumulated cost for a session in USD.
603
+ */
604
+ getSessionCostUsd(sessionId: string): number;
593
605
  /**
594
606
  * Stop the cleanup interval.
595
607
  */
@@ -693,6 +705,18 @@ type ProxyOptions = {
693
705
  * Default: enabled with 10 minute TTL, 200 max entries.
694
706
  */
695
707
  cacheConfig?: ResponseCacheConfig;
708
+ /**
709
+ * Maximum total spend (in USD) per session run.
710
+ * Default: undefined (no limit). Example: 0.5 = $0.50 per session.
711
+ */
712
+ maxCostPerRunUsd?: number;
713
+ /**
714
+ * How to enforce the per-run cost cap.
715
+ * - 'graceful' (default): when budget runs low, downgrade to cheaper models; use free model
716
+ * as last resort. Only hard-stops when no model can serve the request.
717
+ * - 'strict': immediately return 429 once the session spend reaches the cap.
718
+ */
719
+ maxCostPerRunMode?: "graceful" | "strict";
696
720
  onReady?: (port: number) => void;
697
721
  onError?: (error: Error) => void;
698
722
  onPayment?: (info: {
package/dist/index.js CHANGED
@@ -32878,6 +32878,13 @@ var MODEL_ALIASES = {
32878
32878
  "grok-fast": "xai/grok-4-fast-reasoning",
32879
32879
  "grok-code": "deepseek/deepseek-chat",
32880
32880
  // was grok-code-fast-1, delisted due to poor retention
32881
+ // Delisted model redirects — full model IDs that were previously valid but removed
32882
+ "grok-code-fast-1": "deepseek/deepseek-chat",
32883
+ // bare alias
32884
+ "xai/grok-code-fast-1": "deepseek/deepseek-chat",
32885
+ // delisted 2026-03-12
32886
+ "xai/grok-3-fast": "xai/grok-4-fast-reasoning",
32887
+ // delisted (too expensive)
32881
32888
  // NVIDIA
32882
32889
  nvidia: "nvidia/gpt-oss-120b",
32883
32890
  "gpt-120b": "nvidia/gpt-oss-120b",
@@ -46642,7 +46649,8 @@ var SessionStore = class {
46642
46649
  requestCount: 1,
46643
46650
  recentHashes: [],
46644
46651
  strikes: 0,
46645
- escalated: false
46652
+ escalated: false,
46653
+ sessionCostMicros: 0n
46646
46654
  });
46647
46655
  }
46648
46656
  }
@@ -46731,6 +46739,39 @@ var SessionStore = class {
46731
46739
  entry.escalated = true;
46732
46740
  return { model: nextConfig.primary, tier: nextTier };
46733
46741
  }
46742
+ /**
46743
+ * Add cost to a session's running total for maxCostPerRun tracking.
46744
+ * Cost is in USDC 6-decimal units (micros).
46745
+ * Creates a cost-tracking-only entry if none exists (e.g., explicit model requests
46746
+ * that never go through the routing path).
46747
+ */
46748
+ addSessionCost(sessionId, additionalMicros) {
46749
+ let entry = this.sessions.get(sessionId);
46750
+ if (!entry) {
46751
+ const now = Date.now();
46752
+ entry = {
46753
+ model: "",
46754
+ tier: "DIRECT",
46755
+ createdAt: now,
46756
+ lastUsedAt: now,
46757
+ requestCount: 0,
46758
+ recentHashes: [],
46759
+ strikes: 0,
46760
+ escalated: false,
46761
+ sessionCostMicros: 0n
46762
+ };
46763
+ this.sessions.set(sessionId, entry);
46764
+ }
46765
+ entry.sessionCostMicros += additionalMicros;
46766
+ }
46767
+ /**
46768
+ * Get the total accumulated cost for a session in USD.
46769
+ */
46770
+ getSessionCostUsd(sessionId) {
46771
+ const entry = this.sessions.get(sessionId);
46772
+ if (!entry) return 0;
46773
+ return Number(entry.sessionCostMicros) / 1e6;
46774
+ }
46734
46775
  /**
46735
46776
  * Stop the cleanup interval.
46736
46777
  */
@@ -47006,9 +47047,11 @@ var MAX_MESSAGES = 200;
47006
47047
  var CONTEXT_LIMIT_KB = 5120;
47007
47048
  var HEARTBEAT_INTERVAL_MS = 2e3;
47008
47049
  var DEFAULT_REQUEST_TIMEOUT_MS = 18e4;
47050
+ var PER_MODEL_TIMEOUT_MS = 6e4;
47009
47051
  var MAX_FALLBACK_ATTEMPTS = 5;
47010
47052
  var HEALTH_CHECK_TIMEOUT_MS = 2e3;
47011
47053
  var RATE_LIMIT_COOLDOWN_MS = 6e4;
47054
+ var OVERLOAD_COOLDOWN_MS = 15e3;
47012
47055
  var PORT_RETRY_ATTEMPTS = 5;
47013
47056
  var PORT_RETRY_DELAY_MS = 1e3;
47014
47057
  var MODEL_BODY_READ_TIMEOUT_MS = 3e5;
@@ -47158,7 +47201,41 @@ function transformPaymentError(errorBody) {
47158
47201
  }
47159
47202
  return errorBody;
47160
47203
  }
47204
+ function categorizeError(status, body) {
47205
+ if (status === 401) return "auth_failure";
47206
+ if (status === 402) return "payment_error";
47207
+ if (status === 403) {
47208
+ if (/plan.*limit|quota.*exceeded|subscription|allowance/i.test(body))
47209
+ return "quota_exceeded";
47210
+ return "auth_failure";
47211
+ }
47212
+ if (status === 429) return "rate_limited";
47213
+ if (status === 529) return "overloaded";
47214
+ if (status === 503 && /overload|capacity|too.*many.*request/i.test(body)) return "overloaded";
47215
+ if (status >= 500) return "server_error";
47216
+ if (status === 400 || status === 413) {
47217
+ if (PROVIDER_ERROR_PATTERNS.some((p) => p.test(body))) return "config_error";
47218
+ return null;
47219
+ }
47220
+ return null;
47221
+ }
47161
47222
  var rateLimitedModels = /* @__PURE__ */ new Map();
47223
+ var overloadedModels = /* @__PURE__ */ new Map();
47224
+ var perProviderErrors = /* @__PURE__ */ new Map();
47225
+ function recordProviderError(modelId, category) {
47226
+ if (!perProviderErrors.has(modelId)) {
47227
+ perProviderErrors.set(modelId, {
47228
+ auth_failure: 0,
47229
+ quota_exceeded: 0,
47230
+ rate_limited: 0,
47231
+ overloaded: 0,
47232
+ server_error: 0,
47233
+ payment_error: 0,
47234
+ config_error: 0
47235
+ });
47236
+ }
47237
+ perProviderErrors.get(modelId)[category]++;
47238
+ }
47162
47239
  function isRateLimited(modelId) {
47163
47240
  const hitTime = rateLimitedModels.get(modelId);
47164
47241
  if (!hitTime) return false;
@@ -47173,17 +47250,30 @@ function markRateLimited(modelId) {
47173
47250
  rateLimitedModels.set(modelId, Date.now());
47174
47251
  console.log(`[ClawRouter] Model ${modelId} rate-limited, will deprioritize for 60s`);
47175
47252
  }
47253
+ function markOverloaded(modelId) {
47254
+ overloadedModels.set(modelId, Date.now());
47255
+ console.log(`[ClawRouter] Model ${modelId} overloaded, will deprioritize for 15s`);
47256
+ }
47257
+ function isOverloaded(modelId) {
47258
+ const hitTime = overloadedModels.get(modelId);
47259
+ if (!hitTime) return false;
47260
+ if (Date.now() - hitTime >= OVERLOAD_COOLDOWN_MS) {
47261
+ overloadedModels.delete(modelId);
47262
+ return false;
47263
+ }
47264
+ return true;
47265
+ }
47176
47266
  function prioritizeNonRateLimited(models) {
47177
47267
  const available = [];
47178
- const rateLimited = [];
47268
+ const degraded = [];
47179
47269
  for (const model of models) {
47180
- if (isRateLimited(model)) {
47181
- rateLimited.push(model);
47270
+ if (isRateLimited(model) || isOverloaded(model)) {
47271
+ degraded.push(model);
47182
47272
  } else {
47183
47273
  available.push(model);
47184
47274
  }
47185
47275
  }
47186
- return [...available, ...rateLimited];
47276
+ return [...available, ...degraded];
47187
47277
  }
47188
47278
  function canWrite(res) {
47189
47279
  return !res.writableEnded && !res.destroyed && res.socket !== null && !res.socket.destroyed && res.socket.writable;
@@ -47318,37 +47408,6 @@ function detectDegradedSuccessResponse(body) {
47318
47408
  }
47319
47409
  return void 0;
47320
47410
  }
47321
- var FALLBACK_STATUS_CODES = [
47322
- 400,
47323
- // Bad request - sometimes used for billing errors
47324
- 401,
47325
- // Unauthorized - provider API key issues
47326
- 402,
47327
- // Payment required - but from upstream, not x402
47328
- 403,
47329
- // Forbidden - provider restrictions
47330
- 413,
47331
- // Payload too large - request exceeds model's context limit
47332
- 429,
47333
- // Rate limited
47334
- 500,
47335
- // Internal server error
47336
- 502,
47337
- // Bad gateway
47338
- 503,
47339
- // Service unavailable
47340
- 504
47341
- // Gateway timeout
47342
- ];
47343
- function isProviderError(status, body) {
47344
- if (!FALLBACK_STATUS_CODES.includes(status)) {
47345
- return false;
47346
- }
47347
- if (status >= 500) {
47348
- return true;
47349
- }
47350
- return PROVIDER_ERROR_PATTERNS.some((pattern) => pattern.test(body));
47351
- }
47352
47411
  var VALID_ROLES = /* @__PURE__ */ new Set(["system", "user", "assistant", "tool", "function"]);
47353
47412
  var ROLE_MAPPINGS = {
47354
47413
  developer: "system",
@@ -47854,7 +47913,16 @@ async function startProxy(options) {
47854
47913
  "Content-Type": "application/json",
47855
47914
  "Cache-Control": "no-cache"
47856
47915
  });
47857
- res.end(JSON.stringify(stats, null, 2));
47916
+ res.end(
47917
+ JSON.stringify(
47918
+ {
47919
+ ...stats,
47920
+ providerErrors: Object.fromEntries(perProviderErrors)
47921
+ },
47922
+ null,
47923
+ 2
47924
+ )
47925
+ );
47858
47926
  } catch (err) {
47859
47927
  res.writeHead(500, { "Content-Type": "application/json" });
47860
47928
  res.end(
@@ -48297,12 +48365,13 @@ async function tryModelRequest(upstreamUrl, method, headers, body, modelId, maxT
48297
48365
  if (response.status !== 200) {
48298
48366
  const errorBodyChunks = await readBodyWithTimeout(response.body, ERROR_BODY_READ_TIMEOUT_MS);
48299
48367
  const errorBody = Buffer.concat(errorBodyChunks).toString();
48300
- const isProviderErr = isProviderError(response.status, errorBody);
48368
+ const category = categorizeError(response.status, errorBody);
48301
48369
  return {
48302
48370
  success: false,
48303
48371
  errorBody,
48304
48372
  errorStatus: response.status,
48305
- isProviderError: isProviderErr
48373
+ isProviderError: category !== null,
48374
+ errorCategory: category ?? void 0
48306
48375
  };
48307
48376
  }
48308
48377
  const contentType = response.headers.get("content-type") || "";
@@ -48355,6 +48424,8 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
48355
48424
  let maxTokens = 4096;
48356
48425
  let routingProfile = null;
48357
48426
  let balanceFallbackNotice;
48427
+ let budgetDowngradeNotice;
48428
+ let budgetDowngradeHeaderMode;
48358
48429
  let accumulatedContent = "";
48359
48430
  let responseInputTokens;
48360
48431
  const isChatCompletion = req.url?.includes("/chat/completions");
@@ -48369,6 +48440,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
48369
48440
  let bodyModified = false;
48370
48441
  const parsedMessages = Array.isArray(parsed.messages) ? parsed.messages : [];
48371
48442
  const lastUserMsg = [...parsedMessages].reverse().find((m) => m.role === "user");
48443
+ hasTools = Array.isArray(parsed.tools) && parsed.tools.length > 0;
48372
48444
  const rawLastContent = lastUserMsg?.content;
48373
48445
  const lastContent = typeof rawLastContent === "string" ? rawLastContent : Array.isArray(rawLastContent) ? rawLastContent.filter((b) => b.type === "text").map((b) => b.text ?? "").join(" ") : "";
48374
48446
  if (sessionId && parsedMessages.length > 0) {
@@ -49025,6 +49097,9 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
49025
49097
  options.onRouted?.(routingDecision);
49026
49098
  }
49027
49099
  }
49100
+ if (!effectiveSessionId && parsedMessages.length > 0) {
49101
+ effectiveSessionId = deriveSessionId(parsedMessages);
49102
+ }
49028
49103
  if (bodyModified) {
49029
49104
  body = Buffer.from(JSON.stringify(parsed));
49030
49105
  }
@@ -49115,7 +49190,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
49115
49190
  }
49116
49191
  deduplicator.markInflight(dedupKey);
49117
49192
  let estimatedCostMicros;
49118
- const isFreeModel = modelId === FREE_MODEL;
49193
+ let isFreeModel = modelId === FREE_MODEL;
49119
49194
  if (modelId && !options.skipBalanceCheck && !isFreeModel) {
49120
49195
  const estimated = estimateAmount(modelId, body.length, maxTokens);
49121
49196
  if (estimated) {
@@ -49128,6 +49203,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
49128
49203
  `[ClawRouter] Wallet ${sufficiency.info.isEmpty ? "empty" : "insufficient"} (${sufficiency.info.balanceUSD}), falling back to free model: ${FREE_MODEL} (requested: ${originalModel})`
49129
49204
  );
49130
49205
  modelId = FREE_MODEL;
49206
+ isFreeModel = true;
49131
49207
  const parsed = JSON.parse(body.toString());
49132
49208
  parsed.model = FREE_MODEL;
49133
49209
  body = Buffer.from(JSON.stringify(parsed));
@@ -49154,6 +49230,89 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
49154
49230
  }
49155
49231
  }
49156
49232
  }
49233
+ if (options.maxCostPerRunUsd && effectiveSessionId && !isFreeModel && (options.maxCostPerRunMode ?? "graceful") === "strict") {
49234
+ const runCostUsd = sessionStore.getSessionCostUsd(effectiveSessionId);
49235
+ const thisReqEstStr = estimatedCostMicros !== void 0 ? estimatedCostMicros.toString() : modelId ? estimateAmount(modelId, body.length, maxTokens) : void 0;
49236
+ const thisReqEstUsd = thisReqEstStr ? Number(thisReqEstStr) / 1e6 : 0;
49237
+ const projectedCostUsd = runCostUsd + thisReqEstUsd;
49238
+ if (projectedCostUsd > options.maxCostPerRunUsd) {
49239
+ console.log(
49240
+ `[ClawRouter] Cost cap exceeded for session ${effectiveSessionId.slice(0, 8)}...: projected $${projectedCostUsd.toFixed(4)} (spent $${runCostUsd.toFixed(4)} + est $${thisReqEstUsd.toFixed(4)}) > $${options.maxCostPerRunUsd} limit`
49241
+ );
49242
+ res.writeHead(429, {
49243
+ "Content-Type": "application/json",
49244
+ "X-ClawRouter-Cost-Cap-Exceeded": "1"
49245
+ });
49246
+ res.end(
49247
+ JSON.stringify({
49248
+ error: {
49249
+ message: `ClawRouter cost cap exceeded: projected spend $${projectedCostUsd.toFixed(4)} (spent $${runCostUsd.toFixed(4)} + est $${thisReqEstUsd.toFixed(4)}) would exceed limit $${options.maxCostPerRunUsd}`,
49250
+ type: "cost_cap_exceeded",
49251
+ code: "cost_cap_exceeded"
49252
+ }
49253
+ })
49254
+ );
49255
+ deduplicator.removeInflight(dedupKey);
49256
+ return;
49257
+ }
49258
+ }
49259
+ if (options.maxCostPerRunUsd && effectiveSessionId && !isFreeModel && (options.maxCostPerRunMode ?? "graceful") === "graceful") {
49260
+ const runCostUsd = sessionStore.getSessionCostUsd(effectiveSessionId);
49261
+ const remainingUsd = options.maxCostPerRunUsd - runCostUsd;
49262
+ const isComplexOrAgentic = hasTools || routingDecision?.tier === "COMPLEX" || routingDecision?.tier === "REASONING";
49263
+ if (isComplexOrAgentic) {
49264
+ const canAffordAnyNonFreeModel = BLOCKRUN_MODELS.some((m) => {
49265
+ if (m.id === FREE_MODEL) return false;
49266
+ const est = estimateAmount(m.id, body.length, maxTokens);
49267
+ return est !== void 0 && Number(est) / 1e6 <= remainingUsd;
49268
+ });
49269
+ if (!canAffordAnyNonFreeModel) {
49270
+ console.log(
49271
+ `[ClawRouter] Budget insufficient for agentic/complex session ${effectiveSessionId.slice(0, 8)}...: $${Math.max(0, remainingUsd).toFixed(4)} remaining \u2014 blocking (silent downgrade would corrupt tool/complex responses)`
49272
+ );
49273
+ res.writeHead(429, {
49274
+ "Content-Type": "application/json",
49275
+ "X-ClawRouter-Cost-Cap-Exceeded": "1",
49276
+ "X-ClawRouter-Budget-Mode": "blocked"
49277
+ });
49278
+ res.end(
49279
+ JSON.stringify({
49280
+ error: {
49281
+ message: `ClawRouter budget exhausted: $${Math.max(0, remainingUsd).toFixed(4)} remaining (limit: $${options.maxCostPerRunUsd}). Increase maxCostPerRun to continue.`,
49282
+ type: "cost_cap_exceeded",
49283
+ code: "budget_exhausted"
49284
+ }
49285
+ })
49286
+ );
49287
+ deduplicator.removeInflight(dedupKey);
49288
+ return;
49289
+ }
49290
+ } else if (!routingDecision && modelId && modelId !== FREE_MODEL) {
49291
+ const est = estimateAmount(modelId, body.length, maxTokens);
49292
+ const canAfford = !est || Number(est) / 1e6 <= remainingUsd;
49293
+ if (!canAfford) {
49294
+ console.log(
49295
+ `[ClawRouter] Budget insufficient for explicit model ${modelId} in session ${effectiveSessionId.slice(0, 8)}...: $${Math.max(0, remainingUsd).toFixed(4)} remaining \u2014 blocking (user explicitly chose ${modelId})`
49296
+ );
49297
+ res.writeHead(429, {
49298
+ "Content-Type": "application/json",
49299
+ "X-ClawRouter-Cost-Cap-Exceeded": "1",
49300
+ "X-ClawRouter-Budget-Mode": "blocked"
49301
+ });
49302
+ res.end(
49303
+ JSON.stringify({
49304
+ error: {
49305
+ message: `ClawRouter budget exhausted: $${Math.max(0, remainingUsd).toFixed(4)} remaining (limit: $${options.maxCostPerRunUsd}). Increase maxCostPerRun to continue using ${modelId}.`,
49306
+ type: "cost_cap_exceeded",
49307
+ code: "budget_exhausted"
49308
+ }
49309
+ })
49310
+ );
49311
+ deduplicator.removeInflight(dedupKey);
49312
+ return;
49313
+ }
49314
+ }
49315
+ }
49157
49316
  let heartbeatInterval;
49158
49317
  let headersSentEarly = false;
49159
49318
  if (isStreaming) {
@@ -49198,8 +49357,8 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
49198
49357
  }
49199
49358
  });
49200
49359
  const timeoutMs = options.requestTimeoutMs ?? DEFAULT_REQUEST_TIMEOUT_MS;
49201
- const controller = new AbortController();
49202
- const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
49360
+ const globalController = new AbortController();
49361
+ const timeoutId = setTimeout(() => globalController.abort(), timeoutMs);
49203
49362
  try {
49204
49363
  let modelsToTry;
49205
49364
  if (routingDecision) {
@@ -49256,13 +49415,83 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
49256
49415
  if (!hasTools && !modelsToTry.includes(FREE_MODEL)) {
49257
49416
  modelsToTry.push(FREE_MODEL);
49258
49417
  }
49418
+ if (options.maxCostPerRunUsd && effectiveSessionId && !isFreeModel && (options.maxCostPerRunMode ?? "graceful") === "graceful") {
49419
+ const runCostUsd = sessionStore.getSessionCostUsd(effectiveSessionId);
49420
+ const remainingUsd = options.maxCostPerRunUsd - runCostUsd;
49421
+ const beforeFilter = [...modelsToTry];
49422
+ modelsToTry = modelsToTry.filter((m) => {
49423
+ if (m === FREE_MODEL) return true;
49424
+ const est = estimateAmount(m, body.length, maxTokens);
49425
+ if (!est) return true;
49426
+ return Number(est) / 1e6 <= remainingUsd;
49427
+ });
49428
+ const excluded = beforeFilter.filter((m) => !modelsToTry.includes(m));
49429
+ const isComplexOrAgenticFilter = hasTools || routingDecision?.tier === "COMPLEX" || routingDecision?.tier === "REASONING" || routingDecision === void 0;
49430
+ const filteredToFreeOnly = modelsToTry.length > 0 && modelsToTry.every((m) => m === FREE_MODEL);
49431
+ if (isComplexOrAgenticFilter && filteredToFreeOnly) {
49432
+ const budgetSummary = `$${Math.max(0, remainingUsd).toFixed(4)} remaining (limit: $${options.maxCostPerRunUsd})`;
49433
+ console.log(
49434
+ `[ClawRouter] Budget filter left only free model for complex/agentic session \u2014 blocking (${budgetSummary})`
49435
+ );
49436
+ const errPayload = JSON.stringify({
49437
+ error: {
49438
+ message: `ClawRouter budget exhausted: remaining budget (${budgetSummary}) cannot support a complex/tool request. Increase maxCostPerRun to continue.`,
49439
+ type: "cost_cap_exceeded",
49440
+ code: "budget_exhausted"
49441
+ }
49442
+ });
49443
+ if (heartbeatInterval) clearInterval(heartbeatInterval);
49444
+ if (headersSentEarly) {
49445
+ safeWrite(res, `data: ${errPayload}
49446
+
49447
+ data: [DONE]
49448
+
49449
+ `);
49450
+ res.end();
49451
+ } else {
49452
+ res.writeHead(429, {
49453
+ "Content-Type": "application/json",
49454
+ "X-ClawRouter-Cost-Cap-Exceeded": "1",
49455
+ "X-ClawRouter-Budget-Mode": "blocked"
49456
+ });
49457
+ res.end(errPayload);
49458
+ }
49459
+ deduplicator.removeInflight(dedupKey);
49460
+ return;
49461
+ }
49462
+ if (excluded.length > 0) {
49463
+ const budgetSummary = remainingUsd > 0 ? `$${remainingUsd.toFixed(4)} remaining` : `budget exhausted ($${runCostUsd.toFixed(4)}/$${options.maxCostPerRunUsd})`;
49464
+ console.log(
49465
+ `[ClawRouter] Budget downgrade (${budgetSummary}): excluded ${excluded.join(", ")}`
49466
+ );
49467
+ const fromModel = excluded[0];
49468
+ const usingFree = modelsToTry.length === 1 && modelsToTry[0] === FREE_MODEL;
49469
+ if (usingFree) {
49470
+ budgetDowngradeNotice = `> **\u26A0\uFE0F Budget cap reached** ($${runCostUsd.toFixed(4)}/$${options.maxCostPerRunUsd}) \u2014 downgraded to free model. Quality may be reduced. Increase \`maxCostPerRun\` to continue with ${fromModel}.
49471
+
49472
+ `;
49473
+ } else {
49474
+ const toModel = modelsToTry[0] ?? FREE_MODEL;
49475
+ budgetDowngradeNotice = `> **\u26A0\uFE0F Budget low** ($${remainingUsd > 0 ? remainingUsd.toFixed(4) : "0.0000"} remaining) \u2014 using ${toModel} instead of ${fromModel}.
49476
+
49477
+ `;
49478
+ }
49479
+ budgetDowngradeHeaderMode = "downgraded";
49480
+ }
49481
+ }
49259
49482
  let upstream;
49260
49483
  let lastError;
49261
49484
  let actualModelUsed = modelId;
49262
49485
  for (let i = 0; i < modelsToTry.length; i++) {
49263
49486
  const tryModel = modelsToTry[i];
49264
49487
  const isLastAttempt = i === modelsToTry.length - 1;
49488
+ if (globalController.signal.aborted) {
49489
+ throw new Error(`Request timed out after ${timeoutMs}ms`);
49490
+ }
49265
49491
  console.log(`[ClawRouter] Trying model ${i + 1}/${modelsToTry.length}: ${tryModel}`);
49492
+ const modelController = new AbortController();
49493
+ const modelTimeoutId = setTimeout(() => modelController.abort(), PER_MODEL_TIMEOUT_MS);
49494
+ const combinedSignal = AbortSignal.any([globalController.signal, modelController.signal]);
49266
49495
  const result = await tryModelRequest(
49267
49496
  upstreamUrl,
49268
49497
  req.method ?? "POST",
@@ -49272,12 +49501,29 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
49272
49501
  maxTokens,
49273
49502
  payFetch,
49274
49503
  balanceMonitor,
49275
- controller.signal
49504
+ combinedSignal
49276
49505
  );
49506
+ clearTimeout(modelTimeoutId);
49507
+ if (globalController.signal.aborted) {
49508
+ throw new Error(`Request timed out after ${timeoutMs}ms`);
49509
+ }
49510
+ if (!result.success && modelController.signal.aborted && !isLastAttempt) {
49511
+ console.log(
49512
+ `[ClawRouter] Model ${tryModel} timed out after ${PER_MODEL_TIMEOUT_MS}ms, trying fallback`
49513
+ );
49514
+ recordProviderError(tryModel, "server_error");
49515
+ continue;
49516
+ }
49277
49517
  if (result.success && result.response) {
49278
49518
  upstream = result.response;
49279
49519
  actualModelUsed = tryModel;
49280
49520
  console.log(`[ClawRouter] Success with model: ${tryModel}`);
49521
+ if (options.maxCostPerRunUsd && effectiveSessionId && tryModel !== FREE_MODEL) {
49522
+ const costEst = estimateAmount(tryModel, body.length, maxTokens);
49523
+ if (costEst) {
49524
+ sessionStore.addSessionCost(effectiveSessionId, BigInt(costEst));
49525
+ }
49526
+ }
49281
49527
  break;
49282
49528
  }
49283
49529
  lastError = {
@@ -49293,7 +49539,52 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
49293
49539
  );
49294
49540
  break;
49295
49541
  }
49296
- if (result.errorStatus === 429) {
49542
+ const errorCat = result.errorCategory;
49543
+ if (errorCat) {
49544
+ recordProviderError(tryModel, errorCat);
49545
+ }
49546
+ if (errorCat === "rate_limited") {
49547
+ if (!isLastAttempt && !globalController.signal.aborted) {
49548
+ console.log(
49549
+ `[ClawRouter] Rate-limited on ${tryModel}, retrying in 200ms before failover`
49550
+ );
49551
+ await new Promise((resolve) => setTimeout(resolve, 200));
49552
+ if (!globalController.signal.aborted) {
49553
+ const retryController = new AbortController();
49554
+ const retryTimeoutId = setTimeout(
49555
+ () => retryController.abort(),
49556
+ PER_MODEL_TIMEOUT_MS
49557
+ );
49558
+ const retrySignal = AbortSignal.any([
49559
+ globalController.signal,
49560
+ retryController.signal
49561
+ ]);
49562
+ const retryResult = await tryModelRequest(
49563
+ upstreamUrl,
49564
+ req.method ?? "POST",
49565
+ headers,
49566
+ body,
49567
+ tryModel,
49568
+ maxTokens,
49569
+ payFetch,
49570
+ balanceMonitor,
49571
+ retrySignal
49572
+ );
49573
+ clearTimeout(retryTimeoutId);
49574
+ if (retryResult.success && retryResult.response) {
49575
+ upstream = retryResult.response;
49576
+ actualModelUsed = tryModel;
49577
+ console.log(`[ClawRouter] Rate-limit retry succeeded for: ${tryModel}`);
49578
+ if (options.maxCostPerRunUsd && effectiveSessionId && tryModel !== FREE_MODEL) {
49579
+ const costEst = estimateAmount(tryModel, body.length, maxTokens);
49580
+ if (costEst) {
49581
+ sessionStore.addSessionCost(effectiveSessionId, BigInt(costEst));
49582
+ }
49583
+ }
49584
+ break;
49585
+ }
49586
+ }
49587
+ }
49297
49588
  markRateLimited(tryModel);
49298
49589
  try {
49299
49590
  const parsed = JSON.parse(result.errorBody || "{}");
@@ -49309,6 +49600,12 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
49309
49600
  }
49310
49601
  } catch {
49311
49602
  }
49603
+ } else if (errorCat === "overloaded") {
49604
+ markOverloaded(tryModel);
49605
+ } else if (errorCat === "auth_failure" || errorCat === "quota_exceeded") {
49606
+ console.log(
49607
+ `[ClawRouter] \u{1F511} ${errorCat === "auth_failure" ? "Auth failure" : "Quota exceeded"} for ${tryModel} \u2014 check provider config`
49608
+ );
49312
49609
  }
49313
49610
  const isPaymentErr = /payment.*verification.*failed|payment.*settlement.*failed|insufficient.*funds|transaction_simulation_failed/i.test(
49314
49611
  result.errorBody || ""
@@ -49468,6 +49765,25 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
49468
49765
  responseChunks.push(Buffer.from(noticeData));
49469
49766
  balanceFallbackNotice = void 0;
49470
49767
  }
49768
+ if (budgetDowngradeNotice) {
49769
+ const noticeChunk = {
49770
+ ...baseChunk,
49771
+ choices: [
49772
+ {
49773
+ index: index2,
49774
+ delta: { content: budgetDowngradeNotice },
49775
+ logprobs: null,
49776
+ finish_reason: null
49777
+ }
49778
+ ]
49779
+ };
49780
+ const noticeData = `data: ${JSON.stringify(noticeChunk)}
49781
+
49782
+ `;
49783
+ safeWrite(res, noticeData);
49784
+ responseChunks.push(Buffer.from(noticeData));
49785
+ budgetDowngradeNotice = void 0;
49786
+ }
49471
49787
  if (content) {
49472
49788
  const contentChunk = {
49473
49789
  ...baseChunk,
@@ -49571,6 +49887,22 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
49571
49887
  }
49572
49888
  balanceFallbackNotice = void 0;
49573
49889
  }
49890
+ if (budgetDowngradeNotice && responseBody.length > 0) {
49891
+ try {
49892
+ const parsed = JSON.parse(responseBody.toString());
49893
+ if (parsed.choices?.[0]?.message?.content !== void 0) {
49894
+ parsed.choices[0].message.content = budgetDowngradeNotice + parsed.choices[0].message.content;
49895
+ responseBody = Buffer.from(JSON.stringify(parsed));
49896
+ }
49897
+ } catch {
49898
+ }
49899
+ budgetDowngradeNotice = void 0;
49900
+ }
49901
+ if (budgetDowngradeHeaderMode) {
49902
+ responseHeaders["x-clawrouter-budget-downgrade"] = "1";
49903
+ responseHeaders["x-clawrouter-budget-mode"] = budgetDowngradeHeaderMode;
49904
+ budgetDowngradeHeaderMode = void 0;
49905
+ }
49574
49906
  responseHeaders["content-length"] = String(responseBody.length);
49575
49907
  res.writeHead(upstream.status, responseHeaders);
49576
49908
  safeWrite(res, responseBody);
@@ -50238,6 +50570,18 @@ function injectModelsConfig(logger) {
50238
50570
  needsWrite = true;
50239
50571
  }
50240
50572
  const allowlist = defaults.models;
50573
+ const DEPRECATED_BLOCKRUN_MODELS = ["blockrun/xai/grok-code-fast-1"];
50574
+ let removedDeprecatedCount = 0;
50575
+ for (const key of DEPRECATED_BLOCKRUN_MODELS) {
50576
+ if (allowlist[key]) {
50577
+ delete allowlist[key];
50578
+ removedDeprecatedCount++;
50579
+ }
50580
+ }
50581
+ if (removedDeprecatedCount > 0) {
50582
+ needsWrite = true;
50583
+ logger.info(`Removed ${removedDeprecatedCount} deprecated model entries from allowlist`);
50584
+ }
50241
50585
  let addedCount = 0;
50242
50586
  for (const id of TOP_MODELS) {
50243
50587
  const key = `blockrun/${id}`;
@@ -50339,9 +50683,18 @@ async function startProxyInBackground(api) {
50339
50683
  api.logger.info(`Using wallet from BLOCKRUN_WALLET_KEY: ${wallet.address}`);
50340
50684
  }
50341
50685
  const routingConfig = api.pluginConfig?.routing;
50686
+ const maxCostPerRunUsd = typeof api.pluginConfig?.maxCostPerRun === "number" ? api.pluginConfig.maxCostPerRun : void 0;
50687
+ const maxCostPerRunMode = api.pluginConfig?.maxCostPerRunMode === "strict" ? "strict" : "graceful";
50688
+ if (maxCostPerRunUsd !== void 0) {
50689
+ api.logger.info(
50690
+ `Cost cap: $${maxCostPerRunUsd.toFixed(2)} per session (mode: ${maxCostPerRunMode})`
50691
+ );
50692
+ }
50342
50693
  const proxy = await startProxy({
50343
50694
  wallet,
50344
50695
  routingConfig,
50696
+ maxCostPerRunUsd,
50697
+ maxCostPerRunMode,
50345
50698
  onReady: (port) => {
50346
50699
  api.logger.info(`BlockRun x402 proxy listening on port ${port}`);
50347
50700
  },