npm - @blockrun/clawrouter - Versions diffs - 0.12.56 → 0.12.60 - Mend

@blockrun/clawrouter 0.12.56 → 0.12.60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/dist/index.d.ts CHANGED Viewed

@@ -522,6 +522,7 @@ type SessionEntry = {
     recentHashes: string[];
     strikes: number;
     escalated: boolean;
+    sessionCostMicros: bigint;
 };
 type SessionConfig = {
     /** Enable session persistence (default: false) */
@@ -590,6 +591,17 @@ declare class SessionStore {
         model: string;
         tier: string;
     } | null;
+    /**
+     * Add cost to a session's running total for maxCostPerRun tracking.
+     * Cost is in USDC 6-decimal units (micros).
+     * Creates a cost-tracking-only entry if none exists (e.g., explicit model requests
+     * that never go through the routing path).
+     */
+    addSessionCost(sessionId: string, additionalMicros: bigint): void;
+    /**
+     * Get the total accumulated cost for a session in USD.
+     */
+    getSessionCostUsd(sessionId: string): number;
     /**
      * Stop the cleanup interval.
      */
@@ -693,6 +705,18 @@ type ProxyOptions = {
      * Default: enabled with 10 minute TTL, 200 max entries.
      */
     cacheConfig?: ResponseCacheConfig;
+    /**
+     * Maximum total spend (in USD) per session run.
+     * Default: undefined (no limit). Example: 0.5 = $0.50 per session.
+     */
+    maxCostPerRunUsd?: number;
+    /**
+     * How to enforce the per-run cost cap.
+     * - 'graceful' (default): when budget runs low, downgrade to cheaper models; use free model
+     *   as last resort. Only hard-stops when no model can serve the request.
+     * - 'strict': immediately return 429 once the session spend reaches the cap.
+     */
+    maxCostPerRunMode?: "graceful" | "strict";
     onReady?: (port: number) => void;
     onError?: (error: Error) => void;
     onPayment?: (info: {

package/dist/index.js CHANGED Viewed

@@ -32878,6 +32878,13 @@ var MODEL_ALIASES = {
   "grok-fast": "xai/grok-4-fast-reasoning",
   "grok-code": "deepseek/deepseek-chat",
   // was grok-code-fast-1, delisted due to poor retention
+  // Delisted model redirects — full model IDs that were previously valid but removed
+  "grok-code-fast-1": "deepseek/deepseek-chat",
+  // bare alias
+  "xai/grok-code-fast-1": "deepseek/deepseek-chat",
+  // delisted 2026-03-12
+  "xai/grok-3-fast": "xai/grok-4-fast-reasoning",
+  // delisted (too expensive)
   // NVIDIA
   nvidia: "nvidia/gpt-oss-120b",
   "gpt-120b": "nvidia/gpt-oss-120b",
@@ -46642,7 +46649,8 @@ var SessionStore = class {
         requestCount: 1,
         recentHashes: [],
         strikes: 0,
-        escalated: false
+        escalated: false,
+        sessionCostMicros: 0n
       });
     }
   }
@@ -46731,6 +46739,39 @@ var SessionStore = class {
     entry.escalated = true;
     return { model: nextConfig.primary, tier: nextTier };
   }
+  /**
+   * Add cost to a session's running total for maxCostPerRun tracking.
+   * Cost is in USDC 6-decimal units (micros).
+   * Creates a cost-tracking-only entry if none exists (e.g., explicit model requests
+   * that never go through the routing path).
+   */
+  addSessionCost(sessionId, additionalMicros) {
+    let entry = this.sessions.get(sessionId);
+    if (!entry) {
+      const now = Date.now();
+      entry = {
+        model: "",
+        tier: "DIRECT",
+        createdAt: now,
+        lastUsedAt: now,
+        requestCount: 0,
+        recentHashes: [],
+        strikes: 0,
+        escalated: false,
+        sessionCostMicros: 0n
+      };
+      this.sessions.set(sessionId, entry);
+    }
+    entry.sessionCostMicros += additionalMicros;
+  }
+  /**
+   * Get the total accumulated cost for a session in USD.
+   */
+  getSessionCostUsd(sessionId) {
+    const entry = this.sessions.get(sessionId);
+    if (!entry) return 0;
+    return Number(entry.sessionCostMicros) / 1e6;
+  }
   /**
    * Stop the cleanup interval.
    */
@@ -47006,9 +47047,11 @@ var MAX_MESSAGES = 200;
 var CONTEXT_LIMIT_KB = 5120;
 var HEARTBEAT_INTERVAL_MS = 2e3;
 var DEFAULT_REQUEST_TIMEOUT_MS = 18e4;
+var PER_MODEL_TIMEOUT_MS = 6e4;
 var MAX_FALLBACK_ATTEMPTS = 5;
 var HEALTH_CHECK_TIMEOUT_MS = 2e3;
 var RATE_LIMIT_COOLDOWN_MS = 6e4;
+var OVERLOAD_COOLDOWN_MS = 15e3;
 var PORT_RETRY_ATTEMPTS = 5;
 var PORT_RETRY_DELAY_MS = 1e3;
 var MODEL_BODY_READ_TIMEOUT_MS = 3e5;
@@ -47158,7 +47201,41 @@ function transformPaymentError(errorBody) {
   }
   return errorBody;
 }
+function categorizeError(status, body) {
+  if (status === 401) return "auth_failure";
+  if (status === 402) return "payment_error";
+  if (status === 403) {
+    if (/plan.*limit|quota.*exceeded|subscription|allowance/i.test(body))
+      return "quota_exceeded";
+    return "auth_failure";
+  }
+  if (status === 429) return "rate_limited";
+  if (status === 529) return "overloaded";
+  if (status === 503 && /overload|capacity|too.*many.*request/i.test(body)) return "overloaded";
+  if (status >= 500) return "server_error";
+  if (status === 400 || status === 413) {
+    if (PROVIDER_ERROR_PATTERNS.some((p) => p.test(body))) return "config_error";
+    return null;
+  }
+  return null;
+}
 var rateLimitedModels = /* @__PURE__ */ new Map();
+var overloadedModels = /* @__PURE__ */ new Map();
+var perProviderErrors = /* @__PURE__ */ new Map();
+function recordProviderError(modelId, category) {
+  if (!perProviderErrors.has(modelId)) {
+    perProviderErrors.set(modelId, {
+      auth_failure: 0,
+      quota_exceeded: 0,
+      rate_limited: 0,
+      overloaded: 0,
+      server_error: 0,
+      payment_error: 0,
+      config_error: 0
+    });
+  }
+  perProviderErrors.get(modelId)[category]++;
+}
 function isRateLimited(modelId) {
   const hitTime = rateLimitedModels.get(modelId);
   if (!hitTime) return false;
@@ -47173,17 +47250,30 @@ function markRateLimited(modelId) {
   rateLimitedModels.set(modelId, Date.now());
   console.log(`[ClawRouter] Model ${modelId} rate-limited, will deprioritize for 60s`);
 }
+function markOverloaded(modelId) {
+  overloadedModels.set(modelId, Date.now());
+  console.log(`[ClawRouter] Model ${modelId} overloaded, will deprioritize for 15s`);
+}
+function isOverloaded(modelId) {
+  const hitTime = overloadedModels.get(modelId);
+  if (!hitTime) return false;
+  if (Date.now() - hitTime >= OVERLOAD_COOLDOWN_MS) {
+    overloadedModels.delete(modelId);
+    return false;
+  }
+  return true;
+}
 function prioritizeNonRateLimited(models) {
   const available = [];
-  const rateLimited = [];
+  const degraded = [];
   for (const model of models) {
-    if (isRateLimited(model)) {
-      rateLimited.push(model);
+    if (isRateLimited(model) || isOverloaded(model)) {
+      degraded.push(model);
     } else {
       available.push(model);
     }
   }
-  return [...available, ...rateLimited];
+  return [...available, ...degraded];
 }
 function canWrite(res) {
   return !res.writableEnded && !res.destroyed && res.socket !== null && !res.socket.destroyed && res.socket.writable;
@@ -47318,37 +47408,6 @@ function detectDegradedSuccessResponse(body) {
   }
   return void 0;
 }
-var FALLBACK_STATUS_CODES = [
-  400,
-  // Bad request - sometimes used for billing errors
-  401,
-  // Unauthorized - provider API key issues
-  402,
-  // Payment required - but from upstream, not x402
-  403,
-  // Forbidden - provider restrictions
-  413,
-  // Payload too large - request exceeds model's context limit
-  429,
-  // Rate limited
-  500,
-  // Internal server error
-  502,
-  // Bad gateway
-  503,
-  // Service unavailable
-  504
-  // Gateway timeout
-];
-function isProviderError(status, body) {
-  if (!FALLBACK_STATUS_CODES.includes(status)) {
-    return false;
-  }
-  if (status >= 500) {
-    return true;
-  }
-  return PROVIDER_ERROR_PATTERNS.some((pattern) => pattern.test(body));
-}
 var VALID_ROLES = /* @__PURE__ */ new Set(["system", "user", "assistant", "tool", "function"]);
 var ROLE_MAPPINGS = {
   developer: "system",
@@ -47854,7 +47913,16 @@ async function startProxy(options) {
           "Content-Type": "application/json",
           "Cache-Control": "no-cache"
         });
-        res.end(JSON.stringify(stats, null, 2));
+        res.end(
+          JSON.stringify(
+            {
+              ...stats,
+              providerErrors: Object.fromEntries(perProviderErrors)
+            },
+            null,
+            2
+          )
+        );
       } catch (err) {
         res.writeHead(500, { "Content-Type": "application/json" });
         res.end(
@@ -48297,12 +48365,13 @@ async function tryModelRequest(upstreamUrl, method, headers, body, modelId, maxT
     if (response.status !== 200) {
       const errorBodyChunks = await readBodyWithTimeout(response.body, ERROR_BODY_READ_TIMEOUT_MS);
       const errorBody = Buffer.concat(errorBodyChunks).toString();
-      const isProviderErr = isProviderError(response.status, errorBody);
+      const category = categorizeError(response.status, errorBody);
       return {
         success: false,
         errorBody,
         errorStatus: response.status,
-        isProviderError: isProviderErr
+        isProviderError: category !== null,
+        errorCategory: category ?? void 0
       };
     }
     const contentType = response.headers.get("content-type") || "";
@@ -48355,6 +48424,8 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
   let maxTokens = 4096;
   let routingProfile = null;
   let balanceFallbackNotice;
+  let budgetDowngradeNotice;
+  let budgetDowngradeHeaderMode;
   let accumulatedContent = "";
   let responseInputTokens;
   const isChatCompletion = req.url?.includes("/chat/completions");
@@ -48369,6 +48440,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
       let bodyModified = false;
       const parsedMessages = Array.isArray(parsed.messages) ? parsed.messages : [];
       const lastUserMsg = [...parsedMessages].reverse().find((m) => m.role === "user");
+      hasTools = Array.isArray(parsed.tools) && parsed.tools.length > 0;
       const rawLastContent = lastUserMsg?.content;
       const lastContent = typeof rawLastContent === "string" ? rawLastContent : Array.isArray(rawLastContent) ? rawLastContent.filter((b) => b.type === "text").map((b) => b.text ?? "").join(" ") : "";
       if (sessionId && parsedMessages.length > 0) {
@@ -49025,6 +49097,9 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
           options.onRouted?.(routingDecision);
         }
       }
+      if (!effectiveSessionId && parsedMessages.length > 0) {
+        effectiveSessionId = deriveSessionId(parsedMessages);
+      }
       if (bodyModified) {
         body = Buffer.from(JSON.stringify(parsed));
       }
@@ -49115,7 +49190,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
   }
   deduplicator.markInflight(dedupKey);
   let estimatedCostMicros;
-  const isFreeModel = modelId === FREE_MODEL;
+  let isFreeModel = modelId === FREE_MODEL;
   if (modelId && !options.skipBalanceCheck && !isFreeModel) {
     const estimated = estimateAmount(modelId, body.length, maxTokens);
     if (estimated) {
@@ -49128,6 +49203,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
           `[ClawRouter] Wallet ${sufficiency.info.isEmpty ? "empty" : "insufficient"} (${sufficiency.info.balanceUSD}), falling back to free model: ${FREE_MODEL} (requested: ${originalModel})`
         );
         modelId = FREE_MODEL;
+        isFreeModel = true;
         const parsed = JSON.parse(body.toString());
         parsed.model = FREE_MODEL;
         body = Buffer.from(JSON.stringify(parsed));
@@ -49154,6 +49230,89 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
       }
     }
   }
+  if (options.maxCostPerRunUsd && effectiveSessionId && !isFreeModel && (options.maxCostPerRunMode ?? "graceful") === "strict") {
+    const runCostUsd = sessionStore.getSessionCostUsd(effectiveSessionId);
+    const thisReqEstStr = estimatedCostMicros !== void 0 ? estimatedCostMicros.toString() : modelId ? estimateAmount(modelId, body.length, maxTokens) : void 0;
+    const thisReqEstUsd = thisReqEstStr ? Number(thisReqEstStr) / 1e6 : 0;
+    const projectedCostUsd = runCostUsd + thisReqEstUsd;
+    if (projectedCostUsd > options.maxCostPerRunUsd) {
+      console.log(
+        `[ClawRouter] Cost cap exceeded for session ${effectiveSessionId.slice(0, 8)}...: projected $${projectedCostUsd.toFixed(4)} (spent $${runCostUsd.toFixed(4)} + est $${thisReqEstUsd.toFixed(4)}) > $${options.maxCostPerRunUsd} limit`
+      );
+      res.writeHead(429, {
+        "Content-Type": "application/json",
+        "X-ClawRouter-Cost-Cap-Exceeded": "1"
+      });
+      res.end(
+        JSON.stringify({
+          error: {
+            message: `ClawRouter cost cap exceeded: projected spend $${projectedCostUsd.toFixed(4)} (spent $${runCostUsd.toFixed(4)} + est $${thisReqEstUsd.toFixed(4)}) would exceed limit $${options.maxCostPerRunUsd}`,
+            type: "cost_cap_exceeded",
+            code: "cost_cap_exceeded"
+          }
+        })
+      );
+      deduplicator.removeInflight(dedupKey);
+      return;
+    }
+  }
+  if (options.maxCostPerRunUsd && effectiveSessionId && !isFreeModel && (options.maxCostPerRunMode ?? "graceful") === "graceful") {
+    const runCostUsd = sessionStore.getSessionCostUsd(effectiveSessionId);
+    const remainingUsd = options.maxCostPerRunUsd - runCostUsd;
+    const isComplexOrAgentic = hasTools || routingDecision?.tier === "COMPLEX" || routingDecision?.tier === "REASONING";
+    if (isComplexOrAgentic) {
+      const canAffordAnyNonFreeModel = BLOCKRUN_MODELS.some((m) => {
+        if (m.id === FREE_MODEL) return false;
+        const est = estimateAmount(m.id, body.length, maxTokens);
+        return est !== void 0 && Number(est) / 1e6 <= remainingUsd;
+      });
+      if (!canAffordAnyNonFreeModel) {
+        console.log(
+          `[ClawRouter] Budget insufficient for agentic/complex session ${effectiveSessionId.slice(0, 8)}...: $${Math.max(0, remainingUsd).toFixed(4)} remaining \u2014 blocking (silent downgrade would corrupt tool/complex responses)`
+        );
+        res.writeHead(429, {
+          "Content-Type": "application/json",
+          "X-ClawRouter-Cost-Cap-Exceeded": "1",
+          "X-ClawRouter-Budget-Mode": "blocked"
+        });
+        res.end(
+          JSON.stringify({
+            error: {
+              message: `ClawRouter budget exhausted: $${Math.max(0, remainingUsd).toFixed(4)} remaining (limit: $${options.maxCostPerRunUsd}). Increase maxCostPerRun to continue.`,
+              type: "cost_cap_exceeded",
+              code: "budget_exhausted"
+            }
+          })
+        );
+        deduplicator.removeInflight(dedupKey);
+        return;
+      }
+    } else if (!routingDecision && modelId && modelId !== FREE_MODEL) {
+      const est = estimateAmount(modelId, body.length, maxTokens);
+      const canAfford = !est || Number(est) / 1e6 <= remainingUsd;
+      if (!canAfford) {
+        console.log(
+          `[ClawRouter] Budget insufficient for explicit model ${modelId} in session ${effectiveSessionId.slice(0, 8)}...: $${Math.max(0, remainingUsd).toFixed(4)} remaining \u2014 blocking (user explicitly chose ${modelId})`
+        );
+        res.writeHead(429, {
+          "Content-Type": "application/json",
+          "X-ClawRouter-Cost-Cap-Exceeded": "1",
+          "X-ClawRouter-Budget-Mode": "blocked"
+        });
+        res.end(
+          JSON.stringify({
+            error: {
+              message: `ClawRouter budget exhausted: $${Math.max(0, remainingUsd).toFixed(4)} remaining (limit: $${options.maxCostPerRunUsd}). Increase maxCostPerRun to continue using ${modelId}.`,
+              type: "cost_cap_exceeded",
+              code: "budget_exhausted"
+            }
+          })
+        );
+        deduplicator.removeInflight(dedupKey);
+        return;
+      }
+    }
+  }
   let heartbeatInterval;
   let headersSentEarly = false;
   if (isStreaming) {
@@ -49198,8 +49357,8 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
     }
   });
   const timeoutMs = options.requestTimeoutMs ?? DEFAULT_REQUEST_TIMEOUT_MS;
-  const controller = new AbortController();
-  const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
+  const globalController = new AbortController();
+  const timeoutId = setTimeout(() => globalController.abort(), timeoutMs);
   try {
     let modelsToTry;
     if (routingDecision) {
@@ -49256,13 +49415,83 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
     if (!hasTools && !modelsToTry.includes(FREE_MODEL)) {
       modelsToTry.push(FREE_MODEL);
     }
+    if (options.maxCostPerRunUsd && effectiveSessionId && !isFreeModel && (options.maxCostPerRunMode ?? "graceful") === "graceful") {
+      const runCostUsd = sessionStore.getSessionCostUsd(effectiveSessionId);
+      const remainingUsd = options.maxCostPerRunUsd - runCostUsd;
+      const beforeFilter = [...modelsToTry];
+      modelsToTry = modelsToTry.filter((m) => {
+        if (m === FREE_MODEL) return true;
+        const est = estimateAmount(m, body.length, maxTokens);
+        if (!est) return true;
+        return Number(est) / 1e6 <= remainingUsd;
+      });
+      const excluded = beforeFilter.filter((m) => !modelsToTry.includes(m));
+      const isComplexOrAgenticFilter = hasTools || routingDecision?.tier === "COMPLEX" || routingDecision?.tier === "REASONING" || routingDecision === void 0;
+      const filteredToFreeOnly = modelsToTry.length > 0 && modelsToTry.every((m) => m === FREE_MODEL);
+      if (isComplexOrAgenticFilter && filteredToFreeOnly) {
+        const budgetSummary = `$${Math.max(0, remainingUsd).toFixed(4)} remaining (limit: $${options.maxCostPerRunUsd})`;
+        console.log(
+          `[ClawRouter] Budget filter left only free model for complex/agentic session \u2014 blocking (${budgetSummary})`
+        );
+        const errPayload = JSON.stringify({
+          error: {
+            message: `ClawRouter budget exhausted: remaining budget (${budgetSummary}) cannot support a complex/tool request. Increase maxCostPerRun to continue.`,
+            type: "cost_cap_exceeded",
+            code: "budget_exhausted"
+          }
+        });
+        if (heartbeatInterval) clearInterval(heartbeatInterval);
+        if (headersSentEarly) {
+          safeWrite(res, `data: ${errPayload}
+data: [DONE]
+`);
+          res.end();
+        } else {
+          res.writeHead(429, {
+            "Content-Type": "application/json",
+            "X-ClawRouter-Cost-Cap-Exceeded": "1",
+            "X-ClawRouter-Budget-Mode": "blocked"
+          });
+          res.end(errPayload);
+        }
+        deduplicator.removeInflight(dedupKey);
+        return;
+      }
+      if (excluded.length > 0) {
+        const budgetSummary = remainingUsd > 0 ? `$${remainingUsd.toFixed(4)} remaining` : `budget exhausted ($${runCostUsd.toFixed(4)}/$${options.maxCostPerRunUsd})`;
+        console.log(
+          `[ClawRouter] Budget downgrade (${budgetSummary}): excluded ${excluded.join(", ")}`
+        );
+        const fromModel = excluded[0];
+        const usingFree = modelsToTry.length === 1 && modelsToTry[0] === FREE_MODEL;
+        if (usingFree) {
+          budgetDowngradeNotice = `> **\u26A0\uFE0F Budget cap reached** ($${runCostUsd.toFixed(4)}/$${options.maxCostPerRunUsd}) \u2014 downgraded to free model. Quality may be reduced. Increase \`maxCostPerRun\` to continue with ${fromModel}.
+`;
+        } else {
+          const toModel = modelsToTry[0] ?? FREE_MODEL;
+          budgetDowngradeNotice = `> **\u26A0\uFE0F Budget low** ($${remainingUsd > 0 ? remainingUsd.toFixed(4) : "0.0000"} remaining) \u2014 using ${toModel} instead of ${fromModel}.
+`;
+        }
+        budgetDowngradeHeaderMode = "downgraded";
+      }
+    }
     let upstream;
     let lastError;
     let actualModelUsed = modelId;
     for (let i = 0; i < modelsToTry.length; i++) {
       const tryModel = modelsToTry[i];
       const isLastAttempt = i === modelsToTry.length - 1;
+      if (globalController.signal.aborted) {
+        throw new Error(`Request timed out after ${timeoutMs}ms`);
+      }
       console.log(`[ClawRouter] Trying model ${i + 1}/${modelsToTry.length}: ${tryModel}`);
+      const modelController = new AbortController();
+      const modelTimeoutId = setTimeout(() => modelController.abort(), PER_MODEL_TIMEOUT_MS);
+      const combinedSignal = AbortSignal.any([globalController.signal, modelController.signal]);
       const result = await tryModelRequest(
         upstreamUrl,
         req.method ?? "POST",
@@ -49272,12 +49501,29 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
         maxTokens,
         payFetch,
         balanceMonitor,
-        controller.signal
+        combinedSignal
       );
+      clearTimeout(modelTimeoutId);
+      if (globalController.signal.aborted) {
+        throw new Error(`Request timed out after ${timeoutMs}ms`);
+      }
+      if (!result.success && modelController.signal.aborted && !isLastAttempt) {
+        console.log(
+          `[ClawRouter] Model ${tryModel} timed out after ${PER_MODEL_TIMEOUT_MS}ms, trying fallback`
+        );
+        recordProviderError(tryModel, "server_error");
+        continue;
+      }
       if (result.success && result.response) {
         upstream = result.response;
         actualModelUsed = tryModel;
         console.log(`[ClawRouter] Success with model: ${tryModel}`);
+        if (options.maxCostPerRunUsd && effectiveSessionId && tryModel !== FREE_MODEL) {
+          const costEst = estimateAmount(tryModel, body.length, maxTokens);
+          if (costEst) {
+            sessionStore.addSessionCost(effectiveSessionId, BigInt(costEst));
+          }
+        }
         break;
       }
       lastError = {
@@ -49293,7 +49539,52 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
           );
           break;
         }
-        if (result.errorStatus === 429) {
+        const errorCat = result.errorCategory;
+        if (errorCat) {
+          recordProviderError(tryModel, errorCat);
+        }
+        if (errorCat === "rate_limited") {
+          if (!isLastAttempt && !globalController.signal.aborted) {
+            console.log(
+              `[ClawRouter] Rate-limited on ${tryModel}, retrying in 200ms before failover`
+            );
+            await new Promise((resolve) => setTimeout(resolve, 200));
+            if (!globalController.signal.aborted) {
+              const retryController = new AbortController();
+              const retryTimeoutId = setTimeout(
+                () => retryController.abort(),
+                PER_MODEL_TIMEOUT_MS
+              );
+              const retrySignal = AbortSignal.any([
+                globalController.signal,
+                retryController.signal
+              ]);
+              const retryResult = await tryModelRequest(
+                upstreamUrl,
+                req.method ?? "POST",
+                headers,
+                body,
+                tryModel,
+                maxTokens,
+                payFetch,
+                balanceMonitor,
+                retrySignal
+              );
+              clearTimeout(retryTimeoutId);
+              if (retryResult.success && retryResult.response) {
+                upstream = retryResult.response;
+                actualModelUsed = tryModel;
+                console.log(`[ClawRouter] Rate-limit retry succeeded for: ${tryModel}`);
+                if (options.maxCostPerRunUsd && effectiveSessionId && tryModel !== FREE_MODEL) {
+                  const costEst = estimateAmount(tryModel, body.length, maxTokens);
+                  if (costEst) {
+                    sessionStore.addSessionCost(effectiveSessionId, BigInt(costEst));
+                  }
+                }
+                break;
+              }
+            }
+          }
           markRateLimited(tryModel);
           try {
             const parsed = JSON.parse(result.errorBody || "{}");
@@ -49309,6 +49600,12 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
             }
           } catch {
           }
+        } else if (errorCat === "overloaded") {
+          markOverloaded(tryModel);
+        } else if (errorCat === "auth_failure" || errorCat === "quota_exceeded") {
+          console.log(
+            `[ClawRouter] \u{1F511} ${errorCat === "auth_failure" ? "Auth failure" : "Quota exceeded"} for ${tryModel} \u2014 check provider config`
+          );
         }
         const isPaymentErr = /payment.*verification.*failed|payment.*settlement.*failed|insufficient.*funds|transaction_simulation_failed/i.test(
           result.errorBody || ""
@@ -49468,6 +49765,25 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
                 responseChunks.push(Buffer.from(noticeData));
                 balanceFallbackNotice = void 0;
               }
+              if (budgetDowngradeNotice) {
+                const noticeChunk = {
+                  ...baseChunk,
+                  choices: [
+                    {
+                      index: index2,
+                      delta: { content: budgetDowngradeNotice },
+                      logprobs: null,
+                      finish_reason: null
+                    }
+                  ]
+                };
+                const noticeData = `data: ${JSON.stringify(noticeChunk)}
+`;
+                safeWrite(res, noticeData);
+                responseChunks.push(Buffer.from(noticeData));
+                budgetDowngradeNotice = void 0;
+              }
               if (content) {
                 const contentChunk = {
                   ...baseChunk,
@@ -49571,6 +49887,22 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
         }
         balanceFallbackNotice = void 0;
       }
+      if (budgetDowngradeNotice && responseBody.length > 0) {
+        try {
+          const parsed = JSON.parse(responseBody.toString());
+          if (parsed.choices?.[0]?.message?.content !== void 0) {
+            parsed.choices[0].message.content = budgetDowngradeNotice + parsed.choices[0].message.content;
+            responseBody = Buffer.from(JSON.stringify(parsed));
+          }
+        } catch {
+        }
+        budgetDowngradeNotice = void 0;
+      }
+      if (budgetDowngradeHeaderMode) {
+        responseHeaders["x-clawrouter-budget-downgrade"] = "1";
+        responseHeaders["x-clawrouter-budget-mode"] = budgetDowngradeHeaderMode;
+        budgetDowngradeHeaderMode = void 0;
+      }
       responseHeaders["content-length"] = String(responseBody.length);
       res.writeHead(upstream.status, responseHeaders);
       safeWrite(res, responseBody);
@@ -50238,6 +50570,18 @@ function injectModelsConfig(logger) {
     needsWrite = true;
   }
   const allowlist = defaults.models;
+  const DEPRECATED_BLOCKRUN_MODELS = ["blockrun/xai/grok-code-fast-1"];
+  let removedDeprecatedCount = 0;
+  for (const key of DEPRECATED_BLOCKRUN_MODELS) {
+    if (allowlist[key]) {
+      delete allowlist[key];
+      removedDeprecatedCount++;
+    }
+  }
+  if (removedDeprecatedCount > 0) {
+    needsWrite = true;
+    logger.info(`Removed ${removedDeprecatedCount} deprecated model entries from allowlist`);
+  }
   let addedCount = 0;
   for (const id of TOP_MODELS) {
     const key = `blockrun/${id}`;
@@ -50339,9 +50683,18 @@ async function startProxyInBackground(api) {
     api.logger.info(`Using wallet from BLOCKRUN_WALLET_KEY: ${wallet.address}`);
   }
   const routingConfig = api.pluginConfig?.routing;
+  const maxCostPerRunUsd = typeof api.pluginConfig?.maxCostPerRun === "number" ? api.pluginConfig.maxCostPerRun : void 0;
+  const maxCostPerRunMode = api.pluginConfig?.maxCostPerRunMode === "strict" ? "strict" : "graceful";
+  if (maxCostPerRunUsd !== void 0) {
+    api.logger.info(
+      `Cost cap: $${maxCostPerRunUsd.toFixed(2)} per session (mode: ${maxCostPerRunMode})`
+    );
+  }
   const proxy = await startProxy({
     wallet,
     routingConfig,
+    maxCostPerRunUsd,
+    maxCostPerRunMode,
     onReady: (port) => {
       api.logger.info(`BlockRun x402 proxy listening on port ${port}`);
     },