@blockrun/clawrouter 0.12.56 → 0.12.60
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +74 -51
- package/dist/cli.js +376 -44
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +24 -0
- package/dist/index.js +397 -44
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/scripts/reinstall.sh +96 -17
- package/scripts/update.sh +93 -17
package/dist/index.d.ts
CHANGED
|
@@ -522,6 +522,7 @@ type SessionEntry = {
|
|
|
522
522
|
recentHashes: string[];
|
|
523
523
|
strikes: number;
|
|
524
524
|
escalated: boolean;
|
|
525
|
+
sessionCostMicros: bigint;
|
|
525
526
|
};
|
|
526
527
|
type SessionConfig = {
|
|
527
528
|
/** Enable session persistence (default: false) */
|
|
@@ -590,6 +591,17 @@ declare class SessionStore {
|
|
|
590
591
|
model: string;
|
|
591
592
|
tier: string;
|
|
592
593
|
} | null;
|
|
594
|
+
/**
|
|
595
|
+
* Add cost to a session's running total for maxCostPerRun tracking.
|
|
596
|
+
* Cost is in USDC 6-decimal units (micros).
|
|
597
|
+
* Creates a cost-tracking-only entry if none exists (e.g., explicit model requests
|
|
598
|
+
* that never go through the routing path).
|
|
599
|
+
*/
|
|
600
|
+
addSessionCost(sessionId: string, additionalMicros: bigint): void;
|
|
601
|
+
/**
|
|
602
|
+
* Get the total accumulated cost for a session in USD.
|
|
603
|
+
*/
|
|
604
|
+
getSessionCostUsd(sessionId: string): number;
|
|
593
605
|
/**
|
|
594
606
|
* Stop the cleanup interval.
|
|
595
607
|
*/
|
|
@@ -693,6 +705,18 @@ type ProxyOptions = {
|
|
|
693
705
|
* Default: enabled with 10 minute TTL, 200 max entries.
|
|
694
706
|
*/
|
|
695
707
|
cacheConfig?: ResponseCacheConfig;
|
|
708
|
+
/**
|
|
709
|
+
* Maximum total spend (in USD) per session run.
|
|
710
|
+
* Default: undefined (no limit). Example: 0.5 = $0.50 per session.
|
|
711
|
+
*/
|
|
712
|
+
maxCostPerRunUsd?: number;
|
|
713
|
+
/**
|
|
714
|
+
* How to enforce the per-run cost cap.
|
|
715
|
+
* - 'graceful' (default): when budget runs low, downgrade to cheaper models; use free model
|
|
716
|
+
* as last resort. Only hard-stops when no model can serve the request.
|
|
717
|
+
* - 'strict': immediately return 429 once the session spend reaches the cap.
|
|
718
|
+
*/
|
|
719
|
+
maxCostPerRunMode?: "graceful" | "strict";
|
|
696
720
|
onReady?: (port: number) => void;
|
|
697
721
|
onError?: (error: Error) => void;
|
|
698
722
|
onPayment?: (info: {
|
package/dist/index.js
CHANGED
|
@@ -32878,6 +32878,13 @@ var MODEL_ALIASES = {
|
|
|
32878
32878
|
"grok-fast": "xai/grok-4-fast-reasoning",
|
|
32879
32879
|
"grok-code": "deepseek/deepseek-chat",
|
|
32880
32880
|
// was grok-code-fast-1, delisted due to poor retention
|
|
32881
|
+
// Delisted model redirects — full model IDs that were previously valid but removed
|
|
32882
|
+
"grok-code-fast-1": "deepseek/deepseek-chat",
|
|
32883
|
+
// bare alias
|
|
32884
|
+
"xai/grok-code-fast-1": "deepseek/deepseek-chat",
|
|
32885
|
+
// delisted 2026-03-12
|
|
32886
|
+
"xai/grok-3-fast": "xai/grok-4-fast-reasoning",
|
|
32887
|
+
// delisted (too expensive)
|
|
32881
32888
|
// NVIDIA
|
|
32882
32889
|
nvidia: "nvidia/gpt-oss-120b",
|
|
32883
32890
|
"gpt-120b": "nvidia/gpt-oss-120b",
|
|
@@ -46642,7 +46649,8 @@ var SessionStore = class {
|
|
|
46642
46649
|
requestCount: 1,
|
|
46643
46650
|
recentHashes: [],
|
|
46644
46651
|
strikes: 0,
|
|
46645
|
-
escalated: false
|
|
46652
|
+
escalated: false,
|
|
46653
|
+
sessionCostMicros: 0n
|
|
46646
46654
|
});
|
|
46647
46655
|
}
|
|
46648
46656
|
}
|
|
@@ -46731,6 +46739,39 @@ var SessionStore = class {
|
|
|
46731
46739
|
entry.escalated = true;
|
|
46732
46740
|
return { model: nextConfig.primary, tier: nextTier };
|
|
46733
46741
|
}
|
|
46742
|
+
/**
|
|
46743
|
+
* Add cost to a session's running total for maxCostPerRun tracking.
|
|
46744
|
+
* Cost is in USDC 6-decimal units (micros).
|
|
46745
|
+
* Creates a cost-tracking-only entry if none exists (e.g., explicit model requests
|
|
46746
|
+
* that never go through the routing path).
|
|
46747
|
+
*/
|
|
46748
|
+
addSessionCost(sessionId, additionalMicros) {
|
|
46749
|
+
let entry = this.sessions.get(sessionId);
|
|
46750
|
+
if (!entry) {
|
|
46751
|
+
const now = Date.now();
|
|
46752
|
+
entry = {
|
|
46753
|
+
model: "",
|
|
46754
|
+
tier: "DIRECT",
|
|
46755
|
+
createdAt: now,
|
|
46756
|
+
lastUsedAt: now,
|
|
46757
|
+
requestCount: 0,
|
|
46758
|
+
recentHashes: [],
|
|
46759
|
+
strikes: 0,
|
|
46760
|
+
escalated: false,
|
|
46761
|
+
sessionCostMicros: 0n
|
|
46762
|
+
};
|
|
46763
|
+
this.sessions.set(sessionId, entry);
|
|
46764
|
+
}
|
|
46765
|
+
entry.sessionCostMicros += additionalMicros;
|
|
46766
|
+
}
|
|
46767
|
+
/**
|
|
46768
|
+
* Get the total accumulated cost for a session in USD.
|
|
46769
|
+
*/
|
|
46770
|
+
getSessionCostUsd(sessionId) {
|
|
46771
|
+
const entry = this.sessions.get(sessionId);
|
|
46772
|
+
if (!entry) return 0;
|
|
46773
|
+
return Number(entry.sessionCostMicros) / 1e6;
|
|
46774
|
+
}
|
|
46734
46775
|
/**
|
|
46735
46776
|
* Stop the cleanup interval.
|
|
46736
46777
|
*/
|
|
@@ -47006,9 +47047,11 @@ var MAX_MESSAGES = 200;
|
|
|
47006
47047
|
var CONTEXT_LIMIT_KB = 5120;
|
|
47007
47048
|
var HEARTBEAT_INTERVAL_MS = 2e3;
|
|
47008
47049
|
var DEFAULT_REQUEST_TIMEOUT_MS = 18e4;
|
|
47050
|
+
var PER_MODEL_TIMEOUT_MS = 6e4;
|
|
47009
47051
|
var MAX_FALLBACK_ATTEMPTS = 5;
|
|
47010
47052
|
var HEALTH_CHECK_TIMEOUT_MS = 2e3;
|
|
47011
47053
|
var RATE_LIMIT_COOLDOWN_MS = 6e4;
|
|
47054
|
+
var OVERLOAD_COOLDOWN_MS = 15e3;
|
|
47012
47055
|
var PORT_RETRY_ATTEMPTS = 5;
|
|
47013
47056
|
var PORT_RETRY_DELAY_MS = 1e3;
|
|
47014
47057
|
var MODEL_BODY_READ_TIMEOUT_MS = 3e5;
|
|
@@ -47158,7 +47201,41 @@ function transformPaymentError(errorBody) {
|
|
|
47158
47201
|
}
|
|
47159
47202
|
return errorBody;
|
|
47160
47203
|
}
|
|
47204
|
+
function categorizeError(status, body) {
|
|
47205
|
+
if (status === 401) return "auth_failure";
|
|
47206
|
+
if (status === 402) return "payment_error";
|
|
47207
|
+
if (status === 403) {
|
|
47208
|
+
if (/plan.*limit|quota.*exceeded|subscription|allowance/i.test(body))
|
|
47209
|
+
return "quota_exceeded";
|
|
47210
|
+
return "auth_failure";
|
|
47211
|
+
}
|
|
47212
|
+
if (status === 429) return "rate_limited";
|
|
47213
|
+
if (status === 529) return "overloaded";
|
|
47214
|
+
if (status === 503 && /overload|capacity|too.*many.*request/i.test(body)) return "overloaded";
|
|
47215
|
+
if (status >= 500) return "server_error";
|
|
47216
|
+
if (status === 400 || status === 413) {
|
|
47217
|
+
if (PROVIDER_ERROR_PATTERNS.some((p) => p.test(body))) return "config_error";
|
|
47218
|
+
return null;
|
|
47219
|
+
}
|
|
47220
|
+
return null;
|
|
47221
|
+
}
|
|
47161
47222
|
var rateLimitedModels = /* @__PURE__ */ new Map();
|
|
47223
|
+
var overloadedModels = /* @__PURE__ */ new Map();
|
|
47224
|
+
var perProviderErrors = /* @__PURE__ */ new Map();
|
|
47225
|
+
function recordProviderError(modelId, category) {
|
|
47226
|
+
if (!perProviderErrors.has(modelId)) {
|
|
47227
|
+
perProviderErrors.set(modelId, {
|
|
47228
|
+
auth_failure: 0,
|
|
47229
|
+
quota_exceeded: 0,
|
|
47230
|
+
rate_limited: 0,
|
|
47231
|
+
overloaded: 0,
|
|
47232
|
+
server_error: 0,
|
|
47233
|
+
payment_error: 0,
|
|
47234
|
+
config_error: 0
|
|
47235
|
+
});
|
|
47236
|
+
}
|
|
47237
|
+
perProviderErrors.get(modelId)[category]++;
|
|
47238
|
+
}
|
|
47162
47239
|
function isRateLimited(modelId) {
|
|
47163
47240
|
const hitTime = rateLimitedModels.get(modelId);
|
|
47164
47241
|
if (!hitTime) return false;
|
|
@@ -47173,17 +47250,30 @@ function markRateLimited(modelId) {
|
|
|
47173
47250
|
rateLimitedModels.set(modelId, Date.now());
|
|
47174
47251
|
console.log(`[ClawRouter] Model ${modelId} rate-limited, will deprioritize for 60s`);
|
|
47175
47252
|
}
|
|
47253
|
+
function markOverloaded(modelId) {
|
|
47254
|
+
overloadedModels.set(modelId, Date.now());
|
|
47255
|
+
console.log(`[ClawRouter] Model ${modelId} overloaded, will deprioritize for 15s`);
|
|
47256
|
+
}
|
|
47257
|
+
function isOverloaded(modelId) {
|
|
47258
|
+
const hitTime = overloadedModels.get(modelId);
|
|
47259
|
+
if (!hitTime) return false;
|
|
47260
|
+
if (Date.now() - hitTime >= OVERLOAD_COOLDOWN_MS) {
|
|
47261
|
+
overloadedModels.delete(modelId);
|
|
47262
|
+
return false;
|
|
47263
|
+
}
|
|
47264
|
+
return true;
|
|
47265
|
+
}
|
|
47176
47266
|
function prioritizeNonRateLimited(models) {
|
|
47177
47267
|
const available = [];
|
|
47178
|
-
const
|
|
47268
|
+
const degraded = [];
|
|
47179
47269
|
for (const model of models) {
|
|
47180
|
-
if (isRateLimited(model)) {
|
|
47181
|
-
|
|
47270
|
+
if (isRateLimited(model) || isOverloaded(model)) {
|
|
47271
|
+
degraded.push(model);
|
|
47182
47272
|
} else {
|
|
47183
47273
|
available.push(model);
|
|
47184
47274
|
}
|
|
47185
47275
|
}
|
|
47186
|
-
return [...available, ...
|
|
47276
|
+
return [...available, ...degraded];
|
|
47187
47277
|
}
|
|
47188
47278
|
function canWrite(res) {
|
|
47189
47279
|
return !res.writableEnded && !res.destroyed && res.socket !== null && !res.socket.destroyed && res.socket.writable;
|
|
@@ -47318,37 +47408,6 @@ function detectDegradedSuccessResponse(body) {
|
|
|
47318
47408
|
}
|
|
47319
47409
|
return void 0;
|
|
47320
47410
|
}
|
|
47321
|
-
var FALLBACK_STATUS_CODES = [
|
|
47322
|
-
400,
|
|
47323
|
-
// Bad request - sometimes used for billing errors
|
|
47324
|
-
401,
|
|
47325
|
-
// Unauthorized - provider API key issues
|
|
47326
|
-
402,
|
|
47327
|
-
// Payment required - but from upstream, not x402
|
|
47328
|
-
403,
|
|
47329
|
-
// Forbidden - provider restrictions
|
|
47330
|
-
413,
|
|
47331
|
-
// Payload too large - request exceeds model's context limit
|
|
47332
|
-
429,
|
|
47333
|
-
// Rate limited
|
|
47334
|
-
500,
|
|
47335
|
-
// Internal server error
|
|
47336
|
-
502,
|
|
47337
|
-
// Bad gateway
|
|
47338
|
-
503,
|
|
47339
|
-
// Service unavailable
|
|
47340
|
-
504
|
|
47341
|
-
// Gateway timeout
|
|
47342
|
-
];
|
|
47343
|
-
function isProviderError(status, body) {
|
|
47344
|
-
if (!FALLBACK_STATUS_CODES.includes(status)) {
|
|
47345
|
-
return false;
|
|
47346
|
-
}
|
|
47347
|
-
if (status >= 500) {
|
|
47348
|
-
return true;
|
|
47349
|
-
}
|
|
47350
|
-
return PROVIDER_ERROR_PATTERNS.some((pattern) => pattern.test(body));
|
|
47351
|
-
}
|
|
47352
47411
|
var VALID_ROLES = /* @__PURE__ */ new Set(["system", "user", "assistant", "tool", "function"]);
|
|
47353
47412
|
var ROLE_MAPPINGS = {
|
|
47354
47413
|
developer: "system",
|
|
@@ -47854,7 +47913,16 @@ async function startProxy(options) {
|
|
|
47854
47913
|
"Content-Type": "application/json",
|
|
47855
47914
|
"Cache-Control": "no-cache"
|
|
47856
47915
|
});
|
|
47857
|
-
res.end(
|
|
47916
|
+
res.end(
|
|
47917
|
+
JSON.stringify(
|
|
47918
|
+
{
|
|
47919
|
+
...stats,
|
|
47920
|
+
providerErrors: Object.fromEntries(perProviderErrors)
|
|
47921
|
+
},
|
|
47922
|
+
null,
|
|
47923
|
+
2
|
|
47924
|
+
)
|
|
47925
|
+
);
|
|
47858
47926
|
} catch (err) {
|
|
47859
47927
|
res.writeHead(500, { "Content-Type": "application/json" });
|
|
47860
47928
|
res.end(
|
|
@@ -48297,12 +48365,13 @@ async function tryModelRequest(upstreamUrl, method, headers, body, modelId, maxT
|
|
|
48297
48365
|
if (response.status !== 200) {
|
|
48298
48366
|
const errorBodyChunks = await readBodyWithTimeout(response.body, ERROR_BODY_READ_TIMEOUT_MS);
|
|
48299
48367
|
const errorBody = Buffer.concat(errorBodyChunks).toString();
|
|
48300
|
-
const
|
|
48368
|
+
const category = categorizeError(response.status, errorBody);
|
|
48301
48369
|
return {
|
|
48302
48370
|
success: false,
|
|
48303
48371
|
errorBody,
|
|
48304
48372
|
errorStatus: response.status,
|
|
48305
|
-
isProviderError:
|
|
48373
|
+
isProviderError: category !== null,
|
|
48374
|
+
errorCategory: category ?? void 0
|
|
48306
48375
|
};
|
|
48307
48376
|
}
|
|
48308
48377
|
const contentType = response.headers.get("content-type") || "";
|
|
@@ -48355,6 +48424,8 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
48355
48424
|
let maxTokens = 4096;
|
|
48356
48425
|
let routingProfile = null;
|
|
48357
48426
|
let balanceFallbackNotice;
|
|
48427
|
+
let budgetDowngradeNotice;
|
|
48428
|
+
let budgetDowngradeHeaderMode;
|
|
48358
48429
|
let accumulatedContent = "";
|
|
48359
48430
|
let responseInputTokens;
|
|
48360
48431
|
const isChatCompletion = req.url?.includes("/chat/completions");
|
|
@@ -48369,6 +48440,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
48369
48440
|
let bodyModified = false;
|
|
48370
48441
|
const parsedMessages = Array.isArray(parsed.messages) ? parsed.messages : [];
|
|
48371
48442
|
const lastUserMsg = [...parsedMessages].reverse().find((m) => m.role === "user");
|
|
48443
|
+
hasTools = Array.isArray(parsed.tools) && parsed.tools.length > 0;
|
|
48372
48444
|
const rawLastContent = lastUserMsg?.content;
|
|
48373
48445
|
const lastContent = typeof rawLastContent === "string" ? rawLastContent : Array.isArray(rawLastContent) ? rawLastContent.filter((b) => b.type === "text").map((b) => b.text ?? "").join(" ") : "";
|
|
48374
48446
|
if (sessionId && parsedMessages.length > 0) {
|
|
@@ -49025,6 +49097,9 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
49025
49097
|
options.onRouted?.(routingDecision);
|
|
49026
49098
|
}
|
|
49027
49099
|
}
|
|
49100
|
+
if (!effectiveSessionId && parsedMessages.length > 0) {
|
|
49101
|
+
effectiveSessionId = deriveSessionId(parsedMessages);
|
|
49102
|
+
}
|
|
49028
49103
|
if (bodyModified) {
|
|
49029
49104
|
body = Buffer.from(JSON.stringify(parsed));
|
|
49030
49105
|
}
|
|
@@ -49115,7 +49190,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
49115
49190
|
}
|
|
49116
49191
|
deduplicator.markInflight(dedupKey);
|
|
49117
49192
|
let estimatedCostMicros;
|
|
49118
|
-
|
|
49193
|
+
let isFreeModel = modelId === FREE_MODEL;
|
|
49119
49194
|
if (modelId && !options.skipBalanceCheck && !isFreeModel) {
|
|
49120
49195
|
const estimated = estimateAmount(modelId, body.length, maxTokens);
|
|
49121
49196
|
if (estimated) {
|
|
@@ -49128,6 +49203,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
49128
49203
|
`[ClawRouter] Wallet ${sufficiency.info.isEmpty ? "empty" : "insufficient"} (${sufficiency.info.balanceUSD}), falling back to free model: ${FREE_MODEL} (requested: ${originalModel})`
|
|
49129
49204
|
);
|
|
49130
49205
|
modelId = FREE_MODEL;
|
|
49206
|
+
isFreeModel = true;
|
|
49131
49207
|
const parsed = JSON.parse(body.toString());
|
|
49132
49208
|
parsed.model = FREE_MODEL;
|
|
49133
49209
|
body = Buffer.from(JSON.stringify(parsed));
|
|
@@ -49154,6 +49230,89 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
49154
49230
|
}
|
|
49155
49231
|
}
|
|
49156
49232
|
}
|
|
49233
|
+
if (options.maxCostPerRunUsd && effectiveSessionId && !isFreeModel && (options.maxCostPerRunMode ?? "graceful") === "strict") {
|
|
49234
|
+
const runCostUsd = sessionStore.getSessionCostUsd(effectiveSessionId);
|
|
49235
|
+
const thisReqEstStr = estimatedCostMicros !== void 0 ? estimatedCostMicros.toString() : modelId ? estimateAmount(modelId, body.length, maxTokens) : void 0;
|
|
49236
|
+
const thisReqEstUsd = thisReqEstStr ? Number(thisReqEstStr) / 1e6 : 0;
|
|
49237
|
+
const projectedCostUsd = runCostUsd + thisReqEstUsd;
|
|
49238
|
+
if (projectedCostUsd > options.maxCostPerRunUsd) {
|
|
49239
|
+
console.log(
|
|
49240
|
+
`[ClawRouter] Cost cap exceeded for session ${effectiveSessionId.slice(0, 8)}...: projected $${projectedCostUsd.toFixed(4)} (spent $${runCostUsd.toFixed(4)} + est $${thisReqEstUsd.toFixed(4)}) > $${options.maxCostPerRunUsd} limit`
|
|
49241
|
+
);
|
|
49242
|
+
res.writeHead(429, {
|
|
49243
|
+
"Content-Type": "application/json",
|
|
49244
|
+
"X-ClawRouter-Cost-Cap-Exceeded": "1"
|
|
49245
|
+
});
|
|
49246
|
+
res.end(
|
|
49247
|
+
JSON.stringify({
|
|
49248
|
+
error: {
|
|
49249
|
+
message: `ClawRouter cost cap exceeded: projected spend $${projectedCostUsd.toFixed(4)} (spent $${runCostUsd.toFixed(4)} + est $${thisReqEstUsd.toFixed(4)}) would exceed limit $${options.maxCostPerRunUsd}`,
|
|
49250
|
+
type: "cost_cap_exceeded",
|
|
49251
|
+
code: "cost_cap_exceeded"
|
|
49252
|
+
}
|
|
49253
|
+
})
|
|
49254
|
+
);
|
|
49255
|
+
deduplicator.removeInflight(dedupKey);
|
|
49256
|
+
return;
|
|
49257
|
+
}
|
|
49258
|
+
}
|
|
49259
|
+
if (options.maxCostPerRunUsd && effectiveSessionId && !isFreeModel && (options.maxCostPerRunMode ?? "graceful") === "graceful") {
|
|
49260
|
+
const runCostUsd = sessionStore.getSessionCostUsd(effectiveSessionId);
|
|
49261
|
+
const remainingUsd = options.maxCostPerRunUsd - runCostUsd;
|
|
49262
|
+
const isComplexOrAgentic = hasTools || routingDecision?.tier === "COMPLEX" || routingDecision?.tier === "REASONING";
|
|
49263
|
+
if (isComplexOrAgentic) {
|
|
49264
|
+
const canAffordAnyNonFreeModel = BLOCKRUN_MODELS.some((m) => {
|
|
49265
|
+
if (m.id === FREE_MODEL) return false;
|
|
49266
|
+
const est = estimateAmount(m.id, body.length, maxTokens);
|
|
49267
|
+
return est !== void 0 && Number(est) / 1e6 <= remainingUsd;
|
|
49268
|
+
});
|
|
49269
|
+
if (!canAffordAnyNonFreeModel) {
|
|
49270
|
+
console.log(
|
|
49271
|
+
`[ClawRouter] Budget insufficient for agentic/complex session ${effectiveSessionId.slice(0, 8)}...: $${Math.max(0, remainingUsd).toFixed(4)} remaining \u2014 blocking (silent downgrade would corrupt tool/complex responses)`
|
|
49272
|
+
);
|
|
49273
|
+
res.writeHead(429, {
|
|
49274
|
+
"Content-Type": "application/json",
|
|
49275
|
+
"X-ClawRouter-Cost-Cap-Exceeded": "1",
|
|
49276
|
+
"X-ClawRouter-Budget-Mode": "blocked"
|
|
49277
|
+
});
|
|
49278
|
+
res.end(
|
|
49279
|
+
JSON.stringify({
|
|
49280
|
+
error: {
|
|
49281
|
+
message: `ClawRouter budget exhausted: $${Math.max(0, remainingUsd).toFixed(4)} remaining (limit: $${options.maxCostPerRunUsd}). Increase maxCostPerRun to continue.`,
|
|
49282
|
+
type: "cost_cap_exceeded",
|
|
49283
|
+
code: "budget_exhausted"
|
|
49284
|
+
}
|
|
49285
|
+
})
|
|
49286
|
+
);
|
|
49287
|
+
deduplicator.removeInflight(dedupKey);
|
|
49288
|
+
return;
|
|
49289
|
+
}
|
|
49290
|
+
} else if (!routingDecision && modelId && modelId !== FREE_MODEL) {
|
|
49291
|
+
const est = estimateAmount(modelId, body.length, maxTokens);
|
|
49292
|
+
const canAfford = !est || Number(est) / 1e6 <= remainingUsd;
|
|
49293
|
+
if (!canAfford) {
|
|
49294
|
+
console.log(
|
|
49295
|
+
`[ClawRouter] Budget insufficient for explicit model ${modelId} in session ${effectiveSessionId.slice(0, 8)}...: $${Math.max(0, remainingUsd).toFixed(4)} remaining \u2014 blocking (user explicitly chose ${modelId})`
|
|
49296
|
+
);
|
|
49297
|
+
res.writeHead(429, {
|
|
49298
|
+
"Content-Type": "application/json",
|
|
49299
|
+
"X-ClawRouter-Cost-Cap-Exceeded": "1",
|
|
49300
|
+
"X-ClawRouter-Budget-Mode": "blocked"
|
|
49301
|
+
});
|
|
49302
|
+
res.end(
|
|
49303
|
+
JSON.stringify({
|
|
49304
|
+
error: {
|
|
49305
|
+
message: `ClawRouter budget exhausted: $${Math.max(0, remainingUsd).toFixed(4)} remaining (limit: $${options.maxCostPerRunUsd}). Increase maxCostPerRun to continue using ${modelId}.`,
|
|
49306
|
+
type: "cost_cap_exceeded",
|
|
49307
|
+
code: "budget_exhausted"
|
|
49308
|
+
}
|
|
49309
|
+
})
|
|
49310
|
+
);
|
|
49311
|
+
deduplicator.removeInflight(dedupKey);
|
|
49312
|
+
return;
|
|
49313
|
+
}
|
|
49314
|
+
}
|
|
49315
|
+
}
|
|
49157
49316
|
let heartbeatInterval;
|
|
49158
49317
|
let headersSentEarly = false;
|
|
49159
49318
|
if (isStreaming) {
|
|
@@ -49198,8 +49357,8 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
49198
49357
|
}
|
|
49199
49358
|
});
|
|
49200
49359
|
const timeoutMs = options.requestTimeoutMs ?? DEFAULT_REQUEST_TIMEOUT_MS;
|
|
49201
|
-
const
|
|
49202
|
-
const timeoutId = setTimeout(() =>
|
|
49360
|
+
const globalController = new AbortController();
|
|
49361
|
+
const timeoutId = setTimeout(() => globalController.abort(), timeoutMs);
|
|
49203
49362
|
try {
|
|
49204
49363
|
let modelsToTry;
|
|
49205
49364
|
if (routingDecision) {
|
|
@@ -49256,13 +49415,83 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
49256
49415
|
if (!hasTools && !modelsToTry.includes(FREE_MODEL)) {
|
|
49257
49416
|
modelsToTry.push(FREE_MODEL);
|
|
49258
49417
|
}
|
|
49418
|
+
if (options.maxCostPerRunUsd && effectiveSessionId && !isFreeModel && (options.maxCostPerRunMode ?? "graceful") === "graceful") {
|
|
49419
|
+
const runCostUsd = sessionStore.getSessionCostUsd(effectiveSessionId);
|
|
49420
|
+
const remainingUsd = options.maxCostPerRunUsd - runCostUsd;
|
|
49421
|
+
const beforeFilter = [...modelsToTry];
|
|
49422
|
+
modelsToTry = modelsToTry.filter((m) => {
|
|
49423
|
+
if (m === FREE_MODEL) return true;
|
|
49424
|
+
const est = estimateAmount(m, body.length, maxTokens);
|
|
49425
|
+
if (!est) return true;
|
|
49426
|
+
return Number(est) / 1e6 <= remainingUsd;
|
|
49427
|
+
});
|
|
49428
|
+
const excluded = beforeFilter.filter((m) => !modelsToTry.includes(m));
|
|
49429
|
+
const isComplexOrAgenticFilter = hasTools || routingDecision?.tier === "COMPLEX" || routingDecision?.tier === "REASONING" || routingDecision === void 0;
|
|
49430
|
+
const filteredToFreeOnly = modelsToTry.length > 0 && modelsToTry.every((m) => m === FREE_MODEL);
|
|
49431
|
+
if (isComplexOrAgenticFilter && filteredToFreeOnly) {
|
|
49432
|
+
const budgetSummary = `$${Math.max(0, remainingUsd).toFixed(4)} remaining (limit: $${options.maxCostPerRunUsd})`;
|
|
49433
|
+
console.log(
|
|
49434
|
+
`[ClawRouter] Budget filter left only free model for complex/agentic session \u2014 blocking (${budgetSummary})`
|
|
49435
|
+
);
|
|
49436
|
+
const errPayload = JSON.stringify({
|
|
49437
|
+
error: {
|
|
49438
|
+
message: `ClawRouter budget exhausted: remaining budget (${budgetSummary}) cannot support a complex/tool request. Increase maxCostPerRun to continue.`,
|
|
49439
|
+
type: "cost_cap_exceeded",
|
|
49440
|
+
code: "budget_exhausted"
|
|
49441
|
+
}
|
|
49442
|
+
});
|
|
49443
|
+
if (heartbeatInterval) clearInterval(heartbeatInterval);
|
|
49444
|
+
if (headersSentEarly) {
|
|
49445
|
+
safeWrite(res, `data: ${errPayload}
|
|
49446
|
+
|
|
49447
|
+
data: [DONE]
|
|
49448
|
+
|
|
49449
|
+
`);
|
|
49450
|
+
res.end();
|
|
49451
|
+
} else {
|
|
49452
|
+
res.writeHead(429, {
|
|
49453
|
+
"Content-Type": "application/json",
|
|
49454
|
+
"X-ClawRouter-Cost-Cap-Exceeded": "1",
|
|
49455
|
+
"X-ClawRouter-Budget-Mode": "blocked"
|
|
49456
|
+
});
|
|
49457
|
+
res.end(errPayload);
|
|
49458
|
+
}
|
|
49459
|
+
deduplicator.removeInflight(dedupKey);
|
|
49460
|
+
return;
|
|
49461
|
+
}
|
|
49462
|
+
if (excluded.length > 0) {
|
|
49463
|
+
const budgetSummary = remainingUsd > 0 ? `$${remainingUsd.toFixed(4)} remaining` : `budget exhausted ($${runCostUsd.toFixed(4)}/$${options.maxCostPerRunUsd})`;
|
|
49464
|
+
console.log(
|
|
49465
|
+
`[ClawRouter] Budget downgrade (${budgetSummary}): excluded ${excluded.join(", ")}`
|
|
49466
|
+
);
|
|
49467
|
+
const fromModel = excluded[0];
|
|
49468
|
+
const usingFree = modelsToTry.length === 1 && modelsToTry[0] === FREE_MODEL;
|
|
49469
|
+
if (usingFree) {
|
|
49470
|
+
budgetDowngradeNotice = `> **\u26A0\uFE0F Budget cap reached** ($${runCostUsd.toFixed(4)}/$${options.maxCostPerRunUsd}) \u2014 downgraded to free model. Quality may be reduced. Increase \`maxCostPerRun\` to continue with ${fromModel}.
|
|
49471
|
+
|
|
49472
|
+
`;
|
|
49473
|
+
} else {
|
|
49474
|
+
const toModel = modelsToTry[0] ?? FREE_MODEL;
|
|
49475
|
+
budgetDowngradeNotice = `> **\u26A0\uFE0F Budget low** ($${remainingUsd > 0 ? remainingUsd.toFixed(4) : "0.0000"} remaining) \u2014 using ${toModel} instead of ${fromModel}.
|
|
49476
|
+
|
|
49477
|
+
`;
|
|
49478
|
+
}
|
|
49479
|
+
budgetDowngradeHeaderMode = "downgraded";
|
|
49480
|
+
}
|
|
49481
|
+
}
|
|
49259
49482
|
let upstream;
|
|
49260
49483
|
let lastError;
|
|
49261
49484
|
let actualModelUsed = modelId;
|
|
49262
49485
|
for (let i = 0; i < modelsToTry.length; i++) {
|
|
49263
49486
|
const tryModel = modelsToTry[i];
|
|
49264
49487
|
const isLastAttempt = i === modelsToTry.length - 1;
|
|
49488
|
+
if (globalController.signal.aborted) {
|
|
49489
|
+
throw new Error(`Request timed out after ${timeoutMs}ms`);
|
|
49490
|
+
}
|
|
49265
49491
|
console.log(`[ClawRouter] Trying model ${i + 1}/${modelsToTry.length}: ${tryModel}`);
|
|
49492
|
+
const modelController = new AbortController();
|
|
49493
|
+
const modelTimeoutId = setTimeout(() => modelController.abort(), PER_MODEL_TIMEOUT_MS);
|
|
49494
|
+
const combinedSignal = AbortSignal.any([globalController.signal, modelController.signal]);
|
|
49266
49495
|
const result = await tryModelRequest(
|
|
49267
49496
|
upstreamUrl,
|
|
49268
49497
|
req.method ?? "POST",
|
|
@@ -49272,12 +49501,29 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
49272
49501
|
maxTokens,
|
|
49273
49502
|
payFetch,
|
|
49274
49503
|
balanceMonitor,
|
|
49275
|
-
|
|
49504
|
+
combinedSignal
|
|
49276
49505
|
);
|
|
49506
|
+
clearTimeout(modelTimeoutId);
|
|
49507
|
+
if (globalController.signal.aborted) {
|
|
49508
|
+
throw new Error(`Request timed out after ${timeoutMs}ms`);
|
|
49509
|
+
}
|
|
49510
|
+
if (!result.success && modelController.signal.aborted && !isLastAttempt) {
|
|
49511
|
+
console.log(
|
|
49512
|
+
`[ClawRouter] Model ${tryModel} timed out after ${PER_MODEL_TIMEOUT_MS}ms, trying fallback`
|
|
49513
|
+
);
|
|
49514
|
+
recordProviderError(tryModel, "server_error");
|
|
49515
|
+
continue;
|
|
49516
|
+
}
|
|
49277
49517
|
if (result.success && result.response) {
|
|
49278
49518
|
upstream = result.response;
|
|
49279
49519
|
actualModelUsed = tryModel;
|
|
49280
49520
|
console.log(`[ClawRouter] Success with model: ${tryModel}`);
|
|
49521
|
+
if (options.maxCostPerRunUsd && effectiveSessionId && tryModel !== FREE_MODEL) {
|
|
49522
|
+
const costEst = estimateAmount(tryModel, body.length, maxTokens);
|
|
49523
|
+
if (costEst) {
|
|
49524
|
+
sessionStore.addSessionCost(effectiveSessionId, BigInt(costEst));
|
|
49525
|
+
}
|
|
49526
|
+
}
|
|
49281
49527
|
break;
|
|
49282
49528
|
}
|
|
49283
49529
|
lastError = {
|
|
@@ -49293,7 +49539,52 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
49293
49539
|
);
|
|
49294
49540
|
break;
|
|
49295
49541
|
}
|
|
49296
|
-
|
|
49542
|
+
const errorCat = result.errorCategory;
|
|
49543
|
+
if (errorCat) {
|
|
49544
|
+
recordProviderError(tryModel, errorCat);
|
|
49545
|
+
}
|
|
49546
|
+
if (errorCat === "rate_limited") {
|
|
49547
|
+
if (!isLastAttempt && !globalController.signal.aborted) {
|
|
49548
|
+
console.log(
|
|
49549
|
+
`[ClawRouter] Rate-limited on ${tryModel}, retrying in 200ms before failover`
|
|
49550
|
+
);
|
|
49551
|
+
await new Promise((resolve) => setTimeout(resolve, 200));
|
|
49552
|
+
if (!globalController.signal.aborted) {
|
|
49553
|
+
const retryController = new AbortController();
|
|
49554
|
+
const retryTimeoutId = setTimeout(
|
|
49555
|
+
() => retryController.abort(),
|
|
49556
|
+
PER_MODEL_TIMEOUT_MS
|
|
49557
|
+
);
|
|
49558
|
+
const retrySignal = AbortSignal.any([
|
|
49559
|
+
globalController.signal,
|
|
49560
|
+
retryController.signal
|
|
49561
|
+
]);
|
|
49562
|
+
const retryResult = await tryModelRequest(
|
|
49563
|
+
upstreamUrl,
|
|
49564
|
+
req.method ?? "POST",
|
|
49565
|
+
headers,
|
|
49566
|
+
body,
|
|
49567
|
+
tryModel,
|
|
49568
|
+
maxTokens,
|
|
49569
|
+
payFetch,
|
|
49570
|
+
balanceMonitor,
|
|
49571
|
+
retrySignal
|
|
49572
|
+
);
|
|
49573
|
+
clearTimeout(retryTimeoutId);
|
|
49574
|
+
if (retryResult.success && retryResult.response) {
|
|
49575
|
+
upstream = retryResult.response;
|
|
49576
|
+
actualModelUsed = tryModel;
|
|
49577
|
+
console.log(`[ClawRouter] Rate-limit retry succeeded for: ${tryModel}`);
|
|
49578
|
+
if (options.maxCostPerRunUsd && effectiveSessionId && tryModel !== FREE_MODEL) {
|
|
49579
|
+
const costEst = estimateAmount(tryModel, body.length, maxTokens);
|
|
49580
|
+
if (costEst) {
|
|
49581
|
+
sessionStore.addSessionCost(effectiveSessionId, BigInt(costEst));
|
|
49582
|
+
}
|
|
49583
|
+
}
|
|
49584
|
+
break;
|
|
49585
|
+
}
|
|
49586
|
+
}
|
|
49587
|
+
}
|
|
49297
49588
|
markRateLimited(tryModel);
|
|
49298
49589
|
try {
|
|
49299
49590
|
const parsed = JSON.parse(result.errorBody || "{}");
|
|
@@ -49309,6 +49600,12 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
49309
49600
|
}
|
|
49310
49601
|
} catch {
|
|
49311
49602
|
}
|
|
49603
|
+
} else if (errorCat === "overloaded") {
|
|
49604
|
+
markOverloaded(tryModel);
|
|
49605
|
+
} else if (errorCat === "auth_failure" || errorCat === "quota_exceeded") {
|
|
49606
|
+
console.log(
|
|
49607
|
+
`[ClawRouter] \u{1F511} ${errorCat === "auth_failure" ? "Auth failure" : "Quota exceeded"} for ${tryModel} \u2014 check provider config`
|
|
49608
|
+
);
|
|
49312
49609
|
}
|
|
49313
49610
|
const isPaymentErr = /payment.*verification.*failed|payment.*settlement.*failed|insufficient.*funds|transaction_simulation_failed/i.test(
|
|
49314
49611
|
result.errorBody || ""
|
|
@@ -49468,6 +49765,25 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
49468
49765
|
responseChunks.push(Buffer.from(noticeData));
|
|
49469
49766
|
balanceFallbackNotice = void 0;
|
|
49470
49767
|
}
|
|
49768
|
+
if (budgetDowngradeNotice) {
|
|
49769
|
+
const noticeChunk = {
|
|
49770
|
+
...baseChunk,
|
|
49771
|
+
choices: [
|
|
49772
|
+
{
|
|
49773
|
+
index: index2,
|
|
49774
|
+
delta: { content: budgetDowngradeNotice },
|
|
49775
|
+
logprobs: null,
|
|
49776
|
+
finish_reason: null
|
|
49777
|
+
}
|
|
49778
|
+
]
|
|
49779
|
+
};
|
|
49780
|
+
const noticeData = `data: ${JSON.stringify(noticeChunk)}
|
|
49781
|
+
|
|
49782
|
+
`;
|
|
49783
|
+
safeWrite(res, noticeData);
|
|
49784
|
+
responseChunks.push(Buffer.from(noticeData));
|
|
49785
|
+
budgetDowngradeNotice = void 0;
|
|
49786
|
+
}
|
|
49471
49787
|
if (content) {
|
|
49472
49788
|
const contentChunk = {
|
|
49473
49789
|
...baseChunk,
|
|
@@ -49571,6 +49887,22 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
49571
49887
|
}
|
|
49572
49888
|
balanceFallbackNotice = void 0;
|
|
49573
49889
|
}
|
|
49890
|
+
if (budgetDowngradeNotice && responseBody.length > 0) {
|
|
49891
|
+
try {
|
|
49892
|
+
const parsed = JSON.parse(responseBody.toString());
|
|
49893
|
+
if (parsed.choices?.[0]?.message?.content !== void 0) {
|
|
49894
|
+
parsed.choices[0].message.content = budgetDowngradeNotice + parsed.choices[0].message.content;
|
|
49895
|
+
responseBody = Buffer.from(JSON.stringify(parsed));
|
|
49896
|
+
}
|
|
49897
|
+
} catch {
|
|
49898
|
+
}
|
|
49899
|
+
budgetDowngradeNotice = void 0;
|
|
49900
|
+
}
|
|
49901
|
+
if (budgetDowngradeHeaderMode) {
|
|
49902
|
+
responseHeaders["x-clawrouter-budget-downgrade"] = "1";
|
|
49903
|
+
responseHeaders["x-clawrouter-budget-mode"] = budgetDowngradeHeaderMode;
|
|
49904
|
+
budgetDowngradeHeaderMode = void 0;
|
|
49905
|
+
}
|
|
49574
49906
|
responseHeaders["content-length"] = String(responseBody.length);
|
|
49575
49907
|
res.writeHead(upstream.status, responseHeaders);
|
|
49576
49908
|
safeWrite(res, responseBody);
|
|
@@ -50238,6 +50570,18 @@ function injectModelsConfig(logger) {
|
|
|
50238
50570
|
needsWrite = true;
|
|
50239
50571
|
}
|
|
50240
50572
|
const allowlist = defaults.models;
|
|
50573
|
+
const DEPRECATED_BLOCKRUN_MODELS = ["blockrun/xai/grok-code-fast-1"];
|
|
50574
|
+
let removedDeprecatedCount = 0;
|
|
50575
|
+
for (const key of DEPRECATED_BLOCKRUN_MODELS) {
|
|
50576
|
+
if (allowlist[key]) {
|
|
50577
|
+
delete allowlist[key];
|
|
50578
|
+
removedDeprecatedCount++;
|
|
50579
|
+
}
|
|
50580
|
+
}
|
|
50581
|
+
if (removedDeprecatedCount > 0) {
|
|
50582
|
+
needsWrite = true;
|
|
50583
|
+
logger.info(`Removed ${removedDeprecatedCount} deprecated model entries from allowlist`);
|
|
50584
|
+
}
|
|
50241
50585
|
let addedCount = 0;
|
|
50242
50586
|
for (const id of TOP_MODELS) {
|
|
50243
50587
|
const key = `blockrun/${id}`;
|
|
@@ -50339,9 +50683,18 @@ async function startProxyInBackground(api) {
|
|
|
50339
50683
|
api.logger.info(`Using wallet from BLOCKRUN_WALLET_KEY: ${wallet.address}`);
|
|
50340
50684
|
}
|
|
50341
50685
|
const routingConfig = api.pluginConfig?.routing;
|
|
50686
|
+
const maxCostPerRunUsd = typeof api.pluginConfig?.maxCostPerRun === "number" ? api.pluginConfig.maxCostPerRun : void 0;
|
|
50687
|
+
const maxCostPerRunMode = api.pluginConfig?.maxCostPerRunMode === "strict" ? "strict" : "graceful";
|
|
50688
|
+
if (maxCostPerRunUsd !== void 0) {
|
|
50689
|
+
api.logger.info(
|
|
50690
|
+
`Cost cap: $${maxCostPerRunUsd.toFixed(2)} per session (mode: ${maxCostPerRunMode})`
|
|
50691
|
+
);
|
|
50692
|
+
}
|
|
50342
50693
|
const proxy = await startProxy({
|
|
50343
50694
|
wallet,
|
|
50344
50695
|
routingConfig,
|
|
50696
|
+
maxCostPerRunUsd,
|
|
50697
|
+
maxCostPerRunMode,
|
|
50345
50698
|
onReady: (port) => {
|
|
50346
50699
|
api.logger.info(`BlockRun x402 proxy listening on port ${port}`);
|
|
50347
50700
|
},
|