@blockrun/clawrouter 0.12.55 → 0.12.60
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +74 -51
- package/dist/cli.js +401 -44
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +24 -0
- package/dist/index.js +422 -44
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/scripts/reinstall.sh +98 -18
- package/scripts/update.sh +95 -18
package/dist/cli.js
CHANGED
|
@@ -40443,11 +40443,22 @@ var MODEL_ALIASES = {
|
|
|
40443
40443
|
"grok-fast": "xai/grok-4-fast-reasoning",
|
|
40444
40444
|
"grok-code": "deepseek/deepseek-chat",
|
|
40445
40445
|
// was grok-code-fast-1, delisted due to poor retention
|
|
40446
|
+
// Delisted model redirects — full model IDs that were previously valid but removed
|
|
40447
|
+
"grok-code-fast-1": "deepseek/deepseek-chat",
|
|
40448
|
+
// bare alias
|
|
40449
|
+
"xai/grok-code-fast-1": "deepseek/deepseek-chat",
|
|
40450
|
+
// delisted 2026-03-12
|
|
40451
|
+
"xai/grok-3-fast": "xai/grok-4-fast-reasoning",
|
|
40452
|
+
// delisted (too expensive)
|
|
40446
40453
|
// NVIDIA
|
|
40447
40454
|
nvidia: "nvidia/gpt-oss-120b",
|
|
40448
40455
|
"gpt-120b": "nvidia/gpt-oss-120b",
|
|
40449
40456
|
// MiniMax
|
|
40450
40457
|
minimax: "minimax/minimax-m2.5",
|
|
40458
|
+
// Z.AI GLM-5
|
|
40459
|
+
glm: "zai/glm-5",
|
|
40460
|
+
"glm-5": "zai/glm-5",
|
|
40461
|
+
"glm-5-turbo": "zai/glm-5-turbo",
|
|
40451
40462
|
// Routing profile aliases (common variations)
|
|
40452
40463
|
"auto-router": "auto",
|
|
40453
40464
|
router: "auto"
|
|
@@ -40982,6 +40993,27 @@ var BLOCKRUN_MODELS = [
|
|
|
40982
40993
|
contextWindow: 262144,
|
|
40983
40994
|
maxOutput: 16384,
|
|
40984
40995
|
toolCalling: true
|
|
40996
|
+
},
|
|
40997
|
+
// Z.AI GLM-5 Models
|
|
40998
|
+
{
|
|
40999
|
+
id: "zai/glm-5",
|
|
41000
|
+
name: "GLM-5",
|
|
41001
|
+
version: "5",
|
|
41002
|
+
inputPrice: 1,
|
|
41003
|
+
outputPrice: 3.2,
|
|
41004
|
+
contextWindow: 2e5,
|
|
41005
|
+
maxOutput: 128e3,
|
|
41006
|
+
toolCalling: true
|
|
41007
|
+
},
|
|
41008
|
+
{
|
|
41009
|
+
id: "zai/glm-5-turbo",
|
|
41010
|
+
name: "GLM-5 Turbo",
|
|
41011
|
+
version: "5-turbo",
|
|
41012
|
+
inputPrice: 1.2,
|
|
41013
|
+
outputPrice: 4,
|
|
41014
|
+
contextWindow: 2e5,
|
|
41015
|
+
maxOutput: 128e3,
|
|
41016
|
+
toolCalling: true
|
|
40985
41017
|
}
|
|
40986
41018
|
];
|
|
40987
41019
|
function toOpenClawModel(m) {
|
|
@@ -46122,7 +46154,8 @@ var SessionStore = class {
|
|
|
46122
46154
|
requestCount: 1,
|
|
46123
46155
|
recentHashes: [],
|
|
46124
46156
|
strikes: 0,
|
|
46125
|
-
escalated: false
|
|
46157
|
+
escalated: false,
|
|
46158
|
+
sessionCostMicros: 0n
|
|
46126
46159
|
});
|
|
46127
46160
|
}
|
|
46128
46161
|
}
|
|
@@ -46211,6 +46244,39 @@ var SessionStore = class {
|
|
|
46211
46244
|
entry.escalated = true;
|
|
46212
46245
|
return { model: nextConfig.primary, tier: nextTier };
|
|
46213
46246
|
}
|
|
46247
|
+
/**
|
|
46248
|
+
* Add cost to a session's running total for maxCostPerRun tracking.
|
|
46249
|
+
* Cost is in USDC 6-decimal units (micros).
|
|
46250
|
+
* Creates a cost-tracking-only entry if none exists (e.g., explicit model requests
|
|
46251
|
+
* that never go through the routing path).
|
|
46252
|
+
*/
|
|
46253
|
+
addSessionCost(sessionId, additionalMicros) {
|
|
46254
|
+
let entry = this.sessions.get(sessionId);
|
|
46255
|
+
if (!entry) {
|
|
46256
|
+
const now = Date.now();
|
|
46257
|
+
entry = {
|
|
46258
|
+
model: "",
|
|
46259
|
+
tier: "DIRECT",
|
|
46260
|
+
createdAt: now,
|
|
46261
|
+
lastUsedAt: now,
|
|
46262
|
+
requestCount: 0,
|
|
46263
|
+
recentHashes: [],
|
|
46264
|
+
strikes: 0,
|
|
46265
|
+
escalated: false,
|
|
46266
|
+
sessionCostMicros: 0n
|
|
46267
|
+
};
|
|
46268
|
+
this.sessions.set(sessionId, entry);
|
|
46269
|
+
}
|
|
46270
|
+
entry.sessionCostMicros += additionalMicros;
|
|
46271
|
+
}
|
|
46272
|
+
/**
|
|
46273
|
+
* Get the total accumulated cost for a session in USD.
|
|
46274
|
+
*/
|
|
46275
|
+
getSessionCostUsd(sessionId) {
|
|
46276
|
+
const entry = this.sessions.get(sessionId);
|
|
46277
|
+
if (!entry) return 0;
|
|
46278
|
+
return Number(entry.sessionCostMicros) / 1e6;
|
|
46279
|
+
}
|
|
46214
46280
|
/**
|
|
46215
46281
|
* Stop the cleanup interval.
|
|
46216
46282
|
*/
|
|
@@ -46486,9 +46552,11 @@ var MAX_MESSAGES = 200;
|
|
|
46486
46552
|
var CONTEXT_LIMIT_KB = 5120;
|
|
46487
46553
|
var HEARTBEAT_INTERVAL_MS = 2e3;
|
|
46488
46554
|
var DEFAULT_REQUEST_TIMEOUT_MS = 18e4;
|
|
46555
|
+
var PER_MODEL_TIMEOUT_MS = 6e4;
|
|
46489
46556
|
var MAX_FALLBACK_ATTEMPTS = 5;
|
|
46490
46557
|
var HEALTH_CHECK_TIMEOUT_MS = 2e3;
|
|
46491
46558
|
var RATE_LIMIT_COOLDOWN_MS = 6e4;
|
|
46559
|
+
var OVERLOAD_COOLDOWN_MS = 15e3;
|
|
46492
46560
|
var PORT_RETRY_ATTEMPTS = 5;
|
|
46493
46561
|
var PORT_RETRY_DELAY_MS = 1e3;
|
|
46494
46562
|
var MODEL_BODY_READ_TIMEOUT_MS = 3e5;
|
|
@@ -46638,7 +46706,41 @@ function transformPaymentError(errorBody) {
|
|
|
46638
46706
|
}
|
|
46639
46707
|
return errorBody;
|
|
46640
46708
|
}
|
|
46709
|
+
function categorizeError(status, body) {
|
|
46710
|
+
if (status === 401) return "auth_failure";
|
|
46711
|
+
if (status === 402) return "payment_error";
|
|
46712
|
+
if (status === 403) {
|
|
46713
|
+
if (/plan.*limit|quota.*exceeded|subscription|allowance/i.test(body))
|
|
46714
|
+
return "quota_exceeded";
|
|
46715
|
+
return "auth_failure";
|
|
46716
|
+
}
|
|
46717
|
+
if (status === 429) return "rate_limited";
|
|
46718
|
+
if (status === 529) return "overloaded";
|
|
46719
|
+
if (status === 503 && /overload|capacity|too.*many.*request/i.test(body)) return "overloaded";
|
|
46720
|
+
if (status >= 500) return "server_error";
|
|
46721
|
+
if (status === 400 || status === 413) {
|
|
46722
|
+
if (PROVIDER_ERROR_PATTERNS.some((p) => p.test(body))) return "config_error";
|
|
46723
|
+
return null;
|
|
46724
|
+
}
|
|
46725
|
+
return null;
|
|
46726
|
+
}
|
|
46641
46727
|
var rateLimitedModels = /* @__PURE__ */ new Map();
|
|
46728
|
+
var overloadedModels = /* @__PURE__ */ new Map();
|
|
46729
|
+
var perProviderErrors = /* @__PURE__ */ new Map();
|
|
46730
|
+
function recordProviderError(modelId, category) {
|
|
46731
|
+
if (!perProviderErrors.has(modelId)) {
|
|
46732
|
+
perProviderErrors.set(modelId, {
|
|
46733
|
+
auth_failure: 0,
|
|
46734
|
+
quota_exceeded: 0,
|
|
46735
|
+
rate_limited: 0,
|
|
46736
|
+
overloaded: 0,
|
|
46737
|
+
server_error: 0,
|
|
46738
|
+
payment_error: 0,
|
|
46739
|
+
config_error: 0
|
|
46740
|
+
});
|
|
46741
|
+
}
|
|
46742
|
+
perProviderErrors.get(modelId)[category]++;
|
|
46743
|
+
}
|
|
46642
46744
|
function isRateLimited(modelId) {
|
|
46643
46745
|
const hitTime = rateLimitedModels.get(modelId);
|
|
46644
46746
|
if (!hitTime) return false;
|
|
@@ -46653,17 +46755,30 @@ function markRateLimited(modelId) {
|
|
|
46653
46755
|
rateLimitedModels.set(modelId, Date.now());
|
|
46654
46756
|
console.log(`[ClawRouter] Model ${modelId} rate-limited, will deprioritize for 60s`);
|
|
46655
46757
|
}
|
|
46758
|
+
function markOverloaded(modelId) {
|
|
46759
|
+
overloadedModels.set(modelId, Date.now());
|
|
46760
|
+
console.log(`[ClawRouter] Model ${modelId} overloaded, will deprioritize for 15s`);
|
|
46761
|
+
}
|
|
46762
|
+
function isOverloaded(modelId) {
|
|
46763
|
+
const hitTime = overloadedModels.get(modelId);
|
|
46764
|
+
if (!hitTime) return false;
|
|
46765
|
+
if (Date.now() - hitTime >= OVERLOAD_COOLDOWN_MS) {
|
|
46766
|
+
overloadedModels.delete(modelId);
|
|
46767
|
+
return false;
|
|
46768
|
+
}
|
|
46769
|
+
return true;
|
|
46770
|
+
}
|
|
46656
46771
|
function prioritizeNonRateLimited(models) {
|
|
46657
46772
|
const available = [];
|
|
46658
|
-
const
|
|
46773
|
+
const degraded = [];
|
|
46659
46774
|
for (const model of models) {
|
|
46660
|
-
if (isRateLimited(model)) {
|
|
46661
|
-
|
|
46775
|
+
if (isRateLimited(model) || isOverloaded(model)) {
|
|
46776
|
+
degraded.push(model);
|
|
46662
46777
|
} else {
|
|
46663
46778
|
available.push(model);
|
|
46664
46779
|
}
|
|
46665
46780
|
}
|
|
46666
|
-
return [...available, ...
|
|
46781
|
+
return [...available, ...degraded];
|
|
46667
46782
|
}
|
|
46668
46783
|
function canWrite(res) {
|
|
46669
46784
|
return !res.writableEnded && !res.destroyed && res.socket !== null && !res.socket.destroyed && res.socket.writable;
|
|
@@ -46798,37 +46913,6 @@ function detectDegradedSuccessResponse(body) {
|
|
|
46798
46913
|
}
|
|
46799
46914
|
return void 0;
|
|
46800
46915
|
}
|
|
46801
|
-
var FALLBACK_STATUS_CODES = [
|
|
46802
|
-
400,
|
|
46803
|
-
// Bad request - sometimes used for billing errors
|
|
46804
|
-
401,
|
|
46805
|
-
// Unauthorized - provider API key issues
|
|
46806
|
-
402,
|
|
46807
|
-
// Payment required - but from upstream, not x402
|
|
46808
|
-
403,
|
|
46809
|
-
// Forbidden - provider restrictions
|
|
46810
|
-
413,
|
|
46811
|
-
// Payload too large - request exceeds model's context limit
|
|
46812
|
-
429,
|
|
46813
|
-
// Rate limited
|
|
46814
|
-
500,
|
|
46815
|
-
// Internal server error
|
|
46816
|
-
502,
|
|
46817
|
-
// Bad gateway
|
|
46818
|
-
503,
|
|
46819
|
-
// Service unavailable
|
|
46820
|
-
504
|
|
46821
|
-
// Gateway timeout
|
|
46822
|
-
];
|
|
46823
|
-
function isProviderError(status, body) {
|
|
46824
|
-
if (!FALLBACK_STATUS_CODES.includes(status)) {
|
|
46825
|
-
return false;
|
|
46826
|
-
}
|
|
46827
|
-
if (status >= 500) {
|
|
46828
|
-
return true;
|
|
46829
|
-
}
|
|
46830
|
-
return PROVIDER_ERROR_PATTERNS.some((pattern) => pattern.test(body));
|
|
46831
|
-
}
|
|
46832
46916
|
var VALID_ROLES = /* @__PURE__ */ new Set(["system", "user", "assistant", "tool", "function"]);
|
|
46833
46917
|
var ROLE_MAPPINGS = {
|
|
46834
46918
|
developer: "system",
|
|
@@ -47334,7 +47418,16 @@ async function startProxy(options) {
|
|
|
47334
47418
|
"Content-Type": "application/json",
|
|
47335
47419
|
"Cache-Control": "no-cache"
|
|
47336
47420
|
});
|
|
47337
|
-
res.end(
|
|
47421
|
+
res.end(
|
|
47422
|
+
JSON.stringify(
|
|
47423
|
+
{
|
|
47424
|
+
...stats,
|
|
47425
|
+
providerErrors: Object.fromEntries(perProviderErrors)
|
|
47426
|
+
},
|
|
47427
|
+
null,
|
|
47428
|
+
2
|
|
47429
|
+
)
|
|
47430
|
+
);
|
|
47338
47431
|
} catch (err) {
|
|
47339
47432
|
res.writeHead(500, { "Content-Type": "application/json" });
|
|
47340
47433
|
res.end(
|
|
@@ -47777,12 +47870,13 @@ async function tryModelRequest(upstreamUrl, method, headers, body, modelId, maxT
|
|
|
47777
47870
|
if (response.status !== 200) {
|
|
47778
47871
|
const errorBodyChunks = await readBodyWithTimeout(response.body, ERROR_BODY_READ_TIMEOUT_MS);
|
|
47779
47872
|
const errorBody = Buffer.concat(errorBodyChunks).toString();
|
|
47780
|
-
const
|
|
47873
|
+
const category = categorizeError(response.status, errorBody);
|
|
47781
47874
|
return {
|
|
47782
47875
|
success: false,
|
|
47783
47876
|
errorBody,
|
|
47784
47877
|
errorStatus: response.status,
|
|
47785
|
-
isProviderError:
|
|
47878
|
+
isProviderError: category !== null,
|
|
47879
|
+
errorCategory: category ?? void 0
|
|
47786
47880
|
};
|
|
47787
47881
|
}
|
|
47788
47882
|
const contentType = response.headers.get("content-type") || "";
|
|
@@ -47835,6 +47929,8 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
47835
47929
|
let maxTokens = 4096;
|
|
47836
47930
|
let routingProfile = null;
|
|
47837
47931
|
let balanceFallbackNotice;
|
|
47932
|
+
let budgetDowngradeNotice;
|
|
47933
|
+
let budgetDowngradeHeaderMode;
|
|
47838
47934
|
let accumulatedContent = "";
|
|
47839
47935
|
let responseInputTokens;
|
|
47840
47936
|
const isChatCompletion = req.url?.includes("/chat/completions");
|
|
@@ -47849,6 +47945,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
47849
47945
|
let bodyModified = false;
|
|
47850
47946
|
const parsedMessages = Array.isArray(parsed.messages) ? parsed.messages : [];
|
|
47851
47947
|
const lastUserMsg = [...parsedMessages].reverse().find((m) => m.role === "user");
|
|
47948
|
+
hasTools = Array.isArray(parsed.tools) && parsed.tools.length > 0;
|
|
47852
47949
|
const rawLastContent = lastUserMsg?.content;
|
|
47853
47950
|
const lastContent = typeof rawLastContent === "string" ? rawLastContent : Array.isArray(rawLastContent) ? rawLastContent.filter((b) => b.type === "text").map((b) => b.text ?? "").join(" ") : "";
|
|
47854
47951
|
if (sessionId && parsedMessages.length > 0) {
|
|
@@ -48505,6 +48602,9 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
48505
48602
|
options.onRouted?.(routingDecision);
|
|
48506
48603
|
}
|
|
48507
48604
|
}
|
|
48605
|
+
if (!effectiveSessionId && parsedMessages.length > 0) {
|
|
48606
|
+
effectiveSessionId = deriveSessionId(parsedMessages);
|
|
48607
|
+
}
|
|
48508
48608
|
if (bodyModified) {
|
|
48509
48609
|
body = Buffer.from(JSON.stringify(parsed));
|
|
48510
48610
|
}
|
|
@@ -48595,7 +48695,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
48595
48695
|
}
|
|
48596
48696
|
deduplicator.markInflight(dedupKey);
|
|
48597
48697
|
let estimatedCostMicros;
|
|
48598
|
-
|
|
48698
|
+
let isFreeModel = modelId === FREE_MODEL;
|
|
48599
48699
|
if (modelId && !options.skipBalanceCheck && !isFreeModel) {
|
|
48600
48700
|
const estimated = estimateAmount(modelId, body.length, maxTokens);
|
|
48601
48701
|
if (estimated) {
|
|
@@ -48608,6 +48708,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
48608
48708
|
`[ClawRouter] Wallet ${sufficiency.info.isEmpty ? "empty" : "insufficient"} (${sufficiency.info.balanceUSD}), falling back to free model: ${FREE_MODEL} (requested: ${originalModel})`
|
|
48609
48709
|
);
|
|
48610
48710
|
modelId = FREE_MODEL;
|
|
48711
|
+
isFreeModel = true;
|
|
48611
48712
|
const parsed = JSON.parse(body.toString());
|
|
48612
48713
|
parsed.model = FREE_MODEL;
|
|
48613
48714
|
body = Buffer.from(JSON.stringify(parsed));
|
|
@@ -48634,6 +48735,89 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
48634
48735
|
}
|
|
48635
48736
|
}
|
|
48636
48737
|
}
|
|
48738
|
+
if (options.maxCostPerRunUsd && effectiveSessionId && !isFreeModel && (options.maxCostPerRunMode ?? "graceful") === "strict") {
|
|
48739
|
+
const runCostUsd = sessionStore.getSessionCostUsd(effectiveSessionId);
|
|
48740
|
+
const thisReqEstStr = estimatedCostMicros !== void 0 ? estimatedCostMicros.toString() : modelId ? estimateAmount(modelId, body.length, maxTokens) : void 0;
|
|
48741
|
+
const thisReqEstUsd = thisReqEstStr ? Number(thisReqEstStr) / 1e6 : 0;
|
|
48742
|
+
const projectedCostUsd = runCostUsd + thisReqEstUsd;
|
|
48743
|
+
if (projectedCostUsd > options.maxCostPerRunUsd) {
|
|
48744
|
+
console.log(
|
|
48745
|
+
`[ClawRouter] Cost cap exceeded for session ${effectiveSessionId.slice(0, 8)}...: projected $${projectedCostUsd.toFixed(4)} (spent $${runCostUsd.toFixed(4)} + est $${thisReqEstUsd.toFixed(4)}) > $${options.maxCostPerRunUsd} limit`
|
|
48746
|
+
);
|
|
48747
|
+
res.writeHead(429, {
|
|
48748
|
+
"Content-Type": "application/json",
|
|
48749
|
+
"X-ClawRouter-Cost-Cap-Exceeded": "1"
|
|
48750
|
+
});
|
|
48751
|
+
res.end(
|
|
48752
|
+
JSON.stringify({
|
|
48753
|
+
error: {
|
|
48754
|
+
message: `ClawRouter cost cap exceeded: projected spend $${projectedCostUsd.toFixed(4)} (spent $${runCostUsd.toFixed(4)} + est $${thisReqEstUsd.toFixed(4)}) would exceed limit $${options.maxCostPerRunUsd}`,
|
|
48755
|
+
type: "cost_cap_exceeded",
|
|
48756
|
+
code: "cost_cap_exceeded"
|
|
48757
|
+
}
|
|
48758
|
+
})
|
|
48759
|
+
);
|
|
48760
|
+
deduplicator.removeInflight(dedupKey);
|
|
48761
|
+
return;
|
|
48762
|
+
}
|
|
48763
|
+
}
|
|
48764
|
+
if (options.maxCostPerRunUsd && effectiveSessionId && !isFreeModel && (options.maxCostPerRunMode ?? "graceful") === "graceful") {
|
|
48765
|
+
const runCostUsd = sessionStore.getSessionCostUsd(effectiveSessionId);
|
|
48766
|
+
const remainingUsd = options.maxCostPerRunUsd - runCostUsd;
|
|
48767
|
+
const isComplexOrAgentic = hasTools || routingDecision?.tier === "COMPLEX" || routingDecision?.tier === "REASONING";
|
|
48768
|
+
if (isComplexOrAgentic) {
|
|
48769
|
+
const canAffordAnyNonFreeModel = BLOCKRUN_MODELS.some((m) => {
|
|
48770
|
+
if (m.id === FREE_MODEL) return false;
|
|
48771
|
+
const est = estimateAmount(m.id, body.length, maxTokens);
|
|
48772
|
+
return est !== void 0 && Number(est) / 1e6 <= remainingUsd;
|
|
48773
|
+
});
|
|
48774
|
+
if (!canAffordAnyNonFreeModel) {
|
|
48775
|
+
console.log(
|
|
48776
|
+
`[ClawRouter] Budget insufficient for agentic/complex session ${effectiveSessionId.slice(0, 8)}...: $${Math.max(0, remainingUsd).toFixed(4)} remaining \u2014 blocking (silent downgrade would corrupt tool/complex responses)`
|
|
48777
|
+
);
|
|
48778
|
+
res.writeHead(429, {
|
|
48779
|
+
"Content-Type": "application/json",
|
|
48780
|
+
"X-ClawRouter-Cost-Cap-Exceeded": "1",
|
|
48781
|
+
"X-ClawRouter-Budget-Mode": "blocked"
|
|
48782
|
+
});
|
|
48783
|
+
res.end(
|
|
48784
|
+
JSON.stringify({
|
|
48785
|
+
error: {
|
|
48786
|
+
message: `ClawRouter budget exhausted: $${Math.max(0, remainingUsd).toFixed(4)} remaining (limit: $${options.maxCostPerRunUsd}). Increase maxCostPerRun to continue.`,
|
|
48787
|
+
type: "cost_cap_exceeded",
|
|
48788
|
+
code: "budget_exhausted"
|
|
48789
|
+
}
|
|
48790
|
+
})
|
|
48791
|
+
);
|
|
48792
|
+
deduplicator.removeInflight(dedupKey);
|
|
48793
|
+
return;
|
|
48794
|
+
}
|
|
48795
|
+
} else if (!routingDecision && modelId && modelId !== FREE_MODEL) {
|
|
48796
|
+
const est = estimateAmount(modelId, body.length, maxTokens);
|
|
48797
|
+
const canAfford = !est || Number(est) / 1e6 <= remainingUsd;
|
|
48798
|
+
if (!canAfford) {
|
|
48799
|
+
console.log(
|
|
48800
|
+
`[ClawRouter] Budget insufficient for explicit model ${modelId} in session ${effectiveSessionId.slice(0, 8)}...: $${Math.max(0, remainingUsd).toFixed(4)} remaining \u2014 blocking (user explicitly chose ${modelId})`
|
|
48801
|
+
);
|
|
48802
|
+
res.writeHead(429, {
|
|
48803
|
+
"Content-Type": "application/json",
|
|
48804
|
+
"X-ClawRouter-Cost-Cap-Exceeded": "1",
|
|
48805
|
+
"X-ClawRouter-Budget-Mode": "blocked"
|
|
48806
|
+
});
|
|
48807
|
+
res.end(
|
|
48808
|
+
JSON.stringify({
|
|
48809
|
+
error: {
|
|
48810
|
+
message: `ClawRouter budget exhausted: $${Math.max(0, remainingUsd).toFixed(4)} remaining (limit: $${options.maxCostPerRunUsd}). Increase maxCostPerRun to continue using ${modelId}.`,
|
|
48811
|
+
type: "cost_cap_exceeded",
|
|
48812
|
+
code: "budget_exhausted"
|
|
48813
|
+
}
|
|
48814
|
+
})
|
|
48815
|
+
);
|
|
48816
|
+
deduplicator.removeInflight(dedupKey);
|
|
48817
|
+
return;
|
|
48818
|
+
}
|
|
48819
|
+
}
|
|
48820
|
+
}
|
|
48637
48821
|
let heartbeatInterval;
|
|
48638
48822
|
let headersSentEarly = false;
|
|
48639
48823
|
if (isStreaming) {
|
|
@@ -48678,8 +48862,8 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
48678
48862
|
}
|
|
48679
48863
|
});
|
|
48680
48864
|
const timeoutMs = options.requestTimeoutMs ?? DEFAULT_REQUEST_TIMEOUT_MS;
|
|
48681
|
-
const
|
|
48682
|
-
const timeoutId = setTimeout(() =>
|
|
48865
|
+
const globalController = new AbortController();
|
|
48866
|
+
const timeoutId = setTimeout(() => globalController.abort(), timeoutMs);
|
|
48683
48867
|
try {
|
|
48684
48868
|
let modelsToTry;
|
|
48685
48869
|
if (routingDecision) {
|
|
@@ -48736,13 +48920,83 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
48736
48920
|
if (!hasTools && !modelsToTry.includes(FREE_MODEL)) {
|
|
48737
48921
|
modelsToTry.push(FREE_MODEL);
|
|
48738
48922
|
}
|
|
48923
|
+
if (options.maxCostPerRunUsd && effectiveSessionId && !isFreeModel && (options.maxCostPerRunMode ?? "graceful") === "graceful") {
|
|
48924
|
+
const runCostUsd = sessionStore.getSessionCostUsd(effectiveSessionId);
|
|
48925
|
+
const remainingUsd = options.maxCostPerRunUsd - runCostUsd;
|
|
48926
|
+
const beforeFilter = [...modelsToTry];
|
|
48927
|
+
modelsToTry = modelsToTry.filter((m) => {
|
|
48928
|
+
if (m === FREE_MODEL) return true;
|
|
48929
|
+
const est = estimateAmount(m, body.length, maxTokens);
|
|
48930
|
+
if (!est) return true;
|
|
48931
|
+
return Number(est) / 1e6 <= remainingUsd;
|
|
48932
|
+
});
|
|
48933
|
+
const excluded = beforeFilter.filter((m) => !modelsToTry.includes(m));
|
|
48934
|
+
const isComplexOrAgenticFilter = hasTools || routingDecision?.tier === "COMPLEX" || routingDecision?.tier === "REASONING" || routingDecision === void 0;
|
|
48935
|
+
const filteredToFreeOnly = modelsToTry.length > 0 && modelsToTry.every((m) => m === FREE_MODEL);
|
|
48936
|
+
if (isComplexOrAgenticFilter && filteredToFreeOnly) {
|
|
48937
|
+
const budgetSummary = `$${Math.max(0, remainingUsd).toFixed(4)} remaining (limit: $${options.maxCostPerRunUsd})`;
|
|
48938
|
+
console.log(
|
|
48939
|
+
`[ClawRouter] Budget filter left only free model for complex/agentic session \u2014 blocking (${budgetSummary})`
|
|
48940
|
+
);
|
|
48941
|
+
const errPayload = JSON.stringify({
|
|
48942
|
+
error: {
|
|
48943
|
+
message: `ClawRouter budget exhausted: remaining budget (${budgetSummary}) cannot support a complex/tool request. Increase maxCostPerRun to continue.`,
|
|
48944
|
+
type: "cost_cap_exceeded",
|
|
48945
|
+
code: "budget_exhausted"
|
|
48946
|
+
}
|
|
48947
|
+
});
|
|
48948
|
+
if (heartbeatInterval) clearInterval(heartbeatInterval);
|
|
48949
|
+
if (headersSentEarly) {
|
|
48950
|
+
safeWrite(res, `data: ${errPayload}
|
|
48951
|
+
|
|
48952
|
+
data: [DONE]
|
|
48953
|
+
|
|
48954
|
+
`);
|
|
48955
|
+
res.end();
|
|
48956
|
+
} else {
|
|
48957
|
+
res.writeHead(429, {
|
|
48958
|
+
"Content-Type": "application/json",
|
|
48959
|
+
"X-ClawRouter-Cost-Cap-Exceeded": "1",
|
|
48960
|
+
"X-ClawRouter-Budget-Mode": "blocked"
|
|
48961
|
+
});
|
|
48962
|
+
res.end(errPayload);
|
|
48963
|
+
}
|
|
48964
|
+
deduplicator.removeInflight(dedupKey);
|
|
48965
|
+
return;
|
|
48966
|
+
}
|
|
48967
|
+
if (excluded.length > 0) {
|
|
48968
|
+
const budgetSummary = remainingUsd > 0 ? `$${remainingUsd.toFixed(4)} remaining` : `budget exhausted ($${runCostUsd.toFixed(4)}/$${options.maxCostPerRunUsd})`;
|
|
48969
|
+
console.log(
|
|
48970
|
+
`[ClawRouter] Budget downgrade (${budgetSummary}): excluded ${excluded.join(", ")}`
|
|
48971
|
+
);
|
|
48972
|
+
const fromModel = excluded[0];
|
|
48973
|
+
const usingFree = modelsToTry.length === 1 && modelsToTry[0] === FREE_MODEL;
|
|
48974
|
+
if (usingFree) {
|
|
48975
|
+
budgetDowngradeNotice = `> **\u26A0\uFE0F Budget cap reached** ($${runCostUsd.toFixed(4)}/$${options.maxCostPerRunUsd}) \u2014 downgraded to free model. Quality may be reduced. Increase \`maxCostPerRun\` to continue with ${fromModel}.
|
|
48976
|
+
|
|
48977
|
+
`;
|
|
48978
|
+
} else {
|
|
48979
|
+
const toModel = modelsToTry[0] ?? FREE_MODEL;
|
|
48980
|
+
budgetDowngradeNotice = `> **\u26A0\uFE0F Budget low** ($${remainingUsd > 0 ? remainingUsd.toFixed(4) : "0.0000"} remaining) \u2014 using ${toModel} instead of ${fromModel}.
|
|
48981
|
+
|
|
48982
|
+
`;
|
|
48983
|
+
}
|
|
48984
|
+
budgetDowngradeHeaderMode = "downgraded";
|
|
48985
|
+
}
|
|
48986
|
+
}
|
|
48739
48987
|
let upstream;
|
|
48740
48988
|
let lastError;
|
|
48741
48989
|
let actualModelUsed = modelId;
|
|
48742
48990
|
for (let i = 0; i < modelsToTry.length; i++) {
|
|
48743
48991
|
const tryModel = modelsToTry[i];
|
|
48744
48992
|
const isLastAttempt = i === modelsToTry.length - 1;
|
|
48993
|
+
if (globalController.signal.aborted) {
|
|
48994
|
+
throw new Error(`Request timed out after ${timeoutMs}ms`);
|
|
48995
|
+
}
|
|
48745
48996
|
console.log(`[ClawRouter] Trying model ${i + 1}/${modelsToTry.length}: ${tryModel}`);
|
|
48997
|
+
const modelController = new AbortController();
|
|
48998
|
+
const modelTimeoutId = setTimeout(() => modelController.abort(), PER_MODEL_TIMEOUT_MS);
|
|
48999
|
+
const combinedSignal = AbortSignal.any([globalController.signal, modelController.signal]);
|
|
48746
49000
|
const result = await tryModelRequest(
|
|
48747
49001
|
upstreamUrl,
|
|
48748
49002
|
req.method ?? "POST",
|
|
@@ -48752,12 +49006,29 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
48752
49006
|
maxTokens,
|
|
48753
49007
|
payFetch,
|
|
48754
49008
|
balanceMonitor,
|
|
48755
|
-
|
|
49009
|
+
combinedSignal
|
|
48756
49010
|
);
|
|
49011
|
+
clearTimeout(modelTimeoutId);
|
|
49012
|
+
if (globalController.signal.aborted) {
|
|
49013
|
+
throw new Error(`Request timed out after ${timeoutMs}ms`);
|
|
49014
|
+
}
|
|
49015
|
+
if (!result.success && modelController.signal.aborted && !isLastAttempt) {
|
|
49016
|
+
console.log(
|
|
49017
|
+
`[ClawRouter] Model ${tryModel} timed out after ${PER_MODEL_TIMEOUT_MS}ms, trying fallback`
|
|
49018
|
+
);
|
|
49019
|
+
recordProviderError(tryModel, "server_error");
|
|
49020
|
+
continue;
|
|
49021
|
+
}
|
|
48757
49022
|
if (result.success && result.response) {
|
|
48758
49023
|
upstream = result.response;
|
|
48759
49024
|
actualModelUsed = tryModel;
|
|
48760
49025
|
console.log(`[ClawRouter] Success with model: ${tryModel}`);
|
|
49026
|
+
if (options.maxCostPerRunUsd && effectiveSessionId && tryModel !== FREE_MODEL) {
|
|
49027
|
+
const costEst = estimateAmount(tryModel, body.length, maxTokens);
|
|
49028
|
+
if (costEst) {
|
|
49029
|
+
sessionStore.addSessionCost(effectiveSessionId, BigInt(costEst));
|
|
49030
|
+
}
|
|
49031
|
+
}
|
|
48761
49032
|
break;
|
|
48762
49033
|
}
|
|
48763
49034
|
lastError = {
|
|
@@ -48773,7 +49044,52 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
48773
49044
|
);
|
|
48774
49045
|
break;
|
|
48775
49046
|
}
|
|
48776
|
-
|
|
49047
|
+
const errorCat = result.errorCategory;
|
|
49048
|
+
if (errorCat) {
|
|
49049
|
+
recordProviderError(tryModel, errorCat);
|
|
49050
|
+
}
|
|
49051
|
+
if (errorCat === "rate_limited") {
|
|
49052
|
+
if (!isLastAttempt && !globalController.signal.aborted) {
|
|
49053
|
+
console.log(
|
|
49054
|
+
`[ClawRouter] Rate-limited on ${tryModel}, retrying in 200ms before failover`
|
|
49055
|
+
);
|
|
49056
|
+
await new Promise((resolve) => setTimeout(resolve, 200));
|
|
49057
|
+
if (!globalController.signal.aborted) {
|
|
49058
|
+
const retryController = new AbortController();
|
|
49059
|
+
const retryTimeoutId = setTimeout(
|
|
49060
|
+
() => retryController.abort(),
|
|
49061
|
+
PER_MODEL_TIMEOUT_MS
|
|
49062
|
+
);
|
|
49063
|
+
const retrySignal = AbortSignal.any([
|
|
49064
|
+
globalController.signal,
|
|
49065
|
+
retryController.signal
|
|
49066
|
+
]);
|
|
49067
|
+
const retryResult = await tryModelRequest(
|
|
49068
|
+
upstreamUrl,
|
|
49069
|
+
req.method ?? "POST",
|
|
49070
|
+
headers,
|
|
49071
|
+
body,
|
|
49072
|
+
tryModel,
|
|
49073
|
+
maxTokens,
|
|
49074
|
+
payFetch,
|
|
49075
|
+
balanceMonitor,
|
|
49076
|
+
retrySignal
|
|
49077
|
+
);
|
|
49078
|
+
clearTimeout(retryTimeoutId);
|
|
49079
|
+
if (retryResult.success && retryResult.response) {
|
|
49080
|
+
upstream = retryResult.response;
|
|
49081
|
+
actualModelUsed = tryModel;
|
|
49082
|
+
console.log(`[ClawRouter] Rate-limit retry succeeded for: ${tryModel}`);
|
|
49083
|
+
if (options.maxCostPerRunUsd && effectiveSessionId && tryModel !== FREE_MODEL) {
|
|
49084
|
+
const costEst = estimateAmount(tryModel, body.length, maxTokens);
|
|
49085
|
+
if (costEst) {
|
|
49086
|
+
sessionStore.addSessionCost(effectiveSessionId, BigInt(costEst));
|
|
49087
|
+
}
|
|
49088
|
+
}
|
|
49089
|
+
break;
|
|
49090
|
+
}
|
|
49091
|
+
}
|
|
49092
|
+
}
|
|
48777
49093
|
markRateLimited(tryModel);
|
|
48778
49094
|
try {
|
|
48779
49095
|
const parsed = JSON.parse(result.errorBody || "{}");
|
|
@@ -48789,6 +49105,12 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
48789
49105
|
}
|
|
48790
49106
|
} catch {
|
|
48791
49107
|
}
|
|
49108
|
+
} else if (errorCat === "overloaded") {
|
|
49109
|
+
markOverloaded(tryModel);
|
|
49110
|
+
} else if (errorCat === "auth_failure" || errorCat === "quota_exceeded") {
|
|
49111
|
+
console.log(
|
|
49112
|
+
`[ClawRouter] \u{1F511} ${errorCat === "auth_failure" ? "Auth failure" : "Quota exceeded"} for ${tryModel} \u2014 check provider config`
|
|
49113
|
+
);
|
|
48792
49114
|
}
|
|
48793
49115
|
const isPaymentErr = /payment.*verification.*failed|payment.*settlement.*failed|insufficient.*funds|transaction_simulation_failed/i.test(
|
|
48794
49116
|
result.errorBody || ""
|
|
@@ -48948,6 +49270,25 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
48948
49270
|
responseChunks.push(Buffer.from(noticeData));
|
|
48949
49271
|
balanceFallbackNotice = void 0;
|
|
48950
49272
|
}
|
|
49273
|
+
if (budgetDowngradeNotice) {
|
|
49274
|
+
const noticeChunk = {
|
|
49275
|
+
...baseChunk,
|
|
49276
|
+
choices: [
|
|
49277
|
+
{
|
|
49278
|
+
index: index2,
|
|
49279
|
+
delta: { content: budgetDowngradeNotice },
|
|
49280
|
+
logprobs: null,
|
|
49281
|
+
finish_reason: null
|
|
49282
|
+
}
|
|
49283
|
+
]
|
|
49284
|
+
};
|
|
49285
|
+
const noticeData = `data: ${JSON.stringify(noticeChunk)}
|
|
49286
|
+
|
|
49287
|
+
`;
|
|
49288
|
+
safeWrite(res, noticeData);
|
|
49289
|
+
responseChunks.push(Buffer.from(noticeData));
|
|
49290
|
+
budgetDowngradeNotice = void 0;
|
|
49291
|
+
}
|
|
48951
49292
|
if (content) {
|
|
48952
49293
|
const contentChunk = {
|
|
48953
49294
|
...baseChunk,
|
|
@@ -49051,6 +49392,22 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
49051
49392
|
}
|
|
49052
49393
|
balanceFallbackNotice = void 0;
|
|
49053
49394
|
}
|
|
49395
|
+
if (budgetDowngradeNotice && responseBody.length > 0) {
|
|
49396
|
+
try {
|
|
49397
|
+
const parsed = JSON.parse(responseBody.toString());
|
|
49398
|
+
if (parsed.choices?.[0]?.message?.content !== void 0) {
|
|
49399
|
+
parsed.choices[0].message.content = budgetDowngradeNotice + parsed.choices[0].message.content;
|
|
49400
|
+
responseBody = Buffer.from(JSON.stringify(parsed));
|
|
49401
|
+
}
|
|
49402
|
+
} catch {
|
|
49403
|
+
}
|
|
49404
|
+
budgetDowngradeNotice = void 0;
|
|
49405
|
+
}
|
|
49406
|
+
if (budgetDowngradeHeaderMode) {
|
|
49407
|
+
responseHeaders["x-clawrouter-budget-downgrade"] = "1";
|
|
49408
|
+
responseHeaders["x-clawrouter-budget-mode"] = budgetDowngradeHeaderMode;
|
|
49409
|
+
budgetDowngradeHeaderMode = void 0;
|
|
49410
|
+
}
|
|
49054
49411
|
responseHeaders["content-length"] = String(responseBody.length);
|
|
49055
49412
|
res.writeHead(upstream.status, responseHeaders);
|
|
49056
49413
|
safeWrite(res, responseBody);
|