@blockrun/clawrouter 0.12.56 → 0.12.60
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +74 -51
- package/dist/cli.js +376 -44
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +24 -0
- package/dist/index.js +397 -44
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/scripts/reinstall.sh +96 -17
- package/scripts/update.sh +93 -17
package/dist/cli.js
CHANGED
|
@@ -40443,6 +40443,13 @@ var MODEL_ALIASES = {
|
|
|
40443
40443
|
"grok-fast": "xai/grok-4-fast-reasoning",
|
|
40444
40444
|
"grok-code": "deepseek/deepseek-chat",
|
|
40445
40445
|
// was grok-code-fast-1, delisted due to poor retention
|
|
40446
|
+
// Delisted model redirects — full model IDs that were previously valid but removed
|
|
40447
|
+
"grok-code-fast-1": "deepseek/deepseek-chat",
|
|
40448
|
+
// bare alias
|
|
40449
|
+
"xai/grok-code-fast-1": "deepseek/deepseek-chat",
|
|
40450
|
+
// delisted 2026-03-12
|
|
40451
|
+
"xai/grok-3-fast": "xai/grok-4-fast-reasoning",
|
|
40452
|
+
// delisted (too expensive)
|
|
40446
40453
|
// NVIDIA
|
|
40447
40454
|
nvidia: "nvidia/gpt-oss-120b",
|
|
40448
40455
|
"gpt-120b": "nvidia/gpt-oss-120b",
|
|
@@ -46147,7 +46154,8 @@ var SessionStore = class {
|
|
|
46147
46154
|
requestCount: 1,
|
|
46148
46155
|
recentHashes: [],
|
|
46149
46156
|
strikes: 0,
|
|
46150
|
-
escalated: false
|
|
46157
|
+
escalated: false,
|
|
46158
|
+
sessionCostMicros: 0n
|
|
46151
46159
|
});
|
|
46152
46160
|
}
|
|
46153
46161
|
}
|
|
@@ -46236,6 +46244,39 @@ var SessionStore = class {
|
|
|
46236
46244
|
entry.escalated = true;
|
|
46237
46245
|
return { model: nextConfig.primary, tier: nextTier };
|
|
46238
46246
|
}
|
|
46247
|
+
/**
|
|
46248
|
+
* Add cost to a session's running total for maxCostPerRun tracking.
|
|
46249
|
+
* Cost is in USDC 6-decimal units (micros).
|
|
46250
|
+
* Creates a cost-tracking-only entry if none exists (e.g., explicit model requests
|
|
46251
|
+
* that never go through the routing path).
|
|
46252
|
+
*/
|
|
46253
|
+
addSessionCost(sessionId, additionalMicros) {
|
|
46254
|
+
let entry = this.sessions.get(sessionId);
|
|
46255
|
+
if (!entry) {
|
|
46256
|
+
const now = Date.now();
|
|
46257
|
+
entry = {
|
|
46258
|
+
model: "",
|
|
46259
|
+
tier: "DIRECT",
|
|
46260
|
+
createdAt: now,
|
|
46261
|
+
lastUsedAt: now,
|
|
46262
|
+
requestCount: 0,
|
|
46263
|
+
recentHashes: [],
|
|
46264
|
+
strikes: 0,
|
|
46265
|
+
escalated: false,
|
|
46266
|
+
sessionCostMicros: 0n
|
|
46267
|
+
};
|
|
46268
|
+
this.sessions.set(sessionId, entry);
|
|
46269
|
+
}
|
|
46270
|
+
entry.sessionCostMicros += additionalMicros;
|
|
46271
|
+
}
|
|
46272
|
+
/**
|
|
46273
|
+
* Get the total accumulated cost for a session in USD.
|
|
46274
|
+
*/
|
|
46275
|
+
getSessionCostUsd(sessionId) {
|
|
46276
|
+
const entry = this.sessions.get(sessionId);
|
|
46277
|
+
if (!entry) return 0;
|
|
46278
|
+
return Number(entry.sessionCostMicros) / 1e6;
|
|
46279
|
+
}
|
|
46239
46280
|
/**
|
|
46240
46281
|
* Stop the cleanup interval.
|
|
46241
46282
|
*/
|
|
@@ -46511,9 +46552,11 @@ var MAX_MESSAGES = 200;
|
|
|
46511
46552
|
var CONTEXT_LIMIT_KB = 5120;
|
|
46512
46553
|
var HEARTBEAT_INTERVAL_MS = 2e3;
|
|
46513
46554
|
var DEFAULT_REQUEST_TIMEOUT_MS = 18e4;
|
|
46555
|
+
var PER_MODEL_TIMEOUT_MS = 6e4;
|
|
46514
46556
|
var MAX_FALLBACK_ATTEMPTS = 5;
|
|
46515
46557
|
var HEALTH_CHECK_TIMEOUT_MS = 2e3;
|
|
46516
46558
|
var RATE_LIMIT_COOLDOWN_MS = 6e4;
|
|
46559
|
+
var OVERLOAD_COOLDOWN_MS = 15e3;
|
|
46517
46560
|
var PORT_RETRY_ATTEMPTS = 5;
|
|
46518
46561
|
var PORT_RETRY_DELAY_MS = 1e3;
|
|
46519
46562
|
var MODEL_BODY_READ_TIMEOUT_MS = 3e5;
|
|
@@ -46663,7 +46706,41 @@ function transformPaymentError(errorBody) {
|
|
|
46663
46706
|
}
|
|
46664
46707
|
return errorBody;
|
|
46665
46708
|
}
|
|
46709
|
+
function categorizeError(status, body) {
|
|
46710
|
+
if (status === 401) return "auth_failure";
|
|
46711
|
+
if (status === 402) return "payment_error";
|
|
46712
|
+
if (status === 403) {
|
|
46713
|
+
if (/plan.*limit|quota.*exceeded|subscription|allowance/i.test(body))
|
|
46714
|
+
return "quota_exceeded";
|
|
46715
|
+
return "auth_failure";
|
|
46716
|
+
}
|
|
46717
|
+
if (status === 429) return "rate_limited";
|
|
46718
|
+
if (status === 529) return "overloaded";
|
|
46719
|
+
if (status === 503 && /overload|capacity|too.*many.*request/i.test(body)) return "overloaded";
|
|
46720
|
+
if (status >= 500) return "server_error";
|
|
46721
|
+
if (status === 400 || status === 413) {
|
|
46722
|
+
if (PROVIDER_ERROR_PATTERNS.some((p) => p.test(body))) return "config_error";
|
|
46723
|
+
return null;
|
|
46724
|
+
}
|
|
46725
|
+
return null;
|
|
46726
|
+
}
|
|
46666
46727
|
var rateLimitedModels = /* @__PURE__ */ new Map();
|
|
46728
|
+
var overloadedModels = /* @__PURE__ */ new Map();
|
|
46729
|
+
var perProviderErrors = /* @__PURE__ */ new Map();
|
|
46730
|
+
function recordProviderError(modelId, category) {
|
|
46731
|
+
if (!perProviderErrors.has(modelId)) {
|
|
46732
|
+
perProviderErrors.set(modelId, {
|
|
46733
|
+
auth_failure: 0,
|
|
46734
|
+
quota_exceeded: 0,
|
|
46735
|
+
rate_limited: 0,
|
|
46736
|
+
overloaded: 0,
|
|
46737
|
+
server_error: 0,
|
|
46738
|
+
payment_error: 0,
|
|
46739
|
+
config_error: 0
|
|
46740
|
+
});
|
|
46741
|
+
}
|
|
46742
|
+
perProviderErrors.get(modelId)[category]++;
|
|
46743
|
+
}
|
|
46667
46744
|
function isRateLimited(modelId) {
|
|
46668
46745
|
const hitTime = rateLimitedModels.get(modelId);
|
|
46669
46746
|
if (!hitTime) return false;
|
|
@@ -46678,17 +46755,30 @@ function markRateLimited(modelId) {
|
|
|
46678
46755
|
rateLimitedModels.set(modelId, Date.now());
|
|
46679
46756
|
console.log(`[ClawRouter] Model ${modelId} rate-limited, will deprioritize for 60s`);
|
|
46680
46757
|
}
|
|
46758
|
+
function markOverloaded(modelId) {
|
|
46759
|
+
overloadedModels.set(modelId, Date.now());
|
|
46760
|
+
console.log(`[ClawRouter] Model ${modelId} overloaded, will deprioritize for 15s`);
|
|
46761
|
+
}
|
|
46762
|
+
function isOverloaded(modelId) {
|
|
46763
|
+
const hitTime = overloadedModels.get(modelId);
|
|
46764
|
+
if (!hitTime) return false;
|
|
46765
|
+
if (Date.now() - hitTime >= OVERLOAD_COOLDOWN_MS) {
|
|
46766
|
+
overloadedModels.delete(modelId);
|
|
46767
|
+
return false;
|
|
46768
|
+
}
|
|
46769
|
+
return true;
|
|
46770
|
+
}
|
|
46681
46771
|
function prioritizeNonRateLimited(models) {
|
|
46682
46772
|
const available = [];
|
|
46683
|
-
const
|
|
46773
|
+
const degraded = [];
|
|
46684
46774
|
for (const model of models) {
|
|
46685
|
-
if (isRateLimited(model)) {
|
|
46686
|
-
|
|
46775
|
+
if (isRateLimited(model) || isOverloaded(model)) {
|
|
46776
|
+
degraded.push(model);
|
|
46687
46777
|
} else {
|
|
46688
46778
|
available.push(model);
|
|
46689
46779
|
}
|
|
46690
46780
|
}
|
|
46691
|
-
return [...available, ...
|
|
46781
|
+
return [...available, ...degraded];
|
|
46692
46782
|
}
|
|
46693
46783
|
function canWrite(res) {
|
|
46694
46784
|
return !res.writableEnded && !res.destroyed && res.socket !== null && !res.socket.destroyed && res.socket.writable;
|
|
@@ -46823,37 +46913,6 @@ function detectDegradedSuccessResponse(body) {
|
|
|
46823
46913
|
}
|
|
46824
46914
|
return void 0;
|
|
46825
46915
|
}
|
|
46826
|
-
var FALLBACK_STATUS_CODES = [
|
|
46827
|
-
400,
|
|
46828
|
-
// Bad request - sometimes used for billing errors
|
|
46829
|
-
401,
|
|
46830
|
-
// Unauthorized - provider API key issues
|
|
46831
|
-
402,
|
|
46832
|
-
// Payment required - but from upstream, not x402
|
|
46833
|
-
403,
|
|
46834
|
-
// Forbidden - provider restrictions
|
|
46835
|
-
413,
|
|
46836
|
-
// Payload too large - request exceeds model's context limit
|
|
46837
|
-
429,
|
|
46838
|
-
// Rate limited
|
|
46839
|
-
500,
|
|
46840
|
-
// Internal server error
|
|
46841
|
-
502,
|
|
46842
|
-
// Bad gateway
|
|
46843
|
-
503,
|
|
46844
|
-
// Service unavailable
|
|
46845
|
-
504
|
|
46846
|
-
// Gateway timeout
|
|
46847
|
-
];
|
|
46848
|
-
function isProviderError(status, body) {
|
|
46849
|
-
if (!FALLBACK_STATUS_CODES.includes(status)) {
|
|
46850
|
-
return false;
|
|
46851
|
-
}
|
|
46852
|
-
if (status >= 500) {
|
|
46853
|
-
return true;
|
|
46854
|
-
}
|
|
46855
|
-
return PROVIDER_ERROR_PATTERNS.some((pattern) => pattern.test(body));
|
|
46856
|
-
}
|
|
46857
46916
|
var VALID_ROLES = /* @__PURE__ */ new Set(["system", "user", "assistant", "tool", "function"]);
|
|
46858
46917
|
var ROLE_MAPPINGS = {
|
|
46859
46918
|
developer: "system",
|
|
@@ -47359,7 +47418,16 @@ async function startProxy(options) {
|
|
|
47359
47418
|
"Content-Type": "application/json",
|
|
47360
47419
|
"Cache-Control": "no-cache"
|
|
47361
47420
|
});
|
|
47362
|
-
res.end(
|
|
47421
|
+
res.end(
|
|
47422
|
+
JSON.stringify(
|
|
47423
|
+
{
|
|
47424
|
+
...stats,
|
|
47425
|
+
providerErrors: Object.fromEntries(perProviderErrors)
|
|
47426
|
+
},
|
|
47427
|
+
null,
|
|
47428
|
+
2
|
|
47429
|
+
)
|
|
47430
|
+
);
|
|
47363
47431
|
} catch (err) {
|
|
47364
47432
|
res.writeHead(500, { "Content-Type": "application/json" });
|
|
47365
47433
|
res.end(
|
|
@@ -47802,12 +47870,13 @@ async function tryModelRequest(upstreamUrl, method, headers, body, modelId, maxT
|
|
|
47802
47870
|
if (response.status !== 200) {
|
|
47803
47871
|
const errorBodyChunks = await readBodyWithTimeout(response.body, ERROR_BODY_READ_TIMEOUT_MS);
|
|
47804
47872
|
const errorBody = Buffer.concat(errorBodyChunks).toString();
|
|
47805
|
-
const
|
|
47873
|
+
const category = categorizeError(response.status, errorBody);
|
|
47806
47874
|
return {
|
|
47807
47875
|
success: false,
|
|
47808
47876
|
errorBody,
|
|
47809
47877
|
errorStatus: response.status,
|
|
47810
|
-
isProviderError:
|
|
47878
|
+
isProviderError: category !== null,
|
|
47879
|
+
errorCategory: category ?? void 0
|
|
47811
47880
|
};
|
|
47812
47881
|
}
|
|
47813
47882
|
const contentType = response.headers.get("content-type") || "";
|
|
@@ -47860,6 +47929,8 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
47860
47929
|
let maxTokens = 4096;
|
|
47861
47930
|
let routingProfile = null;
|
|
47862
47931
|
let balanceFallbackNotice;
|
|
47932
|
+
let budgetDowngradeNotice;
|
|
47933
|
+
let budgetDowngradeHeaderMode;
|
|
47863
47934
|
let accumulatedContent = "";
|
|
47864
47935
|
let responseInputTokens;
|
|
47865
47936
|
const isChatCompletion = req.url?.includes("/chat/completions");
|
|
@@ -47874,6 +47945,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
47874
47945
|
let bodyModified = false;
|
|
47875
47946
|
const parsedMessages = Array.isArray(parsed.messages) ? parsed.messages : [];
|
|
47876
47947
|
const lastUserMsg = [...parsedMessages].reverse().find((m) => m.role === "user");
|
|
47948
|
+
hasTools = Array.isArray(parsed.tools) && parsed.tools.length > 0;
|
|
47877
47949
|
const rawLastContent = lastUserMsg?.content;
|
|
47878
47950
|
const lastContent = typeof rawLastContent === "string" ? rawLastContent : Array.isArray(rawLastContent) ? rawLastContent.filter((b) => b.type === "text").map((b) => b.text ?? "").join(" ") : "";
|
|
47879
47951
|
if (sessionId && parsedMessages.length > 0) {
|
|
@@ -48530,6 +48602,9 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
48530
48602
|
options.onRouted?.(routingDecision);
|
|
48531
48603
|
}
|
|
48532
48604
|
}
|
|
48605
|
+
if (!effectiveSessionId && parsedMessages.length > 0) {
|
|
48606
|
+
effectiveSessionId = deriveSessionId(parsedMessages);
|
|
48607
|
+
}
|
|
48533
48608
|
if (bodyModified) {
|
|
48534
48609
|
body = Buffer.from(JSON.stringify(parsed));
|
|
48535
48610
|
}
|
|
@@ -48620,7 +48695,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
48620
48695
|
}
|
|
48621
48696
|
deduplicator.markInflight(dedupKey);
|
|
48622
48697
|
let estimatedCostMicros;
|
|
48623
|
-
|
|
48698
|
+
let isFreeModel = modelId === FREE_MODEL;
|
|
48624
48699
|
if (modelId && !options.skipBalanceCheck && !isFreeModel) {
|
|
48625
48700
|
const estimated = estimateAmount(modelId, body.length, maxTokens);
|
|
48626
48701
|
if (estimated) {
|
|
@@ -48633,6 +48708,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
48633
48708
|
`[ClawRouter] Wallet ${sufficiency.info.isEmpty ? "empty" : "insufficient"} (${sufficiency.info.balanceUSD}), falling back to free model: ${FREE_MODEL} (requested: ${originalModel})`
|
|
48634
48709
|
);
|
|
48635
48710
|
modelId = FREE_MODEL;
|
|
48711
|
+
isFreeModel = true;
|
|
48636
48712
|
const parsed = JSON.parse(body.toString());
|
|
48637
48713
|
parsed.model = FREE_MODEL;
|
|
48638
48714
|
body = Buffer.from(JSON.stringify(parsed));
|
|
@@ -48659,6 +48735,89 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
48659
48735
|
}
|
|
48660
48736
|
}
|
|
48661
48737
|
}
|
|
48738
|
+
if (options.maxCostPerRunUsd && effectiveSessionId && !isFreeModel && (options.maxCostPerRunMode ?? "graceful") === "strict") {
|
|
48739
|
+
const runCostUsd = sessionStore.getSessionCostUsd(effectiveSessionId);
|
|
48740
|
+
const thisReqEstStr = estimatedCostMicros !== void 0 ? estimatedCostMicros.toString() : modelId ? estimateAmount(modelId, body.length, maxTokens) : void 0;
|
|
48741
|
+
const thisReqEstUsd = thisReqEstStr ? Number(thisReqEstStr) / 1e6 : 0;
|
|
48742
|
+
const projectedCostUsd = runCostUsd + thisReqEstUsd;
|
|
48743
|
+
if (projectedCostUsd > options.maxCostPerRunUsd) {
|
|
48744
|
+
console.log(
|
|
48745
|
+
`[ClawRouter] Cost cap exceeded for session ${effectiveSessionId.slice(0, 8)}...: projected $${projectedCostUsd.toFixed(4)} (spent $${runCostUsd.toFixed(4)} + est $${thisReqEstUsd.toFixed(4)}) > $${options.maxCostPerRunUsd} limit`
|
|
48746
|
+
);
|
|
48747
|
+
res.writeHead(429, {
|
|
48748
|
+
"Content-Type": "application/json",
|
|
48749
|
+
"X-ClawRouter-Cost-Cap-Exceeded": "1"
|
|
48750
|
+
});
|
|
48751
|
+
res.end(
|
|
48752
|
+
JSON.stringify({
|
|
48753
|
+
error: {
|
|
48754
|
+
message: `ClawRouter cost cap exceeded: projected spend $${projectedCostUsd.toFixed(4)} (spent $${runCostUsd.toFixed(4)} + est $${thisReqEstUsd.toFixed(4)}) would exceed limit $${options.maxCostPerRunUsd}`,
|
|
48755
|
+
type: "cost_cap_exceeded",
|
|
48756
|
+
code: "cost_cap_exceeded"
|
|
48757
|
+
}
|
|
48758
|
+
})
|
|
48759
|
+
);
|
|
48760
|
+
deduplicator.removeInflight(dedupKey);
|
|
48761
|
+
return;
|
|
48762
|
+
}
|
|
48763
|
+
}
|
|
48764
|
+
if (options.maxCostPerRunUsd && effectiveSessionId && !isFreeModel && (options.maxCostPerRunMode ?? "graceful") === "graceful") {
|
|
48765
|
+
const runCostUsd = sessionStore.getSessionCostUsd(effectiveSessionId);
|
|
48766
|
+
const remainingUsd = options.maxCostPerRunUsd - runCostUsd;
|
|
48767
|
+
const isComplexOrAgentic = hasTools || routingDecision?.tier === "COMPLEX" || routingDecision?.tier === "REASONING";
|
|
48768
|
+
if (isComplexOrAgentic) {
|
|
48769
|
+
const canAffordAnyNonFreeModel = BLOCKRUN_MODELS.some((m) => {
|
|
48770
|
+
if (m.id === FREE_MODEL) return false;
|
|
48771
|
+
const est = estimateAmount(m.id, body.length, maxTokens);
|
|
48772
|
+
return est !== void 0 && Number(est) / 1e6 <= remainingUsd;
|
|
48773
|
+
});
|
|
48774
|
+
if (!canAffordAnyNonFreeModel) {
|
|
48775
|
+
console.log(
|
|
48776
|
+
`[ClawRouter] Budget insufficient for agentic/complex session ${effectiveSessionId.slice(0, 8)}...: $${Math.max(0, remainingUsd).toFixed(4)} remaining \u2014 blocking (silent downgrade would corrupt tool/complex responses)`
|
|
48777
|
+
);
|
|
48778
|
+
res.writeHead(429, {
|
|
48779
|
+
"Content-Type": "application/json",
|
|
48780
|
+
"X-ClawRouter-Cost-Cap-Exceeded": "1",
|
|
48781
|
+
"X-ClawRouter-Budget-Mode": "blocked"
|
|
48782
|
+
});
|
|
48783
|
+
res.end(
|
|
48784
|
+
JSON.stringify({
|
|
48785
|
+
error: {
|
|
48786
|
+
message: `ClawRouter budget exhausted: $${Math.max(0, remainingUsd).toFixed(4)} remaining (limit: $${options.maxCostPerRunUsd}). Increase maxCostPerRun to continue.`,
|
|
48787
|
+
type: "cost_cap_exceeded",
|
|
48788
|
+
code: "budget_exhausted"
|
|
48789
|
+
}
|
|
48790
|
+
})
|
|
48791
|
+
);
|
|
48792
|
+
deduplicator.removeInflight(dedupKey);
|
|
48793
|
+
return;
|
|
48794
|
+
}
|
|
48795
|
+
} else if (!routingDecision && modelId && modelId !== FREE_MODEL) {
|
|
48796
|
+
const est = estimateAmount(modelId, body.length, maxTokens);
|
|
48797
|
+
const canAfford = !est || Number(est) / 1e6 <= remainingUsd;
|
|
48798
|
+
if (!canAfford) {
|
|
48799
|
+
console.log(
|
|
48800
|
+
`[ClawRouter] Budget insufficient for explicit model ${modelId} in session ${effectiveSessionId.slice(0, 8)}...: $${Math.max(0, remainingUsd).toFixed(4)} remaining \u2014 blocking (user explicitly chose ${modelId})`
|
|
48801
|
+
);
|
|
48802
|
+
res.writeHead(429, {
|
|
48803
|
+
"Content-Type": "application/json",
|
|
48804
|
+
"X-ClawRouter-Cost-Cap-Exceeded": "1",
|
|
48805
|
+
"X-ClawRouter-Budget-Mode": "blocked"
|
|
48806
|
+
});
|
|
48807
|
+
res.end(
|
|
48808
|
+
JSON.stringify({
|
|
48809
|
+
error: {
|
|
48810
|
+
message: `ClawRouter budget exhausted: $${Math.max(0, remainingUsd).toFixed(4)} remaining (limit: $${options.maxCostPerRunUsd}). Increase maxCostPerRun to continue using ${modelId}.`,
|
|
48811
|
+
type: "cost_cap_exceeded",
|
|
48812
|
+
code: "budget_exhausted"
|
|
48813
|
+
}
|
|
48814
|
+
})
|
|
48815
|
+
);
|
|
48816
|
+
deduplicator.removeInflight(dedupKey);
|
|
48817
|
+
return;
|
|
48818
|
+
}
|
|
48819
|
+
}
|
|
48820
|
+
}
|
|
48662
48821
|
let heartbeatInterval;
|
|
48663
48822
|
let headersSentEarly = false;
|
|
48664
48823
|
if (isStreaming) {
|
|
@@ -48703,8 +48862,8 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
48703
48862
|
}
|
|
48704
48863
|
});
|
|
48705
48864
|
const timeoutMs = options.requestTimeoutMs ?? DEFAULT_REQUEST_TIMEOUT_MS;
|
|
48706
|
-
const
|
|
48707
|
-
const timeoutId = setTimeout(() =>
|
|
48865
|
+
const globalController = new AbortController();
|
|
48866
|
+
const timeoutId = setTimeout(() => globalController.abort(), timeoutMs);
|
|
48708
48867
|
try {
|
|
48709
48868
|
let modelsToTry;
|
|
48710
48869
|
if (routingDecision) {
|
|
@@ -48761,13 +48920,83 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
48761
48920
|
if (!hasTools && !modelsToTry.includes(FREE_MODEL)) {
|
|
48762
48921
|
modelsToTry.push(FREE_MODEL);
|
|
48763
48922
|
}
|
|
48923
|
+
if (options.maxCostPerRunUsd && effectiveSessionId && !isFreeModel && (options.maxCostPerRunMode ?? "graceful") === "graceful") {
|
|
48924
|
+
const runCostUsd = sessionStore.getSessionCostUsd(effectiveSessionId);
|
|
48925
|
+
const remainingUsd = options.maxCostPerRunUsd - runCostUsd;
|
|
48926
|
+
const beforeFilter = [...modelsToTry];
|
|
48927
|
+
modelsToTry = modelsToTry.filter((m) => {
|
|
48928
|
+
if (m === FREE_MODEL) return true;
|
|
48929
|
+
const est = estimateAmount(m, body.length, maxTokens);
|
|
48930
|
+
if (!est) return true;
|
|
48931
|
+
return Number(est) / 1e6 <= remainingUsd;
|
|
48932
|
+
});
|
|
48933
|
+
const excluded = beforeFilter.filter((m) => !modelsToTry.includes(m));
|
|
48934
|
+
const isComplexOrAgenticFilter = hasTools || routingDecision?.tier === "COMPLEX" || routingDecision?.tier === "REASONING" || routingDecision === void 0;
|
|
48935
|
+
const filteredToFreeOnly = modelsToTry.length > 0 && modelsToTry.every((m) => m === FREE_MODEL);
|
|
48936
|
+
if (isComplexOrAgenticFilter && filteredToFreeOnly) {
|
|
48937
|
+
const budgetSummary = `$${Math.max(0, remainingUsd).toFixed(4)} remaining (limit: $${options.maxCostPerRunUsd})`;
|
|
48938
|
+
console.log(
|
|
48939
|
+
`[ClawRouter] Budget filter left only free model for complex/agentic session \u2014 blocking (${budgetSummary})`
|
|
48940
|
+
);
|
|
48941
|
+
const errPayload = JSON.stringify({
|
|
48942
|
+
error: {
|
|
48943
|
+
message: `ClawRouter budget exhausted: remaining budget (${budgetSummary}) cannot support a complex/tool request. Increase maxCostPerRun to continue.`,
|
|
48944
|
+
type: "cost_cap_exceeded",
|
|
48945
|
+
code: "budget_exhausted"
|
|
48946
|
+
}
|
|
48947
|
+
});
|
|
48948
|
+
if (heartbeatInterval) clearInterval(heartbeatInterval);
|
|
48949
|
+
if (headersSentEarly) {
|
|
48950
|
+
safeWrite(res, `data: ${errPayload}
|
|
48951
|
+
|
|
48952
|
+
data: [DONE]
|
|
48953
|
+
|
|
48954
|
+
`);
|
|
48955
|
+
res.end();
|
|
48956
|
+
} else {
|
|
48957
|
+
res.writeHead(429, {
|
|
48958
|
+
"Content-Type": "application/json",
|
|
48959
|
+
"X-ClawRouter-Cost-Cap-Exceeded": "1",
|
|
48960
|
+
"X-ClawRouter-Budget-Mode": "blocked"
|
|
48961
|
+
});
|
|
48962
|
+
res.end(errPayload);
|
|
48963
|
+
}
|
|
48964
|
+
deduplicator.removeInflight(dedupKey);
|
|
48965
|
+
return;
|
|
48966
|
+
}
|
|
48967
|
+
if (excluded.length > 0) {
|
|
48968
|
+
const budgetSummary = remainingUsd > 0 ? `$${remainingUsd.toFixed(4)} remaining` : `budget exhausted ($${runCostUsd.toFixed(4)}/$${options.maxCostPerRunUsd})`;
|
|
48969
|
+
console.log(
|
|
48970
|
+
`[ClawRouter] Budget downgrade (${budgetSummary}): excluded ${excluded.join(", ")}`
|
|
48971
|
+
);
|
|
48972
|
+
const fromModel = excluded[0];
|
|
48973
|
+
const usingFree = modelsToTry.length === 1 && modelsToTry[0] === FREE_MODEL;
|
|
48974
|
+
if (usingFree) {
|
|
48975
|
+
budgetDowngradeNotice = `> **\u26A0\uFE0F Budget cap reached** ($${runCostUsd.toFixed(4)}/$${options.maxCostPerRunUsd}) \u2014 downgraded to free model. Quality may be reduced. Increase \`maxCostPerRun\` to continue with ${fromModel}.
|
|
48976
|
+
|
|
48977
|
+
`;
|
|
48978
|
+
} else {
|
|
48979
|
+
const toModel = modelsToTry[0] ?? FREE_MODEL;
|
|
48980
|
+
budgetDowngradeNotice = `> **\u26A0\uFE0F Budget low** ($${remainingUsd > 0 ? remainingUsd.toFixed(4) : "0.0000"} remaining) \u2014 using ${toModel} instead of ${fromModel}.
|
|
48981
|
+
|
|
48982
|
+
`;
|
|
48983
|
+
}
|
|
48984
|
+
budgetDowngradeHeaderMode = "downgraded";
|
|
48985
|
+
}
|
|
48986
|
+
}
|
|
48764
48987
|
let upstream;
|
|
48765
48988
|
let lastError;
|
|
48766
48989
|
let actualModelUsed = modelId;
|
|
48767
48990
|
for (let i = 0; i < modelsToTry.length; i++) {
|
|
48768
48991
|
const tryModel = modelsToTry[i];
|
|
48769
48992
|
const isLastAttempt = i === modelsToTry.length - 1;
|
|
48993
|
+
if (globalController.signal.aborted) {
|
|
48994
|
+
throw new Error(`Request timed out after ${timeoutMs}ms`);
|
|
48995
|
+
}
|
|
48770
48996
|
console.log(`[ClawRouter] Trying model ${i + 1}/${modelsToTry.length}: ${tryModel}`);
|
|
48997
|
+
const modelController = new AbortController();
|
|
48998
|
+
const modelTimeoutId = setTimeout(() => modelController.abort(), PER_MODEL_TIMEOUT_MS);
|
|
48999
|
+
const combinedSignal = AbortSignal.any([globalController.signal, modelController.signal]);
|
|
48771
49000
|
const result = await tryModelRequest(
|
|
48772
49001
|
upstreamUrl,
|
|
48773
49002
|
req.method ?? "POST",
|
|
@@ -48777,12 +49006,29 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
48777
49006
|
maxTokens,
|
|
48778
49007
|
payFetch,
|
|
48779
49008
|
balanceMonitor,
|
|
48780
|
-
|
|
49009
|
+
combinedSignal
|
|
48781
49010
|
);
|
|
49011
|
+
clearTimeout(modelTimeoutId);
|
|
49012
|
+
if (globalController.signal.aborted) {
|
|
49013
|
+
throw new Error(`Request timed out after ${timeoutMs}ms`);
|
|
49014
|
+
}
|
|
49015
|
+
if (!result.success && modelController.signal.aborted && !isLastAttempt) {
|
|
49016
|
+
console.log(
|
|
49017
|
+
`[ClawRouter] Model ${tryModel} timed out after ${PER_MODEL_TIMEOUT_MS}ms, trying fallback`
|
|
49018
|
+
);
|
|
49019
|
+
recordProviderError(tryModel, "server_error");
|
|
49020
|
+
continue;
|
|
49021
|
+
}
|
|
48782
49022
|
if (result.success && result.response) {
|
|
48783
49023
|
upstream = result.response;
|
|
48784
49024
|
actualModelUsed = tryModel;
|
|
48785
49025
|
console.log(`[ClawRouter] Success with model: ${tryModel}`);
|
|
49026
|
+
if (options.maxCostPerRunUsd && effectiveSessionId && tryModel !== FREE_MODEL) {
|
|
49027
|
+
const costEst = estimateAmount(tryModel, body.length, maxTokens);
|
|
49028
|
+
if (costEst) {
|
|
49029
|
+
sessionStore.addSessionCost(effectiveSessionId, BigInt(costEst));
|
|
49030
|
+
}
|
|
49031
|
+
}
|
|
48786
49032
|
break;
|
|
48787
49033
|
}
|
|
48788
49034
|
lastError = {
|
|
@@ -48798,7 +49044,52 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
48798
49044
|
);
|
|
48799
49045
|
break;
|
|
48800
49046
|
}
|
|
48801
|
-
|
|
49047
|
+
const errorCat = result.errorCategory;
|
|
49048
|
+
if (errorCat) {
|
|
49049
|
+
recordProviderError(tryModel, errorCat);
|
|
49050
|
+
}
|
|
49051
|
+
if (errorCat === "rate_limited") {
|
|
49052
|
+
if (!isLastAttempt && !globalController.signal.aborted) {
|
|
49053
|
+
console.log(
|
|
49054
|
+
`[ClawRouter] Rate-limited on ${tryModel}, retrying in 200ms before failover`
|
|
49055
|
+
);
|
|
49056
|
+
await new Promise((resolve) => setTimeout(resolve, 200));
|
|
49057
|
+
if (!globalController.signal.aborted) {
|
|
49058
|
+
const retryController = new AbortController();
|
|
49059
|
+
const retryTimeoutId = setTimeout(
|
|
49060
|
+
() => retryController.abort(),
|
|
49061
|
+
PER_MODEL_TIMEOUT_MS
|
|
49062
|
+
);
|
|
49063
|
+
const retrySignal = AbortSignal.any([
|
|
49064
|
+
globalController.signal,
|
|
49065
|
+
retryController.signal
|
|
49066
|
+
]);
|
|
49067
|
+
const retryResult = await tryModelRequest(
|
|
49068
|
+
upstreamUrl,
|
|
49069
|
+
req.method ?? "POST",
|
|
49070
|
+
headers,
|
|
49071
|
+
body,
|
|
49072
|
+
tryModel,
|
|
49073
|
+
maxTokens,
|
|
49074
|
+
payFetch,
|
|
49075
|
+
balanceMonitor,
|
|
49076
|
+
retrySignal
|
|
49077
|
+
);
|
|
49078
|
+
clearTimeout(retryTimeoutId);
|
|
49079
|
+
if (retryResult.success && retryResult.response) {
|
|
49080
|
+
upstream = retryResult.response;
|
|
49081
|
+
actualModelUsed = tryModel;
|
|
49082
|
+
console.log(`[ClawRouter] Rate-limit retry succeeded for: ${tryModel}`);
|
|
49083
|
+
if (options.maxCostPerRunUsd && effectiveSessionId && tryModel !== FREE_MODEL) {
|
|
49084
|
+
const costEst = estimateAmount(tryModel, body.length, maxTokens);
|
|
49085
|
+
if (costEst) {
|
|
49086
|
+
sessionStore.addSessionCost(effectiveSessionId, BigInt(costEst));
|
|
49087
|
+
}
|
|
49088
|
+
}
|
|
49089
|
+
break;
|
|
49090
|
+
}
|
|
49091
|
+
}
|
|
49092
|
+
}
|
|
48802
49093
|
markRateLimited(tryModel);
|
|
48803
49094
|
try {
|
|
48804
49095
|
const parsed = JSON.parse(result.errorBody || "{}");
|
|
@@ -48814,6 +49105,12 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
48814
49105
|
}
|
|
48815
49106
|
} catch {
|
|
48816
49107
|
}
|
|
49108
|
+
} else if (errorCat === "overloaded") {
|
|
49109
|
+
markOverloaded(tryModel);
|
|
49110
|
+
} else if (errorCat === "auth_failure" || errorCat === "quota_exceeded") {
|
|
49111
|
+
console.log(
|
|
49112
|
+
`[ClawRouter] \u{1F511} ${errorCat === "auth_failure" ? "Auth failure" : "Quota exceeded"} for ${tryModel} \u2014 check provider config`
|
|
49113
|
+
);
|
|
48817
49114
|
}
|
|
48818
49115
|
const isPaymentErr = /payment.*verification.*failed|payment.*settlement.*failed|insufficient.*funds|transaction_simulation_failed/i.test(
|
|
48819
49116
|
result.errorBody || ""
|
|
@@ -48973,6 +49270,25 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
48973
49270
|
responseChunks.push(Buffer.from(noticeData));
|
|
48974
49271
|
balanceFallbackNotice = void 0;
|
|
48975
49272
|
}
|
|
49273
|
+
if (budgetDowngradeNotice) {
|
|
49274
|
+
const noticeChunk = {
|
|
49275
|
+
...baseChunk,
|
|
49276
|
+
choices: [
|
|
49277
|
+
{
|
|
49278
|
+
index: index2,
|
|
49279
|
+
delta: { content: budgetDowngradeNotice },
|
|
49280
|
+
logprobs: null,
|
|
49281
|
+
finish_reason: null
|
|
49282
|
+
}
|
|
49283
|
+
]
|
|
49284
|
+
};
|
|
49285
|
+
const noticeData = `data: ${JSON.stringify(noticeChunk)}
|
|
49286
|
+
|
|
49287
|
+
`;
|
|
49288
|
+
safeWrite(res, noticeData);
|
|
49289
|
+
responseChunks.push(Buffer.from(noticeData));
|
|
49290
|
+
budgetDowngradeNotice = void 0;
|
|
49291
|
+
}
|
|
48976
49292
|
if (content) {
|
|
48977
49293
|
const contentChunk = {
|
|
48978
49294
|
...baseChunk,
|
|
@@ -49076,6 +49392,22 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
49076
49392
|
}
|
|
49077
49393
|
balanceFallbackNotice = void 0;
|
|
49078
49394
|
}
|
|
49395
|
+
if (budgetDowngradeNotice && responseBody.length > 0) {
|
|
49396
|
+
try {
|
|
49397
|
+
const parsed = JSON.parse(responseBody.toString());
|
|
49398
|
+
if (parsed.choices?.[0]?.message?.content !== void 0) {
|
|
49399
|
+
parsed.choices[0].message.content = budgetDowngradeNotice + parsed.choices[0].message.content;
|
|
49400
|
+
responseBody = Buffer.from(JSON.stringify(parsed));
|
|
49401
|
+
}
|
|
49402
|
+
} catch {
|
|
49403
|
+
}
|
|
49404
|
+
budgetDowngradeNotice = void 0;
|
|
49405
|
+
}
|
|
49406
|
+
if (budgetDowngradeHeaderMode) {
|
|
49407
|
+
responseHeaders["x-clawrouter-budget-downgrade"] = "1";
|
|
49408
|
+
responseHeaders["x-clawrouter-budget-mode"] = budgetDowngradeHeaderMode;
|
|
49409
|
+
budgetDowngradeHeaderMode = void 0;
|
|
49410
|
+
}
|
|
49079
49411
|
responseHeaders["content-length"] = String(responseBody.length);
|
|
49080
49412
|
res.writeHead(upstream.status, responseHeaders);
|
|
49081
49413
|
safeWrite(res, responseBody);
|