@blockrun/clawrouter 0.8.6 → 0.8.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +38 -6
- package/dist/cli.js.map +1 -1
- package/dist/index.js +38 -6
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -931,6 +931,18 @@ function getFallbackChain(tier, tierConfigs) {
|
|
|
931
931
|
const config = tierConfigs[tier];
|
|
932
932
|
return [config.primary, ...config.fallback];
|
|
933
933
|
}
|
|
934
|
+
function calculateModelCost(model, modelPricing, estimatedInputTokens, maxOutputTokens) {
|
|
935
|
+
const pricing = modelPricing.get(model);
|
|
936
|
+
const inputCost = pricing ? estimatedInputTokens / 1e6 * pricing.inputPrice : 0;
|
|
937
|
+
const outputCost = pricing ? maxOutputTokens / 1e6 * pricing.outputPrice : 0;
|
|
938
|
+
const costEstimate = inputCost + outputCost;
|
|
939
|
+
const opusPricing = modelPricing.get("anthropic/claude-opus-4");
|
|
940
|
+
const baselineInput = opusPricing ? estimatedInputTokens / 1e6 * opusPricing.inputPrice : 0;
|
|
941
|
+
const baselineOutput = opusPricing ? maxOutputTokens / 1e6 * opusPricing.outputPrice : 0;
|
|
942
|
+
const baselineCost = baselineInput + baselineOutput;
|
|
943
|
+
const savings = baselineCost > 0 ? Math.max(0, (baselineCost - costEstimate) / baselineCost) : 0;
|
|
944
|
+
return { costEstimate, baselineCost, savings };
|
|
945
|
+
}
|
|
934
946
|
function getFallbackChainFiltered(tier, tierConfigs, estimatedTotalTokens, getContextWindow) {
|
|
935
947
|
const fullChain = getFallbackChain(tier, tierConfigs);
|
|
936
948
|
const filtered = fullChain.filter((modelId) => {
|
|
@@ -1593,8 +1605,9 @@ var DEFAULT_ROUTING_CONFIG = {
|
|
|
1593
1605
|
// Agentic tier configs - models that excel at multi-step autonomous tasks
|
|
1594
1606
|
agenticTiers: {
|
|
1595
1607
|
SIMPLE: {
|
|
1596
|
-
primary: "
|
|
1597
|
-
|
|
1608
|
+
primary: "moonshot/kimi-k2.5",
|
|
1609
|
+
// Cheaper than Haiku ($0.5/$2.4 vs $1/$5), larger context
|
|
1610
|
+
fallback: ["anthropic/claude-haiku-4.5", "xai/grok-4-fast-non-reasoning", "openai/gpt-4o-mini"]
|
|
1598
1611
|
},
|
|
1599
1612
|
MEDIUM: {
|
|
1600
1613
|
primary: "xai/grok-code-fast-1",
|
|
@@ -3176,10 +3189,20 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
3176
3189
|
heartbeatInterval = void 0;
|
|
3177
3190
|
}
|
|
3178
3191
|
if (routingDecision && actualModelUsed !== routingDecision.model) {
|
|
3192
|
+
const estimatedInputTokens = Math.ceil(body.length / 4);
|
|
3193
|
+
const newCosts = calculateModelCost(
|
|
3194
|
+
actualModelUsed,
|
|
3195
|
+
routerOpts.modelPricing,
|
|
3196
|
+
estimatedInputTokens,
|
|
3197
|
+
maxTokens
|
|
3198
|
+
);
|
|
3179
3199
|
routingDecision = {
|
|
3180
3200
|
...routingDecision,
|
|
3181
3201
|
model: actualModelUsed,
|
|
3182
|
-
reasoning: `${routingDecision.reasoning} | fallback to ${actualModelUsed}
|
|
3202
|
+
reasoning: `${routingDecision.reasoning} | fallback to ${actualModelUsed}`,
|
|
3203
|
+
costEstimate: newCosts.costEstimate,
|
|
3204
|
+
baselineCost: newCosts.baselineCost,
|
|
3205
|
+
savings: newCosts.savings
|
|
3183
3206
|
};
|
|
3184
3207
|
options.onRouted?.(routingDecision);
|
|
3185
3208
|
}
|
|
@@ -3371,13 +3394,22 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
3371
3394
|
throw err;
|
|
3372
3395
|
}
|
|
3373
3396
|
if (routingDecision) {
|
|
3397
|
+
const estimatedInputTokens = Math.ceil(body.length / 4);
|
|
3398
|
+
const accurateCosts = calculateModelCost(
|
|
3399
|
+
routingDecision.model,
|
|
3400
|
+
routerOpts.modelPricing,
|
|
3401
|
+
estimatedInputTokens,
|
|
3402
|
+
maxTokens
|
|
3403
|
+
);
|
|
3404
|
+
const costWithBuffer = accurateCosts.costEstimate * 1.2;
|
|
3405
|
+
const baselineWithBuffer = accurateCosts.baselineCost * 1.2;
|
|
3374
3406
|
const entry = {
|
|
3375
3407
|
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
3376
3408
|
model: routingDecision.model,
|
|
3377
3409
|
tier: routingDecision.tier,
|
|
3378
|
-
cost:
|
|
3379
|
-
baselineCost:
|
|
3380
|
-
savings:
|
|
3410
|
+
cost: costWithBuffer,
|
|
3411
|
+
baselineCost: baselineWithBuffer,
|
|
3412
|
+
savings: accurateCosts.savings,
|
|
3381
3413
|
latencyMs: Date.now() - startTime
|
|
3382
3414
|
};
|
|
3383
3415
|
logUsage(entry).catch(() => {
|