@blockrun/clawrouter 0.8.6 → 0.8.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +38 -6
- package/dist/cli.js.map +1 -1
- package/dist/index.js +38 -6
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -466,6 +466,18 @@ function getFallbackChain(tier, tierConfigs) {
|
|
|
466
466
|
const config = tierConfigs[tier];
|
|
467
467
|
return [config.primary, ...config.fallback];
|
|
468
468
|
}
|
|
469
|
+
function calculateModelCost(model, modelPricing, estimatedInputTokens, maxOutputTokens) {
|
|
470
|
+
const pricing = modelPricing.get(model);
|
|
471
|
+
const inputCost = pricing ? estimatedInputTokens / 1e6 * pricing.inputPrice : 0;
|
|
472
|
+
const outputCost = pricing ? maxOutputTokens / 1e6 * pricing.outputPrice : 0;
|
|
473
|
+
const costEstimate = inputCost + outputCost;
|
|
474
|
+
const opusPricing = modelPricing.get("anthropic/claude-opus-4");
|
|
475
|
+
const baselineInput = opusPricing ? estimatedInputTokens / 1e6 * opusPricing.inputPrice : 0;
|
|
476
|
+
const baselineOutput = opusPricing ? maxOutputTokens / 1e6 * opusPricing.outputPrice : 0;
|
|
477
|
+
const baselineCost = baselineInput + baselineOutput;
|
|
478
|
+
const savings = baselineCost > 0 ? Math.max(0, (baselineCost - costEstimate) / baselineCost) : 0;
|
|
479
|
+
return { costEstimate, baselineCost, savings };
|
|
480
|
+
}
|
|
469
481
|
function getFallbackChainFiltered(tier, tierConfigs, estimatedTotalTokens, getContextWindow) {
|
|
470
482
|
const fullChain = getFallbackChain(tier, tierConfigs);
|
|
471
483
|
const filtered = fullChain.filter((modelId) => {
|
|
@@ -1128,8 +1140,9 @@ var DEFAULT_ROUTING_CONFIG = {
|
|
|
1128
1140
|
// Agentic tier configs - models that excel at multi-step autonomous tasks
|
|
1129
1141
|
agenticTiers: {
|
|
1130
1142
|
SIMPLE: {
|
|
1131
|
-
primary: "
|
|
1132
|
-
|
|
1143
|
+
primary: "moonshot/kimi-k2.5",
|
|
1144
|
+
// Cheaper than Haiku ($0.5/$2.4 vs $1/$5), larger context
|
|
1145
|
+
fallback: ["anthropic/claude-haiku-4.5", "xai/grok-4-fast-non-reasoning", "openai/gpt-4o-mini"]
|
|
1133
1146
|
},
|
|
1134
1147
|
MEDIUM: {
|
|
1135
1148
|
primary: "xai/grok-code-fast-1",
|
|
@@ -3044,10 +3057,20 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
3044
3057
|
heartbeatInterval = void 0;
|
|
3045
3058
|
}
|
|
3046
3059
|
if (routingDecision && actualModelUsed !== routingDecision.model) {
|
|
3060
|
+
const estimatedInputTokens = Math.ceil(body.length / 4);
|
|
3061
|
+
const newCosts = calculateModelCost(
|
|
3062
|
+
actualModelUsed,
|
|
3063
|
+
routerOpts.modelPricing,
|
|
3064
|
+
estimatedInputTokens,
|
|
3065
|
+
maxTokens
|
|
3066
|
+
);
|
|
3047
3067
|
routingDecision = {
|
|
3048
3068
|
...routingDecision,
|
|
3049
3069
|
model: actualModelUsed,
|
|
3050
|
-
reasoning: `${routingDecision.reasoning} | fallback to ${actualModelUsed}
|
|
3070
|
+
reasoning: `${routingDecision.reasoning} | fallback to ${actualModelUsed}`,
|
|
3071
|
+
costEstimate: newCosts.costEstimate,
|
|
3072
|
+
baselineCost: newCosts.baselineCost,
|
|
3073
|
+
savings: newCosts.savings
|
|
3051
3074
|
};
|
|
3052
3075
|
options.onRouted?.(routingDecision);
|
|
3053
3076
|
}
|
|
@@ -3239,13 +3262,22 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
3239
3262
|
throw err;
|
|
3240
3263
|
}
|
|
3241
3264
|
if (routingDecision) {
|
|
3265
|
+
const estimatedInputTokens = Math.ceil(body.length / 4);
|
|
3266
|
+
const accurateCosts = calculateModelCost(
|
|
3267
|
+
routingDecision.model,
|
|
3268
|
+
routerOpts.modelPricing,
|
|
3269
|
+
estimatedInputTokens,
|
|
3270
|
+
maxTokens
|
|
3271
|
+
);
|
|
3272
|
+
const costWithBuffer = accurateCosts.costEstimate * 1.2;
|
|
3273
|
+
const baselineWithBuffer = accurateCosts.baselineCost * 1.2;
|
|
3242
3274
|
const entry = {
|
|
3243
3275
|
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
3244
3276
|
model: routingDecision.model,
|
|
3245
3277
|
tier: routingDecision.tier,
|
|
3246
|
-
cost:
|
|
3247
|
-
baselineCost:
|
|
3248
|
-
savings:
|
|
3278
|
+
cost: costWithBuffer,
|
|
3279
|
+
baselineCost: baselineWithBuffer,
|
|
3280
|
+
savings: accurateCosts.savings,
|
|
3249
3281
|
latencyMs: Date.now() - startTime
|
|
3250
3282
|
};
|
|
3251
3283
|
logUsage(entry).catch(() => {
|