@blockrun/clawrouter 0.8.6 → 0.8.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -931,6 +931,18 @@ function getFallbackChain(tier, tierConfigs) {
931
931
  const config = tierConfigs[tier];
932
932
  return [config.primary, ...config.fallback];
933
933
  }
934
+ function calculateModelCost(model, modelPricing, estimatedInputTokens, maxOutputTokens) {
935
+ const pricing = modelPricing.get(model);
936
+ const inputCost = pricing ? estimatedInputTokens / 1e6 * pricing.inputPrice : 0;
937
+ const outputCost = pricing ? maxOutputTokens / 1e6 * pricing.outputPrice : 0;
938
+ const costEstimate = inputCost + outputCost;
939
+ const opusPricing = modelPricing.get("anthropic/claude-opus-4");
940
+ const baselineInput = opusPricing ? estimatedInputTokens / 1e6 * opusPricing.inputPrice : 0;
941
+ const baselineOutput = opusPricing ? maxOutputTokens / 1e6 * opusPricing.outputPrice : 0;
942
+ const baselineCost = baselineInput + baselineOutput;
943
+ const savings = baselineCost > 0 ? Math.max(0, (baselineCost - costEstimate) / baselineCost) : 0;
944
+ return { costEstimate, baselineCost, savings };
945
+ }
934
946
  function getFallbackChainFiltered(tier, tierConfigs, estimatedTotalTokens, getContextWindow) {
935
947
  const fullChain = getFallbackChain(tier, tierConfigs);
936
948
  const filtered = fullChain.filter((modelId) => {
@@ -1593,8 +1605,9 @@ var DEFAULT_ROUTING_CONFIG = {
1593
1605
  // Agentic tier configs - models that excel at multi-step autonomous tasks
1594
1606
  agenticTiers: {
1595
1607
  SIMPLE: {
1596
- primary: "anthropic/claude-haiku-4.5",
1597
- fallback: ["moonshot/kimi-k2.5", "xai/grok-4-fast-non-reasoning", "openai/gpt-4o-mini"]
1608
+ primary: "moonshot/kimi-k2.5",
1609
+ // Cheaper than Haiku ($0.5/$2.4 vs $1/$5), larger context
1610
+ fallback: ["anthropic/claude-haiku-4.5", "xai/grok-4-fast-non-reasoning", "openai/gpt-4o-mini"]
1598
1611
  },
1599
1612
  MEDIUM: {
1600
1613
  primary: "xai/grok-code-fast-1",
@@ -3176,10 +3189,20 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
3176
3189
  heartbeatInterval = void 0;
3177
3190
  }
3178
3191
  if (routingDecision && actualModelUsed !== routingDecision.model) {
3192
+ const estimatedInputTokens = Math.ceil(body.length / 4);
3193
+ const newCosts = calculateModelCost(
3194
+ actualModelUsed,
3195
+ routerOpts.modelPricing,
3196
+ estimatedInputTokens,
3197
+ maxTokens
3198
+ );
3179
3199
  routingDecision = {
3180
3200
  ...routingDecision,
3181
3201
  model: actualModelUsed,
3182
- reasoning: `${routingDecision.reasoning} | fallback to ${actualModelUsed}`
3202
+ reasoning: `${routingDecision.reasoning} | fallback to ${actualModelUsed}`,
3203
+ costEstimate: newCosts.costEstimate,
3204
+ baselineCost: newCosts.baselineCost,
3205
+ savings: newCosts.savings
3183
3206
  };
3184
3207
  options.onRouted?.(routingDecision);
3185
3208
  }
@@ -3371,13 +3394,22 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
3371
3394
  throw err;
3372
3395
  }
3373
3396
  if (routingDecision) {
3397
+ const estimatedInputTokens = Math.ceil(body.length / 4);
3398
+ const accurateCosts = calculateModelCost(
3399
+ routingDecision.model,
3400
+ routerOpts.modelPricing,
3401
+ estimatedInputTokens,
3402
+ maxTokens
3403
+ );
3404
+ const costWithBuffer = accurateCosts.costEstimate * 1.2;
3405
+ const baselineWithBuffer = accurateCosts.baselineCost * 1.2;
3374
3406
  const entry = {
3375
3407
  timestamp: (/* @__PURE__ */ new Date()).toISOString(),
3376
3408
  model: routingDecision.model,
3377
3409
  tier: routingDecision.tier,
3378
- cost: routingDecision.costEstimate,
3379
- baselineCost: routingDecision.baselineCost,
3380
- savings: routingDecision.savings,
3410
+ cost: costWithBuffer,
3411
+ baselineCost: baselineWithBuffer,
3412
+ savings: accurateCosts.savings,
3381
3413
  latencyMs: Date.now() - startTime
3382
3414
  };
3383
3415
  logUsage(entry).catch(() => {