@blockrun/clawrouter 0.8.6 → 0.8.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -466,6 +466,18 @@ function getFallbackChain(tier, tierConfigs) {
466
466
  const config = tierConfigs[tier];
467
467
  return [config.primary, ...config.fallback];
468
468
  }
469
+ function calculateModelCost(model, modelPricing, estimatedInputTokens, maxOutputTokens) {
470
+ const pricing = modelPricing.get(model);
471
+ const inputCost = pricing ? estimatedInputTokens / 1e6 * pricing.inputPrice : 0;
472
+ const outputCost = pricing ? maxOutputTokens / 1e6 * pricing.outputPrice : 0;
473
+ const costEstimate = inputCost + outputCost;
474
+ const opusPricing = modelPricing.get("anthropic/claude-opus-4");
475
+ const baselineInput = opusPricing ? estimatedInputTokens / 1e6 * opusPricing.inputPrice : 0;
476
+ const baselineOutput = opusPricing ? maxOutputTokens / 1e6 * opusPricing.outputPrice : 0;
477
+ const baselineCost = baselineInput + baselineOutput;
478
+ const savings = baselineCost > 0 ? Math.max(0, (baselineCost - costEstimate) / baselineCost) : 0;
479
+ return { costEstimate, baselineCost, savings };
480
+ }
469
481
  function getFallbackChainFiltered(tier, tierConfigs, estimatedTotalTokens, getContextWindow) {
470
482
  const fullChain = getFallbackChain(tier, tierConfigs);
471
483
  const filtered = fullChain.filter((modelId) => {
@@ -1128,8 +1140,9 @@ var DEFAULT_ROUTING_CONFIG = {
1128
1140
  // Agentic tier configs - models that excel at multi-step autonomous tasks
1129
1141
  agenticTiers: {
1130
1142
  SIMPLE: {
1131
- primary: "anthropic/claude-haiku-4.5",
1132
- fallback: ["moonshot/kimi-k2.5", "xai/grok-4-fast-non-reasoning", "openai/gpt-4o-mini"]
1143
+ primary: "moonshot/kimi-k2.5",
1144
+ // Cheaper than Haiku ($0.5/$2.4 vs $1/$5), larger context
1145
+ fallback: ["anthropic/claude-haiku-4.5", "xai/grok-4-fast-non-reasoning", "openai/gpt-4o-mini"]
1133
1146
  },
1134
1147
  MEDIUM: {
1135
1148
  primary: "xai/grok-code-fast-1",
@@ -3044,10 +3057,20 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
3044
3057
  heartbeatInterval = void 0;
3045
3058
  }
3046
3059
  if (routingDecision && actualModelUsed !== routingDecision.model) {
3060
+ const estimatedInputTokens = Math.ceil(body.length / 4);
3061
+ const newCosts = calculateModelCost(
3062
+ actualModelUsed,
3063
+ routerOpts.modelPricing,
3064
+ estimatedInputTokens,
3065
+ maxTokens
3066
+ );
3047
3067
  routingDecision = {
3048
3068
  ...routingDecision,
3049
3069
  model: actualModelUsed,
3050
- reasoning: `${routingDecision.reasoning} | fallback to ${actualModelUsed}`
3070
+ reasoning: `${routingDecision.reasoning} | fallback to ${actualModelUsed}`,
3071
+ costEstimate: newCosts.costEstimate,
3072
+ baselineCost: newCosts.baselineCost,
3073
+ savings: newCosts.savings
3051
3074
  };
3052
3075
  options.onRouted?.(routingDecision);
3053
3076
  }
@@ -3239,13 +3262,22 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
3239
3262
  throw err;
3240
3263
  }
3241
3264
  if (routingDecision) {
3265
+ const estimatedInputTokens = Math.ceil(body.length / 4);
3266
+ const accurateCosts = calculateModelCost(
3267
+ routingDecision.model,
3268
+ routerOpts.modelPricing,
3269
+ estimatedInputTokens,
3270
+ maxTokens
3271
+ );
3272
+ const costWithBuffer = accurateCosts.costEstimate * 1.2;
3273
+ const baselineWithBuffer = accurateCosts.baselineCost * 1.2;
3242
3274
  const entry = {
3243
3275
  timestamp: (/* @__PURE__ */ new Date()).toISOString(),
3244
3276
  model: routingDecision.model,
3245
3277
  tier: routingDecision.tier,
3246
- cost: routingDecision.costEstimate,
3247
- baselineCost: routingDecision.baselineCost,
3248
- savings: routingDecision.savings,
3278
+ cost: costWithBuffer,
3279
+ baselineCost: baselineWithBuffer,
3280
+ savings: accurateCosts.savings,
3249
3281
  latencyMs: Date.now() - startTime
3250
3282
  };
3251
3283
  logUsage(entry).catch(() => {