@blockrun/clawrouter 0.8.7 → 0.8.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +35 -4
- package/dist/cli.js.map +1 -1
- package/dist/index.js +35 -4
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -466,6 +466,18 @@ function getFallbackChain(tier, tierConfigs) {
|
|
|
466
466
|
const config = tierConfigs[tier];
|
|
467
467
|
return [config.primary, ...config.fallback];
|
|
468
468
|
}
|
|
469
|
+
function calculateModelCost(model, modelPricing, estimatedInputTokens, maxOutputTokens) {
|
|
470
|
+
const pricing = modelPricing.get(model);
|
|
471
|
+
const inputCost = pricing ? estimatedInputTokens / 1e6 * pricing.inputPrice : 0;
|
|
472
|
+
const outputCost = pricing ? maxOutputTokens / 1e6 * pricing.outputPrice : 0;
|
|
473
|
+
const costEstimate = inputCost + outputCost;
|
|
474
|
+
const opusPricing = modelPricing.get("anthropic/claude-opus-4");
|
|
475
|
+
const baselineInput = opusPricing ? estimatedInputTokens / 1e6 * opusPricing.inputPrice : 0;
|
|
476
|
+
const baselineOutput = opusPricing ? maxOutputTokens / 1e6 * opusPricing.outputPrice : 0;
|
|
477
|
+
const baselineCost = baselineInput + baselineOutput;
|
|
478
|
+
const savings = baselineCost > 0 ? Math.max(0, (baselineCost - costEstimate) / baselineCost) : 0;
|
|
479
|
+
return { costEstimate, baselineCost, savings };
|
|
480
|
+
}
|
|
469
481
|
function getFallbackChainFiltered(tier, tierConfigs, estimatedTotalTokens, getContextWindow) {
|
|
470
482
|
const fullChain = getFallbackChain(tier, tierConfigs);
|
|
471
483
|
const filtered = fullChain.filter((modelId) => {
|
|
@@ -3045,10 +3057,20 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
3045
3057
|
heartbeatInterval = void 0;
|
|
3046
3058
|
}
|
|
3047
3059
|
if (routingDecision && actualModelUsed !== routingDecision.model) {
|
|
3060
|
+
const estimatedInputTokens = Math.ceil(body.length / 4);
|
|
3061
|
+
const newCosts = calculateModelCost(
|
|
3062
|
+
actualModelUsed,
|
|
3063
|
+
routerOpts.modelPricing,
|
|
3064
|
+
estimatedInputTokens,
|
|
3065
|
+
maxTokens
|
|
3066
|
+
);
|
|
3048
3067
|
routingDecision = {
|
|
3049
3068
|
...routingDecision,
|
|
3050
3069
|
model: actualModelUsed,
|
|
3051
|
-
reasoning: `${routingDecision.reasoning} | fallback to ${actualModelUsed}
|
|
3070
|
+
reasoning: `${routingDecision.reasoning} | fallback to ${actualModelUsed}`,
|
|
3071
|
+
costEstimate: newCosts.costEstimate,
|
|
3072
|
+
baselineCost: newCosts.baselineCost,
|
|
3073
|
+
savings: newCosts.savings
|
|
3052
3074
|
};
|
|
3053
3075
|
options.onRouted?.(routingDecision);
|
|
3054
3076
|
}
|
|
@@ -3240,13 +3262,22 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
3240
3262
|
throw err;
|
|
3241
3263
|
}
|
|
3242
3264
|
if (routingDecision) {
|
|
3265
|
+
const estimatedInputTokens = Math.ceil(body.length / 4);
|
|
3266
|
+
const accurateCosts = calculateModelCost(
|
|
3267
|
+
routingDecision.model,
|
|
3268
|
+
routerOpts.modelPricing,
|
|
3269
|
+
estimatedInputTokens,
|
|
3270
|
+
maxTokens
|
|
3271
|
+
);
|
|
3272
|
+
const costWithBuffer = accurateCosts.costEstimate * 1.2;
|
|
3273
|
+
const baselineWithBuffer = accurateCosts.baselineCost * 1.2;
|
|
3243
3274
|
const entry = {
|
|
3244
3275
|
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
3245
3276
|
model: routingDecision.model,
|
|
3246
3277
|
tier: routingDecision.tier,
|
|
3247
|
-
cost:
|
|
3248
|
-
baselineCost:
|
|
3249
|
-
savings:
|
|
3278
|
+
cost: costWithBuffer,
|
|
3279
|
+
baselineCost: baselineWithBuffer,
|
|
3280
|
+
savings: accurateCosts.savings,
|
|
3250
3281
|
latencyMs: Date.now() - startTime
|
|
3251
3282
|
};
|
|
3252
3283
|
logUsage(entry).catch(() => {
|