@blockrun/clawrouter 0.8.20 → 0.8.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -8
- package/dist/cli.js +194 -72
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +6 -1
- package/dist/index.js +197 -74
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -203,6 +203,10 @@ type RoutingConfig = {
|
|
|
203
203
|
tiers: Record<Tier, TierConfig>;
|
|
204
204
|
/** Tier configs for agentic mode - models that excel at multi-step tasks */
|
|
205
205
|
agenticTiers?: Record<Tier, TierConfig>;
|
|
206
|
+
/** Tier configs for eco profile - ultra cost-optimized (blockrun/eco) */
|
|
207
|
+
ecoTiers?: Record<Tier, TierConfig>;
|
|
208
|
+
/** Tier configs for premium profile - best quality (blockrun/premium) */
|
|
209
|
+
premiumTiers?: Record<Tier, TierConfig>;
|
|
206
210
|
overrides: OverridesConfig;
|
|
207
211
|
};
|
|
208
212
|
|
|
@@ -225,7 +229,7 @@ declare function getFallbackChain(tier: Tier, tierConfigs: Record<Tier, TierConf
|
|
|
225
229
|
* Calculate cost for a specific model (used when fallback model is used).
|
|
226
230
|
* Returns updated cost fields for RoutingDecision.
|
|
227
231
|
*/
|
|
228
|
-
declare function calculateModelCost(model: string, modelPricing: Map<string, ModelPricing>, estimatedInputTokens: number, maxOutputTokens: number): {
|
|
232
|
+
declare function calculateModelCost(model: string, modelPricing: Map<string, ModelPricing>, estimatedInputTokens: number, maxOutputTokens: number, routingProfile?: "free" | "eco" | "auto" | "premium"): {
|
|
229
233
|
costEstimate: number;
|
|
230
234
|
baselineCost: number;
|
|
231
235
|
savings: number;
|
|
@@ -264,6 +268,7 @@ declare const DEFAULT_ROUTING_CONFIG: RoutingConfig;
|
|
|
264
268
|
type RouterOptions = {
|
|
265
269
|
config: RoutingConfig;
|
|
266
270
|
modelPricing: Map<string, ModelPricing>;
|
|
271
|
+
routingProfile?: "free" | "eco" | "auto" | "premium";
|
|
267
272
|
};
|
|
268
273
|
/**
|
|
269
274
|
* Route a request to the cheapest capable model.
|
package/dist/index.js
CHANGED
|
@@ -41,16 +41,40 @@ function resolveModelAlias(model) {
|
|
|
41
41
|
return model;
|
|
42
42
|
}
|
|
43
43
|
var BLOCKRUN_MODELS = [
|
|
44
|
-
// Smart routing meta-
|
|
44
|
+
// Smart routing meta-models — proxy replaces with actual model
|
|
45
45
|
// NOTE: Model IDs are WITHOUT provider prefix (OpenClaw adds "blockrun/" automatically)
|
|
46
46
|
{
|
|
47
47
|
id: "auto",
|
|
48
|
-
name: "
|
|
48
|
+
name: "Auto (Smart Router - Balanced)",
|
|
49
49
|
inputPrice: 0,
|
|
50
50
|
outputPrice: 0,
|
|
51
51
|
contextWindow: 105e4,
|
|
52
52
|
maxOutput: 128e3
|
|
53
53
|
},
|
|
54
|
+
{
|
|
55
|
+
id: "free",
|
|
56
|
+
name: "Free (NVIDIA GPT-OSS-120B only)",
|
|
57
|
+
inputPrice: 0,
|
|
58
|
+
outputPrice: 0,
|
|
59
|
+
contextWindow: 128e3,
|
|
60
|
+
maxOutput: 4096
|
|
61
|
+
},
|
|
62
|
+
{
|
|
63
|
+
id: "eco",
|
|
64
|
+
name: "Eco (Smart Router - Cost Optimized)",
|
|
65
|
+
inputPrice: 0,
|
|
66
|
+
outputPrice: 0,
|
|
67
|
+
contextWindow: 105e4,
|
|
68
|
+
maxOutput: 128e3
|
|
69
|
+
},
|
|
70
|
+
{
|
|
71
|
+
id: "premium",
|
|
72
|
+
name: "Premium (Smart Router - Best Quality)",
|
|
73
|
+
inputPrice: 0,
|
|
74
|
+
outputPrice: 0,
|
|
75
|
+
contextWindow: 2e6,
|
|
76
|
+
maxOutput: 2e5
|
|
77
|
+
},
|
|
54
78
|
// OpenAI GPT-5 Family
|
|
55
79
|
{
|
|
56
80
|
id: "openai/gpt-5.2",
|
|
@@ -352,8 +376,8 @@ var BLOCKRUN_MODELS = [
|
|
|
352
376
|
{
|
|
353
377
|
id: "xai/grok-4-0709",
|
|
354
378
|
name: "Grok 4 (0709)",
|
|
355
|
-
inputPrice:
|
|
356
|
-
outputPrice:
|
|
379
|
+
inputPrice: 0.2,
|
|
380
|
+
outputPrice: 1.5,
|
|
357
381
|
contextWindow: 131072,
|
|
358
382
|
maxOutput: 16384,
|
|
359
383
|
reasoning: true
|
|
@@ -909,7 +933,7 @@ function calibrateConfidence(distance, steepness) {
|
|
|
909
933
|
}
|
|
910
934
|
|
|
911
935
|
// src/router/selector.ts
|
|
912
|
-
function selectModel(tier, confidence, method, reasoning, tierConfigs, modelPricing, estimatedInputTokens, maxOutputTokens) {
|
|
936
|
+
function selectModel(tier, confidence, method, reasoning, tierConfigs, modelPricing, estimatedInputTokens, maxOutputTokens, routingProfile) {
|
|
913
937
|
const tierConfig = tierConfigs[tier];
|
|
914
938
|
const model = tierConfig.primary;
|
|
915
939
|
const pricing = modelPricing.get(model);
|
|
@@ -918,13 +942,13 @@ function selectModel(tier, confidence, method, reasoning, tierConfigs, modelPric
|
|
|
918
942
|
const inputCost = estimatedInputTokens / 1e6 * inputPrice;
|
|
919
943
|
const outputCost = maxOutputTokens / 1e6 * outputPrice;
|
|
920
944
|
const costEstimate = inputCost + outputCost;
|
|
921
|
-
const opusPricing = modelPricing.get("anthropic/claude-opus-4");
|
|
945
|
+
const opusPricing = modelPricing.get("anthropic/claude-opus-4.5");
|
|
922
946
|
const opusInputPrice = opusPricing?.inputPrice ?? 0;
|
|
923
947
|
const opusOutputPrice = opusPricing?.outputPrice ?? 0;
|
|
924
948
|
const baselineInput = estimatedInputTokens / 1e6 * opusInputPrice;
|
|
925
949
|
const baselineOutput = maxOutputTokens / 1e6 * opusOutputPrice;
|
|
926
950
|
const baselineCost = baselineInput + baselineOutput;
|
|
927
|
-
const savings = baselineCost > 0 ? Math.max(0, (baselineCost - costEstimate) / baselineCost) : 0;
|
|
951
|
+
const savings = routingProfile === "premium" ? 0 : baselineCost > 0 ? Math.max(0, (baselineCost - costEstimate) / baselineCost) : 0;
|
|
928
952
|
return {
|
|
929
953
|
model,
|
|
930
954
|
tier,
|
|
@@ -940,20 +964,20 @@ function getFallbackChain(tier, tierConfigs) {
|
|
|
940
964
|
const config = tierConfigs[tier];
|
|
941
965
|
return [config.primary, ...config.fallback];
|
|
942
966
|
}
|
|
943
|
-
function calculateModelCost(model, modelPricing, estimatedInputTokens, maxOutputTokens) {
|
|
967
|
+
function calculateModelCost(model, modelPricing, estimatedInputTokens, maxOutputTokens, routingProfile) {
|
|
944
968
|
const pricing = modelPricing.get(model);
|
|
945
969
|
const inputPrice = pricing?.inputPrice ?? 0;
|
|
946
970
|
const outputPrice = pricing?.outputPrice ?? 0;
|
|
947
971
|
const inputCost = estimatedInputTokens / 1e6 * inputPrice;
|
|
948
972
|
const outputCost = maxOutputTokens / 1e6 * outputPrice;
|
|
949
973
|
const costEstimate = inputCost + outputCost;
|
|
950
|
-
const opusPricing = modelPricing.get("anthropic/claude-opus-4");
|
|
974
|
+
const opusPricing = modelPricing.get("anthropic/claude-opus-4.5");
|
|
951
975
|
const opusInputPrice = opusPricing?.inputPrice ?? 0;
|
|
952
976
|
const opusOutputPrice = opusPricing?.outputPrice ?? 0;
|
|
953
977
|
const baselineInput = estimatedInputTokens / 1e6 * opusInputPrice;
|
|
954
978
|
const baselineOutput = maxOutputTokens / 1e6 * opusOutputPrice;
|
|
955
979
|
const baselineCost = baselineInput + baselineOutput;
|
|
956
|
-
const savings = baselineCost > 0 ? Math.max(0, (baselineCost - costEstimate) / baselineCost) : 0;
|
|
980
|
+
const savings = routingProfile === "premium" ? 0 : baselineCost > 0 ? Math.max(0, (baselineCost - costEstimate) / baselineCost) : 0;
|
|
957
981
|
return { costEstimate, baselineCost, savings };
|
|
958
982
|
}
|
|
959
983
|
function getFallbackChainFiltered(tier, tierConfigs, estimatedTotalTokens, getContextWindow) {
|
|
@@ -1582,15 +1606,17 @@ var DEFAULT_ROUTING_CONFIG = {
|
|
|
1582
1606
|
// Tier boundaries on weighted score axis
|
|
1583
1607
|
tierBoundaries: {
|
|
1584
1608
|
simpleMedium: 0,
|
|
1585
|
-
mediumComplex: 0.
|
|
1586
|
-
|
|
1587
|
-
|
|
1609
|
+
mediumComplex: 0.3,
|
|
1610
|
+
// Raised from 0.18 - prevent simple tasks from reaching expensive COMPLEX tier
|
|
1611
|
+
complexReasoning: 0.5
|
|
1612
|
+
// Raised from 0.4 - reserve for true reasoning tasks
|
|
1588
1613
|
},
|
|
1589
1614
|
// Sigmoid steepness for confidence calibration
|
|
1590
1615
|
confidenceSteepness: 12,
|
|
1591
1616
|
// Below this confidence → ambiguous (null tier)
|
|
1592
1617
|
confidenceThreshold: 0.7
|
|
1593
1618
|
},
|
|
1619
|
+
// Auto (balanced) tier configs - current default smart routing
|
|
1594
1620
|
tiers: {
|
|
1595
1621
|
SIMPLE: {
|
|
1596
1622
|
primary: "nvidia/kimi-k2.5",
|
|
@@ -1599,7 +1625,9 @@ var DEFAULT_ROUTING_CONFIG = {
|
|
|
1599
1625
|
"google/gemini-2.5-flash",
|
|
1600
1626
|
"nvidia/gpt-oss-120b",
|
|
1601
1627
|
"nvidia/gpt-oss-20b",
|
|
1602
|
-
"deepseek/deepseek-chat"
|
|
1628
|
+
"deepseek/deepseek-chat",
|
|
1629
|
+
"xai/grok-code-fast-1"
|
|
1630
|
+
// Added for better quality fallback
|
|
1603
1631
|
]
|
|
1604
1632
|
},
|
|
1605
1633
|
MEDIUM: {
|
|
@@ -1614,7 +1642,8 @@ var DEFAULT_ROUTING_CONFIG = {
|
|
|
1614
1642
|
},
|
|
1615
1643
|
COMPLEX: {
|
|
1616
1644
|
primary: "google/gemini-2.5-pro",
|
|
1617
|
-
fallback: ["
|
|
1645
|
+
fallback: ["xai/grok-4-0709", "openai/gpt-4o", "openai/gpt-5.2", "anthropic/claude-sonnet-4"]
|
|
1646
|
+
// Grok first for cost efficiency, Sonnet as last resort
|
|
1618
1647
|
},
|
|
1619
1648
|
REASONING: {
|
|
1620
1649
|
primary: "xai/grok-4-1-fast-reasoning",
|
|
@@ -1628,6 +1657,52 @@ var DEFAULT_ROUTING_CONFIG = {
|
|
|
1628
1657
|
]
|
|
1629
1658
|
}
|
|
1630
1659
|
},
|
|
1660
|
+
// Eco tier configs - ultra cost-optimized (blockrun/eco)
|
|
1661
|
+
ecoTiers: {
|
|
1662
|
+
SIMPLE: {
|
|
1663
|
+
primary: "nvidia/kimi-k2.5",
|
|
1664
|
+
// $0.001/$0.001
|
|
1665
|
+
fallback: ["deepseek/deepseek-chat", "nvidia/gpt-oss-120b", "nvidia/gpt-oss-20b"]
|
|
1666
|
+
},
|
|
1667
|
+
MEDIUM: {
|
|
1668
|
+
primary: "deepseek/deepseek-chat",
|
|
1669
|
+
// $0.14/$0.28
|
|
1670
|
+
fallback: ["xai/grok-code-fast-1", "google/gemini-2.5-flash", "nvidia/kimi-k2.5"]
|
|
1671
|
+
},
|
|
1672
|
+
COMPLEX: {
|
|
1673
|
+
primary: "xai/grok-4-0709",
|
|
1674
|
+
// $0.20/$1.50
|
|
1675
|
+
fallback: ["deepseek/deepseek-chat", "google/gemini-2.5-flash", "openai/gpt-4o-mini"]
|
|
1676
|
+
},
|
|
1677
|
+
REASONING: {
|
|
1678
|
+
primary: "deepseek/deepseek-reasoner",
|
|
1679
|
+
// $0.55/$2.19
|
|
1680
|
+
fallback: ["xai/grok-4-fast-reasoning", "moonshot/kimi-k2.5"]
|
|
1681
|
+
}
|
|
1682
|
+
},
|
|
1683
|
+
// Premium tier configs - best quality (blockrun/premium)
|
|
1684
|
+
premiumTiers: {
|
|
1685
|
+
SIMPLE: {
|
|
1686
|
+
primary: "google/gemini-2.5-flash",
|
|
1687
|
+
// $0.075/$0.30
|
|
1688
|
+
fallback: ["openai/gpt-4o-mini", "anthropic/claude-haiku-4.5", "moonshot/kimi-k2.5"]
|
|
1689
|
+
},
|
|
1690
|
+
MEDIUM: {
|
|
1691
|
+
primary: "openai/gpt-4o",
|
|
1692
|
+
// $2.50/$10
|
|
1693
|
+
fallback: ["google/gemini-2.5-pro", "anthropic/claude-sonnet-4", "xai/grok-4-0709"]
|
|
1694
|
+
},
|
|
1695
|
+
COMPLEX: {
|
|
1696
|
+
primary: "anthropic/claude-opus-4.5",
|
|
1697
|
+
// $15/$75
|
|
1698
|
+
fallback: ["openai/gpt-5.2", "anthropic/claude-sonnet-4", "google/gemini-2.5-pro"]
|
|
1699
|
+
},
|
|
1700
|
+
REASONING: {
|
|
1701
|
+
primary: "openai/o3",
|
|
1702
|
+
// $10/$40
|
|
1703
|
+
fallback: ["anthropic/claude-opus-4.5", "openai/o1", "google/gemini-2.5-pro"]
|
|
1704
|
+
}
|
|
1705
|
+
},
|
|
1631
1706
|
// Agentic tier configs - models that excel at multi-step autonomous tasks
|
|
1632
1707
|
agenticTiers: {
|
|
1633
1708
|
SIMPLE: {
|
|
@@ -1669,21 +1744,34 @@ function route(prompt, systemPrompt, maxOutputTokens, options) {
|
|
|
1669
1744
|
const fullText = `${systemPrompt ?? ""} ${prompt}`;
|
|
1670
1745
|
const estimatedTokens = Math.ceil(fullText.length / 4);
|
|
1671
1746
|
const ruleResult = classifyByRules(prompt, systemPrompt, estimatedTokens, config.scoring);
|
|
1672
|
-
const
|
|
1673
|
-
|
|
1674
|
-
|
|
1675
|
-
|
|
1676
|
-
|
|
1747
|
+
const { routingProfile } = options;
|
|
1748
|
+
let tierConfigs;
|
|
1749
|
+
let profileSuffix = "";
|
|
1750
|
+
if (routingProfile === "eco" && config.ecoTiers) {
|
|
1751
|
+
tierConfigs = config.ecoTiers;
|
|
1752
|
+
profileSuffix = " | eco";
|
|
1753
|
+
} else if (routingProfile === "premium" && config.premiumTiers) {
|
|
1754
|
+
tierConfigs = config.premiumTiers;
|
|
1755
|
+
profileSuffix = " | premium";
|
|
1756
|
+
} else {
|
|
1757
|
+
const agenticScore = ruleResult.agenticScore ?? 0;
|
|
1758
|
+
const isAutoAgentic = agenticScore >= 0.5;
|
|
1759
|
+
const isExplicitAgentic = config.overrides.agenticMode ?? false;
|
|
1760
|
+
const useAgenticTiers = (isAutoAgentic || isExplicitAgentic) && config.agenticTiers != null;
|
|
1761
|
+
tierConfigs = useAgenticTiers ? config.agenticTiers : config.tiers;
|
|
1762
|
+
profileSuffix = useAgenticTiers ? " | agentic" : "";
|
|
1763
|
+
}
|
|
1677
1764
|
if (estimatedTokens > config.overrides.maxTokensForceComplex) {
|
|
1678
1765
|
return selectModel(
|
|
1679
1766
|
"COMPLEX",
|
|
1680
1767
|
0.95,
|
|
1681
1768
|
"rules",
|
|
1682
|
-
`Input exceeds ${config.overrides.maxTokensForceComplex} tokens${
|
|
1769
|
+
`Input exceeds ${config.overrides.maxTokensForceComplex} tokens${profileSuffix}`,
|
|
1683
1770
|
tierConfigs,
|
|
1684
1771
|
modelPricing,
|
|
1685
1772
|
estimatedTokens,
|
|
1686
|
-
maxOutputTokens
|
|
1773
|
+
maxOutputTokens,
|
|
1774
|
+
routingProfile
|
|
1687
1775
|
);
|
|
1688
1776
|
}
|
|
1689
1777
|
const hasStructuredOutput = systemPrompt ? /json|structured|schema/i.test(systemPrompt) : false;
|
|
@@ -1707,11 +1795,7 @@ function route(prompt, systemPrompt, maxOutputTokens, options) {
|
|
|
1707
1795
|
tier = minTier;
|
|
1708
1796
|
}
|
|
1709
1797
|
}
|
|
1710
|
-
|
|
1711
|
-
reasoning += " | auto-agentic";
|
|
1712
|
-
} else if (isExplicitAgentic) {
|
|
1713
|
-
reasoning += " | agentic";
|
|
1714
|
-
}
|
|
1798
|
+
reasoning += profileSuffix;
|
|
1715
1799
|
return selectModel(
|
|
1716
1800
|
tier,
|
|
1717
1801
|
confidence,
|
|
@@ -1720,7 +1804,8 @@ function route(prompt, systemPrompt, maxOutputTokens, options) {
|
|
|
1720
1804
|
tierConfigs,
|
|
1721
1805
|
modelPricing,
|
|
1722
1806
|
estimatedTokens,
|
|
1723
|
-
maxOutputTokens
|
|
1807
|
+
maxOutputTokens,
|
|
1808
|
+
routingProfile
|
|
1724
1809
|
);
|
|
1725
1810
|
}
|
|
1726
1811
|
|
|
@@ -1878,12 +1963,13 @@ async function getStats(days = 7) {
|
|
|
1878
1963
|
function formatStatsAscii(stats) {
|
|
1879
1964
|
const lines = [];
|
|
1880
1965
|
lines.push("\u2554\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2557");
|
|
1881
|
-
lines.push("\u2551
|
|
1966
|
+
lines.push("\u2551 ClawRouter by BlockRun v0.8.20 \u2551");
|
|
1967
|
+
lines.push("\u2551 Usage Statistics \u2551");
|
|
1882
1968
|
lines.push("\u2560\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2563");
|
|
1883
1969
|
lines.push(`\u2551 Period: ${stats.period.padEnd(49)}\u2551`);
|
|
1884
1970
|
lines.push(`\u2551 Total Requests: ${stats.totalRequests.toString().padEnd(41)}\u2551`);
|
|
1885
1971
|
lines.push(`\u2551 Total Cost: $${stats.totalCost.toFixed(4).padEnd(43)}\u2551`);
|
|
1886
|
-
lines.push(`\u2551 Baseline Cost (Opus): $${stats.totalBaselineCost.toFixed(4).padEnd(
|
|
1972
|
+
lines.push(`\u2551 Baseline Cost (Opus 4.5): $${stats.totalBaselineCost.toFixed(4).padEnd(30)}\u2551`);
|
|
1887
1973
|
const savingsLine = `\u2551 \u{1F4B0} Total Saved: $${stats.totalSavings.toFixed(4)} (${stats.savingsPercentage.toFixed(1)}%)`;
|
|
1888
1974
|
if (stats.entriesWithBaseline < stats.totalRequests && stats.entriesWithBaseline > 0) {
|
|
1889
1975
|
lines.push(savingsLine.padEnd(61) + "\u2551");
|
|
@@ -2411,7 +2497,16 @@ async function checkForUpdates() {
|
|
|
2411
2497
|
// src/proxy.ts
|
|
2412
2498
|
var BLOCKRUN_API = "https://blockrun.ai/api";
|
|
2413
2499
|
var AUTO_MODEL = "blockrun/auto";
|
|
2414
|
-
var
|
|
2500
|
+
var ROUTING_PROFILES = /* @__PURE__ */ new Set([
|
|
2501
|
+
"blockrun/free",
|
|
2502
|
+
"free",
|
|
2503
|
+
"blockrun/eco",
|
|
2504
|
+
"eco",
|
|
2505
|
+
"blockrun/auto",
|
|
2506
|
+
"auto",
|
|
2507
|
+
"blockrun/premium",
|
|
2508
|
+
"premium"
|
|
2509
|
+
]);
|
|
2415
2510
|
var FREE_MODEL = "nvidia/gpt-oss-120b";
|
|
2416
2511
|
var HEARTBEAT_INTERVAL_MS = 2e3;
|
|
2417
2512
|
var DEFAULT_REQUEST_TIMEOUT_MS = 18e4;
|
|
@@ -3093,6 +3188,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
3093
3188
|
let isStreaming = false;
|
|
3094
3189
|
let modelId = "";
|
|
3095
3190
|
let maxTokens = 4096;
|
|
3191
|
+
let routingProfile = null;
|
|
3096
3192
|
const isChatCompletion = req.url?.includes("/chat/completions");
|
|
3097
3193
|
if (isChatCompletion && body.length > 0) {
|
|
3098
3194
|
try {
|
|
@@ -3108,58 +3204,83 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
3108
3204
|
const normalizedModel = typeof parsed.model === "string" ? parsed.model.trim().toLowerCase() : "";
|
|
3109
3205
|
const resolvedModel = resolveModelAlias(normalizedModel);
|
|
3110
3206
|
const wasAlias = resolvedModel !== normalizedModel;
|
|
3111
|
-
const
|
|
3207
|
+
const isRoutingProfile = ROUTING_PROFILES.has(normalizedModel);
|
|
3208
|
+
if (isRoutingProfile) {
|
|
3209
|
+
const profileName = normalizedModel.replace("blockrun/", "");
|
|
3210
|
+
routingProfile = profileName;
|
|
3211
|
+
}
|
|
3112
3212
|
console.log(
|
|
3113
|
-
`[ClawRouter] Received model: "${parsed.model}" -> normalized: "${normalizedModel}"${wasAlias ? ` -> alias: "${resolvedModel}"` : ""}
|
|
3213
|
+
`[ClawRouter] Received model: "${parsed.model}" -> normalized: "${normalizedModel}"${wasAlias ? ` -> alias: "${resolvedModel}"` : ""}${routingProfile ? `, profile: ${routingProfile}` : ""}`
|
|
3114
3214
|
);
|
|
3115
|
-
if (wasAlias && !
|
|
3215
|
+
if (wasAlias && !isRoutingProfile) {
|
|
3116
3216
|
parsed.model = resolvedModel;
|
|
3117
3217
|
modelId = resolvedModel;
|
|
3118
3218
|
bodyModified = true;
|
|
3119
3219
|
}
|
|
3120
|
-
if (
|
|
3121
|
-
|
|
3122
|
-
|
|
3123
|
-
|
|
3124
|
-
|
|
3125
|
-
|
|
3126
|
-
console.log(
|
|
3127
|
-
`[ClawRouter] Session ${sessionId?.slice(0, 8)}... using pinned model: ${existingSession.model}`
|
|
3128
|
-
);
|
|
3129
|
-
parsed.model = existingSession.model;
|
|
3130
|
-
modelId = existingSession.model;
|
|
3220
|
+
if (isRoutingProfile) {
|
|
3221
|
+
if (routingProfile === "free") {
|
|
3222
|
+
const freeModel = "nvidia/gpt-oss-120b";
|
|
3223
|
+
console.log(`[ClawRouter] Free profile - using ${freeModel} directly`);
|
|
3224
|
+
parsed.model = freeModel;
|
|
3225
|
+
modelId = freeModel;
|
|
3131
3226
|
bodyModified = true;
|
|
3132
|
-
|
|
3227
|
+
await logUsage({
|
|
3228
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
3229
|
+
model: freeModel,
|
|
3230
|
+
tier: "SIMPLE",
|
|
3231
|
+
cost: 0,
|
|
3232
|
+
baselineCost: 0,
|
|
3233
|
+
savings: 1,
|
|
3234
|
+
// 100% savings
|
|
3235
|
+
latencyMs: 0
|
|
3236
|
+
});
|
|
3133
3237
|
} else {
|
|
3134
|
-
const
|
|
3135
|
-
|
|
3136
|
-
|
|
3137
|
-
|
|
3138
|
-
|
|
3139
|
-
lastUserMsg = messages[i];
|
|
3140
|
-
break;
|
|
3141
|
-
}
|
|
3142
|
-
}
|
|
3143
|
-
}
|
|
3144
|
-
const systemMsg = messages?.find((m) => m.role === "system");
|
|
3145
|
-
const prompt = typeof lastUserMsg?.content === "string" ? lastUserMsg.content : "";
|
|
3146
|
-
const systemPrompt = typeof systemMsg?.content === "string" ? systemMsg.content : void 0;
|
|
3147
|
-
const tools = parsed.tools;
|
|
3148
|
-
const hasTools = Array.isArray(tools) && tools.length > 0;
|
|
3149
|
-
if (hasTools) {
|
|
3150
|
-
console.log(`[ClawRouter] Tools detected (${tools.length}), agentic mode via keywords`);
|
|
3151
|
-
}
|
|
3152
|
-
routingDecision = route(prompt, systemPrompt, maxTokens, routerOpts);
|
|
3153
|
-
parsed.model = routingDecision.model;
|
|
3154
|
-
modelId = routingDecision.model;
|
|
3155
|
-
bodyModified = true;
|
|
3156
|
-
if (sessionId) {
|
|
3157
|
-
sessionStore.setSession(sessionId, routingDecision.model, routingDecision.tier);
|
|
3238
|
+
const sessionId = getSessionId(
|
|
3239
|
+
req.headers
|
|
3240
|
+
);
|
|
3241
|
+
const existingSession = sessionId ? sessionStore.getSession(sessionId) : void 0;
|
|
3242
|
+
if (existingSession) {
|
|
3158
3243
|
console.log(
|
|
3159
|
-
`[ClawRouter] Session ${sessionId
|
|
3244
|
+
`[ClawRouter] Session ${sessionId?.slice(0, 8)}... using pinned model: ${existingSession.model}`
|
|
3160
3245
|
);
|
|
3246
|
+
parsed.model = existingSession.model;
|
|
3247
|
+
modelId = existingSession.model;
|
|
3248
|
+
bodyModified = true;
|
|
3249
|
+
sessionStore.touchSession(sessionId);
|
|
3250
|
+
} else {
|
|
3251
|
+
const messages = parsed.messages;
|
|
3252
|
+
let lastUserMsg;
|
|
3253
|
+
if (messages) {
|
|
3254
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
3255
|
+
if (messages[i].role === "user") {
|
|
3256
|
+
lastUserMsg = messages[i];
|
|
3257
|
+
break;
|
|
3258
|
+
}
|
|
3259
|
+
}
|
|
3260
|
+
}
|
|
3261
|
+
const systemMsg = messages?.find((m) => m.role === "system");
|
|
3262
|
+
const prompt = typeof lastUserMsg?.content === "string" ? lastUserMsg.content : "";
|
|
3263
|
+
const systemPrompt = typeof systemMsg?.content === "string" ? systemMsg.content : void 0;
|
|
3264
|
+
const tools = parsed.tools;
|
|
3265
|
+
const hasTools = Array.isArray(tools) && tools.length > 0;
|
|
3266
|
+
if (hasTools) {
|
|
3267
|
+
console.log(`[ClawRouter] Tools detected (${tools.length}), agentic mode via keywords`);
|
|
3268
|
+
}
|
|
3269
|
+
routingDecision = route(prompt, systemPrompt, maxTokens, {
|
|
3270
|
+
...routerOpts,
|
|
3271
|
+
routingProfile: routingProfile ?? void 0
|
|
3272
|
+
});
|
|
3273
|
+
parsed.model = routingDecision.model;
|
|
3274
|
+
modelId = routingDecision.model;
|
|
3275
|
+
bodyModified = true;
|
|
3276
|
+
if (sessionId) {
|
|
3277
|
+
sessionStore.setSession(sessionId, routingDecision.model, routingDecision.tier);
|
|
3278
|
+
console.log(
|
|
3279
|
+
`[ClawRouter] Session ${sessionId.slice(0, 8)}... pinned to model: ${routingDecision.model}`
|
|
3280
|
+
);
|
|
3281
|
+
}
|
|
3282
|
+
options.onRouted?.(routingDecision);
|
|
3161
3283
|
}
|
|
3162
|
-
options.onRouted?.(routingDecision);
|
|
3163
3284
|
}
|
|
3164
3285
|
}
|
|
3165
3286
|
if (bodyModified) {
|
|
@@ -3343,7 +3464,8 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
3343
3464
|
actualModelUsed,
|
|
3344
3465
|
routerOpts.modelPricing,
|
|
3345
3466
|
estimatedInputTokens,
|
|
3346
|
-
maxTokens
|
|
3467
|
+
maxTokens,
|
|
3468
|
+
routingProfile ?? void 0
|
|
3347
3469
|
);
|
|
3348
3470
|
routingDecision = {
|
|
3349
3471
|
...routingDecision,
|
|
@@ -3552,7 +3674,8 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
3552
3674
|
routingDecision.model,
|
|
3553
3675
|
routerOpts.modelPricing,
|
|
3554
3676
|
estimatedInputTokens,
|
|
3555
|
-
maxTokens
|
|
3677
|
+
maxTokens,
|
|
3678
|
+
routingProfile ?? void 0
|
|
3556
3679
|
);
|
|
3557
3680
|
const costWithBuffer = accurateCosts.costEstimate * 1.2;
|
|
3558
3681
|
const baselineWithBuffer = accurateCosts.baselineCost * 1.2;
|