@blockrun/clawrouter 0.8.20 → 0.8.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -12
- package/dist/cli.js +198 -75
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +6 -1
- package/dist/index.js +201 -77
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -203,6 +203,10 @@ type RoutingConfig = {
|
|
|
203
203
|
tiers: Record<Tier, TierConfig>;
|
|
204
204
|
/** Tier configs for agentic mode - models that excel at multi-step tasks */
|
|
205
205
|
agenticTiers?: Record<Tier, TierConfig>;
|
|
206
|
+
/** Tier configs for eco profile - ultra cost-optimized (blockrun/eco) */
|
|
207
|
+
ecoTiers?: Record<Tier, TierConfig>;
|
|
208
|
+
/** Tier configs for premium profile - best quality (blockrun/premium) */
|
|
209
|
+
premiumTiers?: Record<Tier, TierConfig>;
|
|
206
210
|
overrides: OverridesConfig;
|
|
207
211
|
};
|
|
208
212
|
|
|
@@ -225,7 +229,7 @@ declare function getFallbackChain(tier: Tier, tierConfigs: Record<Tier, TierConf
|
|
|
225
229
|
* Calculate cost for a specific model (used when fallback model is used).
|
|
226
230
|
* Returns updated cost fields for RoutingDecision.
|
|
227
231
|
*/
|
|
228
|
-
declare function calculateModelCost(model: string, modelPricing: Map<string, ModelPricing>, estimatedInputTokens: number, maxOutputTokens: number): {
|
|
232
|
+
declare function calculateModelCost(model: string, modelPricing: Map<string, ModelPricing>, estimatedInputTokens: number, maxOutputTokens: number, routingProfile?: "free" | "eco" | "auto" | "premium"): {
|
|
229
233
|
costEstimate: number;
|
|
230
234
|
baselineCost: number;
|
|
231
235
|
savings: number;
|
|
@@ -264,6 +268,7 @@ declare const DEFAULT_ROUTING_CONFIG: RoutingConfig;
|
|
|
264
268
|
type RouterOptions = {
|
|
265
269
|
config: RoutingConfig;
|
|
266
270
|
modelPricing: Map<string, ModelPricing>;
|
|
271
|
+
routingProfile?: "free" | "eco" | "auto" | "premium";
|
|
267
272
|
};
|
|
268
273
|
/**
|
|
269
274
|
* Route a request to the cheapest capable model.
|
package/dist/index.js
CHANGED
|
@@ -23,11 +23,12 @@ var MODEL_ALIASES = {
|
|
|
23
23
|
grok: "xai/grok-3",
|
|
24
24
|
"grok-fast": "xai/grok-4-fast-reasoning",
|
|
25
25
|
"grok-code": "xai/grok-code-fast-1",
|
|
26
|
-
// NVIDIA
|
|
26
|
+
// NVIDIA
|
|
27
27
|
nvidia: "nvidia/gpt-oss-120b",
|
|
28
28
|
"gpt-120b": "nvidia/gpt-oss-120b",
|
|
29
|
-
"gpt-20b": "nvidia/gpt-oss-20b"
|
|
30
|
-
|
|
29
|
+
"gpt-20b": "nvidia/gpt-oss-20b"
|
|
30
|
+
// Note: auto, free, eco, premium are virtual routing profiles registered in BLOCKRUN_MODELS
|
|
31
|
+
// They don't need aliases since they're already top-level model IDs
|
|
31
32
|
};
|
|
32
33
|
function resolveModelAlias(model) {
|
|
33
34
|
const normalized = model.trim().toLowerCase();
|
|
@@ -41,16 +42,40 @@ function resolveModelAlias(model) {
|
|
|
41
42
|
return model;
|
|
42
43
|
}
|
|
43
44
|
var BLOCKRUN_MODELS = [
|
|
44
|
-
// Smart routing meta-
|
|
45
|
+
// Smart routing meta-models — proxy replaces with actual model
|
|
45
46
|
// NOTE: Model IDs are WITHOUT provider prefix (OpenClaw adds "blockrun/" automatically)
|
|
46
47
|
{
|
|
47
48
|
id: "auto",
|
|
48
|
-
name: "
|
|
49
|
+
name: "Auto (Smart Router - Balanced)",
|
|
49
50
|
inputPrice: 0,
|
|
50
51
|
outputPrice: 0,
|
|
51
52
|
contextWindow: 105e4,
|
|
52
53
|
maxOutput: 128e3
|
|
53
54
|
},
|
|
55
|
+
{
|
|
56
|
+
id: "free",
|
|
57
|
+
name: "Free (NVIDIA GPT-OSS-120B only)",
|
|
58
|
+
inputPrice: 0,
|
|
59
|
+
outputPrice: 0,
|
|
60
|
+
contextWindow: 128e3,
|
|
61
|
+
maxOutput: 4096
|
|
62
|
+
},
|
|
63
|
+
{
|
|
64
|
+
id: "eco",
|
|
65
|
+
name: "Eco (Smart Router - Cost Optimized)",
|
|
66
|
+
inputPrice: 0,
|
|
67
|
+
outputPrice: 0,
|
|
68
|
+
contextWindow: 105e4,
|
|
69
|
+
maxOutput: 128e3
|
|
70
|
+
},
|
|
71
|
+
{
|
|
72
|
+
id: "premium",
|
|
73
|
+
name: "Premium (Smart Router - Best Quality)",
|
|
74
|
+
inputPrice: 0,
|
|
75
|
+
outputPrice: 0,
|
|
76
|
+
contextWindow: 2e6,
|
|
77
|
+
maxOutput: 2e5
|
|
78
|
+
},
|
|
54
79
|
// OpenAI GPT-5 Family
|
|
55
80
|
{
|
|
56
81
|
id: "openai/gpt-5.2",
|
|
@@ -352,8 +377,8 @@ var BLOCKRUN_MODELS = [
|
|
|
352
377
|
{
|
|
353
378
|
id: "xai/grok-4-0709",
|
|
354
379
|
name: "Grok 4 (0709)",
|
|
355
|
-
inputPrice:
|
|
356
|
-
outputPrice:
|
|
380
|
+
inputPrice: 0.2,
|
|
381
|
+
outputPrice: 1.5,
|
|
357
382
|
contextWindow: 131072,
|
|
358
383
|
maxOutput: 16384,
|
|
359
384
|
reasoning: true
|
|
@@ -909,7 +934,7 @@ function calibrateConfidence(distance, steepness) {
|
|
|
909
934
|
}
|
|
910
935
|
|
|
911
936
|
// src/router/selector.ts
|
|
912
|
-
function selectModel(tier, confidence, method, reasoning, tierConfigs, modelPricing, estimatedInputTokens, maxOutputTokens) {
|
|
937
|
+
function selectModel(tier, confidence, method, reasoning, tierConfigs, modelPricing, estimatedInputTokens, maxOutputTokens, routingProfile) {
|
|
913
938
|
const tierConfig = tierConfigs[tier];
|
|
914
939
|
const model = tierConfig.primary;
|
|
915
940
|
const pricing = modelPricing.get(model);
|
|
@@ -918,13 +943,13 @@ function selectModel(tier, confidence, method, reasoning, tierConfigs, modelPric
|
|
|
918
943
|
const inputCost = estimatedInputTokens / 1e6 * inputPrice;
|
|
919
944
|
const outputCost = maxOutputTokens / 1e6 * outputPrice;
|
|
920
945
|
const costEstimate = inputCost + outputCost;
|
|
921
|
-
const opusPricing = modelPricing.get("anthropic/claude-opus-4");
|
|
946
|
+
const opusPricing = modelPricing.get("anthropic/claude-opus-4.5");
|
|
922
947
|
const opusInputPrice = opusPricing?.inputPrice ?? 0;
|
|
923
948
|
const opusOutputPrice = opusPricing?.outputPrice ?? 0;
|
|
924
949
|
const baselineInput = estimatedInputTokens / 1e6 * opusInputPrice;
|
|
925
950
|
const baselineOutput = maxOutputTokens / 1e6 * opusOutputPrice;
|
|
926
951
|
const baselineCost = baselineInput + baselineOutput;
|
|
927
|
-
const savings = baselineCost > 0 ? Math.max(0, (baselineCost - costEstimate) / baselineCost) : 0;
|
|
952
|
+
const savings = routingProfile === "premium" ? 0 : baselineCost > 0 ? Math.max(0, (baselineCost - costEstimate) / baselineCost) : 0;
|
|
928
953
|
return {
|
|
929
954
|
model,
|
|
930
955
|
tier,
|
|
@@ -940,20 +965,20 @@ function getFallbackChain(tier, tierConfigs) {
|
|
|
940
965
|
const config = tierConfigs[tier];
|
|
941
966
|
return [config.primary, ...config.fallback];
|
|
942
967
|
}
|
|
943
|
-
function calculateModelCost(model, modelPricing, estimatedInputTokens, maxOutputTokens) {
|
|
968
|
+
function calculateModelCost(model, modelPricing, estimatedInputTokens, maxOutputTokens, routingProfile) {
|
|
944
969
|
const pricing = modelPricing.get(model);
|
|
945
970
|
const inputPrice = pricing?.inputPrice ?? 0;
|
|
946
971
|
const outputPrice = pricing?.outputPrice ?? 0;
|
|
947
972
|
const inputCost = estimatedInputTokens / 1e6 * inputPrice;
|
|
948
973
|
const outputCost = maxOutputTokens / 1e6 * outputPrice;
|
|
949
974
|
const costEstimate = inputCost + outputCost;
|
|
950
|
-
const opusPricing = modelPricing.get("anthropic/claude-opus-4");
|
|
975
|
+
const opusPricing = modelPricing.get("anthropic/claude-opus-4.5");
|
|
951
976
|
const opusInputPrice = opusPricing?.inputPrice ?? 0;
|
|
952
977
|
const opusOutputPrice = opusPricing?.outputPrice ?? 0;
|
|
953
978
|
const baselineInput = estimatedInputTokens / 1e6 * opusInputPrice;
|
|
954
979
|
const baselineOutput = maxOutputTokens / 1e6 * opusOutputPrice;
|
|
955
980
|
const baselineCost = baselineInput + baselineOutput;
|
|
956
|
-
const savings = baselineCost > 0 ? Math.max(0, (baselineCost - costEstimate) / baselineCost) : 0;
|
|
981
|
+
const savings = routingProfile === "premium" ? 0 : baselineCost > 0 ? Math.max(0, (baselineCost - costEstimate) / baselineCost) : 0;
|
|
957
982
|
return { costEstimate, baselineCost, savings };
|
|
958
983
|
}
|
|
959
984
|
function getFallbackChainFiltered(tier, tierConfigs, estimatedTotalTokens, getContextWindow) {
|
|
@@ -1582,15 +1607,17 @@ var DEFAULT_ROUTING_CONFIG = {
|
|
|
1582
1607
|
// Tier boundaries on weighted score axis
|
|
1583
1608
|
tierBoundaries: {
|
|
1584
1609
|
simpleMedium: 0,
|
|
1585
|
-
mediumComplex: 0.
|
|
1586
|
-
|
|
1587
|
-
|
|
1610
|
+
mediumComplex: 0.3,
|
|
1611
|
+
// Raised from 0.18 - prevent simple tasks from reaching expensive COMPLEX tier
|
|
1612
|
+
complexReasoning: 0.5
|
|
1613
|
+
// Raised from 0.4 - reserve for true reasoning tasks
|
|
1588
1614
|
},
|
|
1589
1615
|
// Sigmoid steepness for confidence calibration
|
|
1590
1616
|
confidenceSteepness: 12,
|
|
1591
1617
|
// Below this confidence → ambiguous (null tier)
|
|
1592
1618
|
confidenceThreshold: 0.7
|
|
1593
1619
|
},
|
|
1620
|
+
// Auto (balanced) tier configs - current default smart routing
|
|
1594
1621
|
tiers: {
|
|
1595
1622
|
SIMPLE: {
|
|
1596
1623
|
primary: "nvidia/kimi-k2.5",
|
|
@@ -1599,7 +1626,9 @@ var DEFAULT_ROUTING_CONFIG = {
|
|
|
1599
1626
|
"google/gemini-2.5-flash",
|
|
1600
1627
|
"nvidia/gpt-oss-120b",
|
|
1601
1628
|
"nvidia/gpt-oss-20b",
|
|
1602
|
-
"deepseek/deepseek-chat"
|
|
1629
|
+
"deepseek/deepseek-chat",
|
|
1630
|
+
"xai/grok-code-fast-1"
|
|
1631
|
+
// Added for better quality fallback
|
|
1603
1632
|
]
|
|
1604
1633
|
},
|
|
1605
1634
|
MEDIUM: {
|
|
@@ -1614,7 +1643,8 @@ var DEFAULT_ROUTING_CONFIG = {
|
|
|
1614
1643
|
},
|
|
1615
1644
|
COMPLEX: {
|
|
1616
1645
|
primary: "google/gemini-2.5-pro",
|
|
1617
|
-
fallback: ["
|
|
1646
|
+
fallback: ["xai/grok-4-0709", "openai/gpt-4o", "openai/gpt-5.2", "anthropic/claude-sonnet-4"]
|
|
1647
|
+
// Grok first for cost efficiency, Sonnet as last resort
|
|
1618
1648
|
},
|
|
1619
1649
|
REASONING: {
|
|
1620
1650
|
primary: "xai/grok-4-1-fast-reasoning",
|
|
@@ -1628,6 +1658,52 @@ var DEFAULT_ROUTING_CONFIG = {
|
|
|
1628
1658
|
]
|
|
1629
1659
|
}
|
|
1630
1660
|
},
|
|
1661
|
+
// Eco tier configs - ultra cost-optimized (blockrun/eco)
|
|
1662
|
+
ecoTiers: {
|
|
1663
|
+
SIMPLE: {
|
|
1664
|
+
primary: "nvidia/kimi-k2.5",
|
|
1665
|
+
// $0.001/$0.001
|
|
1666
|
+
fallback: ["deepseek/deepseek-chat", "nvidia/gpt-oss-120b", "nvidia/gpt-oss-20b"]
|
|
1667
|
+
},
|
|
1668
|
+
MEDIUM: {
|
|
1669
|
+
primary: "deepseek/deepseek-chat",
|
|
1670
|
+
// $0.14/$0.28
|
|
1671
|
+
fallback: ["xai/grok-code-fast-1", "google/gemini-2.5-flash", "nvidia/kimi-k2.5"]
|
|
1672
|
+
},
|
|
1673
|
+
COMPLEX: {
|
|
1674
|
+
primary: "xai/grok-4-0709",
|
|
1675
|
+
// $0.20/$1.50
|
|
1676
|
+
fallback: ["deepseek/deepseek-chat", "google/gemini-2.5-flash", "openai/gpt-4o-mini"]
|
|
1677
|
+
},
|
|
1678
|
+
REASONING: {
|
|
1679
|
+
primary: "deepseek/deepseek-reasoner",
|
|
1680
|
+
// $0.55/$2.19
|
|
1681
|
+
fallback: ["xai/grok-4-fast-reasoning", "moonshot/kimi-k2.5"]
|
|
1682
|
+
}
|
|
1683
|
+
},
|
|
1684
|
+
// Premium tier configs - best quality (blockrun/premium)
|
|
1685
|
+
premiumTiers: {
|
|
1686
|
+
SIMPLE: {
|
|
1687
|
+
primary: "google/gemini-2.5-flash",
|
|
1688
|
+
// $0.075/$0.30
|
|
1689
|
+
fallback: ["openai/gpt-4o-mini", "anthropic/claude-haiku-4.5", "moonshot/kimi-k2.5"]
|
|
1690
|
+
},
|
|
1691
|
+
MEDIUM: {
|
|
1692
|
+
primary: "openai/gpt-4o",
|
|
1693
|
+
// $2.50/$10
|
|
1694
|
+
fallback: ["google/gemini-2.5-pro", "anthropic/claude-sonnet-4", "xai/grok-4-0709"]
|
|
1695
|
+
},
|
|
1696
|
+
COMPLEX: {
|
|
1697
|
+
primary: "anthropic/claude-opus-4.5",
|
|
1698
|
+
// $15/$75
|
|
1699
|
+
fallback: ["openai/gpt-5.2", "anthropic/claude-sonnet-4", "google/gemini-2.5-pro"]
|
|
1700
|
+
},
|
|
1701
|
+
REASONING: {
|
|
1702
|
+
primary: "openai/o3",
|
|
1703
|
+
// $10/$40
|
|
1704
|
+
fallback: ["anthropic/claude-opus-4.5", "openai/o1", "google/gemini-2.5-pro"]
|
|
1705
|
+
}
|
|
1706
|
+
},
|
|
1631
1707
|
// Agentic tier configs - models that excel at multi-step autonomous tasks
|
|
1632
1708
|
agenticTiers: {
|
|
1633
1709
|
SIMPLE: {
|
|
@@ -1669,21 +1745,34 @@ function route(prompt, systemPrompt, maxOutputTokens, options) {
|
|
|
1669
1745
|
const fullText = `${systemPrompt ?? ""} ${prompt}`;
|
|
1670
1746
|
const estimatedTokens = Math.ceil(fullText.length / 4);
|
|
1671
1747
|
const ruleResult = classifyByRules(prompt, systemPrompt, estimatedTokens, config.scoring);
|
|
1672
|
-
const
|
|
1673
|
-
|
|
1674
|
-
|
|
1675
|
-
|
|
1676
|
-
|
|
1748
|
+
const { routingProfile } = options;
|
|
1749
|
+
let tierConfigs;
|
|
1750
|
+
let profileSuffix = "";
|
|
1751
|
+
if (routingProfile === "eco" && config.ecoTiers) {
|
|
1752
|
+
tierConfigs = config.ecoTiers;
|
|
1753
|
+
profileSuffix = " | eco";
|
|
1754
|
+
} else if (routingProfile === "premium" && config.premiumTiers) {
|
|
1755
|
+
tierConfigs = config.premiumTiers;
|
|
1756
|
+
profileSuffix = " | premium";
|
|
1757
|
+
} else {
|
|
1758
|
+
const agenticScore = ruleResult.agenticScore ?? 0;
|
|
1759
|
+
const isAutoAgentic = agenticScore >= 0.5;
|
|
1760
|
+
const isExplicitAgentic = config.overrides.agenticMode ?? false;
|
|
1761
|
+
const useAgenticTiers = (isAutoAgentic || isExplicitAgentic) && config.agenticTiers != null;
|
|
1762
|
+
tierConfigs = useAgenticTiers ? config.agenticTiers : config.tiers;
|
|
1763
|
+
profileSuffix = useAgenticTiers ? " | agentic" : "";
|
|
1764
|
+
}
|
|
1677
1765
|
if (estimatedTokens > config.overrides.maxTokensForceComplex) {
|
|
1678
1766
|
return selectModel(
|
|
1679
1767
|
"COMPLEX",
|
|
1680
1768
|
0.95,
|
|
1681
1769
|
"rules",
|
|
1682
|
-
`Input exceeds ${config.overrides.maxTokensForceComplex} tokens${
|
|
1770
|
+
`Input exceeds ${config.overrides.maxTokensForceComplex} tokens${profileSuffix}`,
|
|
1683
1771
|
tierConfigs,
|
|
1684
1772
|
modelPricing,
|
|
1685
1773
|
estimatedTokens,
|
|
1686
|
-
maxOutputTokens
|
|
1774
|
+
maxOutputTokens,
|
|
1775
|
+
routingProfile
|
|
1687
1776
|
);
|
|
1688
1777
|
}
|
|
1689
1778
|
const hasStructuredOutput = systemPrompt ? /json|structured|schema/i.test(systemPrompt) : false;
|
|
@@ -1707,11 +1796,7 @@ function route(prompt, systemPrompt, maxOutputTokens, options) {
|
|
|
1707
1796
|
tier = minTier;
|
|
1708
1797
|
}
|
|
1709
1798
|
}
|
|
1710
|
-
|
|
1711
|
-
reasoning += " | auto-agentic";
|
|
1712
|
-
} else if (isExplicitAgentic) {
|
|
1713
|
-
reasoning += " | agentic";
|
|
1714
|
-
}
|
|
1799
|
+
reasoning += profileSuffix;
|
|
1715
1800
|
return selectModel(
|
|
1716
1801
|
tier,
|
|
1717
1802
|
confidence,
|
|
@@ -1720,7 +1805,8 @@ function route(prompt, systemPrompt, maxOutputTokens, options) {
|
|
|
1720
1805
|
tierConfigs,
|
|
1721
1806
|
modelPricing,
|
|
1722
1807
|
estimatedTokens,
|
|
1723
|
-
maxOutputTokens
|
|
1808
|
+
maxOutputTokens,
|
|
1809
|
+
routingProfile
|
|
1724
1810
|
);
|
|
1725
1811
|
}
|
|
1726
1812
|
|
|
@@ -1878,12 +1964,13 @@ async function getStats(days = 7) {
|
|
|
1878
1964
|
function formatStatsAscii(stats) {
|
|
1879
1965
|
const lines = [];
|
|
1880
1966
|
lines.push("\u2554\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2557");
|
|
1881
|
-
lines.push("\u2551
|
|
1967
|
+
lines.push("\u2551 ClawRouter by BlockRun v0.8.20 \u2551");
|
|
1968
|
+
lines.push("\u2551 Usage Statistics \u2551");
|
|
1882
1969
|
lines.push("\u2560\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2563");
|
|
1883
1970
|
lines.push(`\u2551 Period: ${stats.period.padEnd(49)}\u2551`);
|
|
1884
1971
|
lines.push(`\u2551 Total Requests: ${stats.totalRequests.toString().padEnd(41)}\u2551`);
|
|
1885
1972
|
lines.push(`\u2551 Total Cost: $${stats.totalCost.toFixed(4).padEnd(43)}\u2551`);
|
|
1886
|
-
lines.push(`\u2551 Baseline Cost (Opus): $${stats.totalBaselineCost.toFixed(4).padEnd(
|
|
1973
|
+
lines.push(`\u2551 Baseline Cost (Opus 4.5): $${stats.totalBaselineCost.toFixed(4).padEnd(30)}\u2551`);
|
|
1887
1974
|
const savingsLine = `\u2551 \u{1F4B0} Total Saved: $${stats.totalSavings.toFixed(4)} (${stats.savingsPercentage.toFixed(1)}%)`;
|
|
1888
1975
|
if (stats.entriesWithBaseline < stats.totalRequests && stats.entriesWithBaseline > 0) {
|
|
1889
1976
|
lines.push(savingsLine.padEnd(61) + "\u2551");
|
|
@@ -2411,7 +2498,16 @@ async function checkForUpdates() {
|
|
|
2411
2498
|
// src/proxy.ts
|
|
2412
2499
|
var BLOCKRUN_API = "https://blockrun.ai/api";
|
|
2413
2500
|
var AUTO_MODEL = "blockrun/auto";
|
|
2414
|
-
var
|
|
2501
|
+
var ROUTING_PROFILES = /* @__PURE__ */ new Set([
|
|
2502
|
+
"blockrun/free",
|
|
2503
|
+
"free",
|
|
2504
|
+
"blockrun/eco",
|
|
2505
|
+
"eco",
|
|
2506
|
+
"blockrun/auto",
|
|
2507
|
+
"auto",
|
|
2508
|
+
"blockrun/premium",
|
|
2509
|
+
"premium"
|
|
2510
|
+
]);
|
|
2415
2511
|
var FREE_MODEL = "nvidia/gpt-oss-120b";
|
|
2416
2512
|
var HEARTBEAT_INTERVAL_MS = 2e3;
|
|
2417
2513
|
var DEFAULT_REQUEST_TIMEOUT_MS = 18e4;
|
|
@@ -3093,6 +3189,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
3093
3189
|
let isStreaming = false;
|
|
3094
3190
|
let modelId = "";
|
|
3095
3191
|
let maxTokens = 4096;
|
|
3192
|
+
let routingProfile = null;
|
|
3096
3193
|
const isChatCompletion = req.url?.includes("/chat/completions");
|
|
3097
3194
|
if (isChatCompletion && body.length > 0) {
|
|
3098
3195
|
try {
|
|
@@ -3108,58 +3205,83 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
3108
3205
|
const normalizedModel = typeof parsed.model === "string" ? parsed.model.trim().toLowerCase() : "";
|
|
3109
3206
|
const resolvedModel = resolveModelAlias(normalizedModel);
|
|
3110
3207
|
const wasAlias = resolvedModel !== normalizedModel;
|
|
3111
|
-
const
|
|
3208
|
+
const isRoutingProfile = ROUTING_PROFILES.has(normalizedModel);
|
|
3209
|
+
if (isRoutingProfile) {
|
|
3210
|
+
const profileName = normalizedModel.replace("blockrun/", "");
|
|
3211
|
+
routingProfile = profileName;
|
|
3212
|
+
}
|
|
3112
3213
|
console.log(
|
|
3113
|
-
`[ClawRouter] Received model: "${parsed.model}" -> normalized: "${normalizedModel}"${wasAlias ? ` -> alias: "${resolvedModel}"` : ""}
|
|
3214
|
+
`[ClawRouter] Received model: "${parsed.model}" -> normalized: "${normalizedModel}"${wasAlias ? ` -> alias: "${resolvedModel}"` : ""}${routingProfile ? `, profile: ${routingProfile}` : ""}`
|
|
3114
3215
|
);
|
|
3115
|
-
if (wasAlias && !
|
|
3216
|
+
if (wasAlias && !isRoutingProfile) {
|
|
3116
3217
|
parsed.model = resolvedModel;
|
|
3117
3218
|
modelId = resolvedModel;
|
|
3118
3219
|
bodyModified = true;
|
|
3119
3220
|
}
|
|
3120
|
-
if (
|
|
3121
|
-
|
|
3122
|
-
|
|
3123
|
-
|
|
3124
|
-
|
|
3125
|
-
|
|
3126
|
-
console.log(
|
|
3127
|
-
`[ClawRouter] Session ${sessionId?.slice(0, 8)}... using pinned model: ${existingSession.model}`
|
|
3128
|
-
);
|
|
3129
|
-
parsed.model = existingSession.model;
|
|
3130
|
-
modelId = existingSession.model;
|
|
3221
|
+
if (isRoutingProfile) {
|
|
3222
|
+
if (routingProfile === "free") {
|
|
3223
|
+
const freeModel = "nvidia/gpt-oss-120b";
|
|
3224
|
+
console.log(`[ClawRouter] Free profile - using ${freeModel} directly`);
|
|
3225
|
+
parsed.model = freeModel;
|
|
3226
|
+
modelId = freeModel;
|
|
3131
3227
|
bodyModified = true;
|
|
3132
|
-
|
|
3228
|
+
await logUsage({
|
|
3229
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
3230
|
+
model: freeModel,
|
|
3231
|
+
tier: "SIMPLE",
|
|
3232
|
+
cost: 0,
|
|
3233
|
+
baselineCost: 0,
|
|
3234
|
+
savings: 1,
|
|
3235
|
+
// 100% savings
|
|
3236
|
+
latencyMs: 0
|
|
3237
|
+
});
|
|
3133
3238
|
} else {
|
|
3134
|
-
const
|
|
3135
|
-
|
|
3136
|
-
|
|
3137
|
-
|
|
3138
|
-
|
|
3139
|
-
lastUserMsg = messages[i];
|
|
3140
|
-
break;
|
|
3141
|
-
}
|
|
3142
|
-
}
|
|
3143
|
-
}
|
|
3144
|
-
const systemMsg = messages?.find((m) => m.role === "system");
|
|
3145
|
-
const prompt = typeof lastUserMsg?.content === "string" ? lastUserMsg.content : "";
|
|
3146
|
-
const systemPrompt = typeof systemMsg?.content === "string" ? systemMsg.content : void 0;
|
|
3147
|
-
const tools = parsed.tools;
|
|
3148
|
-
const hasTools = Array.isArray(tools) && tools.length > 0;
|
|
3149
|
-
if (hasTools) {
|
|
3150
|
-
console.log(`[ClawRouter] Tools detected (${tools.length}), agentic mode via keywords`);
|
|
3151
|
-
}
|
|
3152
|
-
routingDecision = route(prompt, systemPrompt, maxTokens, routerOpts);
|
|
3153
|
-
parsed.model = routingDecision.model;
|
|
3154
|
-
modelId = routingDecision.model;
|
|
3155
|
-
bodyModified = true;
|
|
3156
|
-
if (sessionId) {
|
|
3157
|
-
sessionStore.setSession(sessionId, routingDecision.model, routingDecision.tier);
|
|
3239
|
+
const sessionId = getSessionId(
|
|
3240
|
+
req.headers
|
|
3241
|
+
);
|
|
3242
|
+
const existingSession = sessionId ? sessionStore.getSession(sessionId) : void 0;
|
|
3243
|
+
if (existingSession) {
|
|
3158
3244
|
console.log(
|
|
3159
|
-
`[ClawRouter] Session ${sessionId
|
|
3245
|
+
`[ClawRouter] Session ${sessionId?.slice(0, 8)}... using pinned model: ${existingSession.model}`
|
|
3160
3246
|
);
|
|
3247
|
+
parsed.model = existingSession.model;
|
|
3248
|
+
modelId = existingSession.model;
|
|
3249
|
+
bodyModified = true;
|
|
3250
|
+
sessionStore.touchSession(sessionId);
|
|
3251
|
+
} else {
|
|
3252
|
+
const messages = parsed.messages;
|
|
3253
|
+
let lastUserMsg;
|
|
3254
|
+
if (messages) {
|
|
3255
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
3256
|
+
if (messages[i].role === "user") {
|
|
3257
|
+
lastUserMsg = messages[i];
|
|
3258
|
+
break;
|
|
3259
|
+
}
|
|
3260
|
+
}
|
|
3261
|
+
}
|
|
3262
|
+
const systemMsg = messages?.find((m) => m.role === "system");
|
|
3263
|
+
const prompt = typeof lastUserMsg?.content === "string" ? lastUserMsg.content : "";
|
|
3264
|
+
const systemPrompt = typeof systemMsg?.content === "string" ? systemMsg.content : void 0;
|
|
3265
|
+
const tools = parsed.tools;
|
|
3266
|
+
const hasTools = Array.isArray(tools) && tools.length > 0;
|
|
3267
|
+
if (hasTools) {
|
|
3268
|
+
console.log(`[ClawRouter] Tools detected (${tools.length}), agentic mode via keywords`);
|
|
3269
|
+
}
|
|
3270
|
+
routingDecision = route(prompt, systemPrompt, maxTokens, {
|
|
3271
|
+
...routerOpts,
|
|
3272
|
+
routingProfile: routingProfile ?? void 0
|
|
3273
|
+
});
|
|
3274
|
+
parsed.model = routingDecision.model;
|
|
3275
|
+
modelId = routingDecision.model;
|
|
3276
|
+
bodyModified = true;
|
|
3277
|
+
if (sessionId) {
|
|
3278
|
+
sessionStore.setSession(sessionId, routingDecision.model, routingDecision.tier);
|
|
3279
|
+
console.log(
|
|
3280
|
+
`[ClawRouter] Session ${sessionId.slice(0, 8)}... pinned to model: ${routingDecision.model}`
|
|
3281
|
+
);
|
|
3282
|
+
}
|
|
3283
|
+
options.onRouted?.(routingDecision);
|
|
3161
3284
|
}
|
|
3162
|
-
options.onRouted?.(routingDecision);
|
|
3163
3285
|
}
|
|
3164
3286
|
}
|
|
3165
3287
|
if (bodyModified) {
|
|
@@ -3343,7 +3465,8 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
3343
3465
|
actualModelUsed,
|
|
3344
3466
|
routerOpts.modelPricing,
|
|
3345
3467
|
estimatedInputTokens,
|
|
3346
|
-
maxTokens
|
|
3468
|
+
maxTokens,
|
|
3469
|
+
routingProfile ?? void 0
|
|
3347
3470
|
);
|
|
3348
3471
|
routingDecision = {
|
|
3349
3472
|
...routingDecision,
|
|
@@ -3552,7 +3675,8 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
3552
3675
|
routingDecision.model,
|
|
3553
3676
|
routerOpts.modelPricing,
|
|
3554
3677
|
estimatedInputTokens,
|
|
3555
|
-
maxTokens
|
|
3678
|
+
maxTokens,
|
|
3679
|
+
routingProfile ?? void 0
|
|
3556
3680
|
);
|
|
3557
3681
|
const costWithBuffer = accurateCosts.costEstimate * 1.2;
|
|
3558
3682
|
const baselineWithBuffer = accurateCosts.baselineCost * 1.2;
|