@blockrun/clawrouter 0.11.13 → 0.11.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -11,13 +11,13 @@ import { x402Client } from "@x402/fetch";
11
11
  // src/payment-preauth.ts
12
12
  import { x402HTTPClient } from "@x402/fetch";
13
13
  var DEFAULT_TTL_MS = 36e5;
14
- function createPayFetchWithPreAuth(baseFetch, client, ttlMs = DEFAULT_TTL_MS) {
14
+ function createPayFetchWithPreAuth(baseFetch, client, ttlMs = DEFAULT_TTL_MS, options) {
15
15
  const httpClient = new x402HTTPClient(client);
16
16
  const cache = /* @__PURE__ */ new Map();
17
17
  return async (input, init) => {
18
18
  const request = new Request(input, init);
19
19
  const urlPath = new URL(request.url).pathname;
20
- const cached = cache.get(urlPath);
20
+ const cached = !options?.skipPreAuth ? cache.get(urlPath) : void 0;
21
21
  if (cached && Date.now() - cached.cachedAt < ttlMs) {
22
22
  try {
23
23
  const payload2 = await client.createPaymentPayload(cached.paymentRequired);
@@ -157,20 +157,18 @@ function scoreAgenticTask(text, keywords) {
157
157
  };
158
158
  }
159
159
  function classifyByRules(prompt, systemPrompt, estimatedTokens, config) {
160
- const text = `${systemPrompt ?? ""} ${prompt}`.toLowerCase();
161
160
  const userText = prompt.toLowerCase();
162
161
  const dimensions = [
163
- // Original 8 dimensions
162
+ // Token count uses total estimated tokens (system + user) — context size matters for model selection
164
163
  scoreTokenCount(estimatedTokens, config.tokenCountThresholds),
165
164
  scoreKeywordMatch(
166
- text,
165
+ userText,
167
166
  config.codeKeywords,
168
167
  "codePresence",
169
168
  "code",
170
169
  { low: 1, high: 2 },
171
170
  { none: 0, low: 0.5, high: 1 }
172
171
  ),
173
- // Reasoning markers use USER prompt only — system prompt "step by step" shouldn't trigger reasoning
174
172
  scoreKeywordMatch(
175
173
  userText,
176
174
  config.reasoningKeywords,
@@ -180,7 +178,7 @@ function classifyByRules(prompt, systemPrompt, estimatedTokens, config) {
180
178
  { none: 0, low: 0.7, high: 1 }
181
179
  ),
182
180
  scoreKeywordMatch(
183
- text,
181
+ userText,
184
182
  config.technicalKeywords,
185
183
  "technicalTerms",
186
184
  "technical",
@@ -188,7 +186,7 @@ function classifyByRules(prompt, systemPrompt, estimatedTokens, config) {
188
186
  { none: 0, low: 0.5, high: 1 }
189
187
  ),
190
188
  scoreKeywordMatch(
191
- text,
189
+ userText,
192
190
  config.creativeKeywords,
193
191
  "creativeMarkers",
194
192
  "creative",
@@ -196,18 +194,18 @@ function classifyByRules(prompt, systemPrompt, estimatedTokens, config) {
196
194
  { none: 0, low: 0.5, high: 0.7 }
197
195
  ),
198
196
  scoreKeywordMatch(
199
- text,
197
+ userText,
200
198
  config.simpleKeywords,
201
199
  "simpleIndicators",
202
200
  "simple",
203
201
  { low: 1, high: 2 },
204
202
  { none: 0, low: -1, high: -1 }
205
203
  ),
206
- scoreMultiStep(text),
204
+ scoreMultiStep(userText),
207
205
  scoreQuestionComplexity(prompt),
208
206
  // 6 new dimensions
209
207
  scoreKeywordMatch(
210
- text,
208
+ userText,
211
209
  config.imperativeVerbs,
212
210
  "imperativeVerbs",
213
211
  "imperative",
@@ -215,7 +213,7 @@ function classifyByRules(prompt, systemPrompt, estimatedTokens, config) {
215
213
  { none: 0, low: 0.3, high: 0.5 }
216
214
  ),
217
215
  scoreKeywordMatch(
218
- text,
216
+ userText,
219
217
  config.constraintIndicators,
220
218
  "constraintCount",
221
219
  "constraints",
@@ -223,7 +221,7 @@ function classifyByRules(prompt, systemPrompt, estimatedTokens, config) {
223
221
  { none: 0, low: 0.3, high: 0.7 }
224
222
  ),
225
223
  scoreKeywordMatch(
226
- text,
224
+ userText,
227
225
  config.outputFormatKeywords,
228
226
  "outputFormat",
229
227
  "format",
@@ -231,7 +229,7 @@ function classifyByRules(prompt, systemPrompt, estimatedTokens, config) {
231
229
  { none: 0, low: 0.4, high: 0.7 }
232
230
  ),
233
231
  scoreKeywordMatch(
234
- text,
232
+ userText,
235
233
  config.referenceKeywords,
236
234
  "referenceComplexity",
237
235
  "references",
@@ -239,7 +237,7 @@ function classifyByRules(prompt, systemPrompt, estimatedTokens, config) {
239
237
  { none: 0, low: 0.3, high: 0.5 }
240
238
  ),
241
239
  scoreKeywordMatch(
242
- text,
240
+ userText,
243
241
  config.negationKeywords,
244
242
  "negationComplexity",
245
243
  "negation",
@@ -247,7 +245,7 @@ function classifyByRules(prompt, systemPrompt, estimatedTokens, config) {
247
245
  { none: 0, low: 0.3, high: 0.5 }
248
246
  ),
249
247
  scoreKeywordMatch(
250
- text,
248
+ userText,
251
249
  config.domainSpecificKeywords,
252
250
  "domainSpecificity",
253
251
  "domain-specific",
@@ -279,7 +277,8 @@ function classifyByRules(prompt, systemPrompt, estimatedTokens, config) {
279
277
  tier: "REASONING",
280
278
  confidence: Math.max(confidence2, 0.85),
281
279
  signals,
282
- agenticScore
280
+ agenticScore,
281
+ dimensions
283
282
  };
284
283
  }
285
284
  const { simpleMedium, mediumComplex, complexReasoning } = config.tierBoundaries;
@@ -303,9 +302,9 @@ function classifyByRules(prompt, systemPrompt, estimatedTokens, config) {
303
302
  }
304
303
  const confidence = calibrateConfidence(distanceFromBoundary, config.confidenceSteepness);
305
304
  if (confidence < config.confidenceThreshold) {
306
- return { score: weightedScore, tier: null, confidence, signals, agenticScore };
305
+ return { score: weightedScore, tier: null, confidence, signals, agenticScore, dimensions };
307
306
  }
308
- return { score: weightedScore, tier, confidence, signals, agenticScore };
307
+ return { score: weightedScore, tier, confidence, signals, agenticScore, dimensions };
309
308
  }
310
309
  function calibrateConfidence(distance, steepness) {
311
310
  return 1 / (1 + Math.exp(-steepness * distance));
@@ -313,7 +312,9 @@ function calibrateConfidence(distance, steepness) {
313
312
 
314
313
  // src/router/selector.ts
315
314
  var BASELINE_MODEL_ID = "anthropic/claude-opus-4.6";
316
- function selectModel(tier, confidence, method, reasoning, tierConfigs, modelPricing, estimatedInputTokens, maxOutputTokens, routingProfile) {
315
+ var BASELINE_INPUT_PRICE = 5;
316
+ var BASELINE_OUTPUT_PRICE = 25;
317
+ function selectModel(tier, confidence, method, reasoning, tierConfigs, modelPricing, estimatedInputTokens, maxOutputTokens, routingProfile, agenticScore) {
317
318
  const tierConfig = tierConfigs[tier];
318
319
  const model = tierConfig.primary;
319
320
  const pricing = modelPricing.get(model);
@@ -323,8 +324,8 @@ function selectModel(tier, confidence, method, reasoning, tierConfigs, modelPric
323
324
  const outputCost = maxOutputTokens / 1e6 * outputPrice;
324
325
  const costEstimate = inputCost + outputCost;
325
326
  const opusPricing = modelPricing.get(BASELINE_MODEL_ID);
326
- const opusInputPrice = opusPricing?.inputPrice ?? 0;
327
- const opusOutputPrice = opusPricing?.outputPrice ?? 0;
327
+ const opusInputPrice = opusPricing?.inputPrice ?? BASELINE_INPUT_PRICE;
328
+ const opusOutputPrice = opusPricing?.outputPrice ?? BASELINE_OUTPUT_PRICE;
328
329
  const baselineInput = estimatedInputTokens / 1e6 * opusInputPrice;
329
330
  const baselineOutput = maxOutputTokens / 1e6 * opusOutputPrice;
330
331
  const baselineCost = baselineInput + baselineOutput;
@@ -337,7 +338,8 @@ function selectModel(tier, confidence, method, reasoning, tierConfigs, modelPric
337
338
  reasoning,
338
339
  costEstimate,
339
340
  baselineCost,
340
- savings
341
+ savings,
342
+ ...agenticScore !== void 0 && { agenticScore }
341
343
  };
342
344
  }
343
345
  function getFallbackChain(tier, tierConfigs) {
@@ -352,14 +354,24 @@ function calculateModelCost(model, modelPricing, estimatedInputTokens, maxOutput
352
354
  const outputCost = maxOutputTokens / 1e6 * outputPrice;
353
355
  const costEstimate = inputCost + outputCost;
354
356
  const opusPricing = modelPricing.get(BASELINE_MODEL_ID);
355
- const opusInputPrice = opusPricing?.inputPrice ?? 0;
356
- const opusOutputPrice = opusPricing?.outputPrice ?? 0;
357
+ const opusInputPrice = opusPricing?.inputPrice ?? BASELINE_INPUT_PRICE;
358
+ const opusOutputPrice = opusPricing?.outputPrice ?? BASELINE_OUTPUT_PRICE;
357
359
  const baselineInput = estimatedInputTokens / 1e6 * opusInputPrice;
358
360
  const baselineOutput = maxOutputTokens / 1e6 * opusOutputPrice;
359
361
  const baselineCost = baselineInput + baselineOutput;
360
362
  const savings = routingProfile === "premium" ? 0 : baselineCost > 0 ? Math.max(0, (baselineCost - costEstimate) / baselineCost) : 0;
361
363
  return { costEstimate, baselineCost, savings };
362
364
  }
365
+ function filterByToolCalling(models, hasTools, supportsToolCalling2) {
366
+ if (!hasTools) return models;
367
+ const filtered = models.filter(supportsToolCalling2);
368
+ return filtered.length > 0 ? filtered : models;
369
+ }
370
+ function filterByVision(models, hasVision, supportsVision2) {
371
+ if (!hasVision) return models;
372
+ const filtered = models.filter(supportsVision2);
373
+ return filtered.length > 0 ? filtered : models;
374
+ }
363
375
  function getFallbackChainFiltered(tier, tierConfigs, estimatedTotalTokens, getContextWindow) {
364
376
  const fullChain = getFallbackChain(tier, tierConfigs);
365
377
  const filtered = fullChain.filter((modelId) => {
@@ -1415,18 +1427,18 @@ var DEFAULT_ROUTING_CONFIG = {
1415
1427
  ]
1416
1428
  },
1417
1429
  MEDIUM: {
1418
- primary: "xai/grok-code-fast-1",
1419
- // Code specialist, $0.20/$1.50
1430
+ primary: "moonshot/kimi-k2.5",
1431
+ // $0.50/$2.40 - strong tool use, proper function call format
1420
1432
  fallback: [
1433
+ "deepseek/deepseek-chat",
1421
1434
  "google/gemini-2.5-flash-lite",
1422
1435
  // 1M context, ultra cheap ($0.10/$0.40)
1423
- "deepseek/deepseek-chat",
1424
1436
  "xai/grok-4-1-fast-non-reasoning"
1425
1437
  // Upgraded Grok 4.1
1426
1438
  ]
1427
1439
  },
1428
1440
  COMPLEX: {
1429
- primary: "google/gemini-3.1-pro-preview",
1441
+ primary: "google/gemini-3.1-pro",
1430
1442
  // Newest Gemini 3.1 - upgraded from 3.0
1431
1443
  fallback: [
1432
1444
  "google/gemini-2.5-flash-lite",
@@ -1486,7 +1498,7 @@ var DEFAULT_ROUTING_CONFIG = {
1486
1498
  fallback: [
1487
1499
  "anthropic/claude-haiku-4.5",
1488
1500
  "google/gemini-2.5-flash-lite",
1489
- "xai/grok-code-fast-1"
1501
+ "deepseek/deepseek-chat"
1490
1502
  ]
1491
1503
  },
1492
1504
  MEDIUM: {
@@ -1506,7 +1518,7 @@ var DEFAULT_ROUTING_CONFIG = {
1506
1518
  "openai/gpt-5.2-codex",
1507
1519
  "anthropic/claude-opus-4.6",
1508
1520
  "anthropic/claude-sonnet-4.6",
1509
- "google/gemini-3.1-pro-preview",
1521
+ "google/gemini-3.1-pro",
1510
1522
  // Newest Gemini
1511
1523
  "google/gemini-3-pro-preview",
1512
1524
  "moonshot/kimi-k2.5"
@@ -1537,9 +1549,13 @@ var DEFAULT_ROUTING_CONFIG = {
1537
1549
  ]
1538
1550
  },
1539
1551
  MEDIUM: {
1540
- primary: "xai/grok-code-fast-1",
1541
- // Code specialist for agentic coding
1542
- fallback: ["moonshot/kimi-k2.5", "anthropic/claude-haiku-4.5", "claude-sonnet-4"]
1552
+ primary: "moonshot/kimi-k2.5",
1553
+ // $0.50/$2.40 - strong tool use, handles function calls correctly
1554
+ fallback: [
1555
+ "anthropic/claude-haiku-4.5",
1556
+ "deepseek/deepseek-chat",
1557
+ "xai/grok-4-1-fast-non-reasoning"
1558
+ ]
1543
1559
  },
1544
1560
  COMPLEX: {
1545
1561
  primary: "anthropic/claude-sonnet-4.6",
@@ -1547,7 +1563,7 @@ var DEFAULT_ROUTING_CONFIG = {
1547
1563
  "anthropic/claude-opus-4.6",
1548
1564
  // Latest Opus - best agentic
1549
1565
  "openai/gpt-5.2",
1550
- "google/gemini-3.1-pro-preview",
1566
+ "google/gemini-3.1-pro",
1551
1567
  // Newest Gemini
1552
1568
  "google/gemini-3-pro-preview",
1553
1569
  "xai/grok-4-0709"
@@ -1579,7 +1595,7 @@ function route(prompt, systemPrompt, maxOutputTokens, options) {
1579
1595
  const ruleResult = classifyByRules(prompt, systemPrompt, estimatedTokens, config.scoring);
1580
1596
  const { routingProfile } = options;
1581
1597
  let tierConfigs;
1582
- let profileSuffix = "";
1598
+ let profileSuffix;
1583
1599
  if (routingProfile === "eco" && config.ecoTiers) {
1584
1600
  tierConfigs = config.ecoTiers;
1585
1601
  profileSuffix = " | eco";
@@ -1594,6 +1610,7 @@ function route(prompt, systemPrompt, maxOutputTokens, options) {
1594
1610
  tierConfigs = useAgenticTiers ? config.agenticTiers : config.tiers;
1595
1611
  profileSuffix = useAgenticTiers ? " | agentic" : "";
1596
1612
  }
1613
+ const agenticScoreValue = ruleResult.agenticScore;
1597
1614
  if (estimatedTokens > config.overrides.maxTokensForceComplex) {
1598
1615
  return selectModel(
1599
1616
  "COMPLEX",
@@ -1604,7 +1621,8 @@ function route(prompt, systemPrompt, maxOutputTokens, options) {
1604
1621
  modelPricing,
1605
1622
  estimatedTokens,
1606
1623
  maxOutputTokens,
1607
- routingProfile
1624
+ routingProfile,
1625
+ agenticScoreValue
1608
1626
  );
1609
1627
  }
1610
1628
  const hasStructuredOutput = systemPrompt ? /json|structured|schema/i.test(systemPrompt) : false;
@@ -1638,7 +1656,8 @@ function route(prompt, systemPrompt, maxOutputTokens, options) {
1638
1656
  modelPricing,
1639
1657
  estimatedTokens,
1640
1658
  maxOutputTokens,
1641
- routingProfile
1659
+ routingProfile,
1660
+ agenticScoreValue
1642
1661
  );
1643
1662
  }
1644
1663
 
@@ -1686,6 +1705,8 @@ var MODEL_ALIASES = {
1686
1705
  // Google
1687
1706
  gemini: "google/gemini-2.5-pro",
1688
1707
  flash: "google/gemini-2.5-flash",
1708
+ "gemini-3.1-pro-preview": "google/gemini-3.1-pro",
1709
+ "google/gemini-3.1-pro-preview": "google/gemini-3.1-pro",
1689
1710
  // xAI
1690
1711
  grok: "xai/grok-3",
1691
1712
  "grok-fast": "xai/grok-4-fast-reasoning",
@@ -1759,7 +1780,8 @@ var BLOCKRUN_MODELS = [
1759
1780
  maxOutput: 128e3,
1760
1781
  reasoning: true,
1761
1782
  vision: true,
1762
- agentic: true
1783
+ agentic: true,
1784
+ toolCalling: true
1763
1785
  },
1764
1786
  {
1765
1787
  id: "openai/gpt-5-mini",
@@ -1768,7 +1790,8 @@ var BLOCKRUN_MODELS = [
1768
1790
  inputPrice: 0.25,
1769
1791
  outputPrice: 2,
1770
1792
  contextWindow: 2e5,
1771
- maxOutput: 65536
1793
+ maxOutput: 65536,
1794
+ toolCalling: true
1772
1795
  },
1773
1796
  {
1774
1797
  id: "openai/gpt-5-nano",
@@ -1777,7 +1800,8 @@ var BLOCKRUN_MODELS = [
1777
1800
  inputPrice: 0.05,
1778
1801
  outputPrice: 0.4,
1779
1802
  contextWindow: 128e3,
1780
- maxOutput: 32768
1803
+ maxOutput: 32768,
1804
+ toolCalling: true
1781
1805
  },
1782
1806
  {
1783
1807
  id: "openai/gpt-5.2-pro",
@@ -1787,7 +1811,8 @@ var BLOCKRUN_MODELS = [
1787
1811
  outputPrice: 168,
1788
1812
  contextWindow: 4e5,
1789
1813
  maxOutput: 128e3,
1790
- reasoning: true
1814
+ reasoning: true,
1815
+ toolCalling: true
1791
1816
  },
1792
1817
  // OpenAI Codex Family
1793
1818
  {
@@ -1798,7 +1823,8 @@ var BLOCKRUN_MODELS = [
1798
1823
  outputPrice: 14,
1799
1824
  contextWindow: 128e3,
1800
1825
  maxOutput: 32e3,
1801
- agentic: true
1826
+ agentic: true,
1827
+ toolCalling: true
1802
1828
  },
1803
1829
  // OpenAI GPT-4 Family
1804
1830
  {
@@ -1809,7 +1835,8 @@ var BLOCKRUN_MODELS = [
1809
1835
  outputPrice: 8,
1810
1836
  contextWindow: 128e3,
1811
1837
  maxOutput: 16384,
1812
- vision: true
1838
+ vision: true,
1839
+ toolCalling: true
1813
1840
  },
1814
1841
  {
1815
1842
  id: "openai/gpt-4.1-mini",
@@ -1818,7 +1845,8 @@ var BLOCKRUN_MODELS = [
1818
1845
  inputPrice: 0.4,
1819
1846
  outputPrice: 1.6,
1820
1847
  contextWindow: 128e3,
1821
- maxOutput: 16384
1848
+ maxOutput: 16384,
1849
+ toolCalling: true
1822
1850
  },
1823
1851
  {
1824
1852
  id: "openai/gpt-4.1-nano",
@@ -1827,7 +1855,8 @@ var BLOCKRUN_MODELS = [
1827
1855
  inputPrice: 0.1,
1828
1856
  outputPrice: 0.4,
1829
1857
  contextWindow: 128e3,
1830
- maxOutput: 16384
1858
+ maxOutput: 16384,
1859
+ toolCalling: true
1831
1860
  },
1832
1861
  {
1833
1862
  id: "openai/gpt-4o",
@@ -1838,7 +1867,8 @@ var BLOCKRUN_MODELS = [
1838
1867
  contextWindow: 128e3,
1839
1868
  maxOutput: 16384,
1840
1869
  vision: true,
1841
- agentic: true
1870
+ agentic: true,
1871
+ toolCalling: true
1842
1872
  },
1843
1873
  {
1844
1874
  id: "openai/gpt-4o-mini",
@@ -1847,7 +1877,8 @@ var BLOCKRUN_MODELS = [
1847
1877
  inputPrice: 0.15,
1848
1878
  outputPrice: 0.6,
1849
1879
  contextWindow: 128e3,
1850
- maxOutput: 16384
1880
+ maxOutput: 16384,
1881
+ toolCalling: true
1851
1882
  },
1852
1883
  // OpenAI O-series (Reasoning)
1853
1884
  {
@@ -1858,7 +1889,8 @@ var BLOCKRUN_MODELS = [
1858
1889
  outputPrice: 60,
1859
1890
  contextWindow: 2e5,
1860
1891
  maxOutput: 1e5,
1861
- reasoning: true
1892
+ reasoning: true,
1893
+ toolCalling: true
1862
1894
  },
1863
1895
  {
1864
1896
  id: "openai/o1-mini",
@@ -1868,7 +1900,8 @@ var BLOCKRUN_MODELS = [
1868
1900
  outputPrice: 4.4,
1869
1901
  contextWindow: 128e3,
1870
1902
  maxOutput: 65536,
1871
- reasoning: true
1903
+ reasoning: true,
1904
+ toolCalling: true
1872
1905
  },
1873
1906
  {
1874
1907
  id: "openai/o3",
@@ -1878,7 +1911,8 @@ var BLOCKRUN_MODELS = [
1878
1911
  outputPrice: 8,
1879
1912
  contextWindow: 2e5,
1880
1913
  maxOutput: 1e5,
1881
- reasoning: true
1914
+ reasoning: true,
1915
+ toolCalling: true
1882
1916
  },
1883
1917
  {
1884
1918
  id: "openai/o3-mini",
@@ -1888,7 +1922,8 @@ var BLOCKRUN_MODELS = [
1888
1922
  outputPrice: 4.4,
1889
1923
  contextWindow: 128e3,
1890
1924
  maxOutput: 65536,
1891
- reasoning: true
1925
+ reasoning: true,
1926
+ toolCalling: true
1892
1927
  },
1893
1928
  {
1894
1929
  id: "openai/o4-mini",
@@ -1898,7 +1933,8 @@ var BLOCKRUN_MODELS = [
1898
1933
  outputPrice: 4.4,
1899
1934
  contextWindow: 128e3,
1900
1935
  maxOutput: 65536,
1901
- reasoning: true
1936
+ reasoning: true,
1937
+ toolCalling: true
1902
1938
  },
1903
1939
  // Anthropic - all Claude models excel at agentic workflows
1904
1940
  // Use newest versions (4.6) with full provider prefix
@@ -1910,7 +1946,9 @@ var BLOCKRUN_MODELS = [
1910
1946
  outputPrice: 5,
1911
1947
  contextWindow: 2e5,
1912
1948
  maxOutput: 8192,
1913
- agentic: true
1949
+ vision: true,
1950
+ agentic: true,
1951
+ toolCalling: true
1914
1952
  },
1915
1953
  {
1916
1954
  id: "anthropic/claude-sonnet-4.6",
@@ -1921,7 +1959,9 @@ var BLOCKRUN_MODELS = [
1921
1959
  contextWindow: 2e5,
1922
1960
  maxOutput: 64e3,
1923
1961
  reasoning: true,
1924
- agentic: true
1962
+ vision: true,
1963
+ agentic: true,
1964
+ toolCalling: true
1925
1965
  },
1926
1966
  {
1927
1967
  id: "anthropic/claude-opus-4.6",
@@ -1932,19 +1972,22 @@ var BLOCKRUN_MODELS = [
1932
1972
  contextWindow: 2e5,
1933
1973
  maxOutput: 32e3,
1934
1974
  reasoning: true,
1935
- agentic: true
1975
+ vision: true,
1976
+ agentic: true,
1977
+ toolCalling: true
1936
1978
  },
1937
1979
  // Google
1938
1980
  {
1939
- id: "google/gemini-3.1-pro-preview",
1940
- name: "Gemini 3.1 Pro Preview",
1981
+ id: "google/gemini-3.1-pro",
1982
+ name: "Gemini 3.1 Pro",
1941
1983
  version: "3.1",
1942
1984
  inputPrice: 2,
1943
1985
  outputPrice: 12,
1944
1986
  contextWindow: 105e4,
1945
1987
  maxOutput: 65536,
1946
1988
  reasoning: true,
1947
- vision: true
1989
+ vision: true,
1990
+ toolCalling: true
1948
1991
  },
1949
1992
  {
1950
1993
  id: "google/gemini-3-pro-preview",
@@ -1955,7 +1998,8 @@ var BLOCKRUN_MODELS = [
1955
1998
  contextWindow: 105e4,
1956
1999
  maxOutput: 65536,
1957
2000
  reasoning: true,
1958
- vision: true
2001
+ vision: true,
2002
+ toolCalling: true
1959
2003
  },
1960
2004
  {
1961
2005
  id: "google/gemini-3-flash-preview",
@@ -1965,7 +2009,8 @@ var BLOCKRUN_MODELS = [
1965
2009
  outputPrice: 3,
1966
2010
  contextWindow: 1e6,
1967
2011
  maxOutput: 65536,
1968
- vision: true
2012
+ vision: true,
2013
+ toolCalling: true
1969
2014
  },
1970
2015
  {
1971
2016
  id: "google/gemini-2.5-pro",
@@ -1976,7 +2021,8 @@ var BLOCKRUN_MODELS = [
1976
2021
  contextWindow: 105e4,
1977
2022
  maxOutput: 65536,
1978
2023
  reasoning: true,
1979
- vision: true
2024
+ vision: true,
2025
+ toolCalling: true
1980
2026
  },
1981
2027
  {
1982
2028
  id: "google/gemini-2.5-flash",
@@ -1985,7 +2031,9 @@ var BLOCKRUN_MODELS = [
1985
2031
  inputPrice: 0.3,
1986
2032
  outputPrice: 2.5,
1987
2033
  contextWindow: 1e6,
1988
- maxOutput: 65536
2034
+ maxOutput: 65536,
2035
+ vision: true,
2036
+ toolCalling: true
1989
2037
  },
1990
2038
  {
1991
2039
  id: "google/gemini-2.5-flash-lite",
@@ -1994,7 +2042,8 @@ var BLOCKRUN_MODELS = [
1994
2042
  inputPrice: 0.1,
1995
2043
  outputPrice: 0.4,
1996
2044
  contextWindow: 1e6,
1997
- maxOutput: 65536
2045
+ maxOutput: 65536,
2046
+ toolCalling: true
1998
2047
  },
1999
2048
  // DeepSeek
2000
2049
  {
@@ -2004,7 +2053,8 @@ var BLOCKRUN_MODELS = [
2004
2053
  inputPrice: 0.28,
2005
2054
  outputPrice: 0.42,
2006
2055
  contextWindow: 128e3,
2007
- maxOutput: 8192
2056
+ maxOutput: 8192,
2057
+ toolCalling: true
2008
2058
  },
2009
2059
  {
2010
2060
  id: "deepseek/deepseek-reasoner",
@@ -2014,7 +2064,8 @@ var BLOCKRUN_MODELS = [
2014
2064
  outputPrice: 0.42,
2015
2065
  contextWindow: 128e3,
2016
2066
  maxOutput: 8192,
2017
- reasoning: true
2067
+ reasoning: true,
2068
+ toolCalling: true
2018
2069
  },
2019
2070
  // Moonshot / Kimi - optimized for agentic workflows
2020
2071
  {
@@ -2027,7 +2078,8 @@ var BLOCKRUN_MODELS = [
2027
2078
  maxOutput: 8192,
2028
2079
  reasoning: true,
2029
2080
  vision: true,
2030
- agentic: true
2081
+ agentic: true,
2082
+ toolCalling: true
2031
2083
  },
2032
2084
  // xAI / Grok
2033
2085
  {
@@ -2038,7 +2090,8 @@ var BLOCKRUN_MODELS = [
2038
2090
  outputPrice: 15,
2039
2091
  contextWindow: 131072,
2040
2092
  maxOutput: 16384,
2041
- reasoning: true
2093
+ reasoning: true,
2094
+ toolCalling: true
2042
2095
  },
2043
2096
  // grok-3-fast removed - too expensive ($5/$25), use grok-4-fast instead
2044
2097
  {
@@ -2048,7 +2101,8 @@ var BLOCKRUN_MODELS = [
2048
2101
  inputPrice: 0.3,
2049
2102
  outputPrice: 0.5,
2050
2103
  contextWindow: 131072,
2051
- maxOutput: 16384
2104
+ maxOutput: 16384,
2105
+ toolCalling: true
2052
2106
  },
2053
2107
  // xAI Grok 4 Family - Ultra-cheap fast models
2054
2108
  {
@@ -2059,7 +2113,8 @@ var BLOCKRUN_MODELS = [
2059
2113
  outputPrice: 0.5,
2060
2114
  contextWindow: 131072,
2061
2115
  maxOutput: 16384,
2062
- reasoning: true
2116
+ reasoning: true,
2117
+ toolCalling: true
2063
2118
  },
2064
2119
  {
2065
2120
  id: "xai/grok-4-fast-non-reasoning",
@@ -2068,7 +2123,8 @@ var BLOCKRUN_MODELS = [
2068
2123
  inputPrice: 0.2,
2069
2124
  outputPrice: 0.5,
2070
2125
  contextWindow: 131072,
2071
- maxOutput: 16384
2126
+ maxOutput: 16384,
2127
+ toolCalling: true
2072
2128
  },
2073
2129
  {
2074
2130
  id: "xai/grok-4-1-fast-reasoning",
@@ -2078,7 +2134,8 @@ var BLOCKRUN_MODELS = [
2078
2134
  outputPrice: 0.5,
2079
2135
  contextWindow: 131072,
2080
2136
  maxOutput: 16384,
2081
- reasoning: true
2137
+ reasoning: true,
2138
+ toolCalling: true
2082
2139
  },
2083
2140
  {
2084
2141
  id: "xai/grok-4-1-fast-non-reasoning",
@@ -2087,7 +2144,8 @@ var BLOCKRUN_MODELS = [
2087
2144
  inputPrice: 0.2,
2088
2145
  outputPrice: 0.5,
2089
2146
  contextWindow: 131072,
2090
- maxOutput: 16384
2147
+ maxOutput: 16384,
2148
+ toolCalling: true
2091
2149
  },
2092
2150
  {
2093
2151
  id: "xai/grok-code-fast-1",
@@ -2096,9 +2154,10 @@ var BLOCKRUN_MODELS = [
2096
2154
  inputPrice: 0.2,
2097
2155
  outputPrice: 1.5,
2098
2156
  contextWindow: 131072,
2099
- maxOutput: 16384,
2100
- agentic: true
2101
- // Good for coding tasks
2157
+ maxOutput: 16384
2158
+ // toolCalling intentionally omitted: outputs tool calls as plain text JSON,
2159
+ // not OpenAI-compatible structured function calls. Will be skipped when
2160
+ // request has tools to prevent the "talking to itself" bug.
2102
2161
  },
2103
2162
  {
2104
2163
  id: "xai/grok-4-0709",
@@ -2108,7 +2167,8 @@ var BLOCKRUN_MODELS = [
2108
2167
  outputPrice: 1.5,
2109
2168
  contextWindow: 131072,
2110
2169
  maxOutput: 16384,
2111
- reasoning: true
2170
+ reasoning: true,
2171
+ toolCalling: true
2112
2172
  },
2113
2173
  {
2114
2174
  id: "xai/grok-2-vision",
@@ -2118,7 +2178,8 @@ var BLOCKRUN_MODELS = [
2118
2178
  outputPrice: 10,
2119
2179
  contextWindow: 131072,
2120
2180
  maxOutput: 16384,
2121
- vision: true
2181
+ vision: true,
2182
+ toolCalling: true
2122
2183
  },
2123
2184
  // MiniMax
2124
2185
  {
@@ -2130,7 +2191,8 @@ var BLOCKRUN_MODELS = [
2130
2191
  contextWindow: 204800,
2131
2192
  maxOutput: 16384,
2132
2193
  reasoning: true,
2133
- agentic: true
2194
+ agentic: true,
2195
+ toolCalling: true
2134
2196
  },
2135
2197
  // NVIDIA - Free/cheap models
2136
2198
  {
@@ -2141,6 +2203,8 @@ var BLOCKRUN_MODELS = [
2141
2203
  outputPrice: 0,
2142
2204
  contextWindow: 128e3,
2143
2205
  maxOutput: 16384
2206
+ // toolCalling intentionally omitted: free model, structured function
2207
+ // calling support unverified. Excluded from tool-heavy routing paths.
2144
2208
  },
2145
2209
  {
2146
2210
  id: "nvidia/kimi-k2.5",
@@ -2149,7 +2213,8 @@ var BLOCKRUN_MODELS = [
2149
2213
  inputPrice: 0.55,
2150
2214
  outputPrice: 2.5,
2151
2215
  contextWindow: 262144,
2152
- maxOutput: 16384
2216
+ maxOutput: 16384,
2217
+ toolCalling: true
2153
2218
  }
2154
2219
  ];
2155
2220
  function toOpenClawModel(m) {
@@ -2178,6 +2243,16 @@ var OPENCLAW_MODELS = [
2178
2243
  ...BLOCKRUN_MODELS.map(toOpenClawModel),
2179
2244
  ...ALIAS_MODELS
2180
2245
  ];
2246
+ function supportsToolCalling(modelId) {
2247
+ const normalized = modelId.replace("blockrun/", "");
2248
+ const model = BLOCKRUN_MODELS.find((m) => m.id === normalized);
2249
+ return model?.toolCalling ?? false;
2250
+ }
2251
+ function supportsVision(modelId) {
2252
+ const normalized = modelId.replace("blockrun/", "");
2253
+ const model = BLOCKRUN_MODELS.find((m) => m.id === normalized);
2254
+ return model?.vision ?? false;
2255
+ }
2181
2256
  function getModelContextWindow(modelId) {
2182
2257
  const normalized = modelId.replace("blockrun/", "");
2183
2258
  const model = BLOCKRUN_MODELS.find((m) => m.id === normalized);
@@ -2828,6 +2903,102 @@ var BalanceMonitor = class {
2828
2903
  }
2829
2904
  };
2830
2905
 
2906
+ // src/solana-balance.ts
2907
+ import { address as solAddress, createSolanaRpc } from "@solana/kit";
2908
+ var SOLANA_USDC_MINT = "EPjFWdd5AufqSSqeM2qN1xzybapC8G4wEGGkZwyTDt1v";
2909
+ var SOLANA_DEFAULT_RPC = "https://api.mainnet-beta.solana.com";
2910
+ var BALANCE_TIMEOUT_MS = 1e4;
2911
+ var CACHE_TTL_MS2 = 3e4;
2912
+ var SolanaBalanceMonitor = class {
2913
+ rpc;
2914
+ walletAddress;
2915
+ cachedBalance = null;
2916
+ cachedAt = 0;
2917
+ constructor(walletAddress, rpcUrl) {
2918
+ this.walletAddress = walletAddress;
2919
+ const url = rpcUrl || process["env"].CLAWROUTER_SOLANA_RPC_URL || SOLANA_DEFAULT_RPC;
2920
+ this.rpc = createSolanaRpc(url);
2921
+ }
2922
+ async checkBalance() {
2923
+ const now = Date.now();
2924
+ if (this.cachedBalance !== null && now - this.cachedAt < CACHE_TTL_MS2) {
2925
+ return this.buildInfo(this.cachedBalance);
2926
+ }
2927
+ const balance = await this.fetchBalance();
2928
+ this.cachedBalance = balance;
2929
+ this.cachedAt = now;
2930
+ return this.buildInfo(balance);
2931
+ }
2932
+ deductEstimated(amountMicros) {
2933
+ if (this.cachedBalance !== null && this.cachedBalance >= amountMicros) {
2934
+ this.cachedBalance -= amountMicros;
2935
+ }
2936
+ }
2937
+ invalidate() {
2938
+ this.cachedBalance = null;
2939
+ this.cachedAt = 0;
2940
+ }
2941
+ async refresh() {
2942
+ this.invalidate();
2943
+ return this.checkBalance();
2944
+ }
2945
+ /**
2946
+ * Check if balance is sufficient for an estimated cost.
2947
+ */
2948
+ async checkSufficient(estimatedCostMicros) {
2949
+ const info = await this.checkBalance();
2950
+ if (info.balance >= estimatedCostMicros) {
2951
+ return { sufficient: true, info };
2952
+ }
2953
+ const shortfall = estimatedCostMicros - info.balance;
2954
+ return {
2955
+ sufficient: false,
2956
+ info,
2957
+ shortfall: this.formatUSDC(shortfall)
2958
+ };
2959
+ }
2960
+ /**
2961
+ * Format USDC amount (in micros) as "$X.XX".
2962
+ */
2963
+ formatUSDC(amountMicros) {
2964
+ const dollars = Number(amountMicros) / 1e6;
2965
+ return `$${dollars.toFixed(2)}`;
2966
+ }
2967
+ getWalletAddress() {
2968
+ return this.walletAddress;
2969
+ }
2970
+ async fetchBalance() {
2971
+ const owner = solAddress(this.walletAddress);
2972
+ const mint = solAddress(SOLANA_USDC_MINT);
2973
+ const controller = new AbortController();
2974
+ const timer = setTimeout(() => controller.abort(), BALANCE_TIMEOUT_MS);
2975
+ try {
2976
+ const response = await this.rpc.getTokenAccountsByOwner(owner, { mint }, { encoding: "jsonParsed" }).send({ abortSignal: controller.signal });
2977
+ if (response.value.length === 0) return 0n;
2978
+ let total = 0n;
2979
+ for (const account of response.value) {
2980
+ const parsed = account.account.data;
2981
+ total += BigInt(parsed.parsed.info.tokenAmount.amount);
2982
+ }
2983
+ return total;
2984
+ } catch (err) {
2985
+ throw new Error(`Failed to fetch Solana USDC balance: ${err instanceof Error ? err.message : String(err)}`);
2986
+ } finally {
2987
+ clearTimeout(timer);
2988
+ }
2989
+ }
2990
+ buildInfo(balance) {
2991
+ const dollars = Number(balance) / 1e6;
2992
+ return {
2993
+ balance,
2994
+ balanceUSD: `$${dollars.toFixed(2)}`,
2995
+ isLow: balance < 1000000n,
2996
+ isEmpty: balance < 100n,
2997
+ walletAddress: this.walletAddress
2998
+ };
2999
+ }
3000
+ };
3001
+
2831
3002
  // src/auth.ts
2832
3003
  import { writeFile, mkdir as mkdir2 } from "fs/promises";
2833
3004
  import { join as join4 } from "path";
@@ -2884,7 +3055,9 @@ async function loadSavedWallet() {
2884
3055
  console.error(`[ClawRouter] \u2717 CRITICAL: Wallet file exists but has invalid format!`);
2885
3056
  console.error(`[ClawRouter] File: ${WALLET_FILE}`);
2886
3057
  console.error(`[ClawRouter] Expected: 0x followed by 64 hex characters (66 chars total)`);
2887
- console.error(`[ClawRouter] To fix: restore your backup key or set BLOCKRUN_WALLET_KEY env var`);
3058
+ console.error(
3059
+ `[ClawRouter] To fix: restore your backup key or set BLOCKRUN_WALLET_KEY env var`
3060
+ );
2888
3061
  throw new Error(
2889
3062
  `Wallet file at ${WALLET_FILE} is corrupted or has wrong format. Refusing to auto-generate new wallet to protect existing funds. Restore your backup key or set BLOCKRUN_WALLET_KEY environment variable.`
2890
3063
  );
@@ -2897,7 +3070,8 @@ async function loadSavedWallet() {
2897
3070
  `[ClawRouter] \u2717 Failed to read wallet file: ${err instanceof Error ? err.message : String(err)}`
2898
3071
  );
2899
3072
  throw new Error(
2900
- `Cannot read wallet file at ${WALLET_FILE}: ${err instanceof Error ? err.message : String(err)}. Refusing to auto-generate new wallet to protect existing funds. Fix file permissions or set BLOCKRUN_WALLET_KEY environment variable.`
3073
+ `Cannot read wallet file at ${WALLET_FILE}: ${err instanceof Error ? err.message : String(err)}. Refusing to auto-generate new wallet to protect existing funds. Fix file permissions or set BLOCKRUN_WALLET_KEY environment variable.`,
3074
+ { cause: err }
2901
3075
  );
2902
3076
  }
2903
3077
  }
@@ -2938,7 +3112,8 @@ async function generateAndSaveWallet() {
2938
3112
  console.log(`[ClawRouter] Wallet saved and verified at ${WALLET_FILE}`);
2939
3113
  } catch (err) {
2940
3114
  throw new Error(
2941
- `Failed to verify wallet file after creation: ${err instanceof Error ? err.message : String(err)}`
3115
+ `Failed to verify wallet file after creation: ${err instanceof Error ? err.message : String(err)}`,
3116
+ { cause: err }
2942
3117
  );
2943
3118
  }
2944
3119
  console.log(`[ClawRouter]`);
@@ -3767,8 +3942,9 @@ function shouldCompress(messages) {
3767
3942
  }
3768
3943
 
3769
3944
  // src/session.ts
3945
+ import { createHash as createHash3 } from "crypto";
3770
3946
  var DEFAULT_SESSION_CONFIG = {
3771
- enabled: false,
3947
+ enabled: true,
3772
3948
  timeoutMs: 30 * 60 * 1e3,
3773
3949
  // 30 minutes
3774
3950
  headerName: "x-session-id"
@@ -3823,7 +3999,10 @@ var SessionStore = class {
3823
3999
  tier,
3824
4000
  createdAt: now,
3825
4001
  lastUsedAt: now,
3826
- requestCount: 1
4002
+ requestCount: 1,
4003
+ recentHashes: [],
4004
+ strikes: 0,
4005
+ escalated: false
3827
4006
  });
3828
4007
  }
3829
4008
  }
@@ -3875,6 +4054,43 @@ var SessionStore = class {
3875
4054
  }
3876
4055
  }
3877
4056
  }
4057
+ /**
4058
+ * Record a request content hash and detect repetitive patterns.
4059
+ * Returns true if escalation should be triggered (3+ consecutive similar requests).
4060
+ */
4061
+ recordRequestHash(sessionId, hash) {
4062
+ const entry = this.sessions.get(sessionId);
4063
+ if (!entry) return false;
4064
+ const prev = entry.recentHashes;
4065
+ if (prev.length > 0 && prev[prev.length - 1] === hash) {
4066
+ entry.strikes++;
4067
+ } else {
4068
+ entry.strikes = 0;
4069
+ }
4070
+ entry.recentHashes.push(hash);
4071
+ if (entry.recentHashes.length > 3) {
4072
+ entry.recentHashes.shift();
4073
+ }
4074
+ return entry.strikes >= 2 && !entry.escalated;
4075
+ }
4076
+ /**
4077
+ * Escalate session to next tier. Returns the new model/tier or null if already at max.
4078
+ */
4079
+ escalateSession(sessionId, tierConfigs) {
4080
+ const entry = this.sessions.get(sessionId);
4081
+ if (!entry) return null;
4082
+ const TIER_ORDER = ["SIMPLE", "MEDIUM", "COMPLEX", "REASONING"];
4083
+ const currentIdx = TIER_ORDER.indexOf(entry.tier);
4084
+ if (currentIdx < 0 || currentIdx >= TIER_ORDER.length - 1) return null;
4085
+ const nextTier = TIER_ORDER[currentIdx + 1];
4086
+ const nextConfig = tierConfigs[nextTier];
4087
+ if (!nextConfig) return null;
4088
+ entry.model = nextConfig.primary;
4089
+ entry.tier = nextTier;
4090
+ entry.strikes = 0;
4091
+ entry.escalated = true;
4092
+ return { model: nextConfig.primary, tier: nextTier };
4093
+ }
3878
4094
  /**
3879
4095
  * Stop the cleanup interval.
3880
4096
  */
@@ -3895,6 +4111,17 @@ function getSessionId(headers, headerName = DEFAULT_SESSION_CONFIG.headerName) {
3895
4111
  }
3896
4112
  return void 0;
3897
4113
  }
4114
+ function deriveSessionId(messages) {
4115
+ const firstUser = messages.find((m) => m.role === "user");
4116
+ if (!firstUser) return void 0;
4117
+ const content = typeof firstUser.content === "string" ? firstUser.content : JSON.stringify(firstUser.content);
4118
+ return createHash3("sha256").update(content).digest("hex").slice(0, 8);
4119
+ }
4120
+ function hashRequestContent(lastUserContent, toolCallNames) {
4121
+ const normalized = lastUserContent.replace(/\s+/g, " ").trim().slice(0, 500);
4122
+ const toolSuffix = toolCallNames?.length ? `|tools:${toolCallNames.sort().join(",")}` : "";
4123
+ return createHash3("sha256").update(normalized + toolSuffix).digest("hex").slice(0, 12);
4124
+ }
3898
4125
 
3899
4126
  // src/updater.ts
3900
4127
  var NPM_REGISTRY = "https://registry.npmjs.org/@blockrun/clawrouter/latest";
@@ -4652,6 +4879,27 @@ async function proxyPartnerRequest(req, res, apiBase, payFetch) {
4652
4879
  }).catch(() => {
4653
4880
  });
4654
4881
  }
4882
+ async function uploadDataUriToHost(dataUri) {
4883
+ const match = dataUri.match(/^data:(image\/\w+);base64,(.+)$/);
4884
+ if (!match) throw new Error("Invalid data URI format");
4885
+ const [, mimeType, b64Data] = match;
4886
+ const ext = mimeType === "image/jpeg" ? "jpg" : mimeType.split("/")[1] ?? "png";
4887
+ const buffer = Buffer.from(b64Data, "base64");
4888
+ const blob = new Blob([buffer], { type: mimeType });
4889
+ const form = new FormData();
4890
+ form.append("reqtype", "fileupload");
4891
+ form.append("fileToUpload", blob, `image.${ext}`);
4892
+ const resp = await fetch("https://catbox.moe/user/api.php", {
4893
+ method: "POST",
4894
+ body: form
4895
+ });
4896
+ if (!resp.ok) throw new Error(`catbox.moe upload failed: HTTP ${resp.status}`);
4897
+ const result = await resp.text();
4898
+ if (result.startsWith("https://")) {
4899
+ return result.trim();
4900
+ }
4901
+ throw new Error(`catbox.moe upload failed: ${result}`);
4902
+ }
4655
4903
  async function startProxy(options) {
4656
4904
  const walletKey = typeof options.wallet === "string" ? options.wallet : options.wallet.key;
4657
4905
  const solanaPrivateKeyBytes = typeof options.wallet === "string" ? void 0 : options.wallet.solanaPrivateKeyBytes;
@@ -4666,7 +4914,6 @@ async function startProxy(options) {
4666
4914
  const existingProxy = await checkExistingProxy(listenPort);
4667
4915
  if (existingProxy) {
4668
4916
  const account2 = privateKeyToAccount3(walletKey);
4669
- const balanceMonitor2 = new BalanceMonitor(account2.address);
4670
4917
  const baseUrl2 = `http://127.0.0.1:${listenPort}`;
4671
4918
  if (existingProxy.wallet !== account2.address) {
4672
4919
  console.warn(
@@ -4691,6 +4938,7 @@ async function startProxy(options) {
4691
4938
  const solanaSigner = await createKeyPairSignerFromPrivateKeyBytes(solanaPrivateKeyBytes);
4692
4939
  reuseSolanaAddress = solanaSigner.address;
4693
4940
  }
4941
+ const balanceMonitor2 = paymentChain === "solana" && reuseSolanaAddress ? new SolanaBalanceMonitor(reuseSolanaAddress) : new BalanceMonitor(account2.address);
4694
4942
  options.onReady?.(listenPort);
4695
4943
  return {
4696
4944
  port: listenPort,
@@ -4721,8 +4969,10 @@ async function startProxy(options) {
4721
4969
  const chain = network.startsWith("eip155") ? "Base (EVM)" : network.startsWith("solana") ? "Solana" : network;
4722
4970
  console.log(`[ClawRouter] Payment signed on ${chain} (${network})`);
4723
4971
  });
4724
- const payFetch = createPayFetchWithPreAuth(fetch, x402);
4725
- const balanceMonitor = new BalanceMonitor(account.address);
4972
+ const payFetch = createPayFetchWithPreAuth(fetch, x402, void 0, {
4973
+ skipPreAuth: paymentChain === "solana"
4974
+ });
4975
+ const balanceMonitor = paymentChain === "solana" && solanaAddress ? new SolanaBalanceMonitor(solanaAddress) : new BalanceMonitor(account.address);
4726
4976
  const routingConfig = mergeRoutingConfig(options.routingConfig);
4727
4977
  const modelPricing = buildModelPricing();
4728
4978
  const routerOpts = {
@@ -5075,14 +5325,19 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
5075
5325
  }
5076
5326
  let body = Buffer.concat(bodyChunks);
5077
5327
  const originalContextSizeKB = Math.ceil(body.length / 1024);
5328
+ const debugMode = req.headers["x-clawrouter-debug"] !== "false";
5078
5329
  let routingDecision;
5330
+ let hasTools = false;
5331
+ let hasVision = false;
5079
5332
  let isStreaming = false;
5080
5333
  let modelId = "";
5081
5334
  let maxTokens = 4096;
5082
5335
  let routingProfile = null;
5083
5336
  let accumulatedContent = "";
5337
+ let responseInputTokens;
5084
5338
  const isChatCompletion = req.url?.includes("/chat/completions");
5085
5339
  const sessionId = getSessionId(req.headers);
5340
+ let effectiveSessionId = sessionId;
5086
5341
  if (isChatCompletion && body.length > 0) {
5087
5342
  try {
5088
5343
  const parsed = JSON.parse(body.toString());
@@ -5090,10 +5345,12 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
5090
5345
  modelId = parsed.model || "";
5091
5346
  maxTokens = parsed.max_tokens || 4096;
5092
5347
  let bodyModified = false;
5093
- if (sessionId && Array.isArray(parsed.messages)) {
5094
- const messages = parsed.messages;
5095
- const lastUserMsg = [...messages].reverse().find((m) => m.role === "user");
5096
- const lastContent = typeof lastUserMsg?.content === "string" ? lastUserMsg.content : "";
5348
+ const parsedMessages = Array.isArray(parsed.messages) ? parsed.messages : [];
5349
+ const lastUserMsg = [...parsedMessages].reverse().find((m) => m.role === "user");
5350
+ const rawLastContent = lastUserMsg?.content;
5351
+ const lastContent = typeof rawLastContent === "string" ? rawLastContent : Array.isArray(rawLastContent) ? rawLastContent.filter((b) => b.type === "text").map((b) => b.text ?? "").join(" ") : "";
5352
+ if (sessionId && parsedMessages.length > 0) {
5353
+ const messages = parsedMessages;
5097
5354
  if (sessionJournal.needsContext(lastContent)) {
5098
5355
  const journalText = sessionJournal.format(sessionId);
5099
5356
  if (journalText) {
@@ -5114,6 +5371,303 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
5114
5371
  }
5115
5372
  }
5116
5373
  }
5374
+ if (lastContent.startsWith("/debug")) {
5375
+ const debugPrompt = lastContent.slice("/debug".length).trim() || "hello";
5376
+ const messages = parsed.messages;
5377
+ const systemMsg = messages?.find((m) => m.role === "system");
5378
+ const systemPrompt = typeof systemMsg?.content === "string" ? systemMsg.content : void 0;
5379
+ const fullText = `${systemPrompt ?? ""} ${debugPrompt}`;
5380
+ const estimatedTokens = Math.ceil(fullText.length / 4);
5381
+ const normalizedModel2 = typeof parsed.model === "string" ? parsed.model.trim().toLowerCase() : "";
5382
+ const profileName = normalizedModel2.replace("blockrun/", "");
5383
+ const debugProfile = ["free", "eco", "auto", "premium"].includes(profileName) ? profileName : "auto";
5384
+ const scoring = classifyByRules(
5385
+ debugPrompt,
5386
+ systemPrompt,
5387
+ estimatedTokens,
5388
+ DEFAULT_ROUTING_CONFIG.scoring
5389
+ );
5390
+ const debugRouting = route(debugPrompt, systemPrompt, maxTokens, {
5391
+ ...routerOpts,
5392
+ routingProfile: debugProfile
5393
+ });
5394
+ const dimLines = (scoring.dimensions ?? []).map((d) => {
5395
+ const nameStr = (d.name + ":").padEnd(24);
5396
+ const scoreStr = d.score.toFixed(2).padStart(6);
5397
+ const sigStr = d.signal ? ` [${d.signal}]` : "";
5398
+ return ` ${nameStr}${scoreStr}${sigStr}`;
5399
+ }).join("\n");
5400
+ const sess = sessionId ? sessionStore.getSession(sessionId) : void 0;
5401
+ const sessLine = sess ? `Session: ${sessionId.slice(0, 8)}... \u2192 pinned: ${sess.model} (${sess.requestCount} requests)` : sessionId ? `Session: ${sessionId.slice(0, 8)}... \u2192 no pinned model` : "Session: none";
5402
+ const { simpleMedium, mediumComplex, complexReasoning } = DEFAULT_ROUTING_CONFIG.scoring.tierBoundaries;
5403
+ const debugText = [
5404
+ "ClawRouter Debug",
5405
+ "",
5406
+ `Profile: ${debugProfile} | Tier: ${debugRouting.tier} | Model: ${debugRouting.model}`,
5407
+ `Confidence: ${debugRouting.confidence.toFixed(2)} | Cost: $${debugRouting.costEstimate.toFixed(4)} | Savings: ${(debugRouting.savings * 100).toFixed(0)}%`,
5408
+ `Reasoning: ${debugRouting.reasoning}`,
5409
+ "",
5410
+ `Scoring (weighted: ${scoring.score.toFixed(3)})`,
5411
+ dimLines,
5412
+ "",
5413
+ `Tier Boundaries: SIMPLE <${simpleMedium.toFixed(2)} | MEDIUM <${mediumComplex.toFixed(2)} | COMPLEX <${complexReasoning.toFixed(2)} | REASONING >=${complexReasoning.toFixed(2)}`,
5414
+ "",
5415
+ sessLine
5416
+ ].join("\n");
5417
+ const completionId = `chatcmpl-debug-${Date.now()}`;
5418
+ const timestamp = Math.floor(Date.now() / 1e3);
5419
+ const syntheticResponse = {
5420
+ id: completionId,
5421
+ object: "chat.completion",
5422
+ created: timestamp,
5423
+ model: "clawrouter/debug",
5424
+ choices: [
5425
+ {
5426
+ index: 0,
5427
+ message: { role: "assistant", content: debugText },
5428
+ finish_reason: "stop"
5429
+ }
5430
+ ],
5431
+ usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 }
5432
+ };
5433
+ if (isStreaming) {
5434
+ res.writeHead(200, {
5435
+ "Content-Type": "text/event-stream",
5436
+ "Cache-Control": "no-cache",
5437
+ Connection: "keep-alive"
5438
+ });
5439
+ const sseChunk = {
5440
+ id: completionId,
5441
+ object: "chat.completion.chunk",
5442
+ created: timestamp,
5443
+ model: "clawrouter/debug",
5444
+ choices: [
5445
+ {
5446
+ index: 0,
5447
+ delta: { role: "assistant", content: debugText },
5448
+ finish_reason: null
5449
+ }
5450
+ ]
5451
+ };
5452
+ const sseDone = {
5453
+ id: completionId,
5454
+ object: "chat.completion.chunk",
5455
+ created: timestamp,
5456
+ model: "clawrouter/debug",
5457
+ choices: [{ index: 0, delta: {}, finish_reason: "stop" }]
5458
+ };
5459
+ res.write(`data: ${JSON.stringify(sseChunk)}
5460
+
5461
+ `);
5462
+ res.write(`data: ${JSON.stringify(sseDone)}
5463
+
5464
+ `);
5465
+ res.write("data: [DONE]\n\n");
5466
+ res.end();
5467
+ } else {
5468
+ res.writeHead(200, { "Content-Type": "application/json" });
5469
+ res.end(JSON.stringify(syntheticResponse));
5470
+ }
5471
+ console.log(`[ClawRouter] /debug command \u2192 ${debugRouting.tier} | ${debugRouting.model}`);
5472
+ return;
5473
+ }
5474
+ if (lastContent.startsWith("/imagegen")) {
5475
+ const imageArgs = lastContent.slice("/imagegen".length).trim();
5476
+ let imageModel = "google/nano-banana";
5477
+ let imageSize = "1024x1024";
5478
+ let imagePrompt = imageArgs;
5479
+ const modelMatch = imageArgs.match(/--model\s+(\S+)/);
5480
+ if (modelMatch) {
5481
+ const raw = modelMatch[1];
5482
+ const IMAGE_MODEL_ALIASES = {
5483
+ "dall-e-3": "openai/dall-e-3",
5484
+ dalle3: "openai/dall-e-3",
5485
+ dalle: "openai/dall-e-3",
5486
+ "gpt-image": "openai/gpt-image-1",
5487
+ "gpt-image-1": "openai/gpt-image-1",
5488
+ flux: "black-forest/flux-1.1-pro",
5489
+ "flux-pro": "black-forest/flux-1.1-pro",
5490
+ banana: "google/nano-banana",
5491
+ "nano-banana": "google/nano-banana",
5492
+ "banana-pro": "google/nano-banana-pro",
5493
+ "nano-banana-pro": "google/nano-banana-pro"
5494
+ };
5495
+ imageModel = IMAGE_MODEL_ALIASES[raw] ?? raw;
5496
+ imagePrompt = imagePrompt.replace(/--model\s+\S+/, "").trim();
5497
+ }
5498
+ const sizeMatch = imageArgs.match(/--size\s+(\d+x\d+)/);
5499
+ if (sizeMatch) {
5500
+ imageSize = sizeMatch[1];
5501
+ imagePrompt = imagePrompt.replace(/--size\s+\d+x\d+/, "").trim();
5502
+ }
5503
+ if (!imagePrompt) {
5504
+ const errorText = [
5505
+ "Usage: /imagegen <prompt>",
5506
+ "",
5507
+ "Options:",
5508
+ " --model <model> Model to use (default: nano-banana)",
5509
+ " --size <WxH> Image size (default: 1024x1024)",
5510
+ "",
5511
+ "Models:",
5512
+ " nano-banana Google Gemini Flash \u2014 $0.05/image",
5513
+ " banana-pro Google Gemini Pro \u2014 $0.10/image (up to 4K)",
5514
+ " dall-e-3 OpenAI DALL-E 3 \u2014 $0.04/image",
5515
+ " gpt-image OpenAI GPT Image 1 \u2014 $0.02/image",
5516
+ " flux Black Forest Flux 1.1 Pro \u2014 $0.04/image",
5517
+ "",
5518
+ "Examples:",
5519
+ " /imagegen a cat wearing sunglasses",
5520
+ " /imagegen --model dall-e-3 a futuristic city at sunset",
5521
+ " /imagegen --model banana-pro --size 2048x2048 mountain landscape"
5522
+ ].join("\n");
5523
+ const completionId = `chatcmpl-image-${Date.now()}`;
5524
+ const timestamp = Math.floor(Date.now() / 1e3);
5525
+ if (isStreaming) {
5526
+ res.writeHead(200, {
5527
+ "Content-Type": "text/event-stream",
5528
+ "Cache-Control": "no-cache",
5529
+ Connection: "keep-alive"
5530
+ });
5531
+ res.write(
5532
+ `data: ${JSON.stringify({ id: completionId, object: "chat.completion.chunk", created: timestamp, model: "clawrouter/image", choices: [{ index: 0, delta: { role: "assistant", content: errorText }, finish_reason: null }] })}
5533
+
5534
+ `
5535
+ );
5536
+ res.write(
5537
+ `data: ${JSON.stringify({ id: completionId, object: "chat.completion.chunk", created: timestamp, model: "clawrouter/image", choices: [{ index: 0, delta: {}, finish_reason: "stop" }] })}
5538
+
5539
+ `
5540
+ );
5541
+ res.write("data: [DONE]\n\n");
5542
+ res.end();
5543
+ } else {
5544
+ res.writeHead(200, { "Content-Type": "application/json" });
5545
+ res.end(
5546
+ JSON.stringify({
5547
+ id: completionId,
5548
+ object: "chat.completion",
5549
+ created: timestamp,
5550
+ model: "clawrouter/image",
5551
+ choices: [
5552
+ {
5553
+ index: 0,
5554
+ message: { role: "assistant", content: errorText },
5555
+ finish_reason: "stop"
5556
+ }
5557
+ ],
5558
+ usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 }
5559
+ })
5560
+ );
5561
+ }
5562
+ console.log(`[ClawRouter] /imagegen command \u2192 showing usage help`);
5563
+ return;
5564
+ }
5565
+ console.log(
5566
+ `[ClawRouter] /imagegen command \u2192 ${imageModel} (${imageSize}): ${imagePrompt.slice(0, 80)}...`
5567
+ );
5568
+ try {
5569
+ const imageUpstreamUrl = `${apiBase}/v1/images/generations`;
5570
+ const imageBody = JSON.stringify({
5571
+ model: imageModel,
5572
+ prompt: imagePrompt,
5573
+ size: imageSize,
5574
+ n: 1
5575
+ });
5576
+ const imageResponse = await payFetch(imageUpstreamUrl, {
5577
+ method: "POST",
5578
+ headers: { "content-type": "application/json", "user-agent": USER_AGENT },
5579
+ body: imageBody
5580
+ });
5581
+ const imageResult = await imageResponse.json();
5582
+ let responseText;
5583
+ if (!imageResponse.ok || imageResult.error) {
5584
+ const errMsg = typeof imageResult.error === "string" ? imageResult.error : imageResult.error?.message ?? `HTTP ${imageResponse.status}`;
5585
+ responseText = `Image generation failed: ${errMsg}`;
5586
+ console.log(`[ClawRouter] /imagegen error: ${errMsg}`);
5587
+ } else {
5588
+ const images = imageResult.data ?? [];
5589
+ if (images.length === 0) {
5590
+ responseText = "Image generation returned no results.";
5591
+ } else {
5592
+ const lines = [];
5593
+ for (const img of images) {
5594
+ if (img.url) {
5595
+ if (img.url.startsWith("data:")) {
5596
+ try {
5597
+ const hostedUrl = await uploadDataUriToHost(img.url);
5598
+ lines.push(hostedUrl);
5599
+ } catch (uploadErr) {
5600
+ console.error(
5601
+ `[ClawRouter] /imagegen: failed to upload data URI: ${uploadErr instanceof Error ? uploadErr.message : String(uploadErr)}`
5602
+ );
5603
+ lines.push(
5604
+ "Image generated but upload failed. Try again or use --model dall-e-3."
5605
+ );
5606
+ }
5607
+ } else {
5608
+ lines.push(img.url);
5609
+ }
5610
+ }
5611
+ if (img.revised_prompt) lines.push(`Revised prompt: ${img.revised_prompt}`);
5612
+ }
5613
+ lines.push("", `Model: ${imageModel} | Size: ${imageSize}`);
5614
+ responseText = lines.join("\n");
5615
+ }
5616
+ console.log(`[ClawRouter] /imagegen success: ${images.length} image(s) generated`);
5617
+ }
5618
+ const completionId = `chatcmpl-image-${Date.now()}`;
5619
+ const timestamp = Math.floor(Date.now() / 1e3);
5620
+ if (isStreaming) {
5621
+ res.writeHead(200, {
5622
+ "Content-Type": "text/event-stream",
5623
+ "Cache-Control": "no-cache",
5624
+ Connection: "keep-alive"
5625
+ });
5626
+ res.write(
5627
+ `data: ${JSON.stringify({ id: completionId, object: "chat.completion.chunk", created: timestamp, model: "clawrouter/image", choices: [{ index: 0, delta: { role: "assistant", content: responseText }, finish_reason: null }] })}
5628
+
5629
+ `
5630
+ );
5631
+ res.write(
5632
+ `data: ${JSON.stringify({ id: completionId, object: "chat.completion.chunk", created: timestamp, model: "clawrouter/image", choices: [{ index: 0, delta: {}, finish_reason: "stop" }] })}
5633
+
5634
+ `
5635
+ );
5636
+ res.write("data: [DONE]\n\n");
5637
+ res.end();
5638
+ } else {
5639
+ res.writeHead(200, { "Content-Type": "application/json" });
5640
+ res.end(
5641
+ JSON.stringify({
5642
+ id: completionId,
5643
+ object: "chat.completion",
5644
+ created: timestamp,
5645
+ model: "clawrouter/image",
5646
+ choices: [
5647
+ {
5648
+ index: 0,
5649
+ message: { role: "assistant", content: responseText },
5650
+ finish_reason: "stop"
5651
+ }
5652
+ ],
5653
+ usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 }
5654
+ })
5655
+ );
5656
+ }
5657
+ } catch (err) {
5658
+ const errMsg = err instanceof Error ? err.message : String(err);
5659
+ console.error(`[ClawRouter] /imagegen error: ${errMsg}`);
5660
+ if (!res.headersSent) {
5661
+ res.writeHead(500, { "Content-Type": "application/json" });
5662
+ res.end(
5663
+ JSON.stringify({
5664
+ error: { message: `Image generation failed: ${errMsg}`, type: "image_error" }
5665
+ })
5666
+ );
5667
+ }
5668
+ }
5669
+ return;
5670
+ }
5117
5671
  if (parsed.stream === true) {
5118
5672
  parsed.stream = false;
5119
5673
  bodyModified = true;
@@ -5154,54 +5708,118 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
5154
5708
  latencyMs: 0
5155
5709
  });
5156
5710
  } else {
5157
- const sessionId2 = getSessionId(
5158
- req.headers
5159
- );
5160
- const existingSession = sessionId2 ? sessionStore.getSession(sessionId2) : void 0;
5711
+ effectiveSessionId = getSessionId(req.headers) ?? deriveSessionId(parsedMessages);
5712
+ const existingSession = effectiveSessionId ? sessionStore.getSession(effectiveSessionId) : void 0;
5713
+ const rawPrompt = lastUserMsg?.content;
5714
+ const prompt = typeof rawPrompt === "string" ? rawPrompt : Array.isArray(rawPrompt) ? rawPrompt.filter((b) => b.type === "text").map((b) => b.text ?? "").join(" ") : "";
5715
+ const systemMsg = parsedMessages.find((m) => m.role === "system");
5716
+ const systemPrompt = typeof systemMsg?.content === "string" ? systemMsg.content : void 0;
5717
+ const tools = parsed.tools;
5718
+ hasTools = Array.isArray(tools) && tools.length > 0;
5719
+ if (hasTools && tools) {
5720
+ console.log(`[ClawRouter] Tools detected (${tools.length}), agentic mode via keywords`);
5721
+ }
5722
+ hasVision = parsedMessages.some((m) => {
5723
+ if (Array.isArray(m.content)) {
5724
+ return m.content.some((p) => p.type === "image_url");
5725
+ }
5726
+ return false;
5727
+ });
5728
+ if (hasVision) {
5729
+ console.log(`[ClawRouter] Vision content detected, filtering to vision-capable models`);
5730
+ }
5731
+ routingDecision = route(prompt, systemPrompt, maxTokens, {
5732
+ ...routerOpts,
5733
+ routingProfile: routingProfile ?? void 0
5734
+ });
5161
5735
  if (existingSession) {
5162
- console.log(
5163
- `[ClawRouter] Session ${sessionId2?.slice(0, 8)}... using pinned model: ${existingSession.model}`
5164
- );
5165
- parsed.model = existingSession.model;
5166
- modelId = existingSession.model;
5167
- bodyModified = true;
5168
- sessionStore.touchSession(sessionId2);
5169
- } else {
5170
- const messages = parsed.messages;
5171
- let lastUserMsg;
5172
- if (messages) {
5173
- for (let i = messages.length - 1; i >= 0; i--) {
5174
- if (messages[i].role === "user") {
5175
- lastUserMsg = messages[i];
5176
- break;
5177
- }
5736
+ const tierRank = {
5737
+ SIMPLE: 0,
5738
+ MEDIUM: 1,
5739
+ COMPLEX: 2,
5740
+ REASONING: 3
5741
+ };
5742
+ const existingRank = tierRank[existingSession.tier] ?? 0;
5743
+ const newRank = tierRank[routingDecision.tier] ?? 0;
5744
+ if (newRank > existingRank) {
5745
+ console.log(
5746
+ `[ClawRouter] Session ${effectiveSessionId?.slice(0, 8)}... upgrading: ${existingSession.tier} \u2192 ${routingDecision.tier} (${routingDecision.model})`
5747
+ );
5748
+ parsed.model = routingDecision.model;
5749
+ modelId = routingDecision.model;
5750
+ bodyModified = true;
5751
+ if (effectiveSessionId) {
5752
+ sessionStore.setSession(
5753
+ effectiveSessionId,
5754
+ routingDecision.model,
5755
+ routingDecision.tier
5756
+ );
5178
5757
  }
5179
- }
5180
- const systemMsg = messages?.find((m) => m.role === "system");
5181
- const prompt = typeof lastUserMsg?.content === "string" ? lastUserMsg.content : "";
5182
- const systemPrompt = typeof systemMsg?.content === "string" ? systemMsg.content : void 0;
5183
- const tools = parsed.tools;
5184
- const hasTools = Array.isArray(tools) && tools.length > 0;
5185
- if (hasTools && tools) {
5758
+ } else {
5186
5759
  console.log(
5187
- `[ClawRouter] Tools detected (${tools.length}), agentic mode via keywords`
5760
+ `[ClawRouter] Session ${effectiveSessionId?.slice(0, 8)}... keeping pinned model: ${existingSession.model} (${existingSession.tier} >= ${routingDecision.tier})`
5188
5761
  );
5762
+ parsed.model = existingSession.model;
5763
+ modelId = existingSession.model;
5764
+ bodyModified = true;
5765
+ sessionStore.touchSession(effectiveSessionId);
5766
+ routingDecision = {
5767
+ ...routingDecision,
5768
+ model: existingSession.model,
5769
+ tier: existingSession.tier
5770
+ };
5189
5771
  }
5190
- routingDecision = route(prompt, systemPrompt, maxTokens, {
5191
- ...routerOpts,
5192
- routingProfile: routingProfile ?? void 0
5193
- });
5772
+ const lastAssistantMsg = [...parsedMessages].reverse().find((m) => m.role === "assistant");
5773
+ const assistantToolCalls = lastAssistantMsg?.tool_calls;
5774
+ const toolCallNames = Array.isArray(assistantToolCalls) ? assistantToolCalls.map((tc) => tc.function?.name).filter((n) => Boolean(n)) : void 0;
5775
+ const contentHash = hashRequestContent(prompt, toolCallNames);
5776
+ const shouldEscalate = sessionStore.recordRequestHash(effectiveSessionId, contentHash);
5777
+ if (shouldEscalate) {
5778
+ const activeTierConfigs = (() => {
5779
+ if (routingDecision.reasoning?.includes("agentic") && routerOpts.config.agenticTiers) {
5780
+ return routerOpts.config.agenticTiers;
5781
+ }
5782
+ if (routingProfile === "eco" && routerOpts.config.ecoTiers) {
5783
+ return routerOpts.config.ecoTiers;
5784
+ }
5785
+ if (routingProfile === "premium" && routerOpts.config.premiumTiers) {
5786
+ return routerOpts.config.premiumTiers;
5787
+ }
5788
+ return routerOpts.config.tiers;
5789
+ })();
5790
+ const escalation = sessionStore.escalateSession(
5791
+ effectiveSessionId,
5792
+ activeTierConfigs
5793
+ );
5794
+ if (escalation) {
5795
+ console.log(
5796
+ `[ClawRouter] \u26A1 3-strike escalation: ${existingSession.model} \u2192 ${escalation.model} (${existingSession.tier} \u2192 ${escalation.tier})`
5797
+ );
5798
+ parsed.model = escalation.model;
5799
+ modelId = escalation.model;
5800
+ routingDecision = {
5801
+ ...routingDecision,
5802
+ model: escalation.model,
5803
+ tier: escalation.tier
5804
+ };
5805
+ }
5806
+ }
5807
+ } else {
5194
5808
  parsed.model = routingDecision.model;
5195
5809
  modelId = routingDecision.model;
5196
5810
  bodyModified = true;
5197
- if (sessionId2) {
5198
- sessionStore.setSession(sessionId2, routingDecision.model, routingDecision.tier);
5811
+ if (effectiveSessionId) {
5812
+ sessionStore.setSession(
5813
+ effectiveSessionId,
5814
+ routingDecision.model,
5815
+ routingDecision.tier
5816
+ );
5199
5817
  console.log(
5200
- `[ClawRouter] Session ${sessionId2.slice(0, 8)}... pinned to model: ${routingDecision.model}`
5818
+ `[ClawRouter] Session ${effectiveSessionId.slice(0, 8)}... pinned to model: ${routingDecision.model}`
5201
5819
  );
5202
5820
  }
5203
- options.onRouted?.(routingDecision);
5204
5821
  }
5822
+ options.onRouted?.(routingDecision);
5205
5823
  }
5206
5824
  }
5207
5825
  if (bodyModified) {
@@ -5294,6 +5912,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
5294
5912
  }
5295
5913
  deduplicator.markInflight(dedupKey);
5296
5914
  let estimatedCostMicros;
5915
+ let balanceFallbackNotice;
5297
5916
  const isFreeModel = modelId === FREE_MODEL;
5298
5917
  if (modelId && !options.skipBalanceCheck && !isFreeModel) {
5299
5918
  const estimated = estimateAmount(modelId, body.length, maxTokens);
@@ -5304,12 +5923,17 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
5304
5923
  if (sufficiency.info.isEmpty || !sufficiency.sufficient) {
5305
5924
  const originalModel = modelId;
5306
5925
  console.log(
5307
- `[ClawRouter] Wallet ${sufficiency.info.isEmpty ? "empty" : "insufficient"} ($${sufficiency.info.balanceUSD}), falling back to free model: ${FREE_MODEL} (requested: ${originalModel})`
5926
+ `[ClawRouter] Wallet ${sufficiency.info.isEmpty ? "empty" : "insufficient"} (${sufficiency.info.balanceUSD}), falling back to free model: ${FREE_MODEL} (requested: ${originalModel})`
5308
5927
  );
5309
5928
  modelId = FREE_MODEL;
5310
5929
  const parsed = JSON.parse(body.toString());
5311
5930
  parsed.model = FREE_MODEL;
5312
5931
  body = Buffer.from(JSON.stringify(parsed));
5932
+ balanceFallbackNotice = sufficiency.info.isEmpty ? `> **\u26A0\uFE0F Wallet empty** \u2014 using free model. Fund your wallet to use ${originalModel}.
5933
+
5934
+ ` : `> **\u26A0\uFE0F Insufficient balance** (${sufficiency.info.balanceUSD}) \u2014 using free model instead of ${originalModel}.
5935
+
5936
+ `;
5313
5937
  options.onLowBalance?.({
5314
5938
  balanceUSD: sufficiency.info.balanceUSD,
5315
5939
  walletAddress: sufficiency.info.walletAddress
@@ -5373,8 +5997,18 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
5373
5997
  if (routingDecision) {
5374
5998
  const estimatedInputTokens = Math.ceil(body.length / 4);
5375
5999
  const estimatedTotalTokens = estimatedInputTokens + maxTokens;
5376
- const useAgenticTiers = routingDecision.reasoning?.includes("agentic") && routerOpts.config.agenticTiers;
5377
- const tierConfigs = useAgenticTiers ? routerOpts.config.agenticTiers : routerOpts.config.tiers;
6000
+ const tierConfigs = (() => {
6001
+ if (routingDecision.reasoning?.includes("agentic") && routerOpts.config.agenticTiers) {
6002
+ return routerOpts.config.agenticTiers;
6003
+ }
6004
+ if (routingProfile === "eco" && routerOpts.config.ecoTiers) {
6005
+ return routerOpts.config.ecoTiers;
6006
+ }
6007
+ if (routingProfile === "premium" && routerOpts.config.premiumTiers) {
6008
+ return routerOpts.config.premiumTiers;
6009
+ }
6010
+ return routerOpts.config.tiers;
6011
+ })();
5378
6012
  const fullChain = getFallbackChain(routingDecision.tier, tierConfigs);
5379
6013
  const contextFiltered = getFallbackChainFiltered(
5380
6014
  routingDecision.tier,
@@ -5388,14 +6022,27 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
5388
6022
  `[ClawRouter] Context filter (~${estimatedTotalTokens} tokens): excluded ${contextExcluded.join(", ")}`
5389
6023
  );
5390
6024
  }
5391
- modelsToTry = contextFiltered.slice(0, MAX_FALLBACK_ATTEMPTS);
6025
+ const toolFiltered = filterByToolCalling(contextFiltered, hasTools, supportsToolCalling);
6026
+ const toolExcluded = contextFiltered.filter((m) => !toolFiltered.includes(m));
6027
+ if (toolExcluded.length > 0) {
6028
+ console.log(
6029
+ `[ClawRouter] Tool-calling filter: excluded ${toolExcluded.join(", ")} (no structured function call support)`
6030
+ );
6031
+ }
6032
+ const visionFiltered = filterByVision(toolFiltered, hasVision, supportsVision);
6033
+ const visionExcluded = toolFiltered.filter((m) => !visionFiltered.includes(m));
6034
+ if (visionExcluded.length > 0) {
6035
+ console.log(
6036
+ `[ClawRouter] Vision filter: excluded ${visionExcluded.join(", ")} (no vision support)`
6037
+ );
6038
+ }
6039
+ modelsToTry = visionFiltered.slice(0, MAX_FALLBACK_ATTEMPTS);
5392
6040
  modelsToTry = prioritizeNonRateLimited(modelsToTry);
5393
6041
  } else {
5394
- if (modelId && modelId !== FREE_MODEL) {
5395
- modelsToTry = [modelId, FREE_MODEL];
5396
- } else {
5397
- modelsToTry = modelId ? [modelId] : [];
5398
- }
6042
+ modelsToTry = modelId ? [modelId] : [];
6043
+ }
6044
+ if (!modelsToTry.includes(FREE_MODEL)) {
6045
+ modelsToTry.push(FREE_MODEL);
5399
6046
  }
5400
6047
  let upstream;
5401
6048
  let lastError;
@@ -5429,6 +6076,17 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
5429
6076
  if (result.errorStatus === 429) {
5430
6077
  markRateLimited(tryModel);
5431
6078
  }
6079
+ const isPaymentErr = /payment.*verification.*failed|insufficient.*funds/i.test(
6080
+ result.errorBody || ""
6081
+ );
6082
+ if (isPaymentErr && tryModel !== FREE_MODEL) {
6083
+ const freeIdx = modelsToTry.indexOf(FREE_MODEL);
6084
+ if (freeIdx > i + 1) {
6085
+ console.log(`[ClawRouter] Payment error \u2014 skipping to free model: ${FREE_MODEL}`);
6086
+ i = freeIdx - 1;
6087
+ continue;
6088
+ }
6089
+ }
5432
6090
  console.log(
5433
6091
  `[ClawRouter] Provider error from ${tryModel}, trying fallback: ${result.errorBody?.slice(0, 100)}`
5434
6092
  );
@@ -5446,6 +6104,12 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
5446
6104
  clearInterval(heartbeatInterval);
5447
6105
  heartbeatInterval = void 0;
5448
6106
  }
6107
+ if (debugMode && headersSentEarly && routingDecision) {
6108
+ const debugComment = `: x-clawrouter-debug profile=${routingProfile ?? "auto"} tier=${routingDecision.tier} model=${actualModelUsed} agentic=${routingDecision.agenticScore?.toFixed(2) ?? "n/a"} confidence=${routingDecision.confidence.toFixed(2)} reasoning=${routingDecision.reasoning}
6109
+
6110
+ `;
6111
+ safeWrite(res, debugComment);
6112
+ }
5449
6113
  if (routingDecision && actualModelUsed !== routingDecision.model) {
5450
6114
  const estimatedInputTokens = Math.ceil(body.length / 4);
5451
6115
  const newCosts = calculateModelCost(
@@ -5464,6 +6128,12 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
5464
6128
  savings: newCosts.savings
5465
6129
  };
5466
6130
  options.onRouted?.(routingDecision);
6131
+ if (effectiveSessionId) {
6132
+ sessionStore.setSession(effectiveSessionId, actualModelUsed, routingDecision.tier);
6133
+ console.log(
6134
+ `[ClawRouter] Session ${effectiveSessionId.slice(0, 8)}... updated pin to fallback: ${actualModelUsed}`
6135
+ );
6136
+ }
5467
6137
  }
5468
6138
  if (!upstream) {
5469
6139
  const rawErrBody = lastError?.body || "All models in fallback chain failed";
@@ -5526,6 +6196,10 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
5526
6196
  const jsonStr = jsonBody.toString();
5527
6197
  try {
5528
6198
  const rsp = JSON.parse(jsonStr);
6199
+ if (rsp.usage && typeof rsp.usage === "object") {
6200
+ const u = rsp.usage;
6201
+ if (typeof u.prompt_tokens === "number") responseInputTokens = u.prompt_tokens;
6202
+ }
5529
6203
  const baseChunk = {
5530
6204
  id: rsp.id ?? `chatcmpl-${Date.now()}`,
5531
6205
  object: "chat.completion.chunk",
@@ -5551,6 +6225,18 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
5551
6225
  `;
5552
6226
  safeWrite(res, roleData);
5553
6227
  responseChunks.push(Buffer.from(roleData));
6228
+ if (balanceFallbackNotice) {
6229
+ const noticeChunk = {
6230
+ ...baseChunk,
6231
+ choices: [{ index, delta: { content: balanceFallbackNotice }, logprobs: null, finish_reason: null }]
6232
+ };
6233
+ const noticeData = `data: ${JSON.stringify(noticeChunk)}
6234
+
6235
+ `;
6236
+ safeWrite(res, noticeData);
6237
+ responseChunks.push(Buffer.from(noticeData));
6238
+ balanceFallbackNotice = void 0;
6239
+ }
5554
6240
  if (content) {
5555
6241
  const contentChunk = {
5556
6242
  ...baseChunk,
@@ -5625,23 +6311,46 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
5625
6311
  });
5626
6312
  responseHeaders["x-context-used-kb"] = String(originalContextSizeKB);
5627
6313
  responseHeaders["x-context-limit-kb"] = String(CONTEXT_LIMIT_KB);
5628
- res.writeHead(upstream.status, responseHeaders);
6314
+ if (debugMode && routingDecision) {
6315
+ responseHeaders["x-clawrouter-profile"] = routingProfile ?? "auto";
6316
+ responseHeaders["x-clawrouter-tier"] = routingDecision.tier;
6317
+ responseHeaders["x-clawrouter-model"] = actualModelUsed;
6318
+ responseHeaders["x-clawrouter-confidence"] = routingDecision.confidence.toFixed(2);
6319
+ responseHeaders["x-clawrouter-reasoning"] = routingDecision.reasoning;
6320
+ if (routingDecision.agenticScore !== void 0) {
6321
+ responseHeaders["x-clawrouter-agentic-score"] = routingDecision.agenticScore.toFixed(2);
6322
+ }
6323
+ }
6324
+ const bodyParts = [];
5629
6325
  if (upstream.body) {
5630
6326
  const reader = upstream.body.getReader();
5631
6327
  try {
5632
6328
  while (true) {
5633
6329
  const { done, value } = await reader.read();
5634
6330
  if (done) break;
5635
- const chunk = Buffer.from(value);
5636
- safeWrite(res, chunk);
5637
- responseChunks.push(chunk);
6331
+ bodyParts.push(Buffer.from(value));
5638
6332
  }
5639
6333
  } finally {
5640
6334
  reader.releaseLock();
5641
6335
  }
5642
6336
  }
6337
+ let responseBody = Buffer.concat(bodyParts);
6338
+ if (balanceFallbackNotice && responseBody.length > 0) {
6339
+ try {
6340
+ const parsed = JSON.parse(responseBody.toString());
6341
+ if (parsed.choices?.[0]?.message?.content !== void 0) {
6342
+ parsed.choices[0].message.content = balanceFallbackNotice + parsed.choices[0].message.content;
6343
+ responseBody = Buffer.from(JSON.stringify(parsed));
6344
+ }
6345
+ } catch {
6346
+ }
6347
+ balanceFallbackNotice = void 0;
6348
+ }
6349
+ responseHeaders["content-length"] = String(responseBody.length);
6350
+ res.writeHead(upstream.status, responseHeaders);
6351
+ safeWrite(res, responseBody);
6352
+ responseChunks.push(responseBody);
5643
6353
  res.end();
5644
- const responseBody = Buffer.concat(responseChunks);
5645
6354
  deduplicator.complete(dedupKey, {
5646
6355
  status: upstream.status,
5647
6356
  headers: responseHeaders,
@@ -5664,6 +6373,10 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
5664
6373
  if (rspJson.choices?.[0]?.message?.content) {
5665
6374
  accumulatedContent = rspJson.choices[0].message.content;
5666
6375
  }
6376
+ if (rspJson.usage && typeof rspJson.usage === "object") {
6377
+ if (typeof rspJson.usage.prompt_tokens === "number")
6378
+ responseInputTokens = rspJson.usage.prompt_tokens;
6379
+ }
5667
6380
  } catch {
5668
6381
  }
5669
6382
  }
@@ -5689,7 +6402,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
5689
6402
  deduplicator.removeInflight(dedupKey);
5690
6403
  balanceMonitor.invalidate();
5691
6404
  if (err instanceof Error && err.name === "AbortError") {
5692
- throw new Error(`Request timed out after ${timeoutMs}ms`);
6405
+ throw new Error(`Request timed out after ${timeoutMs}ms`, { cause: err });
5693
6406
  }
5694
6407
  throw err;
5695
6408
  }
@@ -5712,13 +6425,53 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
5712
6425
  cost: costWithBuffer,
5713
6426
  baselineCost: baselineWithBuffer,
5714
6427
  savings: accurateCosts.savings,
5715
- latencyMs: Date.now() - startTime
6428
+ latencyMs: Date.now() - startTime,
6429
+ ...responseInputTokens !== void 0 && { inputTokens: responseInputTokens }
5716
6430
  };
5717
6431
  logUsage(entry).catch(() => {
5718
6432
  });
5719
6433
  }
5720
6434
  }
5721
6435
 
6436
+ // src/report.ts
6437
+ async function generateReport(period, json = false) {
6438
+ const days = period === "daily" ? 1 : period === "weekly" ? 7 : 30;
6439
+ const stats = await getStats(days);
6440
+ if (json) {
6441
+ return JSON.stringify(stats, null, 2);
6442
+ }
6443
+ return formatMarkdownReport(period, days, stats);
6444
+ }
6445
+ function formatMarkdownReport(period, days, stats) {
6446
+ const lines = [];
6447
+ lines.push(`# ClawRouter ${capitalize(period)} Report`);
6448
+ lines.push(`**Period:** Last ${days} day${days > 1 ? "s" : ""}`);
6449
+ lines.push(`**Generated:** ${(/* @__PURE__ */ new Date()).toISOString()}`);
6450
+ lines.push("");
6451
+ lines.push("## \u{1F4CA} Usage Summary");
6452
+ lines.push("");
6453
+ lines.push(`| Metric | Value |`);
6454
+ lines.push(`|--------|-------|`);
6455
+ lines.push(`| Total Requests | ${stats.totalRequests} |`);
6456
+ lines.push(`| Total Cost | $${stats.totalCost.toFixed(4)} |`);
6457
+ lines.push(`| Baseline Cost | $${stats.totalBaselineCost.toFixed(4)} |`);
6458
+ lines.push(`| **Savings** | **$${stats.totalSavings.toFixed(4)}** |`);
6459
+ lines.push(`| Savings % | ${stats.savingsPercentage.toFixed(1)}% |`);
6460
+ lines.push(`| Avg Latency | ${stats.avgLatencyMs.toFixed(0)}ms |`);
6461
+ lines.push("");
6462
+ lines.push("## \u{1F916} Model Distribution");
6463
+ lines.push("");
6464
+ const sortedModels = Object.entries(stats.byModel).sort((a, b) => b[1].count - a[1].count).slice(0, 10);
6465
+ for (const [model, data] of sortedModels) {
6466
+ lines.push(`- ${model}: ${data.count} reqs, $${data.cost.toFixed(4)}`);
6467
+ }
6468
+ lines.push("");
6469
+ return lines.join("\n");
6470
+ }
6471
+ function capitalize(str) {
6472
+ return str.charAt(0).toUpperCase() + str.slice(1);
6473
+ }
6474
+
5722
6475
  // src/doctor.ts
5723
6476
  import { platform, arch, freemem, totalmem } from "os";
5724
6477
  import { createPublicClient as createPublicClient3, http as http3 } from "viem";
@@ -5811,7 +6564,6 @@ async function collectNetworkInfo() {
5811
6564
  blockrunLatency = Date.now() - start;
5812
6565
  blockrunReachable = response.ok || response.status === 402;
5813
6566
  } catch {
5814
- blockrunReachable = false;
5815
6567
  }
5816
6568
  let proxyRunning = false;
5817
6569
  try {
@@ -5821,7 +6573,6 @@ async function collectNetworkInfo() {
5821
6573
  });
5822
6574
  proxyRunning = response.ok;
5823
6575
  } catch {
5824
- proxyRunning = false;
5825
6576
  }
5826
6577
  return {
5827
6578
  blockrunApi: { reachable: blockrunReachable, latencyMs: blockrunLatency },
@@ -6065,6 +6816,7 @@ Usage:
6065
6816
  clawrouter [options]
6066
6817
  clawrouter doctor [opus] [question]
6067
6818
  clawrouter partners [test]
6819
+ clawrouter report [daily|weekly|monthly] [--json]
6068
6820
 
6069
6821
  Options:
6070
6822
  --version, -v Show version number
@@ -6107,6 +6859,9 @@ function parseArgs(args) {
6107
6859
  doctor: false,
6108
6860
  partners: false,
6109
6861
  partnersTest: false,
6862
+ report: false,
6863
+ reportPeriod: "daily",
6864
+ reportJson: false,
6110
6865
  port: void 0
6111
6866
  };
6112
6867
  for (let i = 0; i < args.length; i++) {
@@ -6123,6 +6878,20 @@ function parseArgs(args) {
6123
6878
  result.partnersTest = true;
6124
6879
  i++;
6125
6880
  }
6881
+ } else if (arg === "report") {
6882
+ result.report = true;
6883
+ const next = args[i + 1];
6884
+ if (next && ["daily", "weekly", "monthly"].includes(next)) {
6885
+ result.reportPeriod = next;
6886
+ i++;
6887
+ if (args[i + 1] === "--json") {
6888
+ result.reportJson = true;
6889
+ i++;
6890
+ }
6891
+ } else if (next === "--json") {
6892
+ result.reportJson = true;
6893
+ i++;
6894
+ }
6126
6895
  } else if (arg === "--port" && args[i + 1]) {
6127
6896
  result.port = parseInt(args[i + 1], 10);
6128
6897
  i++;
@@ -6170,7 +6939,9 @@ ClawRouter Partner APIs (v${VERSION})
6170
6939
  console.log(` ${svc.description}`);
6171
6940
  console.log(` Tool: blockrun_${svc.id}`);
6172
6941
  console.log(` Method: ${svc.method} /v1${svc.proxyPath}`);
6173
- console.log(` Pricing: ${svc.pricing.perUnit} per ${svc.pricing.unit} (min ${svc.pricing.minimum}, max ${svc.pricing.maximum})`);
6942
+ console.log(
6943
+ ` Pricing: ${svc.pricing.perUnit} per ${svc.pricing.unit} (min ${svc.pricing.minimum}, max ${svc.pricing.maximum})`
6944
+ );
6174
6945
  console.log();
6175
6946
  }
6176
6947
  if (args.partnersTest) {
@@ -6191,6 +6962,11 @@ ClawRouter Partner APIs (v${VERSION})
6191
6962
  }
6192
6963
  process.exit(0);
6193
6964
  }
6965
+ if (args.report) {
6966
+ const report = await generateReport(args.reportPeriod, args.reportJson);
6967
+ console.log(report);
6968
+ process.exit(0);
6969
+ }
6194
6970
  const wallet = await resolveOrGenerateWalletKey();
6195
6971
  if (wallet.source === "generated") {
6196
6972
  console.log(`[ClawRouter] Generated new wallet: ${wallet.address}`);