@blockrun/clawrouter 0.10.13 → 0.10.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -476,7 +476,8 @@ function classifyByRules(prompt, systemPrompt, estimatedTokens, config) {
476
476
  tier: "REASONING",
477
477
  confidence: Math.max(confidence2, 0.85),
478
478
  signals,
479
- agenticScore
479
+ agenticScore,
480
+ dimensions
480
481
  };
481
482
  }
482
483
  const { simpleMedium, mediumComplex, complexReasoning } = config.tierBoundaries;
@@ -500,9 +501,9 @@ function classifyByRules(prompt, systemPrompt, estimatedTokens, config) {
500
501
  }
501
502
  const confidence = calibrateConfidence(distanceFromBoundary, config.confidenceSteepness);
502
503
  if (confidence < config.confidenceThreshold) {
503
- return { score: weightedScore, tier: null, confidence, signals, agenticScore };
504
+ return { score: weightedScore, tier: null, confidence, signals, agenticScore, dimensions };
504
505
  }
505
- return { score: weightedScore, tier, confidence, signals, agenticScore };
506
+ return { score: weightedScore, tier, confidence, signals, agenticScore, dimensions };
506
507
  }
507
508
  function calibrateConfidence(distance, steepness) {
508
509
  return 1 / (1 + Math.exp(-steepness * distance));
@@ -558,6 +559,11 @@ function calculateModelCost(model, modelPricing, estimatedInputTokens, maxOutput
558
559
  const savings = routingProfile === "premium" ? 0 : baselineCost > 0 ? Math.max(0, (baselineCost - costEstimate) / baselineCost) : 0;
559
560
  return { costEstimate, baselineCost, savings };
560
561
  }
562
+ function filterByToolCalling(models, hasTools, supportsToolCalling2) {
563
+ if (!hasTools) return models;
564
+ const filtered = models.filter(supportsToolCalling2);
565
+ return filtered.length > 0 ? filtered : models;
566
+ }
561
567
  function getFallbackChainFiltered(tier, tierConfigs, estimatedTotalTokens, getContextWindow) {
562
568
  const fullChain = getFallbackChain(tier, tierConfigs);
563
569
  const filtered = fullChain.filter((modelId) => {
@@ -1613,12 +1619,12 @@ var DEFAULT_ROUTING_CONFIG = {
1613
1619
  ]
1614
1620
  },
1615
1621
  MEDIUM: {
1616
- primary: "xai/grok-code-fast-1",
1617
- // Code specialist, $0.20/$1.50
1622
+ primary: "moonshot/kimi-k2.5",
1623
+ // $0.50/$2.40 - strong tool use, proper function call format
1618
1624
  fallback: [
1625
+ "deepseek/deepseek-chat",
1619
1626
  "google/gemini-2.5-flash-lite",
1620
1627
  // 1M context, ultra cheap ($0.10/$0.40)
1621
- "deepseek/deepseek-chat",
1622
1628
  "xai/grok-4-1-fast-non-reasoning"
1623
1629
  // Upgraded Grok 4.1
1624
1630
  ]
@@ -1684,7 +1690,7 @@ var DEFAULT_ROUTING_CONFIG = {
1684
1690
  fallback: [
1685
1691
  "anthropic/claude-haiku-4.5",
1686
1692
  "google/gemini-2.5-flash-lite",
1687
- "xai/grok-code-fast-1"
1693
+ "deepseek/deepseek-chat"
1688
1694
  ]
1689
1695
  },
1690
1696
  MEDIUM: {
@@ -1735,9 +1741,9 @@ var DEFAULT_ROUTING_CONFIG = {
1735
1741
  ]
1736
1742
  },
1737
1743
  MEDIUM: {
1738
- primary: "xai/grok-code-fast-1",
1739
- // Code specialist for agentic coding
1740
- fallback: ["moonshot/kimi-k2.5", "anthropic/claude-haiku-4.5", "claude-sonnet-4"]
1744
+ primary: "moonshot/kimi-k2.5",
1745
+ // $0.50/$2.40 - strong tool use, handles function calls correctly
1746
+ fallback: ["anthropic/claude-haiku-4.5", "deepseek/deepseek-chat", "xai/grok-4-1-fast-non-reasoning"]
1741
1747
  },
1742
1748
  COMPLEX: {
1743
1749
  primary: "anthropic/claude-sonnet-4.6",
@@ -1960,7 +1966,8 @@ var BLOCKRUN_MODELS = [
1960
1966
  maxOutput: 128e3,
1961
1967
  reasoning: true,
1962
1968
  vision: true,
1963
- agentic: true
1969
+ agentic: true,
1970
+ toolCalling: true
1964
1971
  },
1965
1972
  {
1966
1973
  id: "openai/gpt-5-mini",
@@ -1969,7 +1976,8 @@ var BLOCKRUN_MODELS = [
1969
1976
  inputPrice: 0.25,
1970
1977
  outputPrice: 2,
1971
1978
  contextWindow: 2e5,
1972
- maxOutput: 65536
1979
+ maxOutput: 65536,
1980
+ toolCalling: true
1973
1981
  },
1974
1982
  {
1975
1983
  id: "openai/gpt-5-nano",
@@ -1978,7 +1986,8 @@ var BLOCKRUN_MODELS = [
1978
1986
  inputPrice: 0.05,
1979
1987
  outputPrice: 0.4,
1980
1988
  contextWindow: 128e3,
1981
- maxOutput: 32768
1989
+ maxOutput: 32768,
1990
+ toolCalling: true
1982
1991
  },
1983
1992
  {
1984
1993
  id: "openai/gpt-5.2-pro",
@@ -1988,7 +1997,8 @@ var BLOCKRUN_MODELS = [
1988
1997
  outputPrice: 168,
1989
1998
  contextWindow: 4e5,
1990
1999
  maxOutput: 128e3,
1991
- reasoning: true
2000
+ reasoning: true,
2001
+ toolCalling: true
1992
2002
  },
1993
2003
  // OpenAI Codex Family
1994
2004
  {
@@ -1999,7 +2009,8 @@ var BLOCKRUN_MODELS = [
1999
2009
  outputPrice: 14,
2000
2010
  contextWindow: 128e3,
2001
2011
  maxOutput: 32e3,
2002
- agentic: true
2012
+ agentic: true,
2013
+ toolCalling: true
2003
2014
  },
2004
2015
  // OpenAI GPT-4 Family
2005
2016
  {
@@ -2010,7 +2021,8 @@ var BLOCKRUN_MODELS = [
2010
2021
  outputPrice: 8,
2011
2022
  contextWindow: 128e3,
2012
2023
  maxOutput: 16384,
2013
- vision: true
2024
+ vision: true,
2025
+ toolCalling: true
2014
2026
  },
2015
2027
  {
2016
2028
  id: "openai/gpt-4.1-mini",
@@ -2019,7 +2031,8 @@ var BLOCKRUN_MODELS = [
2019
2031
  inputPrice: 0.4,
2020
2032
  outputPrice: 1.6,
2021
2033
  contextWindow: 128e3,
2022
- maxOutput: 16384
2034
+ maxOutput: 16384,
2035
+ toolCalling: true
2023
2036
  },
2024
2037
  {
2025
2038
  id: "openai/gpt-4.1-nano",
@@ -2028,7 +2041,8 @@ var BLOCKRUN_MODELS = [
2028
2041
  inputPrice: 0.1,
2029
2042
  outputPrice: 0.4,
2030
2043
  contextWindow: 128e3,
2031
- maxOutput: 16384
2044
+ maxOutput: 16384,
2045
+ toolCalling: true
2032
2046
  },
2033
2047
  {
2034
2048
  id: "openai/gpt-4o",
@@ -2039,7 +2053,8 @@ var BLOCKRUN_MODELS = [
2039
2053
  contextWindow: 128e3,
2040
2054
  maxOutput: 16384,
2041
2055
  vision: true,
2042
- agentic: true
2056
+ agentic: true,
2057
+ toolCalling: true
2043
2058
  },
2044
2059
  {
2045
2060
  id: "openai/gpt-4o-mini",
@@ -2048,7 +2063,8 @@ var BLOCKRUN_MODELS = [
2048
2063
  inputPrice: 0.15,
2049
2064
  outputPrice: 0.6,
2050
2065
  contextWindow: 128e3,
2051
- maxOutput: 16384
2066
+ maxOutput: 16384,
2067
+ toolCalling: true
2052
2068
  },
2053
2069
  // OpenAI O-series (Reasoning)
2054
2070
  {
@@ -2059,7 +2075,8 @@ var BLOCKRUN_MODELS = [
2059
2075
  outputPrice: 60,
2060
2076
  contextWindow: 2e5,
2061
2077
  maxOutput: 1e5,
2062
- reasoning: true
2078
+ reasoning: true,
2079
+ toolCalling: true
2063
2080
  },
2064
2081
  {
2065
2082
  id: "openai/o1-mini",
@@ -2069,7 +2086,8 @@ var BLOCKRUN_MODELS = [
2069
2086
  outputPrice: 4.4,
2070
2087
  contextWindow: 128e3,
2071
2088
  maxOutput: 65536,
2072
- reasoning: true
2089
+ reasoning: true,
2090
+ toolCalling: true
2073
2091
  },
2074
2092
  {
2075
2093
  id: "openai/o3",
@@ -2079,7 +2097,8 @@ var BLOCKRUN_MODELS = [
2079
2097
  outputPrice: 8,
2080
2098
  contextWindow: 2e5,
2081
2099
  maxOutput: 1e5,
2082
- reasoning: true
2100
+ reasoning: true,
2101
+ toolCalling: true
2083
2102
  },
2084
2103
  {
2085
2104
  id: "openai/o3-mini",
@@ -2089,7 +2108,8 @@ var BLOCKRUN_MODELS = [
2089
2108
  outputPrice: 4.4,
2090
2109
  contextWindow: 128e3,
2091
2110
  maxOutput: 65536,
2092
- reasoning: true
2111
+ reasoning: true,
2112
+ toolCalling: true
2093
2113
  },
2094
2114
  {
2095
2115
  id: "openai/o4-mini",
@@ -2099,7 +2119,8 @@ var BLOCKRUN_MODELS = [
2099
2119
  outputPrice: 4.4,
2100
2120
  contextWindow: 128e3,
2101
2121
  maxOutput: 65536,
2102
- reasoning: true
2122
+ reasoning: true,
2123
+ toolCalling: true
2103
2124
  },
2104
2125
  // Anthropic - all Claude models excel at agentic workflows
2105
2126
  // Use newest versions (4.6) with full provider prefix
@@ -2111,7 +2132,8 @@ var BLOCKRUN_MODELS = [
2111
2132
  outputPrice: 5,
2112
2133
  contextWindow: 2e5,
2113
2134
  maxOutput: 8192,
2114
- agentic: true
2135
+ agentic: true,
2136
+ toolCalling: true
2115
2137
  },
2116
2138
  {
2117
2139
  id: "anthropic/claude-sonnet-4.6",
@@ -2122,7 +2144,8 @@ var BLOCKRUN_MODELS = [
2122
2144
  contextWindow: 2e5,
2123
2145
  maxOutput: 64e3,
2124
2146
  reasoning: true,
2125
- agentic: true
2147
+ agentic: true,
2148
+ toolCalling: true
2126
2149
  },
2127
2150
  {
2128
2151
  id: "anthropic/claude-opus-4.6",
@@ -2133,7 +2156,8 @@ var BLOCKRUN_MODELS = [
2133
2156
  contextWindow: 2e5,
2134
2157
  maxOutput: 32e3,
2135
2158
  reasoning: true,
2136
- agentic: true
2159
+ agentic: true,
2160
+ toolCalling: true
2137
2161
  },
2138
2162
  // Google
2139
2163
  {
@@ -2145,7 +2169,8 @@ var BLOCKRUN_MODELS = [
2145
2169
  contextWindow: 105e4,
2146
2170
  maxOutput: 65536,
2147
2171
  reasoning: true,
2148
- vision: true
2172
+ vision: true,
2173
+ toolCalling: true
2149
2174
  },
2150
2175
  {
2151
2176
  id: "google/gemini-3-pro-preview",
@@ -2156,7 +2181,8 @@ var BLOCKRUN_MODELS = [
2156
2181
  contextWindow: 105e4,
2157
2182
  maxOutput: 65536,
2158
2183
  reasoning: true,
2159
- vision: true
2184
+ vision: true,
2185
+ toolCalling: true
2160
2186
  },
2161
2187
  {
2162
2188
  id: "google/gemini-3-flash-preview",
@@ -2166,7 +2192,8 @@ var BLOCKRUN_MODELS = [
2166
2192
  outputPrice: 3,
2167
2193
  contextWindow: 1e6,
2168
2194
  maxOutput: 65536,
2169
- vision: true
2195
+ vision: true,
2196
+ toolCalling: true
2170
2197
  },
2171
2198
  {
2172
2199
  id: "google/gemini-2.5-pro",
@@ -2177,7 +2204,8 @@ var BLOCKRUN_MODELS = [
2177
2204
  contextWindow: 105e4,
2178
2205
  maxOutput: 65536,
2179
2206
  reasoning: true,
2180
- vision: true
2207
+ vision: true,
2208
+ toolCalling: true
2181
2209
  },
2182
2210
  {
2183
2211
  id: "google/gemini-2.5-flash",
@@ -2186,7 +2214,8 @@ var BLOCKRUN_MODELS = [
2186
2214
  inputPrice: 0.3,
2187
2215
  outputPrice: 2.5,
2188
2216
  contextWindow: 1e6,
2189
- maxOutput: 65536
2217
+ maxOutput: 65536,
2218
+ toolCalling: true
2190
2219
  },
2191
2220
  {
2192
2221
  id: "google/gemini-2.5-flash-lite",
@@ -2195,7 +2224,8 @@ var BLOCKRUN_MODELS = [
2195
2224
  inputPrice: 0.1,
2196
2225
  outputPrice: 0.4,
2197
2226
  contextWindow: 1e6,
2198
- maxOutput: 65536
2227
+ maxOutput: 65536,
2228
+ toolCalling: true
2199
2229
  },
2200
2230
  // DeepSeek
2201
2231
  {
@@ -2205,7 +2235,8 @@ var BLOCKRUN_MODELS = [
2205
2235
  inputPrice: 0.28,
2206
2236
  outputPrice: 0.42,
2207
2237
  contextWindow: 128e3,
2208
- maxOutput: 8192
2238
+ maxOutput: 8192,
2239
+ toolCalling: true
2209
2240
  },
2210
2241
  {
2211
2242
  id: "deepseek/deepseek-reasoner",
@@ -2215,7 +2246,8 @@ var BLOCKRUN_MODELS = [
2215
2246
  outputPrice: 0.42,
2216
2247
  contextWindow: 128e3,
2217
2248
  maxOutput: 8192,
2218
- reasoning: true
2249
+ reasoning: true,
2250
+ toolCalling: true
2219
2251
  },
2220
2252
  // Moonshot / Kimi - optimized for agentic workflows
2221
2253
  {
@@ -2228,7 +2260,8 @@ var BLOCKRUN_MODELS = [
2228
2260
  maxOutput: 8192,
2229
2261
  reasoning: true,
2230
2262
  vision: true,
2231
- agentic: true
2263
+ agentic: true,
2264
+ toolCalling: true
2232
2265
  },
2233
2266
  // xAI / Grok
2234
2267
  {
@@ -2239,7 +2272,8 @@ var BLOCKRUN_MODELS = [
2239
2272
  outputPrice: 15,
2240
2273
  contextWindow: 131072,
2241
2274
  maxOutput: 16384,
2242
- reasoning: true
2275
+ reasoning: true,
2276
+ toolCalling: true
2243
2277
  },
2244
2278
  // grok-3-fast removed - too expensive ($5/$25), use grok-4-fast instead
2245
2279
  {
@@ -2249,7 +2283,8 @@ var BLOCKRUN_MODELS = [
2249
2283
  inputPrice: 0.3,
2250
2284
  outputPrice: 0.5,
2251
2285
  contextWindow: 131072,
2252
- maxOutput: 16384
2286
+ maxOutput: 16384,
2287
+ toolCalling: true
2253
2288
  },
2254
2289
  // xAI Grok 4 Family - Ultra-cheap fast models
2255
2290
  {
@@ -2260,7 +2295,8 @@ var BLOCKRUN_MODELS = [
2260
2295
  outputPrice: 0.5,
2261
2296
  contextWindow: 131072,
2262
2297
  maxOutput: 16384,
2263
- reasoning: true
2298
+ reasoning: true,
2299
+ toolCalling: true
2264
2300
  },
2265
2301
  {
2266
2302
  id: "xai/grok-4-fast-non-reasoning",
@@ -2269,7 +2305,8 @@ var BLOCKRUN_MODELS = [
2269
2305
  inputPrice: 0.2,
2270
2306
  outputPrice: 0.5,
2271
2307
  contextWindow: 131072,
2272
- maxOutput: 16384
2308
+ maxOutput: 16384,
2309
+ toolCalling: true
2273
2310
  },
2274
2311
  {
2275
2312
  id: "xai/grok-4-1-fast-reasoning",
@@ -2279,7 +2316,8 @@ var BLOCKRUN_MODELS = [
2279
2316
  outputPrice: 0.5,
2280
2317
  contextWindow: 131072,
2281
2318
  maxOutput: 16384,
2282
- reasoning: true
2319
+ reasoning: true,
2320
+ toolCalling: true
2283
2321
  },
2284
2322
  {
2285
2323
  id: "xai/grok-4-1-fast-non-reasoning",
@@ -2288,7 +2326,8 @@ var BLOCKRUN_MODELS = [
2288
2326
  inputPrice: 0.2,
2289
2327
  outputPrice: 0.5,
2290
2328
  contextWindow: 131072,
2291
- maxOutput: 16384
2329
+ maxOutput: 16384,
2330
+ toolCalling: true
2292
2331
  },
2293
2332
  {
2294
2333
  id: "xai/grok-code-fast-1",
@@ -2297,9 +2336,10 @@ var BLOCKRUN_MODELS = [
2297
2336
  inputPrice: 0.2,
2298
2337
  outputPrice: 1.5,
2299
2338
  contextWindow: 131072,
2300
- maxOutput: 16384,
2301
- agentic: true
2302
- // Good for coding tasks
2339
+ maxOutput: 16384
2340
+ // toolCalling intentionally omitted: outputs tool calls as plain text JSON,
2341
+ // not OpenAI-compatible structured function calls. Will be skipped when
2342
+ // request has tools to prevent the "talking to itself" bug.
2303
2343
  },
2304
2344
  {
2305
2345
  id: "xai/grok-4-0709",
@@ -2309,7 +2349,8 @@ var BLOCKRUN_MODELS = [
2309
2349
  outputPrice: 1.5,
2310
2350
  contextWindow: 131072,
2311
2351
  maxOutput: 16384,
2312
- reasoning: true
2352
+ reasoning: true,
2353
+ toolCalling: true
2313
2354
  },
2314
2355
  {
2315
2356
  id: "xai/grok-2-vision",
@@ -2319,7 +2360,8 @@ var BLOCKRUN_MODELS = [
2319
2360
  outputPrice: 10,
2320
2361
  contextWindow: 131072,
2321
2362
  maxOutput: 16384,
2322
- vision: true
2363
+ vision: true,
2364
+ toolCalling: true
2323
2365
  },
2324
2366
  // MiniMax
2325
2367
  {
@@ -2331,7 +2373,8 @@ var BLOCKRUN_MODELS = [
2331
2373
  contextWindow: 204800,
2332
2374
  maxOutput: 16384,
2333
2375
  reasoning: true,
2334
- agentic: true
2376
+ agentic: true,
2377
+ toolCalling: true
2335
2378
  },
2336
2379
  // NVIDIA - Free/cheap models
2337
2380
  {
@@ -2342,6 +2385,8 @@ var BLOCKRUN_MODELS = [
2342
2385
  outputPrice: 0,
2343
2386
  contextWindow: 128e3,
2344
2387
  maxOutput: 16384
2388
+ // toolCalling intentionally omitted: free model, structured function
2389
+ // calling support unverified. Excluded from tool-heavy routing paths.
2345
2390
  },
2346
2391
  {
2347
2392
  id: "nvidia/kimi-k2.5",
@@ -2350,7 +2395,8 @@ var BLOCKRUN_MODELS = [
2350
2395
  inputPrice: 0.55,
2351
2396
  outputPrice: 2.5,
2352
2397
  contextWindow: 262144,
2353
- maxOutput: 16384
2398
+ maxOutput: 16384,
2399
+ toolCalling: true
2354
2400
  }
2355
2401
  ];
2356
2402
  function toOpenClawModel(m) {
@@ -2379,6 +2425,11 @@ var OPENCLAW_MODELS = [
2379
2425
  ...BLOCKRUN_MODELS.map(toOpenClawModel),
2380
2426
  ...ALIAS_MODELS
2381
2427
  ];
2428
+ function supportsToolCalling(modelId) {
2429
+ const normalized = modelId.replace("blockrun/", "");
2430
+ const model = BLOCKRUN_MODELS.find((m) => m.id === normalized);
2431
+ return model?.toolCalling ?? false;
2432
+ }
2382
2433
  function getModelContextWindow(modelId) {
2383
2434
  const normalized = modelId.replace("blockrun/", "");
2384
2435
  const model = BLOCKRUN_MODELS.find((m) => m.id === normalized);
@@ -5032,6 +5083,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
5032
5083
  const originalContextSizeKB = Math.ceil(body.length / 1024);
5033
5084
  const debugMode = req.headers["x-clawrouter-debug"] !== "false";
5034
5085
  let routingDecision;
5086
+ let hasTools = false;
5035
5087
  let isStreaming = false;
5036
5088
  let modelId = "";
5037
5089
  let maxTokens = 4096;
@@ -5046,10 +5098,11 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
5046
5098
  modelId = parsed.model || "";
5047
5099
  maxTokens = parsed.max_tokens || 4096;
5048
5100
  let bodyModified = false;
5049
- if (sessionId && Array.isArray(parsed.messages)) {
5050
- const messages = parsed.messages;
5051
- const lastUserMsg = [...messages].reverse().find((m) => m.role === "user");
5052
- const lastContent = typeof lastUserMsg?.content === "string" ? lastUserMsg.content : "";
5101
+ const parsedMessages = Array.isArray(parsed.messages) ? parsed.messages : [];
5102
+ const lastUserMsg = [...parsedMessages].reverse().find((m) => m.role === "user");
5103
+ const lastContent = typeof lastUserMsg?.content === "string" ? lastUserMsg.content : "";
5104
+ if (sessionId && parsedMessages.length > 0) {
5105
+ const messages = parsedMessages;
5053
5106
  if (sessionJournal.needsContext(lastContent)) {
5054
5107
  const journalText = sessionJournal.format(sessionId);
5055
5108
  if (journalText) {
@@ -5070,6 +5123,106 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
5070
5123
  }
5071
5124
  }
5072
5125
  }
5126
+ if (lastContent.startsWith("/debug")) {
5127
+ const debugPrompt = lastContent.slice("/debug".length).trim() || "hello";
5128
+ const messages = parsed.messages;
5129
+ const systemMsg = messages?.find((m) => m.role === "system");
5130
+ const systemPrompt = typeof systemMsg?.content === "string" ? systemMsg.content : void 0;
5131
+ const fullText = `${systemPrompt ?? ""} ${debugPrompt}`;
5132
+ const estimatedTokens = Math.ceil(fullText.length / 4);
5133
+ const normalizedModel2 = typeof parsed.model === "string" ? parsed.model.trim().toLowerCase() : "";
5134
+ const profileName = normalizedModel2.replace("blockrun/", "");
5135
+ const debugProfile = ["free", "eco", "auto", "premium"].includes(profileName) ? profileName : "auto";
5136
+ const scoring = classifyByRules(
5137
+ debugPrompt,
5138
+ systemPrompt,
5139
+ estimatedTokens,
5140
+ DEFAULT_ROUTING_CONFIG.scoring
5141
+ );
5142
+ const debugRouting = route(debugPrompt, systemPrompt, maxTokens, {
5143
+ ...routerOpts,
5144
+ routingProfile: debugProfile
5145
+ });
5146
+ const dimLines = (scoring.dimensions ?? []).map((d) => {
5147
+ const nameStr = (d.name + ":").padEnd(24);
5148
+ const scoreStr = d.score.toFixed(2).padStart(6);
5149
+ const sigStr = d.signal ? ` [${d.signal}]` : "";
5150
+ return ` ${nameStr}${scoreStr}${sigStr}`;
5151
+ }).join("\n");
5152
+ const sess = sessionId ? sessionStore.getSession(sessionId) : void 0;
5153
+ const sessLine = sess ? `Session: ${sessionId.slice(0, 8)}... \u2192 pinned: ${sess.model} (${sess.requestCount} requests)` : sessionId ? `Session: ${sessionId.slice(0, 8)}... \u2192 no pinned model` : "Session: none";
5154
+ const { simpleMedium, mediumComplex, complexReasoning } = DEFAULT_ROUTING_CONFIG.scoring.tierBoundaries;
5155
+ const debugText = [
5156
+ "ClawRouter Debug",
5157
+ "",
5158
+ `Profile: ${debugProfile} | Tier: ${debugRouting.tier} | Model: ${debugRouting.model}`,
5159
+ `Confidence: ${debugRouting.confidence.toFixed(2)} | Cost: $${debugRouting.costEstimate.toFixed(4)} | Savings: ${(debugRouting.savings * 100).toFixed(0)}%`,
5160
+ `Reasoning: ${debugRouting.reasoning}`,
5161
+ "",
5162
+ `Scoring (weighted: ${scoring.score.toFixed(3)})`,
5163
+ dimLines,
5164
+ "",
5165
+ `Tier Boundaries: SIMPLE <${simpleMedium.toFixed(2)} | MEDIUM <${mediumComplex.toFixed(2)} | COMPLEX <${complexReasoning.toFixed(2)} | REASONING >=${complexReasoning.toFixed(2)}`,
5166
+ "",
5167
+ sessLine
5168
+ ].join("\n");
5169
+ const completionId = `chatcmpl-debug-${Date.now()}`;
5170
+ const timestamp = Math.floor(Date.now() / 1e3);
5171
+ const syntheticResponse = {
5172
+ id: completionId,
5173
+ object: "chat.completion",
5174
+ created: timestamp,
5175
+ model: "clawrouter/debug",
5176
+ choices: [
5177
+ {
5178
+ index: 0,
5179
+ message: { role: "assistant", content: debugText },
5180
+ finish_reason: "stop"
5181
+ }
5182
+ ],
5183
+ usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 }
5184
+ };
5185
+ if (isStreaming) {
5186
+ res.writeHead(200, {
5187
+ "Content-Type": "text/event-stream",
5188
+ "Cache-Control": "no-cache",
5189
+ Connection: "keep-alive"
5190
+ });
5191
+ const sseChunk = {
5192
+ id: completionId,
5193
+ object: "chat.completion.chunk",
5194
+ created: timestamp,
5195
+ model: "clawrouter/debug",
5196
+ choices: [
5197
+ {
5198
+ index: 0,
5199
+ delta: { role: "assistant", content: debugText },
5200
+ finish_reason: null
5201
+ }
5202
+ ]
5203
+ };
5204
+ const sseDone = {
5205
+ id: completionId,
5206
+ object: "chat.completion.chunk",
5207
+ created: timestamp,
5208
+ model: "clawrouter/debug",
5209
+ choices: [{ index: 0, delta: {}, finish_reason: "stop" }]
5210
+ };
5211
+ res.write(`data: ${JSON.stringify(sseChunk)}
5212
+
5213
+ `);
5214
+ res.write(`data: ${JSON.stringify(sseDone)}
5215
+
5216
+ `);
5217
+ res.write("data: [DONE]\n\n");
5218
+ res.end();
5219
+ } else {
5220
+ res.writeHead(200, { "Content-Type": "application/json" });
5221
+ res.end(JSON.stringify(syntheticResponse));
5222
+ }
5223
+ console.log(`[ClawRouter] /debug command \u2192 ${debugRouting.tier} | ${debugRouting.model}`);
5224
+ return;
5225
+ }
5073
5226
  if (parsed.stream === true) {
5074
5227
  parsed.stream = false;
5075
5228
  bodyModified = true;
@@ -5124,20 +5277,20 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
5124
5277
  sessionStore.touchSession(sessionId2);
5125
5278
  } else {
5126
5279
  const messages = parsed.messages;
5127
- let lastUserMsg;
5280
+ let lastUserMsg2;
5128
5281
  if (messages) {
5129
5282
  for (let i = messages.length - 1; i >= 0; i--) {
5130
5283
  if (messages[i].role === "user") {
5131
- lastUserMsg = messages[i];
5284
+ lastUserMsg2 = messages[i];
5132
5285
  break;
5133
5286
  }
5134
5287
  }
5135
5288
  }
5136
5289
  const systemMsg = messages?.find((m) => m.role === "system");
5137
- const prompt = typeof lastUserMsg?.content === "string" ? lastUserMsg.content : "";
5290
+ const prompt = typeof lastUserMsg2?.content === "string" ? lastUserMsg2.content : "";
5138
5291
  const systemPrompt = typeof systemMsg?.content === "string" ? systemMsg.content : void 0;
5139
5292
  const tools = parsed.tools;
5140
- const hasTools = Array.isArray(tools) && tools.length > 0;
5293
+ hasTools = Array.isArray(tools) && tools.length > 0;
5141
5294
  if (hasTools && tools) {
5142
5295
  console.log(
5143
5296
  `[ClawRouter] Tools detected (${tools.length}), agentic mode via keywords`
@@ -5354,7 +5507,14 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
5354
5507
  `[ClawRouter] Context filter (~${estimatedTotalTokens} tokens): excluded ${contextExcluded.join(", ")}`
5355
5508
  );
5356
5509
  }
5357
- modelsToTry = contextFiltered.slice(0, MAX_FALLBACK_ATTEMPTS);
5510
+ const toolFiltered = filterByToolCalling(contextFiltered, hasTools, supportsToolCalling);
5511
+ const toolExcluded = contextFiltered.filter((m) => !toolFiltered.includes(m));
5512
+ if (toolExcluded.length > 0) {
5513
+ console.log(
5514
+ `[ClawRouter] Tool-calling filter: excluded ${toolExcluded.join(", ")} (no structured function call support)`
5515
+ );
5516
+ }
5517
+ modelsToTry = toolFiltered.slice(0, MAX_FALLBACK_ATTEMPTS);
5358
5518
  modelsToTry = prioritizeNonRateLimited(modelsToTry);
5359
5519
  } else {
5360
5520
  if (modelId && modelId !== FREE_MODEL) {