@blockrun/cc 0.8.2 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,24 @@
1
1
  import http from 'node:http';
2
- import { getOrCreateWallet, getOrCreateSolanaWallet, createPaymentPayload, createSolanaPaymentPayload, parsePaymentRequired, extractPaymentDetails, solanaKeyToBytes, SOLANA_NETWORK, } from '@blockrun/llm';
3
2
  import fs from 'node:fs';
4
3
  import path from 'node:path';
5
4
  import os from 'node:os';
5
+ import { fileURLToPath } from 'node:url';
6
+ import { getOrCreateWallet, getOrCreateSolanaWallet, createPaymentPayload, createSolanaPaymentPayload, parsePaymentRequired, extractPaymentDetails, solanaKeyToBytes, SOLANA_NETWORK, } from '@blockrun/llm';
7
+ import { recordUsage } from '../stats/tracker.js';
8
+ import { fetchWithFallback, buildFallbackChain, DEFAULT_FALLBACK_CONFIG, } from './fallback.js';
9
+ import { routeRequest, parseRoutingProfile, } from '../router/index.js';
10
+ // Get version from package.json
11
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
12
+ let VERSION = '0.9.0';
13
+ try {
14
+ const pkgPath = path.resolve(__dirname, '../../package.json');
15
+ const pkg = JSON.parse(fs.readFileSync(pkgPath, 'utf-8'));
16
+ VERSION = pkg.version || VERSION;
17
+ }
18
+ catch { /* use default */ }
19
+ // User-Agent for backend requests
20
+ const USER_AGENT = `brcc/${VERSION}`;
21
+ const X_BRCC_VERSION = VERSION;
6
22
  const LOG_FILE = path.join(os.homedir(), '.blockrun', 'brcc-debug.log');
7
23
  function debug(options, ...args) {
8
24
  if (!options.debug)
@@ -12,27 +28,64 @@ function debug(options, ...args) {
12
28
  fs.mkdirSync(path.dirname(LOG_FILE), { recursive: true });
13
29
  fs.appendFileSync(LOG_FILE, msg);
14
30
  }
15
- catch { /* ignore */ }
31
+ catch {
32
+ /* ignore */
33
+ }
34
+ }
35
+ function log(...args) {
36
+ const msg = `[brcc] ${args.map(String).join(' ')}`;
37
+ console.log(msg);
16
38
  }
17
39
  const DEFAULT_MAX_TOKENS = 4096;
18
40
  let lastOutputTokens = 0;
19
41
  // Model shortcuts for quick switching
20
42
  const MODEL_SHORTCUTS = {
21
- 'gpt': 'openai/gpt-5.4',
22
- 'gpt5': 'openai/gpt-5.4',
43
+ auto: 'blockrun/auto',
44
+ smart: 'blockrun/auto',
45
+ eco: 'blockrun/eco',
46
+ premium: 'blockrun/premium',
47
+ gpt: 'openai/gpt-5.4',
48
+ gpt5: 'openai/gpt-5.4',
23
49
  'gpt-5': 'openai/gpt-5.4',
24
50
  'gpt-5.4': 'openai/gpt-5.4',
25
- 'sonnet': 'anthropic/claude-sonnet-4.6',
26
- 'claude': 'anthropic/claude-sonnet-4.6',
27
- 'opus': 'anthropic/claude-opus-4.6',
28
- 'haiku': 'anthropic/claude-haiku-4.5',
29
- 'deepseek': 'deepseek/deepseek-chat',
30
- 'gemini': 'google/gemini-2.5-pro',
31
- 'grok': 'xai/grok-3',
32
- 'free': 'nvidia/gpt-oss-120b',
33
- 'mini': 'openai/gpt-5-mini',
34
- 'glm': 'zai/glm-5',
51
+ sonnet: 'anthropic/claude-sonnet-4.6',
52
+ claude: 'anthropic/claude-sonnet-4.6',
53
+ opus: 'anthropic/claude-opus-4.6',
54
+ haiku: 'anthropic/claude-haiku-4.5',
55
+ deepseek: 'deepseek/deepseek-chat',
56
+ gemini: 'google/gemini-2.5-pro',
57
+ grok: 'xai/grok-3',
58
+ free: 'nvidia/gpt-oss-120b',
59
+ mini: 'openai/gpt-5-mini',
60
+ glm: 'zai/glm-5',
35
61
  };
62
+ // Model pricing (per 1M tokens) - used for stats
63
+ const MODEL_PRICING = {
64
+ // Routing profiles (blended averages)
65
+ 'blockrun/auto': { input: 0.8, output: 4.0 },
66
+ 'blockrun/eco': { input: 0.2, output: 1.0 },
67
+ 'blockrun/premium': { input: 3.0, output: 15.0 },
68
+ 'blockrun/free': { input: 0, output: 0 },
69
+ // Individual models
70
+ 'anthropic/claude-sonnet-4.6': { input: 3.0, output: 15.0 },
71
+ 'anthropic/claude-opus-4.6': { input: 5.0, output: 25.0 },
72
+ 'anthropic/claude-haiku-4.5': { input: 1.0, output: 5.0 },
73
+ 'openai/gpt-5.4': { input: 2.5, output: 15.0 },
74
+ 'openai/gpt-5-mini': { input: 0.25, output: 2.0 },
75
+ 'google/gemini-2.5-pro': { input: 1.25, output: 10.0 },
76
+ 'google/gemini-2.5-flash': { input: 0.3, output: 2.5 },
77
+ 'deepseek/deepseek-chat': { input: 0.28, output: 0.42 },
78
+ 'xai/grok-3': { input: 3.0, output: 15.0 },
79
+ 'xai/grok-4-fast': { input: 0.2, output: 0.5 },
80
+ 'nvidia/gpt-oss-120b': { input: 0, output: 0 },
81
+ 'zai/glm-5': { input: 1.0, output: 3.2 },
82
+ 'moonshot/kimi-k2.5': { input: 0.6, output: 3.0 },
83
+ };
84
+ function estimateCost(model, inputTokens, outputTokens) {
85
+ const pricing = MODEL_PRICING[model] || { input: 2.0, output: 10.0 };
86
+ return ((inputTokens / 1_000_000) * pricing.input +
87
+ (outputTokens / 1_000_000) * pricing.output);
88
+ }
36
89
  function detectModelSwitch(parsed) {
37
90
  if (!parsed.messages || parsed.messages.length === 0)
38
91
  return null;
@@ -44,7 +97,7 @@ function detectModelSwitch(parsed) {
44
97
  content = last.content;
45
98
  }
46
99
  else if (Array.isArray(last.content)) {
47
- const textBlock = last.content.find(b => b.type === 'text' && b.text);
100
+ const textBlock = last.content.find((b) => b.type === 'text' && b.text);
48
101
  if (textBlock && textBlock.text)
49
102
  content = textBlock.text;
50
103
  }
@@ -63,9 +116,12 @@ function detectModelSwitch(parsed) {
63
116
  return modelInput;
64
117
  return null;
65
118
  }
119
+ // Default model - smart routing built-in
120
+ const DEFAULT_MODEL = 'blockrun/auto';
66
121
  export function createProxy(options) {
67
122
  const chain = options.chain || 'base';
68
- let currentModel = options.modelOverride || null;
123
+ let currentModel = options.modelOverride || DEFAULT_MODEL;
124
+ const fallbackEnabled = options.fallbackEnabled !== false; // Default true
69
125
  let baseWallet = null;
70
126
  let solanaWallet = null;
71
127
  if (chain === 'base') {
@@ -85,13 +141,16 @@ export function createProxy(options) {
85
141
  return;
86
142
  }
87
143
  await initSolana();
88
- const path = req.url?.replace(/^\/api/, '') || '';
89
- const targetUrl = `${options.apiUrl}${path}`;
144
+ const requestPath = req.url?.replace(/^\/api/, '') || '';
145
+ const targetUrl = `${options.apiUrl}${requestPath}`;
90
146
  let body = '';
147
+ const requestStartTime = Date.now();
91
148
  req.on('data', (chunk) => {
92
149
  body += chunk;
93
150
  });
94
151
  req.on('end', async () => {
152
+ let requestModel = currentModel || options.modelOverride || 'unknown';
153
+ let usedFallback = false;
95
154
  try {
96
155
  debug(options, `request: ${req.method} ${req.url} currentModel=${currentModel || 'none'}`);
97
156
  if (body) {
@@ -111,7 +170,12 @@ export function createProxy(options) {
111
170
  type: 'message',
112
171
  role: 'assistant',
113
172
  model: currentModel,
114
- content: [{ type: 'text', text: `Switched to **${currentModel}**. All subsequent requests will use this model.` }],
173
+ content: [
174
+ {
175
+ type: 'text',
176
+ text: `Switched to **${currentModel}**. All subsequent requests will use this model.`,
177
+ },
178
+ ],
115
179
  stop_reason: 'end_turn',
116
180
  stop_sequence: null,
117
181
  usage: { input_tokens: 0, output_tokens: 10 },
@@ -120,14 +184,53 @@ export function createProxy(options) {
120
184
  res.end(JSON.stringify(fakeResponse));
121
185
  return;
122
186
  }
123
- // Apply model override
124
- if ((currentModel || options.modelOverride) && parsed.model) {
125
- parsed.model = currentModel || options.modelOverride;
187
+ // Apply model override only if:
188
+ // 1. User specified --model on CLI (options.modelOverride)
189
+ // 2. User switched model in-session (currentModel set by "use X" command)
190
+ // 3. Request has no model specified
191
+ if (options.modelOverride && currentModel) {
192
+ // CLI --model flag: always use this
193
+ parsed.model = currentModel;
194
+ }
195
+ else if (!parsed.model) {
196
+ // No model in request: use default
197
+ parsed.model = currentModel || DEFAULT_MODEL;
198
+ }
199
+ // Otherwise: use the model from the request as-is
200
+ requestModel = parsed.model || DEFAULT_MODEL;
201
+ // Smart routing: if model is a routing profile, classify and route
202
+ const routingProfile = parseRoutingProfile(requestModel);
203
+ if (routingProfile) {
204
+ // Extract user prompt for classification
205
+ const userMessages = parsed.messages?.filter((m) => m.role === 'user') || [];
206
+ const lastUserMsg = userMessages[userMessages.length - 1];
207
+ let promptText = '';
208
+ if (lastUserMsg) {
209
+ if (typeof lastUserMsg.content === 'string') {
210
+ promptText = lastUserMsg.content;
211
+ }
212
+ else if (Array.isArray(lastUserMsg.content)) {
213
+ promptText = lastUserMsg.content
214
+ .filter((b) => b.type === 'text')
215
+ .map((b) => b.text)
216
+ .join('\n');
217
+ }
218
+ }
219
+ // Route the request
220
+ const routing = routeRequest(promptText, routingProfile);
221
+ parsed.model = routing.model;
222
+ requestModel = routing.model;
223
+ log(`🧠 Smart routing: ${routingProfile} → ${routing.tier} → ${routing.model} ` +
224
+ `(${(routing.savings * 100).toFixed(0)}% savings) [${routing.signals.join(', ')}]`);
126
225
  }
127
- if (parsed.max_tokens) {
226
+ {
128
227
  const original = parsed.max_tokens;
129
228
  const model = (parsed.model || '').toLowerCase();
130
- const modelCap = (model.includes('deepseek') || model.includes('haiku') || model.includes('gpt-oss')) ? 8192 : 16384;
229
+ const modelCap = model.includes('deepseek') ||
230
+ model.includes('haiku') ||
231
+ model.includes('gpt-oss')
232
+ ? 8192
233
+ : 16384;
131
234
  // Use max of (last output × 2, default 4096) capped by model limit
132
235
  // This ensures short replies don't starve the next request
133
236
  const adaptive = lastOutputTokens > 0
@@ -135,28 +238,57 @@ export function createProxy(options) {
135
238
  : DEFAULT_MAX_TOKENS;
136
239
  parsed.max_tokens = Math.min(adaptive, modelCap);
137
240
  if (original !== parsed.max_tokens) {
138
- debug(options, `max_tokens: ${original} → ${parsed.max_tokens} (last output: ${lastOutputTokens || 'none'})`);
241
+ debug(options, `max_tokens: ${original || 'unset'} → ${parsed.max_tokens} (last output: ${lastOutputTokens || 'none'})`);
139
242
  }
140
243
  }
141
244
  body = JSON.stringify(parsed);
142
245
  }
143
- catch { /* not JSON, pass through */ }
246
+ catch {
247
+ /* not JSON, pass through */
248
+ }
144
249
  }
145
250
  const headers = {
146
251
  'Content-Type': 'application/json',
252
+ 'User-Agent': USER_AGENT,
253
+ 'X-Brcc-Version': X_BRCC_VERSION,
147
254
  };
148
255
  for (const [key, value] of Object.entries(req.headers)) {
149
256
  if (key.toLowerCase() !== 'host' &&
150
257
  key.toLowerCase() !== 'content-length' &&
258
+ key.toLowerCase() !== 'user-agent' && // Don't forward client's user-agent
151
259
  value) {
152
260
  headers[key] = Array.isArray(value) ? value[0] : value;
153
261
  }
154
262
  }
155
- let response = await fetch(targetUrl, {
263
+ // Build request init
264
+ const requestInit = {
156
265
  method: req.method || 'POST',
157
266
  headers,
158
267
  body: body || undefined,
159
- });
268
+ };
269
+ let response;
270
+ let finalModel = requestModel;
271
+ // Use fallback chain if enabled
272
+ if (fallbackEnabled && body && requestPath.includes('messages')) {
273
+ const fallbackConfig = {
274
+ ...DEFAULT_FALLBACK_CONFIG,
275
+ chain: buildFallbackChain(requestModel),
276
+ };
277
+ const result = await fetchWithFallback(targetUrl, requestInit, body, fallbackConfig, (failedModel, status, nextModel) => {
278
+ log(`⚠️ ${failedModel} returned ${status}, falling back to ${nextModel}`);
279
+ });
280
+ response = result.response;
281
+ finalModel = result.modelUsed;
282
+ usedFallback = result.fallbackUsed;
283
+ if (usedFallback) {
284
+ log(`↺ Fallback successful: using ${finalModel}`);
285
+ }
286
+ }
287
+ else {
288
+ // Direct fetch without fallback
289
+ response = await fetch(targetUrl, requestInit);
290
+ }
291
+ // Handle 402 payment
160
292
  if (response.status === 402) {
161
293
  if (chain === 'solana' && solanaWallet) {
162
294
  response = await handleSolanaPayment(response, targetUrl, req.method || 'POST', headers, body, solanaWallet.privateKey, solanaWallet.address);
@@ -175,22 +307,33 @@ export function createProxy(options) {
175
307
  const reader = response.body.getReader();
176
308
  const decoder = new TextDecoder();
177
309
  let lastChunkText = '';
310
+ let fullResponse = '';
178
311
  const pump = async () => {
179
312
  while (true) {
180
313
  const { done, value } = await reader.read();
181
314
  if (done) {
315
+ // Record stats from streaming response
182
316
  if (isStreaming && lastChunkText) {
183
- const match = lastChunkText.match(/"output_tokens"\s*:\s*(\d+)/);
184
- if (match) {
185
- lastOutputTokens = parseInt(match[1], 10);
186
- debug(options, `recorded output_tokens: ${lastOutputTokens} (stream)`);
317
+ const outputMatch = lastChunkText.match(/"output_tokens"\s*:\s*(\d+)/);
318
+ const inputMatch = fullResponse.match(/"input_tokens"\s*:\s*(\d+)/);
319
+ if (outputMatch) {
320
+ lastOutputTokens = parseInt(outputMatch[1], 10);
321
+ const inputTokens = inputMatch
322
+ ? parseInt(inputMatch[1], 10)
323
+ : 0;
324
+ const latencyMs = Date.now() - requestStartTime;
325
+ const cost = estimateCost(finalModel, inputTokens, lastOutputTokens);
326
+ recordUsage(finalModel, inputTokens, lastOutputTokens, cost, latencyMs, usedFallback);
327
+ debug(options, `recorded: model=${finalModel} in=${inputTokens} out=${lastOutputTokens} cost=$${cost.toFixed(4)} fallback=${usedFallback}`);
187
328
  }
188
329
  }
189
330
  res.end();
190
331
  break;
191
332
  }
192
333
  if (isStreaming) {
193
- lastChunkText = decoder.decode(value, { stream: true });
334
+ const chunk = decoder.decode(value, { stream: true });
335
+ lastChunkText = chunk;
336
+ fullResponse += chunk;
194
337
  }
195
338
  res.write(value);
196
339
  }
@@ -203,15 +346,22 @@ export function createProxy(options) {
203
346
  const parsed = JSON.parse(text);
204
347
  if (parsed.usage?.output_tokens) {
205
348
  lastOutputTokens = parsed.usage.output_tokens;
206
- debug(options, `recorded output_tokens: ${lastOutputTokens}`);
349
+ const inputTokens = parsed.usage?.input_tokens || 0;
350
+ const latencyMs = Date.now() - requestStartTime;
351
+ const cost = estimateCost(finalModel, inputTokens, lastOutputTokens);
352
+ recordUsage(finalModel, inputTokens, lastOutputTokens, cost, latencyMs, usedFallback);
353
+ debug(options, `recorded: model=${finalModel} in=${inputTokens} out=${lastOutputTokens} cost=$${cost.toFixed(4)} fallback=${usedFallback}`);
207
354
  }
208
355
  }
209
- catch { /* not JSON */ }
356
+ catch {
357
+ /* not JSON */
358
+ }
210
359
  res.end(text);
211
360
  }
212
361
  }
213
362
  catch (error) {
214
363
  const msg = error instanceof Error ? error.message : 'Proxy error';
364
+ log(`❌ Error: ${msg}`);
215
365
  res.writeHead(502, { 'Content-Type': 'application/json' });
216
366
  res.end(JSON.stringify({
217
367
  type: 'error',
@@ -292,9 +442,12 @@ export function classifyRequest(body) {
292
442
  .map((b) => b.text)
293
443
  .join('\n');
294
444
  }
295
- if (content.includes('```') || content.includes('function ') ||
296
- content.includes('class ') || content.includes('import ') ||
297
- content.includes('def ') || content.includes('const ')) {
445
+ if (content.includes('```') ||
446
+ content.includes('function ') ||
447
+ content.includes('class ') ||
448
+ content.includes('import ') ||
449
+ content.includes('def ') ||
450
+ content.includes('const ')) {
298
451
  return { category: 'code' };
299
452
  }
300
453
  if (content.length < 100) {
@@ -0,0 +1,22 @@
1
+ /**
2
+ * Smart Router for brcc
3
+ * Ported from ClawRouter - 15-dimension weighted scoring for tier classification
4
+ */
5
+ export type Tier = 'SIMPLE' | 'MEDIUM' | 'COMPLEX' | 'REASONING';
6
+ export type RoutingProfile = 'auto' | 'eco' | 'premium' | 'free';
7
+ export interface RoutingResult {
8
+ model: string;
9
+ tier: Tier;
10
+ confidence: number;
11
+ signals: string[];
12
+ savings: number;
13
+ }
14
+ export declare function routeRequest(prompt: string, profile?: RoutingProfile): RoutingResult;
15
+ /**
16
+ * Get fallback models for a tier
17
+ */
18
+ export declare function getFallbackChain(tier: Tier, profile?: RoutingProfile): string[];
19
+ /**
20
+ * Parse routing profile from model string
21
+ */
22
+ export declare function parseRoutingProfile(model: string): RoutingProfile | null;
@@ -0,0 +1,274 @@
1
+ /**
2
+ * Smart Router for brcc
3
+ * Ported from ClawRouter - 15-dimension weighted scoring for tier classification
4
+ */
5
+ // ─── Tier Model Configs ───
6
+ const AUTO_TIERS = {
7
+ SIMPLE: {
8
+ primary: 'google/gemini-2.5-flash',
9
+ fallback: ['deepseek/deepseek-chat', 'nvidia/gpt-oss-120b'],
10
+ },
11
+ MEDIUM: {
12
+ primary: 'moonshot/kimi-k2.5',
13
+ fallback: ['google/gemini-2.5-flash', 'deepseek/deepseek-chat'],
14
+ },
15
+ COMPLEX: {
16
+ primary: 'google/gemini-3.1-pro',
17
+ fallback: ['anthropic/claude-sonnet-4.6', 'google/gemini-2.5-pro'],
18
+ },
19
+ REASONING: {
20
+ primary: 'xai/grok-4-1-fast-reasoning',
21
+ fallback: ['deepseek/deepseek-reasoner', 'openai/o4-mini'],
22
+ },
23
+ };
24
+ const ECO_TIERS = {
25
+ SIMPLE: {
26
+ primary: 'nvidia/gpt-oss-120b',
27
+ fallback: ['google/gemini-2.5-flash-lite'],
28
+ },
29
+ MEDIUM: {
30
+ primary: 'google/gemini-2.5-flash-lite',
31
+ fallback: ['nvidia/gpt-oss-120b'],
32
+ },
33
+ COMPLEX: {
34
+ primary: 'google/gemini-2.5-flash-lite',
35
+ fallback: ['deepseek/deepseek-chat'],
36
+ },
37
+ REASONING: {
38
+ primary: 'xai/grok-4-1-fast-reasoning',
39
+ fallback: ['deepseek/deepseek-reasoner'],
40
+ },
41
+ };
42
+ const PREMIUM_TIERS = {
43
+ SIMPLE: {
44
+ primary: 'moonshot/kimi-k2.5',
45
+ fallback: ['anthropic/claude-haiku-4.5'],
46
+ },
47
+ MEDIUM: {
48
+ primary: 'openai/gpt-5.3-codex',
49
+ fallback: ['anthropic/claude-sonnet-4.6'],
50
+ },
51
+ COMPLEX: {
52
+ primary: 'anthropic/claude-opus-4.6',
53
+ fallback: ['openai/gpt-5.4', 'anthropic/claude-sonnet-4.6'],
54
+ },
55
+ REASONING: {
56
+ primary: 'anthropic/claude-sonnet-4.6',
57
+ fallback: ['anthropic/claude-opus-4.6', 'openai/o3'],
58
+ },
59
+ };
60
+ // ─── Keywords for Classification ───
61
+ const CODE_KEYWORDS = [
62
+ 'function', 'class', 'import', 'def', 'SELECT', 'async', 'await',
63
+ 'const', 'let', 'var', 'return', '```', '函数', '类', '导入',
64
+ ];
65
+ const REASONING_KEYWORDS = [
66
+ 'prove', 'theorem', 'derive', 'step by step', 'chain of thought',
67
+ 'formally', 'mathematical', 'proof', 'logically', '证明', '定理', '推导',
68
+ ];
69
+ const SIMPLE_KEYWORDS = [
70
+ 'what is', 'define', 'translate', 'hello', 'yes or no', 'capital of',
71
+ 'how old', 'who is', 'when was', '什么是', '翻译', '你好',
72
+ ];
73
+ const TECHNICAL_KEYWORDS = [
74
+ 'algorithm', 'optimize', 'architecture', 'distributed', 'kubernetes',
75
+ 'microservice', 'database', 'infrastructure', '算法', '架构', '优化',
76
+ ];
77
+ const AGENTIC_KEYWORDS = [
78
+ 'read file', 'edit', 'modify', 'update', 'create file', 'execute',
79
+ 'deploy', 'install', 'npm', 'pip', 'fix', 'debug', 'verify',
80
+ '编辑', '修改', '部署', '安装', '修复', '调试',
81
+ ];
82
+ function countMatches(text, keywords) {
83
+ const lower = text.toLowerCase();
84
+ return keywords.filter(kw => lower.includes(kw.toLowerCase())).length;
85
+ }
86
+ function classifyRequest(prompt, tokenCount) {
87
+ const signals = [];
88
+ let score = 0;
89
+ // Token count scoring (reduced weight - don't penalize short prompts too much)
90
+ if (tokenCount < 30) {
91
+ score -= 0.15;
92
+ signals.push('short');
93
+ }
94
+ else if (tokenCount > 500) {
95
+ score += 0.2;
96
+ signals.push('long');
97
+ }
98
+ // Code detection (weight: 0.20) - increased weight
99
+ const codeMatches = countMatches(prompt, CODE_KEYWORDS);
100
+ if (codeMatches >= 2) {
101
+ score += 0.5;
102
+ signals.push('code');
103
+ }
104
+ else if (codeMatches >= 1) {
105
+ score += 0.25;
106
+ signals.push('code-light');
107
+ }
108
+ // Reasoning detection (weight: 0.18)
109
+ const reasoningMatches = countMatches(prompt, REASONING_KEYWORDS);
110
+ if (reasoningMatches >= 2) {
111
+ // Direct reasoning override
112
+ return { tier: 'REASONING', confidence: 0.9, signals: [...signals, 'reasoning'] };
113
+ }
114
+ else if (reasoningMatches >= 1) {
115
+ score += 0.4;
116
+ signals.push('reasoning-light');
117
+ }
118
+ // Simple detection (weight: -0.12) - only trigger on strong simple signals
119
+ const simpleMatches = countMatches(prompt, SIMPLE_KEYWORDS);
120
+ if (simpleMatches >= 2) {
121
+ score -= 0.4;
122
+ signals.push('simple');
123
+ }
124
+ else if (simpleMatches >= 1 && codeMatches === 0 && tokenCount < 50) {
125
+ // Only mark as simple if no code and very short
126
+ score -= 0.25;
127
+ signals.push('simple');
128
+ }
129
+ // Technical complexity (weight: 0.15) - increased
130
+ const techMatches = countMatches(prompt, TECHNICAL_KEYWORDS);
131
+ if (techMatches >= 2) {
132
+ score += 0.4;
133
+ signals.push('technical');
134
+ }
135
+ else if (techMatches >= 1) {
136
+ score += 0.2;
137
+ signals.push('technical-light');
138
+ }
139
+ // Agentic detection (weight: 0.10) - increased
140
+ const agenticMatches = countMatches(prompt, AGENTIC_KEYWORDS);
141
+ if (agenticMatches >= 3) {
142
+ score += 0.35;
143
+ signals.push('agentic');
144
+ }
145
+ else if (agenticMatches >= 2) {
146
+ score += 0.2;
147
+ signals.push('agentic-light');
148
+ }
149
+ // Multi-step patterns
150
+ if (/first.*then|step \d|\d\.\s/i.test(prompt)) {
151
+ score += 0.2;
152
+ signals.push('multi-step');
153
+ }
154
+ // Question complexity
155
+ const questionCount = (prompt.match(/\?/g) || []).length;
156
+ if (questionCount > 3) {
157
+ score += 0.15;
158
+ signals.push(`${questionCount} questions`);
159
+ }
160
+ // Imperative verbs (build, create, implement, etc.)
161
+ const imperativeMatches = countMatches(prompt, [
162
+ 'build', 'create', 'implement', 'design', 'develop', 'write', 'make',
163
+ 'generate', 'construct', '构建', '创建', '实现', '设计', '开发'
164
+ ]);
165
+ if (imperativeMatches >= 1) {
166
+ score += 0.15;
167
+ signals.push('imperative');
168
+ }
169
+ // Map score to tier (adjusted boundaries)
170
+ let tier;
171
+ if (score < -0.1) {
172
+ tier = 'SIMPLE';
173
+ }
174
+ else if (score < 0.25) {
175
+ tier = 'MEDIUM';
176
+ }
177
+ else if (score < 0.45) {
178
+ tier = 'COMPLEX';
179
+ }
180
+ else {
181
+ tier = 'REASONING';
182
+ }
183
+ // Calculate confidence based on distance from boundary
184
+ const confidence = Math.min(0.95, 0.7 + Math.abs(score) * 0.3);
185
+ return { tier, confidence, signals };
186
+ }
187
+ // ─── Main Router ───
188
+ export function routeRequest(prompt, profile = 'auto') {
189
+ // Free profile - always use free model
190
+ if (profile === 'free') {
191
+ return {
192
+ model: 'nvidia/gpt-oss-120b',
193
+ tier: 'SIMPLE',
194
+ confidence: 1.0,
195
+ signals: ['free-profile'],
196
+ savings: 1.0,
197
+ };
198
+ }
199
+ // Estimate token count (rough: 4 chars per token)
200
+ const tokenCount = Math.ceil(prompt.length / 4);
201
+ // Classify the request
202
+ const { tier, confidence, signals } = classifyRequest(prompt, tokenCount);
203
+ // Select tier config based on profile
204
+ let tierConfigs;
205
+ switch (profile) {
206
+ case 'eco':
207
+ tierConfigs = ECO_TIERS;
208
+ break;
209
+ case 'premium':
210
+ tierConfigs = PREMIUM_TIERS;
211
+ break;
212
+ default:
213
+ tierConfigs = AUTO_TIERS;
214
+ }
215
+ const model = tierConfigs[tier].primary;
216
+ // Calculate savings estimate
217
+ // Baseline: Claude Opus at $5/$25 per 1M tokens
218
+ const OPUS_COST_PER_1K = 0.015; // rough average
219
+ const modelCosts = {
220
+ 'nvidia/gpt-oss-120b': 0,
221
+ 'google/gemini-2.5-flash': 0.001,
222
+ 'google/gemini-2.5-flash-lite': 0.0003,
223
+ 'deepseek/deepseek-chat': 0.0004,
224
+ 'moonshot/kimi-k2.5': 0.002,
225
+ 'google/gemini-3.1-pro': 0.007,
226
+ 'anthropic/claude-sonnet-4.6': 0.009,
227
+ 'anthropic/claude-opus-4.6': 0.015,
228
+ 'xai/grok-4-1-fast-reasoning': 0.0004,
229
+ };
230
+ const modelCost = modelCosts[model] ?? 0.005;
231
+ const savings = Math.max(0, (OPUS_COST_PER_1K - modelCost) / OPUS_COST_PER_1K);
232
+ return {
233
+ model,
234
+ tier,
235
+ confidence,
236
+ signals,
237
+ savings,
238
+ };
239
+ }
240
+ /**
241
+ * Get fallback models for a tier
242
+ */
243
+ export function getFallbackChain(tier, profile = 'auto') {
244
+ let tierConfigs;
245
+ switch (profile) {
246
+ case 'eco':
247
+ tierConfigs = ECO_TIERS;
248
+ break;
249
+ case 'premium':
250
+ tierConfigs = PREMIUM_TIERS;
251
+ break;
252
+ case 'free':
253
+ return ['nvidia/gpt-oss-120b'];
254
+ default:
255
+ tierConfigs = AUTO_TIERS;
256
+ }
257
+ const config = tierConfigs[tier];
258
+ return [config.primary, ...config.fallback];
259
+ }
260
+ /**
261
+ * Parse routing profile from model string
262
+ */
263
+ export function parseRoutingProfile(model) {
264
+ const lower = model.toLowerCase();
265
+ if (lower === 'blockrun/auto' || lower === 'auto')
266
+ return 'auto';
267
+ if (lower === 'blockrun/eco' || lower === 'eco')
268
+ return 'eco';
269
+ if (lower === 'blockrun/premium' || lower === 'premium')
270
+ return 'premium';
271
+ if (lower === 'blockrun/free' || lower === 'free')
272
+ return 'free';
273
+ return null;
274
+ }