@blockrun/cc 0.8.1 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,24 @@
1
1
  import http from 'node:http';
2
- import { getOrCreateWallet, getOrCreateSolanaWallet, createPaymentPayload, createSolanaPaymentPayload, parsePaymentRequired, extractPaymentDetails, solanaKeyToBytes, SOLANA_NETWORK, } from '@blockrun/llm';
3
2
  import fs from 'node:fs';
4
3
  import path from 'node:path';
5
4
  import os from 'node:os';
5
+ import { fileURLToPath } from 'node:url';
6
+ import { getOrCreateWallet, getOrCreateSolanaWallet, createPaymentPayload, createSolanaPaymentPayload, parsePaymentRequired, extractPaymentDetails, solanaKeyToBytes, SOLANA_NETWORK, } from '@blockrun/llm';
7
+ import { recordUsage } from '../stats/tracker.js';
8
+ import { fetchWithFallback, buildFallbackChain, DEFAULT_FALLBACK_CONFIG, } from './fallback.js';
9
+ import { routeRequest, parseRoutingProfile, } from '../router/index.js';
10
+ // Get version from package.json
11
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
12
+ let VERSION = '0.9.0';
13
+ try {
14
+ const pkgPath = path.resolve(__dirname, '../../package.json');
15
+ const pkg = JSON.parse(fs.readFileSync(pkgPath, 'utf-8'));
16
+ VERSION = pkg.version || VERSION;
17
+ }
18
+ catch { /* use default */ }
19
+ // User-Agent for backend requests
20
+ const USER_AGENT = `brcc/${VERSION}`;
21
+ const X_BRCC_VERSION = VERSION;
6
22
  const LOG_FILE = path.join(os.homedir(), '.blockrun', 'brcc-debug.log');
7
23
  function debug(options, ...args) {
8
24
  if (!options.debug)
@@ -12,27 +28,64 @@ function debug(options, ...args) {
12
28
  fs.mkdirSync(path.dirname(LOG_FILE), { recursive: true });
13
29
  fs.appendFileSync(LOG_FILE, msg);
14
30
  }
15
- catch { /* ignore */ }
31
+ catch {
32
+ /* ignore */
33
+ }
34
+ }
35
+ function log(...args) {
36
+ const msg = `[brcc] ${args.map(String).join(' ')}`;
37
+ console.log(msg);
16
38
  }
17
39
  const DEFAULT_MAX_TOKENS = 4096;
18
40
  let lastOutputTokens = 0;
19
41
  // Model shortcuts for quick switching
20
42
  const MODEL_SHORTCUTS = {
21
- 'gpt': 'openai/gpt-5.4',
22
- 'gpt5': 'openai/gpt-5.4',
43
+ auto: 'blockrun/auto',
44
+ smart: 'blockrun/auto',
45
+ eco: 'blockrun/eco',
46
+ premium: 'blockrun/premium',
47
+ gpt: 'openai/gpt-5.4',
48
+ gpt5: 'openai/gpt-5.4',
23
49
  'gpt-5': 'openai/gpt-5.4',
24
50
  'gpt-5.4': 'openai/gpt-5.4',
25
- 'sonnet': 'anthropic/claude-sonnet-4.6',
26
- 'claude': 'anthropic/claude-sonnet-4.6',
27
- 'opus': 'anthropic/claude-opus-4.6',
28
- 'haiku': 'anthropic/claude-haiku-4.5',
29
- 'deepseek': 'deepseek/deepseek-chat',
30
- 'gemini': 'google/gemini-2.5-pro',
31
- 'grok': 'xai/grok-3',
32
- 'free': 'nvidia/gpt-oss-120b',
33
- 'mini': 'openai/gpt-5-mini',
34
- 'glm': 'zai/glm-5',
51
+ sonnet: 'anthropic/claude-sonnet-4.6',
52
+ claude: 'anthropic/claude-sonnet-4.6',
53
+ opus: 'anthropic/claude-opus-4.6',
54
+ haiku: 'anthropic/claude-haiku-4.5',
55
+ deepseek: 'deepseek/deepseek-chat',
56
+ gemini: 'google/gemini-2.5-pro',
57
+ grok: 'xai/grok-3',
58
+ free: 'nvidia/gpt-oss-120b',
59
+ mini: 'openai/gpt-5-mini',
60
+ glm: 'zai/glm-5',
35
61
  };
62
+ // Model pricing (per 1M tokens) - used for stats
63
+ const MODEL_PRICING = {
64
+ // Routing profiles (blended averages)
65
+ 'blockrun/auto': { input: 0.8, output: 4.0 },
66
+ 'blockrun/eco': { input: 0.2, output: 1.0 },
67
+ 'blockrun/premium': { input: 3.0, output: 15.0 },
68
+ 'blockrun/free': { input: 0, output: 0 },
69
+ // Individual models
70
+ 'anthropic/claude-sonnet-4.6': { input: 3.0, output: 15.0 },
71
+ 'anthropic/claude-opus-4.6': { input: 5.0, output: 25.0 },
72
+ 'anthropic/claude-haiku-4.5': { input: 1.0, output: 5.0 },
73
+ 'openai/gpt-5.4': { input: 2.5, output: 15.0 },
74
+ 'openai/gpt-5-mini': { input: 0.25, output: 2.0 },
75
+ 'google/gemini-2.5-pro': { input: 1.25, output: 10.0 },
76
+ 'google/gemini-2.5-flash': { input: 0.3, output: 2.5 },
77
+ 'deepseek/deepseek-chat': { input: 0.28, output: 0.42 },
78
+ 'xai/grok-3': { input: 3.0, output: 15.0 },
79
+ 'xai/grok-4-fast': { input: 0.2, output: 0.5 },
80
+ 'nvidia/gpt-oss-120b': { input: 0, output: 0 },
81
+ 'zai/glm-5': { input: 1.0, output: 3.2 },
82
+ 'moonshot/kimi-k2.5': { input: 0.6, output: 3.0 },
83
+ };
84
+ function estimateCost(model, inputTokens, outputTokens) {
85
+ const pricing = MODEL_PRICING[model] || { input: 2.0, output: 10.0 };
86
+ return ((inputTokens / 1_000_000) * pricing.input +
87
+ (outputTokens / 1_000_000) * pricing.output);
88
+ }
36
89
  function detectModelSwitch(parsed) {
37
90
  if (!parsed.messages || parsed.messages.length === 0)
38
91
  return null;
@@ -44,7 +97,7 @@ function detectModelSwitch(parsed) {
44
97
  content = last.content;
45
98
  }
46
99
  else if (Array.isArray(last.content)) {
47
- const textBlock = last.content.find(b => b.type === 'text' && b.text);
100
+ const textBlock = last.content.find((b) => b.type === 'text' && b.text);
48
101
  if (textBlock && textBlock.text)
49
102
  content = textBlock.text;
50
103
  }
@@ -63,9 +116,12 @@ function detectModelSwitch(parsed) {
63
116
  return modelInput;
64
117
  return null;
65
118
  }
119
+ // Default model - smart routing built-in
120
+ const DEFAULT_MODEL = 'blockrun/auto';
66
121
  export function createProxy(options) {
67
122
  const chain = options.chain || 'base';
68
- let currentModel = options.modelOverride || null;
123
+ let currentModel = options.modelOverride || DEFAULT_MODEL;
124
+ const fallbackEnabled = options.fallbackEnabled !== false; // Default true
69
125
  let baseWallet = null;
70
126
  let solanaWallet = null;
71
127
  if (chain === 'base') {
@@ -85,14 +141,18 @@ export function createProxy(options) {
85
141
  return;
86
142
  }
87
143
  await initSolana();
88
- const path = req.url?.replace(/^\/api/, '') || '';
89
- const targetUrl = `${options.apiUrl}${path}`;
144
+ const requestPath = req.url?.replace(/^\/api/, '') || '';
145
+ const targetUrl = `${options.apiUrl}${requestPath}`;
90
146
  let body = '';
147
+ const requestStartTime = Date.now();
91
148
  req.on('data', (chunk) => {
92
149
  body += chunk;
93
150
  });
94
151
  req.on('end', async () => {
152
+ let requestModel = currentModel || options.modelOverride || 'unknown';
153
+ let usedFallback = false;
95
154
  try {
155
+ debug(options, `request: ${req.method} ${req.url} currentModel=${currentModel || 'none'}`);
96
156
  if (body) {
97
157
  try {
98
158
  const parsed = JSON.parse(body);
@@ -110,7 +170,12 @@ export function createProxy(options) {
110
170
  type: 'message',
111
171
  role: 'assistant',
112
172
  model: currentModel,
113
- content: [{ type: 'text', text: `Switched to **${currentModel}**. All subsequent requests will use this model.` }],
173
+ content: [
174
+ {
175
+ type: 'text',
176
+ text: `Switched to **${currentModel}**. All subsequent requests will use this model.`,
177
+ },
178
+ ],
114
179
  stop_reason: 'end_turn',
115
180
  stop_sequence: null,
116
181
  usage: { input_tokens: 0, output_tokens: 10 },
@@ -119,14 +184,53 @@ export function createProxy(options) {
119
184
  res.end(JSON.stringify(fakeResponse));
120
185
  return;
121
186
  }
122
- // Apply model override
123
- if ((currentModel || options.modelOverride) && parsed.model) {
124
- parsed.model = currentModel || options.modelOverride;
187
+ // Apply model override only if:
188
+ // 1. User specified --model on CLI (options.modelOverride)
189
+ // 2. User switched model in-session (currentModel set by "use X" command)
190
+ // 3. Request has no model specified
191
+ if (options.modelOverride && currentModel) {
192
+ // CLI --model flag: always use this
193
+ parsed.model = currentModel;
194
+ }
195
+ else if (!parsed.model) {
196
+ // No model in request: use default
197
+ parsed.model = currentModel || DEFAULT_MODEL;
198
+ }
199
+ // Otherwise: use the model from the request as-is
200
+ requestModel = parsed.model || DEFAULT_MODEL;
201
+ // Smart routing: if model is a routing profile, classify and route
202
+ const routingProfile = parseRoutingProfile(requestModel);
203
+ if (routingProfile) {
204
+ // Extract user prompt for classification
205
+ const userMessages = parsed.messages?.filter((m) => m.role === 'user') || [];
206
+ const lastUserMsg = userMessages[userMessages.length - 1];
207
+ let promptText = '';
208
+ if (lastUserMsg) {
209
+ if (typeof lastUserMsg.content === 'string') {
210
+ promptText = lastUserMsg.content;
211
+ }
212
+ else if (Array.isArray(lastUserMsg.content)) {
213
+ promptText = lastUserMsg.content
214
+ .filter((b) => b.type === 'text')
215
+ .map((b) => b.text)
216
+ .join('\n');
217
+ }
218
+ }
219
+ // Route the request
220
+ const routing = routeRequest(promptText, routingProfile);
221
+ parsed.model = routing.model;
222
+ requestModel = routing.model;
223
+ log(`🧠 Smart routing: ${routingProfile} → ${routing.tier} → ${routing.model} ` +
224
+ `(${(routing.savings * 100).toFixed(0)}% savings) [${routing.signals.join(', ')}]`);
125
225
  }
126
- if (parsed.max_tokens) {
226
+ {
127
227
  const original = parsed.max_tokens;
128
228
  const model = (parsed.model || '').toLowerCase();
129
- const modelCap = (model.includes('deepseek') || model.includes('haiku') || model.includes('gpt-oss')) ? 8192 : 16384;
229
+ const modelCap = model.includes('deepseek') ||
230
+ model.includes('haiku') ||
231
+ model.includes('gpt-oss')
232
+ ? 8192
233
+ : 16384;
130
234
  // Use max of (last output × 2, default 4096) capped by model limit
131
235
  // This ensures short replies don't starve the next request
132
236
  const adaptive = lastOutputTokens > 0
@@ -134,28 +238,57 @@ export function createProxy(options) {
134
238
  : DEFAULT_MAX_TOKENS;
135
239
  parsed.max_tokens = Math.min(adaptive, modelCap);
136
240
  if (original !== parsed.max_tokens) {
137
- debug(options, `max_tokens: ${original} → ${parsed.max_tokens} (last output: ${lastOutputTokens || 'none'})`);
241
+ debug(options, `max_tokens: ${original || 'unset'} → ${parsed.max_tokens} (last output: ${lastOutputTokens || 'none'})`);
138
242
  }
139
243
  }
140
244
  body = JSON.stringify(parsed);
141
245
  }
142
- catch { /* not JSON, pass through */ }
246
+ catch {
247
+ /* not JSON, pass through */
248
+ }
143
249
  }
144
250
  const headers = {
145
251
  'Content-Type': 'application/json',
252
+ 'User-Agent': USER_AGENT,
253
+ 'X-Brcc-Version': X_BRCC_VERSION,
146
254
  };
147
255
  for (const [key, value] of Object.entries(req.headers)) {
148
256
  if (key.toLowerCase() !== 'host' &&
149
257
  key.toLowerCase() !== 'content-length' &&
258
+ key.toLowerCase() !== 'user-agent' && // Don't forward client's user-agent
150
259
  value) {
151
260
  headers[key] = Array.isArray(value) ? value[0] : value;
152
261
  }
153
262
  }
154
- let response = await fetch(targetUrl, {
263
+ // Build request init
264
+ const requestInit = {
155
265
  method: req.method || 'POST',
156
266
  headers,
157
267
  body: body || undefined,
158
- });
268
+ };
269
+ let response;
270
+ let finalModel = requestModel;
271
+ // Use fallback chain if enabled
272
+ if (fallbackEnabled && body && requestPath.includes('messages')) {
273
+ const fallbackConfig = {
274
+ ...DEFAULT_FALLBACK_CONFIG,
275
+ chain: buildFallbackChain(requestModel),
276
+ };
277
+ const result = await fetchWithFallback(targetUrl, requestInit, body, fallbackConfig, (failedModel, status, nextModel) => {
278
+ log(`⚠️ ${failedModel} returned ${status}, falling back to ${nextModel}`);
279
+ });
280
+ response = result.response;
281
+ finalModel = result.modelUsed;
282
+ usedFallback = result.fallbackUsed;
283
+ if (usedFallback) {
284
+ log(`↺ Fallback successful: using ${finalModel}`);
285
+ }
286
+ }
287
+ else {
288
+ // Direct fetch without fallback
289
+ response = await fetch(targetUrl, requestInit);
290
+ }
291
+ // Handle 402 payment
159
292
  if (response.status === 402) {
160
293
  if (chain === 'solana' && solanaWallet) {
161
294
  response = await handleSolanaPayment(response, targetUrl, req.method || 'POST', headers, body, solanaWallet.privateKey, solanaWallet.address);
@@ -174,22 +307,33 @@ export function createProxy(options) {
174
307
  const reader = response.body.getReader();
175
308
  const decoder = new TextDecoder();
176
309
  let lastChunkText = '';
310
+ let fullResponse = '';
177
311
  const pump = async () => {
178
312
  while (true) {
179
313
  const { done, value } = await reader.read();
180
314
  if (done) {
315
+ // Record stats from streaming response
181
316
  if (isStreaming && lastChunkText) {
182
- const match = lastChunkText.match(/"output_tokens"\s*:\s*(\d+)/);
183
- if (match) {
184
- lastOutputTokens = parseInt(match[1], 10);
185
- debug(options, `recorded output_tokens: ${lastOutputTokens} (stream)`);
317
+ const outputMatch = lastChunkText.match(/"output_tokens"\s*:\s*(\d+)/);
318
+ const inputMatch = fullResponse.match(/"input_tokens"\s*:\s*(\d+)/);
319
+ if (outputMatch) {
320
+ lastOutputTokens = parseInt(outputMatch[1], 10);
321
+ const inputTokens = inputMatch
322
+ ? parseInt(inputMatch[1], 10)
323
+ : 0;
324
+ const latencyMs = Date.now() - requestStartTime;
325
+ const cost = estimateCost(finalModel, inputTokens, lastOutputTokens);
326
+ recordUsage(finalModel, inputTokens, lastOutputTokens, cost, latencyMs, usedFallback);
327
+ debug(options, `recorded: model=${finalModel} in=${inputTokens} out=${lastOutputTokens} cost=$${cost.toFixed(4)} fallback=${usedFallback}`);
186
328
  }
187
329
  }
188
330
  res.end();
189
331
  break;
190
332
  }
191
333
  if (isStreaming) {
192
- lastChunkText = decoder.decode(value, { stream: true });
334
+ const chunk = decoder.decode(value, { stream: true });
335
+ lastChunkText = chunk;
336
+ fullResponse += chunk;
193
337
  }
194
338
  res.write(value);
195
339
  }
@@ -202,15 +346,22 @@ export function createProxy(options) {
202
346
  const parsed = JSON.parse(text);
203
347
  if (parsed.usage?.output_tokens) {
204
348
  lastOutputTokens = parsed.usage.output_tokens;
205
- debug(options, `recorded output_tokens: ${lastOutputTokens}`);
349
+ const inputTokens = parsed.usage?.input_tokens || 0;
350
+ const latencyMs = Date.now() - requestStartTime;
351
+ const cost = estimateCost(finalModel, inputTokens, lastOutputTokens);
352
+ recordUsage(finalModel, inputTokens, lastOutputTokens, cost, latencyMs, usedFallback);
353
+ debug(options, `recorded: model=${finalModel} in=${inputTokens} out=${lastOutputTokens} cost=$${cost.toFixed(4)} fallback=${usedFallback}`);
206
354
  }
207
355
  }
208
- catch { /* not JSON */ }
356
+ catch {
357
+ /* not JSON */
358
+ }
209
359
  res.end(text);
210
360
  }
211
361
  }
212
362
  catch (error) {
213
363
  const msg = error instanceof Error ? error.message : 'Proxy error';
364
+ log(`❌ Error: ${msg}`);
214
365
  res.writeHead(502, { 'Content-Type': 'application/json' });
215
366
  res.end(JSON.stringify({
216
367
  type: 'error',
@@ -291,9 +442,12 @@ export function classifyRequest(body) {
291
442
  .map((b) => b.text)
292
443
  .join('\n');
293
444
  }
294
- if (content.includes('```') || content.includes('function ') ||
295
- content.includes('class ') || content.includes('import ') ||
296
- content.includes('def ') || content.includes('const ')) {
445
+ if (content.includes('```') ||
446
+ content.includes('function ') ||
447
+ content.includes('class ') ||
448
+ content.includes('import ') ||
449
+ content.includes('def ') ||
450
+ content.includes('const ')) {
297
451
  return { category: 'code' };
298
452
  }
299
453
  if (content.length < 100) {
@@ -0,0 +1,22 @@
1
+ /**
2
+ * Smart Router for brcc
3
+ * Ported from ClawRouter - 15-dimension weighted scoring for tier classification
4
+ */
5
+ export type Tier = 'SIMPLE' | 'MEDIUM' | 'COMPLEX' | 'REASONING';
6
+ export type RoutingProfile = 'auto' | 'eco' | 'premium' | 'free';
7
+ export interface RoutingResult {
8
+ model: string;
9
+ tier: Tier;
10
+ confidence: number;
11
+ signals: string[];
12
+ savings: number;
13
+ }
14
+ export declare function routeRequest(prompt: string, profile?: RoutingProfile): RoutingResult;
15
+ /**
16
+ * Get fallback models for a tier
17
+ */
18
+ export declare function getFallbackChain(tier: Tier, profile?: RoutingProfile): string[];
19
+ /**
20
+ * Parse routing profile from model string
21
+ */
22
+ export declare function parseRoutingProfile(model: string): RoutingProfile | null;
@@ -0,0 +1,274 @@
1
+ /**
2
+ * Smart Router for brcc
3
+ * Ported from ClawRouter - 15-dimension weighted scoring for tier classification
4
+ */
5
+ // ─── Tier Model Configs ───
6
+ const AUTO_TIERS = {
7
+ SIMPLE: {
8
+ primary: 'google/gemini-2.5-flash',
9
+ fallback: ['deepseek/deepseek-chat', 'nvidia/gpt-oss-120b'],
10
+ },
11
+ MEDIUM: {
12
+ primary: 'moonshot/kimi-k2.5',
13
+ fallback: ['google/gemini-2.5-flash', 'deepseek/deepseek-chat'],
14
+ },
15
+ COMPLEX: {
16
+ primary: 'google/gemini-3.1-pro',
17
+ fallback: ['anthropic/claude-sonnet-4.6', 'google/gemini-2.5-pro'],
18
+ },
19
+ REASONING: {
20
+ primary: 'xai/grok-4-1-fast-reasoning',
21
+ fallback: ['deepseek/deepseek-reasoner', 'openai/o4-mini'],
22
+ },
23
+ };
24
+ const ECO_TIERS = {
25
+ SIMPLE: {
26
+ primary: 'nvidia/gpt-oss-120b',
27
+ fallback: ['google/gemini-2.5-flash-lite'],
28
+ },
29
+ MEDIUM: {
30
+ primary: 'google/gemini-2.5-flash-lite',
31
+ fallback: ['nvidia/gpt-oss-120b'],
32
+ },
33
+ COMPLEX: {
34
+ primary: 'google/gemini-2.5-flash-lite',
35
+ fallback: ['deepseek/deepseek-chat'],
36
+ },
37
+ REASONING: {
38
+ primary: 'xai/grok-4-1-fast-reasoning',
39
+ fallback: ['deepseek/deepseek-reasoner'],
40
+ },
41
+ };
42
+ const PREMIUM_TIERS = {
43
+ SIMPLE: {
44
+ primary: 'moonshot/kimi-k2.5',
45
+ fallback: ['anthropic/claude-haiku-4.5'],
46
+ },
47
+ MEDIUM: {
48
+ primary: 'openai/gpt-5.3-codex',
49
+ fallback: ['anthropic/claude-sonnet-4.6'],
50
+ },
51
+ COMPLEX: {
52
+ primary: 'anthropic/claude-opus-4.6',
53
+ fallback: ['openai/gpt-5.4', 'anthropic/claude-sonnet-4.6'],
54
+ },
55
+ REASONING: {
56
+ primary: 'anthropic/claude-sonnet-4.6',
57
+ fallback: ['anthropic/claude-opus-4.6', 'openai/o3'],
58
+ },
59
+ };
60
+ // ─── Keywords for Classification ───
61
+ const CODE_KEYWORDS = [
62
+ 'function', 'class', 'import', 'def', 'SELECT', 'async', 'await',
63
+ 'const', 'let', 'var', 'return', '```', '函数', '类', '导入',
64
+ ];
65
+ const REASONING_KEYWORDS = [
66
+ 'prove', 'theorem', 'derive', 'step by step', 'chain of thought',
67
+ 'formally', 'mathematical', 'proof', 'logically', '证明', '定理', '推导',
68
+ ];
69
+ const SIMPLE_KEYWORDS = [
70
+ 'what is', 'define', 'translate', 'hello', 'yes or no', 'capital of',
71
+ 'how old', 'who is', 'when was', '什么是', '翻译', '你好',
72
+ ];
73
+ const TECHNICAL_KEYWORDS = [
74
+ 'algorithm', 'optimize', 'architecture', 'distributed', 'kubernetes',
75
+ 'microservice', 'database', 'infrastructure', '算法', '架构', '优化',
76
+ ];
77
+ const AGENTIC_KEYWORDS = [
78
+ 'read file', 'edit', 'modify', 'update', 'create file', 'execute',
79
+ 'deploy', 'install', 'npm', 'pip', 'fix', 'debug', 'verify',
80
+ '编辑', '修改', '部署', '安装', '修复', '调试',
81
+ ];
82
+ function countMatches(text, keywords) {
83
+ const lower = text.toLowerCase();
84
+ return keywords.filter(kw => lower.includes(kw.toLowerCase())).length;
85
+ }
86
+ function classifyRequest(prompt, tokenCount) {
87
+ const signals = [];
88
+ let score = 0;
89
+ // Token count scoring (reduced weight - don't penalize short prompts too much)
90
+ if (tokenCount < 30) {
91
+ score -= 0.15;
92
+ signals.push('short');
93
+ }
94
+ else if (tokenCount > 500) {
95
+ score += 0.2;
96
+ signals.push('long');
97
+ }
98
+ // Code detection (weight: 0.20) - increased weight
99
+ const codeMatches = countMatches(prompt, CODE_KEYWORDS);
100
+ if (codeMatches >= 2) {
101
+ score += 0.5;
102
+ signals.push('code');
103
+ }
104
+ else if (codeMatches >= 1) {
105
+ score += 0.25;
106
+ signals.push('code-light');
107
+ }
108
+ // Reasoning detection (weight: 0.18)
109
+ const reasoningMatches = countMatches(prompt, REASONING_KEYWORDS);
110
+ if (reasoningMatches >= 2) {
111
+ // Direct reasoning override
112
+ return { tier: 'REASONING', confidence: 0.9, signals: [...signals, 'reasoning'] };
113
+ }
114
+ else if (reasoningMatches >= 1) {
115
+ score += 0.4;
116
+ signals.push('reasoning-light');
117
+ }
118
+ // Simple detection (weight: -0.12) - only trigger on strong simple signals
119
+ const simpleMatches = countMatches(prompt, SIMPLE_KEYWORDS);
120
+ if (simpleMatches >= 2) {
121
+ score -= 0.4;
122
+ signals.push('simple');
123
+ }
124
+ else if (simpleMatches >= 1 && codeMatches === 0 && tokenCount < 50) {
125
+ // Only mark as simple if no code and very short
126
+ score -= 0.25;
127
+ signals.push('simple');
128
+ }
129
+ // Technical complexity (weight: 0.15) - increased
130
+ const techMatches = countMatches(prompt, TECHNICAL_KEYWORDS);
131
+ if (techMatches >= 2) {
132
+ score += 0.4;
133
+ signals.push('technical');
134
+ }
135
+ else if (techMatches >= 1) {
136
+ score += 0.2;
137
+ signals.push('technical-light');
138
+ }
139
+ // Agentic detection (weight: 0.10) - increased
140
+ const agenticMatches = countMatches(prompt, AGENTIC_KEYWORDS);
141
+ if (agenticMatches >= 3) {
142
+ score += 0.35;
143
+ signals.push('agentic');
144
+ }
145
+ else if (agenticMatches >= 2) {
146
+ score += 0.2;
147
+ signals.push('agentic-light');
148
+ }
149
+ // Multi-step patterns
150
+ if (/first.*then|step \d|\d\.\s/i.test(prompt)) {
151
+ score += 0.2;
152
+ signals.push('multi-step');
153
+ }
154
+ // Question complexity
155
+ const questionCount = (prompt.match(/\?/g) || []).length;
156
+ if (questionCount > 3) {
157
+ score += 0.15;
158
+ signals.push(`${questionCount} questions`);
159
+ }
160
+ // Imperative verbs (build, create, implement, etc.)
161
+ const imperativeMatches = countMatches(prompt, [
162
+ 'build', 'create', 'implement', 'design', 'develop', 'write', 'make',
163
+ 'generate', 'construct', '构建', '创建', '实现', '设计', '开发'
164
+ ]);
165
+ if (imperativeMatches >= 1) {
166
+ score += 0.15;
167
+ signals.push('imperative');
168
+ }
169
+ // Map score to tier (adjusted boundaries)
170
+ let tier;
171
+ if (score < -0.1) {
172
+ tier = 'SIMPLE';
173
+ }
174
+ else if (score < 0.25) {
175
+ tier = 'MEDIUM';
176
+ }
177
+ else if (score < 0.45) {
178
+ tier = 'COMPLEX';
179
+ }
180
+ else {
181
+ tier = 'REASONING';
182
+ }
183
+ // Calculate confidence based on distance from boundary
184
+ const confidence = Math.min(0.95, 0.7 + Math.abs(score) * 0.3);
185
+ return { tier, confidence, signals };
186
+ }
187
+ // ─── Main Router ───
188
+ export function routeRequest(prompt, profile = 'auto') {
189
+ // Free profile - always use free model
190
+ if (profile === 'free') {
191
+ return {
192
+ model: 'nvidia/gpt-oss-120b',
193
+ tier: 'SIMPLE',
194
+ confidence: 1.0,
195
+ signals: ['free-profile'],
196
+ savings: 1.0,
197
+ };
198
+ }
199
+ // Estimate token count (rough: 4 chars per token)
200
+ const tokenCount = Math.ceil(prompt.length / 4);
201
+ // Classify the request
202
+ const { tier, confidence, signals } = classifyRequest(prompt, tokenCount);
203
+ // Select tier config based on profile
204
+ let tierConfigs;
205
+ switch (profile) {
206
+ case 'eco':
207
+ tierConfigs = ECO_TIERS;
208
+ break;
209
+ case 'premium':
210
+ tierConfigs = PREMIUM_TIERS;
211
+ break;
212
+ default:
213
+ tierConfigs = AUTO_TIERS;
214
+ }
215
+ const model = tierConfigs[tier].primary;
216
+ // Calculate savings estimate
217
+ // Baseline: Claude Opus at $5/$25 per 1M tokens
218
+ const OPUS_COST_PER_1K = 0.015; // rough average
219
+ const modelCosts = {
220
+ 'nvidia/gpt-oss-120b': 0,
221
+ 'google/gemini-2.5-flash': 0.001,
222
+ 'google/gemini-2.5-flash-lite': 0.0003,
223
+ 'deepseek/deepseek-chat': 0.0004,
224
+ 'moonshot/kimi-k2.5': 0.002,
225
+ 'google/gemini-3.1-pro': 0.007,
226
+ 'anthropic/claude-sonnet-4.6': 0.009,
227
+ 'anthropic/claude-opus-4.6': 0.015,
228
+ 'xai/grok-4-1-fast-reasoning': 0.0004,
229
+ };
230
+ const modelCost = modelCosts[model] ?? 0.005;
231
+ const savings = Math.max(0, (OPUS_COST_PER_1K - modelCost) / OPUS_COST_PER_1K);
232
+ return {
233
+ model,
234
+ tier,
235
+ confidence,
236
+ signals,
237
+ savings,
238
+ };
239
+ }
240
+ /**
241
+ * Get fallback models for a tier
242
+ */
243
+ export function getFallbackChain(tier, profile = 'auto') {
244
+ let tierConfigs;
245
+ switch (profile) {
246
+ case 'eco':
247
+ tierConfigs = ECO_TIERS;
248
+ break;
249
+ case 'premium':
250
+ tierConfigs = PREMIUM_TIERS;
251
+ break;
252
+ case 'free':
253
+ return ['nvidia/gpt-oss-120b'];
254
+ default:
255
+ tierConfigs = AUTO_TIERS;
256
+ }
257
+ const config = tierConfigs[tier];
258
+ return [config.primary, ...config.fallback];
259
+ }
260
+ /**
261
+ * Parse routing profile from model string
262
+ */
263
+ export function parseRoutingProfile(model) {
264
+ const lower = model.toLowerCase();
265
+ if (lower === 'blockrun/auto' || lower === 'auto')
266
+ return 'auto';
267
+ if (lower === 'blockrun/eco' || lower === 'eco')
268
+ return 'eco';
269
+ if (lower === 'blockrun/premium' || lower === 'premium')
270
+ return 'premium';
271
+ if (lower === 'blockrun/free' || lower === 'free')
272
+ return 'free';
273
+ return null;
274
+ }