@blockrun/cc 0.8.1 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +112 -7
- package/dist/commands/start.d.ts +1 -0
- package/dist/commands/start.js +32 -13
- package/dist/commands/stats.d.ts +10 -0
- package/dist/commands/stats.js +94 -0
- package/dist/index.js +9 -1
- package/dist/proxy/fallback.d.ts +34 -0
- package/dist/proxy/fallback.js +115 -0
- package/dist/proxy/server.d.ts +1 -0
- package/dist/proxy/server.js +192 -38
- package/dist/router/index.d.ts +22 -0
- package/dist/router/index.js +274 -0
- package/dist/stats/tracker.d.ts +52 -0
- package/dist/stats/tracker.js +130 -0
- package/package.json +1 -1
package/dist/proxy/server.js
CHANGED
|
@@ -1,8 +1,24 @@
|
|
|
1
1
|
import http from 'node:http';
|
|
2
|
-
import { getOrCreateWallet, getOrCreateSolanaWallet, createPaymentPayload, createSolanaPaymentPayload, parsePaymentRequired, extractPaymentDetails, solanaKeyToBytes, SOLANA_NETWORK, } from '@blockrun/llm';
|
|
3
2
|
import fs from 'node:fs';
|
|
4
3
|
import path from 'node:path';
|
|
5
4
|
import os from 'node:os';
|
|
5
|
+
import { fileURLToPath } from 'node:url';
|
|
6
|
+
import { getOrCreateWallet, getOrCreateSolanaWallet, createPaymentPayload, createSolanaPaymentPayload, parsePaymentRequired, extractPaymentDetails, solanaKeyToBytes, SOLANA_NETWORK, } from '@blockrun/llm';
|
|
7
|
+
import { recordUsage } from '../stats/tracker.js';
|
|
8
|
+
import { fetchWithFallback, buildFallbackChain, DEFAULT_FALLBACK_CONFIG, } from './fallback.js';
|
|
9
|
+
import { routeRequest, parseRoutingProfile, } from '../router/index.js';
|
|
10
|
+
// Get version from package.json
|
|
11
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
12
|
+
let VERSION = '0.9.0';
|
|
13
|
+
try {
|
|
14
|
+
const pkgPath = path.resolve(__dirname, '../../package.json');
|
|
15
|
+
const pkg = JSON.parse(fs.readFileSync(pkgPath, 'utf-8'));
|
|
16
|
+
VERSION = pkg.version || VERSION;
|
|
17
|
+
}
|
|
18
|
+
catch { /* use default */ }
|
|
19
|
+
// User-Agent for backend requests
|
|
20
|
+
const USER_AGENT = `brcc/${VERSION}`;
|
|
21
|
+
const X_BRCC_VERSION = VERSION;
|
|
6
22
|
const LOG_FILE = path.join(os.homedir(), '.blockrun', 'brcc-debug.log');
|
|
7
23
|
function debug(options, ...args) {
|
|
8
24
|
if (!options.debug)
|
|
@@ -12,27 +28,64 @@ function debug(options, ...args) {
|
|
|
12
28
|
fs.mkdirSync(path.dirname(LOG_FILE), { recursive: true });
|
|
13
29
|
fs.appendFileSync(LOG_FILE, msg);
|
|
14
30
|
}
|
|
15
|
-
catch {
|
|
31
|
+
catch {
|
|
32
|
+
/* ignore */
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
function log(...args) {
|
|
36
|
+
const msg = `[brcc] ${args.map(String).join(' ')}`;
|
|
37
|
+
console.log(msg);
|
|
16
38
|
}
|
|
17
39
|
const DEFAULT_MAX_TOKENS = 4096;
|
|
18
40
|
let lastOutputTokens = 0;
|
|
19
41
|
// Model shortcuts for quick switching
|
|
20
42
|
const MODEL_SHORTCUTS = {
|
|
21
|
-
|
|
22
|
-
|
|
43
|
+
auto: 'blockrun/auto',
|
|
44
|
+
smart: 'blockrun/auto',
|
|
45
|
+
eco: 'blockrun/eco',
|
|
46
|
+
premium: 'blockrun/premium',
|
|
47
|
+
gpt: 'openai/gpt-5.4',
|
|
48
|
+
gpt5: 'openai/gpt-5.4',
|
|
23
49
|
'gpt-5': 'openai/gpt-5.4',
|
|
24
50
|
'gpt-5.4': 'openai/gpt-5.4',
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
51
|
+
sonnet: 'anthropic/claude-sonnet-4.6',
|
|
52
|
+
claude: 'anthropic/claude-sonnet-4.6',
|
|
53
|
+
opus: 'anthropic/claude-opus-4.6',
|
|
54
|
+
haiku: 'anthropic/claude-haiku-4.5',
|
|
55
|
+
deepseek: 'deepseek/deepseek-chat',
|
|
56
|
+
gemini: 'google/gemini-2.5-pro',
|
|
57
|
+
grok: 'xai/grok-3',
|
|
58
|
+
free: 'nvidia/gpt-oss-120b',
|
|
59
|
+
mini: 'openai/gpt-5-mini',
|
|
60
|
+
glm: 'zai/glm-5',
|
|
35
61
|
};
|
|
62
|
+
// Model pricing (per 1M tokens) - used for stats
|
|
63
|
+
const MODEL_PRICING = {
|
|
64
|
+
// Routing profiles (blended averages)
|
|
65
|
+
'blockrun/auto': { input: 0.8, output: 4.0 },
|
|
66
|
+
'blockrun/eco': { input: 0.2, output: 1.0 },
|
|
67
|
+
'blockrun/premium': { input: 3.0, output: 15.0 },
|
|
68
|
+
'blockrun/free': { input: 0, output: 0 },
|
|
69
|
+
// Individual models
|
|
70
|
+
'anthropic/claude-sonnet-4.6': { input: 3.0, output: 15.0 },
|
|
71
|
+
'anthropic/claude-opus-4.6': { input: 5.0, output: 25.0 },
|
|
72
|
+
'anthropic/claude-haiku-4.5': { input: 1.0, output: 5.0 },
|
|
73
|
+
'openai/gpt-5.4': { input: 2.5, output: 15.0 },
|
|
74
|
+
'openai/gpt-5-mini': { input: 0.25, output: 2.0 },
|
|
75
|
+
'google/gemini-2.5-pro': { input: 1.25, output: 10.0 },
|
|
76
|
+
'google/gemini-2.5-flash': { input: 0.3, output: 2.5 },
|
|
77
|
+
'deepseek/deepseek-chat': { input: 0.28, output: 0.42 },
|
|
78
|
+
'xai/grok-3': { input: 3.0, output: 15.0 },
|
|
79
|
+
'xai/grok-4-fast': { input: 0.2, output: 0.5 },
|
|
80
|
+
'nvidia/gpt-oss-120b': { input: 0, output: 0 },
|
|
81
|
+
'zai/glm-5': { input: 1.0, output: 3.2 },
|
|
82
|
+
'moonshot/kimi-k2.5': { input: 0.6, output: 3.0 },
|
|
83
|
+
};
|
|
84
|
+
function estimateCost(model, inputTokens, outputTokens) {
|
|
85
|
+
const pricing = MODEL_PRICING[model] || { input: 2.0, output: 10.0 };
|
|
86
|
+
return ((inputTokens / 1_000_000) * pricing.input +
|
|
87
|
+
(outputTokens / 1_000_000) * pricing.output);
|
|
88
|
+
}
|
|
36
89
|
function detectModelSwitch(parsed) {
|
|
37
90
|
if (!parsed.messages || parsed.messages.length === 0)
|
|
38
91
|
return null;
|
|
@@ -44,7 +97,7 @@ function detectModelSwitch(parsed) {
|
|
|
44
97
|
content = last.content;
|
|
45
98
|
}
|
|
46
99
|
else if (Array.isArray(last.content)) {
|
|
47
|
-
const textBlock = last.content.find(b => b.type === 'text' && b.text);
|
|
100
|
+
const textBlock = last.content.find((b) => b.type === 'text' && b.text);
|
|
48
101
|
if (textBlock && textBlock.text)
|
|
49
102
|
content = textBlock.text;
|
|
50
103
|
}
|
|
@@ -63,9 +116,12 @@ function detectModelSwitch(parsed) {
|
|
|
63
116
|
return modelInput;
|
|
64
117
|
return null;
|
|
65
118
|
}
|
|
119
|
+
// Default model - smart routing built-in
|
|
120
|
+
const DEFAULT_MODEL = 'blockrun/auto';
|
|
66
121
|
export function createProxy(options) {
|
|
67
122
|
const chain = options.chain || 'base';
|
|
68
|
-
let currentModel = options.modelOverride ||
|
|
123
|
+
let currentModel = options.modelOverride || DEFAULT_MODEL;
|
|
124
|
+
const fallbackEnabled = options.fallbackEnabled !== false; // Default true
|
|
69
125
|
let baseWallet = null;
|
|
70
126
|
let solanaWallet = null;
|
|
71
127
|
if (chain === 'base') {
|
|
@@ -85,14 +141,18 @@ export function createProxy(options) {
|
|
|
85
141
|
return;
|
|
86
142
|
}
|
|
87
143
|
await initSolana();
|
|
88
|
-
const
|
|
89
|
-
const targetUrl = `${options.apiUrl}${
|
|
144
|
+
const requestPath = req.url?.replace(/^\/api/, '') || '';
|
|
145
|
+
const targetUrl = `${options.apiUrl}${requestPath}`;
|
|
90
146
|
let body = '';
|
|
147
|
+
const requestStartTime = Date.now();
|
|
91
148
|
req.on('data', (chunk) => {
|
|
92
149
|
body += chunk;
|
|
93
150
|
});
|
|
94
151
|
req.on('end', async () => {
|
|
152
|
+
let requestModel = currentModel || options.modelOverride || 'unknown';
|
|
153
|
+
let usedFallback = false;
|
|
95
154
|
try {
|
|
155
|
+
debug(options, `request: ${req.method} ${req.url} currentModel=${currentModel || 'none'}`);
|
|
96
156
|
if (body) {
|
|
97
157
|
try {
|
|
98
158
|
const parsed = JSON.parse(body);
|
|
@@ -110,7 +170,12 @@ export function createProxy(options) {
|
|
|
110
170
|
type: 'message',
|
|
111
171
|
role: 'assistant',
|
|
112
172
|
model: currentModel,
|
|
113
|
-
content: [
|
|
173
|
+
content: [
|
|
174
|
+
{
|
|
175
|
+
type: 'text',
|
|
176
|
+
text: `Switched to **${currentModel}**. All subsequent requests will use this model.`,
|
|
177
|
+
},
|
|
178
|
+
],
|
|
114
179
|
stop_reason: 'end_turn',
|
|
115
180
|
stop_sequence: null,
|
|
116
181
|
usage: { input_tokens: 0, output_tokens: 10 },
|
|
@@ -119,14 +184,53 @@ export function createProxy(options) {
|
|
|
119
184
|
res.end(JSON.stringify(fakeResponse));
|
|
120
185
|
return;
|
|
121
186
|
}
|
|
122
|
-
// Apply model override
|
|
123
|
-
|
|
124
|
-
|
|
187
|
+
// Apply model override only if:
|
|
188
|
+
// 1. User specified --model on CLI (options.modelOverride)
|
|
189
|
+
// 2. User switched model in-session (currentModel set by "use X" command)
|
|
190
|
+
// 3. Request has no model specified
|
|
191
|
+
if (options.modelOverride && currentModel) {
|
|
192
|
+
// CLI --model flag: always use this
|
|
193
|
+
parsed.model = currentModel;
|
|
194
|
+
}
|
|
195
|
+
else if (!parsed.model) {
|
|
196
|
+
// No model in request: use default
|
|
197
|
+
parsed.model = currentModel || DEFAULT_MODEL;
|
|
198
|
+
}
|
|
199
|
+
// Otherwise: use the model from the request as-is
|
|
200
|
+
requestModel = parsed.model || DEFAULT_MODEL;
|
|
201
|
+
// Smart routing: if model is a routing profile, classify and route
|
|
202
|
+
const routingProfile = parseRoutingProfile(requestModel);
|
|
203
|
+
if (routingProfile) {
|
|
204
|
+
// Extract user prompt for classification
|
|
205
|
+
const userMessages = parsed.messages?.filter((m) => m.role === 'user') || [];
|
|
206
|
+
const lastUserMsg = userMessages[userMessages.length - 1];
|
|
207
|
+
let promptText = '';
|
|
208
|
+
if (lastUserMsg) {
|
|
209
|
+
if (typeof lastUserMsg.content === 'string') {
|
|
210
|
+
promptText = lastUserMsg.content;
|
|
211
|
+
}
|
|
212
|
+
else if (Array.isArray(lastUserMsg.content)) {
|
|
213
|
+
promptText = lastUserMsg.content
|
|
214
|
+
.filter((b) => b.type === 'text')
|
|
215
|
+
.map((b) => b.text)
|
|
216
|
+
.join('\n');
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
// Route the request
|
|
220
|
+
const routing = routeRequest(promptText, routingProfile);
|
|
221
|
+
parsed.model = routing.model;
|
|
222
|
+
requestModel = routing.model;
|
|
223
|
+
log(`🧠 Smart routing: ${routingProfile} → ${routing.tier} → ${routing.model} ` +
|
|
224
|
+
`(${(routing.savings * 100).toFixed(0)}% savings) [${routing.signals.join(', ')}]`);
|
|
125
225
|
}
|
|
126
|
-
|
|
226
|
+
{
|
|
127
227
|
const original = parsed.max_tokens;
|
|
128
228
|
const model = (parsed.model || '').toLowerCase();
|
|
129
|
-
const modelCap =
|
|
229
|
+
const modelCap = model.includes('deepseek') ||
|
|
230
|
+
model.includes('haiku') ||
|
|
231
|
+
model.includes('gpt-oss')
|
|
232
|
+
? 8192
|
|
233
|
+
: 16384;
|
|
130
234
|
// Use max of (last output × 2, default 4096) capped by model limit
|
|
131
235
|
// This ensures short replies don't starve the next request
|
|
132
236
|
const adaptive = lastOutputTokens > 0
|
|
@@ -134,28 +238,57 @@ export function createProxy(options) {
|
|
|
134
238
|
: DEFAULT_MAX_TOKENS;
|
|
135
239
|
parsed.max_tokens = Math.min(adaptive, modelCap);
|
|
136
240
|
if (original !== parsed.max_tokens) {
|
|
137
|
-
debug(options, `max_tokens: ${original} → ${parsed.max_tokens} (last output: ${lastOutputTokens || 'none'})`);
|
|
241
|
+
debug(options, `max_tokens: ${original || 'unset'} → ${parsed.max_tokens} (last output: ${lastOutputTokens || 'none'})`);
|
|
138
242
|
}
|
|
139
243
|
}
|
|
140
244
|
body = JSON.stringify(parsed);
|
|
141
245
|
}
|
|
142
|
-
catch {
|
|
246
|
+
catch {
|
|
247
|
+
/* not JSON, pass through */
|
|
248
|
+
}
|
|
143
249
|
}
|
|
144
250
|
const headers = {
|
|
145
251
|
'Content-Type': 'application/json',
|
|
252
|
+
'User-Agent': USER_AGENT,
|
|
253
|
+
'X-Brcc-Version': X_BRCC_VERSION,
|
|
146
254
|
};
|
|
147
255
|
for (const [key, value] of Object.entries(req.headers)) {
|
|
148
256
|
if (key.toLowerCase() !== 'host' &&
|
|
149
257
|
key.toLowerCase() !== 'content-length' &&
|
|
258
|
+
key.toLowerCase() !== 'user-agent' && // Don't forward client's user-agent
|
|
150
259
|
value) {
|
|
151
260
|
headers[key] = Array.isArray(value) ? value[0] : value;
|
|
152
261
|
}
|
|
153
262
|
}
|
|
154
|
-
|
|
263
|
+
// Build request init
|
|
264
|
+
const requestInit = {
|
|
155
265
|
method: req.method || 'POST',
|
|
156
266
|
headers,
|
|
157
267
|
body: body || undefined,
|
|
158
|
-
}
|
|
268
|
+
};
|
|
269
|
+
let response;
|
|
270
|
+
let finalModel = requestModel;
|
|
271
|
+
// Use fallback chain if enabled
|
|
272
|
+
if (fallbackEnabled && body && requestPath.includes('messages')) {
|
|
273
|
+
const fallbackConfig = {
|
|
274
|
+
...DEFAULT_FALLBACK_CONFIG,
|
|
275
|
+
chain: buildFallbackChain(requestModel),
|
|
276
|
+
};
|
|
277
|
+
const result = await fetchWithFallback(targetUrl, requestInit, body, fallbackConfig, (failedModel, status, nextModel) => {
|
|
278
|
+
log(`⚠️ ${failedModel} returned ${status}, falling back to ${nextModel}`);
|
|
279
|
+
});
|
|
280
|
+
response = result.response;
|
|
281
|
+
finalModel = result.modelUsed;
|
|
282
|
+
usedFallback = result.fallbackUsed;
|
|
283
|
+
if (usedFallback) {
|
|
284
|
+
log(`↺ Fallback successful: using ${finalModel}`);
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
else {
|
|
288
|
+
// Direct fetch without fallback
|
|
289
|
+
response = await fetch(targetUrl, requestInit);
|
|
290
|
+
}
|
|
291
|
+
// Handle 402 payment
|
|
159
292
|
if (response.status === 402) {
|
|
160
293
|
if (chain === 'solana' && solanaWallet) {
|
|
161
294
|
response = await handleSolanaPayment(response, targetUrl, req.method || 'POST', headers, body, solanaWallet.privateKey, solanaWallet.address);
|
|
@@ -174,22 +307,33 @@ export function createProxy(options) {
|
|
|
174
307
|
const reader = response.body.getReader();
|
|
175
308
|
const decoder = new TextDecoder();
|
|
176
309
|
let lastChunkText = '';
|
|
310
|
+
let fullResponse = '';
|
|
177
311
|
const pump = async () => {
|
|
178
312
|
while (true) {
|
|
179
313
|
const { done, value } = await reader.read();
|
|
180
314
|
if (done) {
|
|
315
|
+
// Record stats from streaming response
|
|
181
316
|
if (isStreaming && lastChunkText) {
|
|
182
|
-
const
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
317
|
+
const outputMatch = lastChunkText.match(/"output_tokens"\s*:\s*(\d+)/);
|
|
318
|
+
const inputMatch = fullResponse.match(/"input_tokens"\s*:\s*(\d+)/);
|
|
319
|
+
if (outputMatch) {
|
|
320
|
+
lastOutputTokens = parseInt(outputMatch[1], 10);
|
|
321
|
+
const inputTokens = inputMatch
|
|
322
|
+
? parseInt(inputMatch[1], 10)
|
|
323
|
+
: 0;
|
|
324
|
+
const latencyMs = Date.now() - requestStartTime;
|
|
325
|
+
const cost = estimateCost(finalModel, inputTokens, lastOutputTokens);
|
|
326
|
+
recordUsage(finalModel, inputTokens, lastOutputTokens, cost, latencyMs, usedFallback);
|
|
327
|
+
debug(options, `recorded: model=${finalModel} in=${inputTokens} out=${lastOutputTokens} cost=$${cost.toFixed(4)} fallback=${usedFallback}`);
|
|
186
328
|
}
|
|
187
329
|
}
|
|
188
330
|
res.end();
|
|
189
331
|
break;
|
|
190
332
|
}
|
|
191
333
|
if (isStreaming) {
|
|
192
|
-
|
|
334
|
+
const chunk = decoder.decode(value, { stream: true });
|
|
335
|
+
lastChunkText = chunk;
|
|
336
|
+
fullResponse += chunk;
|
|
193
337
|
}
|
|
194
338
|
res.write(value);
|
|
195
339
|
}
|
|
@@ -202,15 +346,22 @@ export function createProxy(options) {
|
|
|
202
346
|
const parsed = JSON.parse(text);
|
|
203
347
|
if (parsed.usage?.output_tokens) {
|
|
204
348
|
lastOutputTokens = parsed.usage.output_tokens;
|
|
205
|
-
|
|
349
|
+
const inputTokens = parsed.usage?.input_tokens || 0;
|
|
350
|
+
const latencyMs = Date.now() - requestStartTime;
|
|
351
|
+
const cost = estimateCost(finalModel, inputTokens, lastOutputTokens);
|
|
352
|
+
recordUsage(finalModel, inputTokens, lastOutputTokens, cost, latencyMs, usedFallback);
|
|
353
|
+
debug(options, `recorded: model=${finalModel} in=${inputTokens} out=${lastOutputTokens} cost=$${cost.toFixed(4)} fallback=${usedFallback}`);
|
|
206
354
|
}
|
|
207
355
|
}
|
|
208
|
-
catch {
|
|
356
|
+
catch {
|
|
357
|
+
/* not JSON */
|
|
358
|
+
}
|
|
209
359
|
res.end(text);
|
|
210
360
|
}
|
|
211
361
|
}
|
|
212
362
|
catch (error) {
|
|
213
363
|
const msg = error instanceof Error ? error.message : 'Proxy error';
|
|
364
|
+
log(`❌ Error: ${msg}`);
|
|
214
365
|
res.writeHead(502, { 'Content-Type': 'application/json' });
|
|
215
366
|
res.end(JSON.stringify({
|
|
216
367
|
type: 'error',
|
|
@@ -291,9 +442,12 @@ export function classifyRequest(body) {
|
|
|
291
442
|
.map((b) => b.text)
|
|
292
443
|
.join('\n');
|
|
293
444
|
}
|
|
294
|
-
if (content.includes('```') ||
|
|
295
|
-
content.includes('
|
|
296
|
-
content.includes('
|
|
445
|
+
if (content.includes('```') ||
|
|
446
|
+
content.includes('function ') ||
|
|
447
|
+
content.includes('class ') ||
|
|
448
|
+
content.includes('import ') ||
|
|
449
|
+
content.includes('def ') ||
|
|
450
|
+
content.includes('const ')) {
|
|
297
451
|
return { category: 'code' };
|
|
298
452
|
}
|
|
299
453
|
if (content.length < 100) {
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Smart Router for brcc
|
|
3
|
+
* Ported from ClawRouter - 15-dimension weighted scoring for tier classification
|
|
4
|
+
*/
|
|
5
|
+
export type Tier = 'SIMPLE' | 'MEDIUM' | 'COMPLEX' | 'REASONING';
|
|
6
|
+
export type RoutingProfile = 'auto' | 'eco' | 'premium' | 'free';
|
|
7
|
+
export interface RoutingResult {
|
|
8
|
+
model: string;
|
|
9
|
+
tier: Tier;
|
|
10
|
+
confidence: number;
|
|
11
|
+
signals: string[];
|
|
12
|
+
savings: number;
|
|
13
|
+
}
|
|
14
|
+
export declare function routeRequest(prompt: string, profile?: RoutingProfile): RoutingResult;
|
|
15
|
+
/**
|
|
16
|
+
* Get fallback models for a tier
|
|
17
|
+
*/
|
|
18
|
+
export declare function getFallbackChain(tier: Tier, profile?: RoutingProfile): string[];
|
|
19
|
+
/**
|
|
20
|
+
* Parse routing profile from model string
|
|
21
|
+
*/
|
|
22
|
+
export declare function parseRoutingProfile(model: string): RoutingProfile | null;
|
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Smart Router for brcc
|
|
3
|
+
* Ported from ClawRouter - 15-dimension weighted scoring for tier classification
|
|
4
|
+
*/
|
|
5
|
+
// ─── Tier Model Configs ───
|
|
6
|
+
const AUTO_TIERS = {
|
|
7
|
+
SIMPLE: {
|
|
8
|
+
primary: 'google/gemini-2.5-flash',
|
|
9
|
+
fallback: ['deepseek/deepseek-chat', 'nvidia/gpt-oss-120b'],
|
|
10
|
+
},
|
|
11
|
+
MEDIUM: {
|
|
12
|
+
primary: 'moonshot/kimi-k2.5',
|
|
13
|
+
fallback: ['google/gemini-2.5-flash', 'deepseek/deepseek-chat'],
|
|
14
|
+
},
|
|
15
|
+
COMPLEX: {
|
|
16
|
+
primary: 'google/gemini-3.1-pro',
|
|
17
|
+
fallback: ['anthropic/claude-sonnet-4.6', 'google/gemini-2.5-pro'],
|
|
18
|
+
},
|
|
19
|
+
REASONING: {
|
|
20
|
+
primary: 'xai/grok-4-1-fast-reasoning',
|
|
21
|
+
fallback: ['deepseek/deepseek-reasoner', 'openai/o4-mini'],
|
|
22
|
+
},
|
|
23
|
+
};
|
|
24
|
+
const ECO_TIERS = {
|
|
25
|
+
SIMPLE: {
|
|
26
|
+
primary: 'nvidia/gpt-oss-120b',
|
|
27
|
+
fallback: ['google/gemini-2.5-flash-lite'],
|
|
28
|
+
},
|
|
29
|
+
MEDIUM: {
|
|
30
|
+
primary: 'google/gemini-2.5-flash-lite',
|
|
31
|
+
fallback: ['nvidia/gpt-oss-120b'],
|
|
32
|
+
},
|
|
33
|
+
COMPLEX: {
|
|
34
|
+
primary: 'google/gemini-2.5-flash-lite',
|
|
35
|
+
fallback: ['deepseek/deepseek-chat'],
|
|
36
|
+
},
|
|
37
|
+
REASONING: {
|
|
38
|
+
primary: 'xai/grok-4-1-fast-reasoning',
|
|
39
|
+
fallback: ['deepseek/deepseek-reasoner'],
|
|
40
|
+
},
|
|
41
|
+
};
|
|
42
|
+
const PREMIUM_TIERS = {
|
|
43
|
+
SIMPLE: {
|
|
44
|
+
primary: 'moonshot/kimi-k2.5',
|
|
45
|
+
fallback: ['anthropic/claude-haiku-4.5'],
|
|
46
|
+
},
|
|
47
|
+
MEDIUM: {
|
|
48
|
+
primary: 'openai/gpt-5.3-codex',
|
|
49
|
+
fallback: ['anthropic/claude-sonnet-4.6'],
|
|
50
|
+
},
|
|
51
|
+
COMPLEX: {
|
|
52
|
+
primary: 'anthropic/claude-opus-4.6',
|
|
53
|
+
fallback: ['openai/gpt-5.4', 'anthropic/claude-sonnet-4.6'],
|
|
54
|
+
},
|
|
55
|
+
REASONING: {
|
|
56
|
+
primary: 'anthropic/claude-sonnet-4.6',
|
|
57
|
+
fallback: ['anthropic/claude-opus-4.6', 'openai/o3'],
|
|
58
|
+
},
|
|
59
|
+
};
|
|
60
|
+
// ─── Keywords for Classification ───
|
|
61
|
+
const CODE_KEYWORDS = [
|
|
62
|
+
'function', 'class', 'import', 'def', 'SELECT', 'async', 'await',
|
|
63
|
+
'const', 'let', 'var', 'return', '```', '函数', '类', '导入',
|
|
64
|
+
];
|
|
65
|
+
const REASONING_KEYWORDS = [
|
|
66
|
+
'prove', 'theorem', 'derive', 'step by step', 'chain of thought',
|
|
67
|
+
'formally', 'mathematical', 'proof', 'logically', '证明', '定理', '推导',
|
|
68
|
+
];
|
|
69
|
+
const SIMPLE_KEYWORDS = [
|
|
70
|
+
'what is', 'define', 'translate', 'hello', 'yes or no', 'capital of',
|
|
71
|
+
'how old', 'who is', 'when was', '什么是', '翻译', '你好',
|
|
72
|
+
];
|
|
73
|
+
const TECHNICAL_KEYWORDS = [
|
|
74
|
+
'algorithm', 'optimize', 'architecture', 'distributed', 'kubernetes',
|
|
75
|
+
'microservice', 'database', 'infrastructure', '算法', '架构', '优化',
|
|
76
|
+
];
|
|
77
|
+
const AGENTIC_KEYWORDS = [
|
|
78
|
+
'read file', 'edit', 'modify', 'update', 'create file', 'execute',
|
|
79
|
+
'deploy', 'install', 'npm', 'pip', 'fix', 'debug', 'verify',
|
|
80
|
+
'编辑', '修改', '部署', '安装', '修复', '调试',
|
|
81
|
+
];
|
|
82
|
+
function countMatches(text, keywords) {
|
|
83
|
+
const lower = text.toLowerCase();
|
|
84
|
+
return keywords.filter(kw => lower.includes(kw.toLowerCase())).length;
|
|
85
|
+
}
|
|
86
|
+
function classifyRequest(prompt, tokenCount) {
|
|
87
|
+
const signals = [];
|
|
88
|
+
let score = 0;
|
|
89
|
+
// Token count scoring (reduced weight - don't penalize short prompts too much)
|
|
90
|
+
if (tokenCount < 30) {
|
|
91
|
+
score -= 0.15;
|
|
92
|
+
signals.push('short');
|
|
93
|
+
}
|
|
94
|
+
else if (tokenCount > 500) {
|
|
95
|
+
score += 0.2;
|
|
96
|
+
signals.push('long');
|
|
97
|
+
}
|
|
98
|
+
// Code detection (weight: 0.20) - increased weight
|
|
99
|
+
const codeMatches = countMatches(prompt, CODE_KEYWORDS);
|
|
100
|
+
if (codeMatches >= 2) {
|
|
101
|
+
score += 0.5;
|
|
102
|
+
signals.push('code');
|
|
103
|
+
}
|
|
104
|
+
else if (codeMatches >= 1) {
|
|
105
|
+
score += 0.25;
|
|
106
|
+
signals.push('code-light');
|
|
107
|
+
}
|
|
108
|
+
// Reasoning detection (weight: 0.18)
|
|
109
|
+
const reasoningMatches = countMatches(prompt, REASONING_KEYWORDS);
|
|
110
|
+
if (reasoningMatches >= 2) {
|
|
111
|
+
// Direct reasoning override
|
|
112
|
+
return { tier: 'REASONING', confidence: 0.9, signals: [...signals, 'reasoning'] };
|
|
113
|
+
}
|
|
114
|
+
else if (reasoningMatches >= 1) {
|
|
115
|
+
score += 0.4;
|
|
116
|
+
signals.push('reasoning-light');
|
|
117
|
+
}
|
|
118
|
+
// Simple detection (weight: -0.12) - only trigger on strong simple signals
|
|
119
|
+
const simpleMatches = countMatches(prompt, SIMPLE_KEYWORDS);
|
|
120
|
+
if (simpleMatches >= 2) {
|
|
121
|
+
score -= 0.4;
|
|
122
|
+
signals.push('simple');
|
|
123
|
+
}
|
|
124
|
+
else if (simpleMatches >= 1 && codeMatches === 0 && tokenCount < 50) {
|
|
125
|
+
// Only mark as simple if no code and very short
|
|
126
|
+
score -= 0.25;
|
|
127
|
+
signals.push('simple');
|
|
128
|
+
}
|
|
129
|
+
// Technical complexity (weight: 0.15) - increased
|
|
130
|
+
const techMatches = countMatches(prompt, TECHNICAL_KEYWORDS);
|
|
131
|
+
if (techMatches >= 2) {
|
|
132
|
+
score += 0.4;
|
|
133
|
+
signals.push('technical');
|
|
134
|
+
}
|
|
135
|
+
else if (techMatches >= 1) {
|
|
136
|
+
score += 0.2;
|
|
137
|
+
signals.push('technical-light');
|
|
138
|
+
}
|
|
139
|
+
// Agentic detection (weight: 0.10) - increased
|
|
140
|
+
const agenticMatches = countMatches(prompt, AGENTIC_KEYWORDS);
|
|
141
|
+
if (agenticMatches >= 3) {
|
|
142
|
+
score += 0.35;
|
|
143
|
+
signals.push('agentic');
|
|
144
|
+
}
|
|
145
|
+
else if (agenticMatches >= 2) {
|
|
146
|
+
score += 0.2;
|
|
147
|
+
signals.push('agentic-light');
|
|
148
|
+
}
|
|
149
|
+
// Multi-step patterns
|
|
150
|
+
if (/first.*then|step \d|\d\.\s/i.test(prompt)) {
|
|
151
|
+
score += 0.2;
|
|
152
|
+
signals.push('multi-step');
|
|
153
|
+
}
|
|
154
|
+
// Question complexity
|
|
155
|
+
const questionCount = (prompt.match(/\?/g) || []).length;
|
|
156
|
+
if (questionCount > 3) {
|
|
157
|
+
score += 0.15;
|
|
158
|
+
signals.push(`${questionCount} questions`);
|
|
159
|
+
}
|
|
160
|
+
// Imperative verbs (build, create, implement, etc.)
|
|
161
|
+
const imperativeMatches = countMatches(prompt, [
|
|
162
|
+
'build', 'create', 'implement', 'design', 'develop', 'write', 'make',
|
|
163
|
+
'generate', 'construct', '构建', '创建', '实现', '设计', '开发'
|
|
164
|
+
]);
|
|
165
|
+
if (imperativeMatches >= 1) {
|
|
166
|
+
score += 0.15;
|
|
167
|
+
signals.push('imperative');
|
|
168
|
+
}
|
|
169
|
+
// Map score to tier (adjusted boundaries)
|
|
170
|
+
let tier;
|
|
171
|
+
if (score < -0.1) {
|
|
172
|
+
tier = 'SIMPLE';
|
|
173
|
+
}
|
|
174
|
+
else if (score < 0.25) {
|
|
175
|
+
tier = 'MEDIUM';
|
|
176
|
+
}
|
|
177
|
+
else if (score < 0.45) {
|
|
178
|
+
tier = 'COMPLEX';
|
|
179
|
+
}
|
|
180
|
+
else {
|
|
181
|
+
tier = 'REASONING';
|
|
182
|
+
}
|
|
183
|
+
// Calculate confidence based on distance from boundary
|
|
184
|
+
const confidence = Math.min(0.95, 0.7 + Math.abs(score) * 0.3);
|
|
185
|
+
return { tier, confidence, signals };
|
|
186
|
+
}
|
|
187
|
+
// ─── Main Router ───
|
|
188
|
+
export function routeRequest(prompt, profile = 'auto') {
|
|
189
|
+
// Free profile - always use free model
|
|
190
|
+
if (profile === 'free') {
|
|
191
|
+
return {
|
|
192
|
+
model: 'nvidia/gpt-oss-120b',
|
|
193
|
+
tier: 'SIMPLE',
|
|
194
|
+
confidence: 1.0,
|
|
195
|
+
signals: ['free-profile'],
|
|
196
|
+
savings: 1.0,
|
|
197
|
+
};
|
|
198
|
+
}
|
|
199
|
+
// Estimate token count (rough: 4 chars per token)
|
|
200
|
+
const tokenCount = Math.ceil(prompt.length / 4);
|
|
201
|
+
// Classify the request
|
|
202
|
+
const { tier, confidence, signals } = classifyRequest(prompt, tokenCount);
|
|
203
|
+
// Select tier config based on profile
|
|
204
|
+
let tierConfigs;
|
|
205
|
+
switch (profile) {
|
|
206
|
+
case 'eco':
|
|
207
|
+
tierConfigs = ECO_TIERS;
|
|
208
|
+
break;
|
|
209
|
+
case 'premium':
|
|
210
|
+
tierConfigs = PREMIUM_TIERS;
|
|
211
|
+
break;
|
|
212
|
+
default:
|
|
213
|
+
tierConfigs = AUTO_TIERS;
|
|
214
|
+
}
|
|
215
|
+
const model = tierConfigs[tier].primary;
|
|
216
|
+
// Calculate savings estimate
|
|
217
|
+
// Baseline: Claude Opus at $5/$25 per 1M tokens
|
|
218
|
+
const OPUS_COST_PER_1K = 0.015; // rough average
|
|
219
|
+
const modelCosts = {
|
|
220
|
+
'nvidia/gpt-oss-120b': 0,
|
|
221
|
+
'google/gemini-2.5-flash': 0.001,
|
|
222
|
+
'google/gemini-2.5-flash-lite': 0.0003,
|
|
223
|
+
'deepseek/deepseek-chat': 0.0004,
|
|
224
|
+
'moonshot/kimi-k2.5': 0.002,
|
|
225
|
+
'google/gemini-3.1-pro': 0.007,
|
|
226
|
+
'anthropic/claude-sonnet-4.6': 0.009,
|
|
227
|
+
'anthropic/claude-opus-4.6': 0.015,
|
|
228
|
+
'xai/grok-4-1-fast-reasoning': 0.0004,
|
|
229
|
+
};
|
|
230
|
+
const modelCost = modelCosts[model] ?? 0.005;
|
|
231
|
+
const savings = Math.max(0, (OPUS_COST_PER_1K - modelCost) / OPUS_COST_PER_1K);
|
|
232
|
+
return {
|
|
233
|
+
model,
|
|
234
|
+
tier,
|
|
235
|
+
confidence,
|
|
236
|
+
signals,
|
|
237
|
+
savings,
|
|
238
|
+
};
|
|
239
|
+
}
|
|
240
|
+
/**
|
|
241
|
+
* Get fallback models for a tier
|
|
242
|
+
*/
|
|
243
|
+
export function getFallbackChain(tier, profile = 'auto') {
|
|
244
|
+
let tierConfigs;
|
|
245
|
+
switch (profile) {
|
|
246
|
+
case 'eco':
|
|
247
|
+
tierConfigs = ECO_TIERS;
|
|
248
|
+
break;
|
|
249
|
+
case 'premium':
|
|
250
|
+
tierConfigs = PREMIUM_TIERS;
|
|
251
|
+
break;
|
|
252
|
+
case 'free':
|
|
253
|
+
return ['nvidia/gpt-oss-120b'];
|
|
254
|
+
default:
|
|
255
|
+
tierConfigs = AUTO_TIERS;
|
|
256
|
+
}
|
|
257
|
+
const config = tierConfigs[tier];
|
|
258
|
+
return [config.primary, ...config.fallback];
|
|
259
|
+
}
|
|
260
|
+
/**
|
|
261
|
+
* Parse routing profile from model string
|
|
262
|
+
*/
|
|
263
|
+
export function parseRoutingProfile(model) {
|
|
264
|
+
const lower = model.toLowerCase();
|
|
265
|
+
if (lower === 'blockrun/auto' || lower === 'auto')
|
|
266
|
+
return 'auto';
|
|
267
|
+
if (lower === 'blockrun/eco' || lower === 'eco')
|
|
268
|
+
return 'eco';
|
|
269
|
+
if (lower === 'blockrun/premium' || lower === 'premium')
|
|
270
|
+
return 'premium';
|
|
271
|
+
if (lower === 'blockrun/free' || lower === 'free')
|
|
272
|
+
return 'free';
|
|
273
|
+
return null;
|
|
274
|
+
}
|