@blockrun/cc 0.8.2 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,24 @@
1
1
  import http from 'node:http';
2
- import { getOrCreateWallet, getOrCreateSolanaWallet, createPaymentPayload, createSolanaPaymentPayload, parsePaymentRequired, extractPaymentDetails, solanaKeyToBytes, SOLANA_NETWORK, } from '@blockrun/llm';
3
2
  import fs from 'node:fs';
4
3
  import path from 'node:path';
5
4
  import os from 'node:os';
5
+ import { fileURLToPath } from 'node:url';
6
+ import { getOrCreateWallet, getOrCreateSolanaWallet, createPaymentPayload, createSolanaPaymentPayload, parsePaymentRequired, extractPaymentDetails, solanaKeyToBytes, SOLANA_NETWORK, } from '@blockrun/llm';
7
+ import { recordUsage } from '../stats/tracker.js';
8
+ import { fetchWithFallback, buildFallbackChain, DEFAULT_FALLBACK_CONFIG, } from './fallback.js';
9
+ import { routeRequest, parseRoutingProfile, } from '../router/index.js';
10
+ // Get version from package.json
11
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
12
+ let VERSION = '0.9.0';
13
+ try {
14
+ const pkgPath = path.resolve(__dirname, '../../package.json');
15
+ const pkg = JSON.parse(fs.readFileSync(pkgPath, 'utf-8'));
16
+ VERSION = pkg.version || VERSION;
17
+ }
18
+ catch { /* use default */ }
19
+ // User-Agent for backend requests
20
+ const USER_AGENT = `brcc/${VERSION}`;
21
+ const X_BRCC_VERSION = VERSION;
6
22
  const LOG_FILE = path.join(os.homedir(), '.blockrun', 'brcc-debug.log');
7
23
  function debug(options, ...args) {
8
24
  if (!options.debug)
@@ -12,27 +28,71 @@ function debug(options, ...args) {
12
28
  fs.mkdirSync(path.dirname(LOG_FILE), { recursive: true });
13
29
  fs.appendFileSync(LOG_FILE, msg);
14
30
  }
15
- catch { /* ignore */ }
31
+ catch {
32
+ /* ignore */
33
+ }
34
+ }
35
+ function log(...args) {
36
+ const msg = `[brcc] ${args.map(String).join(' ')}`;
37
+ console.log(msg);
16
38
  }
17
39
  const DEFAULT_MAX_TOKENS = 4096;
18
40
  let lastOutputTokens = 0;
19
41
  // Model shortcuts for quick switching
20
42
  const MODEL_SHORTCUTS = {
21
- 'gpt': 'openai/gpt-5.4',
22
- 'gpt5': 'openai/gpt-5.4',
43
+ auto: 'blockrun/auto',
44
+ smart: 'blockrun/auto',
45
+ eco: 'blockrun/eco',
46
+ premium: 'blockrun/premium',
47
+ gpt: 'openai/gpt-5.4',
48
+ gpt5: 'openai/gpt-5.4',
23
49
  'gpt-5': 'openai/gpt-5.4',
24
50
  'gpt-5.4': 'openai/gpt-5.4',
25
- 'sonnet': 'anthropic/claude-sonnet-4.6',
26
- 'claude': 'anthropic/claude-sonnet-4.6',
27
- 'opus': 'anthropic/claude-opus-4.6',
28
- 'haiku': 'anthropic/claude-haiku-4.5',
29
- 'deepseek': 'deepseek/deepseek-chat',
30
- 'gemini': 'google/gemini-2.5-pro',
31
- 'grok': 'xai/grok-3',
32
- 'free': 'nvidia/gpt-oss-120b',
33
- 'mini': 'openai/gpt-5-mini',
34
- 'glm': 'zai/glm-5',
51
+ sonnet: 'anthropic/claude-sonnet-4.6',
52
+ claude: 'anthropic/claude-sonnet-4.6',
53
+ opus: 'anthropic/claude-opus-4.6',
54
+ haiku: 'anthropic/claude-haiku-4.5',
55
+ deepseek: 'deepseek/deepseek-chat',
56
+ gemini: 'google/gemini-2.5-pro',
57
+ grok: 'xai/grok-3',
58
+ free: 'nvidia/gpt-oss-120b',
59
+ mini: 'openai/gpt-5-mini',
60
+ glm: 'zai/glm-5',
61
+ };
62
+ // Model pricing (per 1M tokens) - used for stats
63
+ const MODEL_PRICING = {
64
+ // Routing profiles (blended averages)
65
+ 'blockrun/auto': { input: 0.8, output: 4.0 },
66
+ 'blockrun/eco': { input: 0.2, output: 1.0 },
67
+ 'blockrun/premium': { input: 3.0, output: 15.0 },
68
+ 'blockrun/free': { input: 0, output: 0 },
69
+ // Individual models
70
+ 'anthropic/claude-sonnet-4.6': { input: 3.0, output: 15.0 },
71
+ 'anthropic/claude-opus-4.6': { input: 5.0, output: 25.0 },
72
+ 'anthropic/claude-haiku-4.5': { input: 1.0, output: 5.0 },
73
+ 'openai/gpt-5.4': { input: 2.5, output: 15.0 },
74
+ 'openai/gpt-5-mini': { input: 0.25, output: 2.0 },
75
+ 'google/gemini-2.5-pro': { input: 1.25, output: 10.0 },
76
+ 'google/gemini-2.5-flash': { input: 0.3, output: 2.5 },
77
+ 'deepseek/deepseek-chat': { input: 0.28, output: 0.42 },
78
+ 'deepseek/deepseek-reasoner': { input: 0.55, output: 2.19 },
79
+ 'xai/grok-3': { input: 3.0, output: 15.0 },
80
+ 'xai/grok-4-fast': { input: 0.2, output: 0.5 },
81
+ 'xai/grok-4-1-fast-reasoning': { input: 0.2, output: 0.5 },
82
+ 'nvidia/gpt-oss-120b': { input: 0, output: 0 },
83
+ 'zai/glm-5': { input: 1.0, output: 3.2 },
84
+ 'moonshot/kimi-k2.5': { input: 0.6, output: 3.0 },
85
+ 'openai/gpt-5.3-codex': { input: 2.5, output: 10.0 },
86
+ 'openai/o3': { input: 2.0, output: 8.0 },
87
+ 'openai/o4-mini': { input: 1.1, output: 4.4 },
88
+ 'google/gemini-2.5-flash-lite': { input: 0.08, output: 0.3 },
89
+ 'google/gemini-3.1-pro': { input: 1.25, output: 10.0 },
35
90
  };
91
+ function estimateCost(model, inputTokens, outputTokens) {
92
+ const pricing = MODEL_PRICING[model] || { input: 2.0, output: 10.0 };
93
+ return ((inputTokens / 1_000_000) * pricing.input +
94
+ (outputTokens / 1_000_000) * pricing.output);
95
+ }
36
96
  function detectModelSwitch(parsed) {
37
97
  if (!parsed.messages || parsed.messages.length === 0)
38
98
  return null;
@@ -44,7 +104,7 @@ function detectModelSwitch(parsed) {
44
104
  content = last.content;
45
105
  }
46
106
  else if (Array.isArray(last.content)) {
47
- const textBlock = last.content.find(b => b.type === 'text' && b.text);
107
+ const textBlock = last.content.find((b) => b.type === 'text' && b.text);
48
108
  if (textBlock && textBlock.text)
49
109
  content = textBlock.text;
50
110
  }
@@ -63,9 +123,12 @@ function detectModelSwitch(parsed) {
63
123
  return modelInput;
64
124
  return null;
65
125
  }
126
+ // Default model - smart routing built-in
127
+ const DEFAULT_MODEL = 'blockrun/auto';
66
128
  export function createProxy(options) {
67
129
  const chain = options.chain || 'base';
68
- let currentModel = options.modelOverride || null;
130
+ let currentModel = options.modelOverride || DEFAULT_MODEL;
131
+ const fallbackEnabled = options.fallbackEnabled !== false; // Default true
69
132
  let baseWallet = null;
70
133
  let solanaWallet = null;
71
134
  if (chain === 'base') {
@@ -85,13 +148,16 @@ export function createProxy(options) {
85
148
  return;
86
149
  }
87
150
  await initSolana();
88
- const path = req.url?.replace(/^\/api/, '') || '';
89
- const targetUrl = `${options.apiUrl}${path}`;
151
+ const requestPath = req.url?.replace(/^\/api/, '') || '';
152
+ const targetUrl = `${options.apiUrl}${requestPath}`;
90
153
  let body = '';
154
+ const requestStartTime = Date.now();
91
155
  req.on('data', (chunk) => {
92
156
  body += chunk;
93
157
  });
94
158
  req.on('end', async () => {
159
+ let requestModel = currentModel || options.modelOverride || 'unknown';
160
+ let usedFallback = false;
95
161
  try {
96
162
  debug(options, `request: ${req.method} ${req.url} currentModel=${currentModel || 'none'}`);
97
163
  if (body) {
@@ -111,7 +177,12 @@ export function createProxy(options) {
111
177
  type: 'message',
112
178
  role: 'assistant',
113
179
  model: currentModel,
114
- content: [{ type: 'text', text: `Switched to **${currentModel}**. All subsequent requests will use this model.` }],
180
+ content: [
181
+ {
182
+ type: 'text',
183
+ text: `Switched to **${currentModel}**. All subsequent requests will use this model.`,
184
+ },
185
+ ],
115
186
  stop_reason: 'end_turn',
116
187
  stop_sequence: null,
117
188
  usage: { input_tokens: 0, output_tokens: 10 },
@@ -120,14 +191,53 @@ export function createProxy(options) {
120
191
  res.end(JSON.stringify(fakeResponse));
121
192
  return;
122
193
  }
123
- // Apply model override
124
- if ((currentModel || options.modelOverride) && parsed.model) {
125
- parsed.model = currentModel || options.modelOverride;
194
+ // Apply model override only if:
195
+ // 1. User specified --model on CLI (options.modelOverride)
196
+ // 2. User switched model in-session (currentModel set by "use X" command)
197
+ // 3. Request has no model specified
198
+ if (options.modelOverride && currentModel) {
199
+ // CLI --model flag: always use this
200
+ parsed.model = currentModel;
201
+ }
202
+ else if (!parsed.model) {
203
+ // No model in request: use default
204
+ parsed.model = currentModel || DEFAULT_MODEL;
126
205
  }
127
- if (parsed.max_tokens) {
206
+ // Otherwise: use the model from the request as-is
207
+ requestModel = parsed.model || DEFAULT_MODEL;
208
+ // Smart routing: if model is a routing profile, classify and route
209
+ const routingProfile = parseRoutingProfile(requestModel);
210
+ if (routingProfile) {
211
+ // Extract user prompt for classification
212
+ const userMessages = parsed.messages?.filter((m) => m.role === 'user') || [];
213
+ const lastUserMsg = userMessages[userMessages.length - 1];
214
+ let promptText = '';
215
+ if (lastUserMsg) {
216
+ if (typeof lastUserMsg.content === 'string') {
217
+ promptText = lastUserMsg.content;
218
+ }
219
+ else if (Array.isArray(lastUserMsg.content)) {
220
+ promptText = lastUserMsg.content
221
+ .filter((b) => b.type === 'text')
222
+ .map((b) => b.text)
223
+ .join('\n');
224
+ }
225
+ }
226
+ // Route the request
227
+ const routing = routeRequest(promptText, routingProfile);
228
+ parsed.model = routing.model;
229
+ requestModel = routing.model;
230
+ log(`🧠 Smart routing: ${routingProfile} → ${routing.tier} → ${routing.model} ` +
231
+ `(${(routing.savings * 100).toFixed(0)}% savings) [${routing.signals.join(', ')}]`);
232
+ }
233
+ {
128
234
  const original = parsed.max_tokens;
129
235
  const model = (parsed.model || '').toLowerCase();
130
- const modelCap = (model.includes('deepseek') || model.includes('haiku') || model.includes('gpt-oss')) ? 8192 : 16384;
236
+ const modelCap = model.includes('deepseek') ||
237
+ model.includes('haiku') ||
238
+ model.includes('gpt-oss')
239
+ ? 8192
240
+ : 16384;
131
241
  // Use max of (last output × 2, default 4096) capped by model limit
132
242
  // This ensures short replies don't starve the next request
133
243
  const adaptive = lastOutputTokens > 0
@@ -135,28 +245,57 @@ export function createProxy(options) {
135
245
  : DEFAULT_MAX_TOKENS;
136
246
  parsed.max_tokens = Math.min(adaptive, modelCap);
137
247
  if (original !== parsed.max_tokens) {
138
- debug(options, `max_tokens: ${original} → ${parsed.max_tokens} (last output: ${lastOutputTokens || 'none'})`);
248
+ debug(options, `max_tokens: ${original || 'unset'} → ${parsed.max_tokens} (last output: ${lastOutputTokens || 'none'})`);
139
249
  }
140
250
  }
141
251
  body = JSON.stringify(parsed);
142
252
  }
143
- catch { /* not JSON, pass through */ }
253
+ catch {
254
+ /* not JSON, pass through */
255
+ }
144
256
  }
145
257
  const headers = {
146
258
  'Content-Type': 'application/json',
259
+ 'User-Agent': USER_AGENT,
260
+ 'X-Brcc-Version': X_BRCC_VERSION,
147
261
  };
148
262
  for (const [key, value] of Object.entries(req.headers)) {
149
263
  if (key.toLowerCase() !== 'host' &&
150
264
  key.toLowerCase() !== 'content-length' &&
265
+ key.toLowerCase() !== 'user-agent' && // Don't forward client's user-agent
151
266
  value) {
152
267
  headers[key] = Array.isArray(value) ? value[0] : value;
153
268
  }
154
269
  }
155
- let response = await fetch(targetUrl, {
270
+ // Build request init
271
+ const requestInit = {
156
272
  method: req.method || 'POST',
157
273
  headers,
158
274
  body: body || undefined,
159
- });
275
+ };
276
+ let response;
277
+ let finalModel = requestModel;
278
+ // Use fallback chain if enabled
279
+ if (fallbackEnabled && body && requestPath.includes('messages')) {
280
+ const fallbackConfig = {
281
+ ...DEFAULT_FALLBACK_CONFIG,
282
+ chain: buildFallbackChain(requestModel),
283
+ };
284
+ const result = await fetchWithFallback(targetUrl, requestInit, body, fallbackConfig, (failedModel, status, nextModel) => {
285
+ log(`⚠️ ${failedModel} returned ${status}, falling back to ${nextModel}`);
286
+ });
287
+ response = result.response;
288
+ finalModel = result.modelUsed;
289
+ usedFallback = result.fallbackUsed;
290
+ if (usedFallback) {
291
+ log(`↺ Fallback successful: using ${finalModel}`);
292
+ }
293
+ }
294
+ else {
295
+ // Direct fetch without fallback
296
+ response = await fetch(targetUrl, requestInit);
297
+ }
298
+ // Handle 402 payment
160
299
  if (response.status === 402) {
161
300
  if (chain === 'solana' && solanaWallet) {
162
301
  response = await handleSolanaPayment(response, targetUrl, req.method || 'POST', headers, body, solanaWallet.privateKey, solanaWallet.address);
@@ -169,28 +308,78 @@ export function createProxy(options) {
169
308
  response.headers.forEach((v, k) => {
170
309
  responseHeaders[k] = v;
171
310
  });
311
+ // Intercept error responses and ensure Anthropic-format errors
312
+ // so Claude Code doesn't fall back to showing a login page
313
+ if (response.status >= 400 && !responseHeaders['content-type']?.includes('text/event-stream')) {
314
+ let errorBody;
315
+ try {
316
+ const rawText = await response.text();
317
+ const parsed = JSON.parse(rawText);
318
+ // Already has Anthropic error shape? Pass through
319
+ if (parsed.type === 'error' && parsed.error) {
320
+ errorBody = rawText;
321
+ }
322
+ else {
323
+ // Wrap in Anthropic error format
324
+ const errorMsg = parsed.error?.message || parsed.message || rawText.slice(0, 500);
325
+ errorBody = JSON.stringify({
326
+ type: 'error',
327
+ error: {
328
+ type: response.status === 401 ? 'authentication_error'
329
+ : response.status === 402 ? 'invalid_request_error'
330
+ : response.status === 429 ? 'rate_limit_error'
331
+ : response.status === 400 ? 'invalid_request_error'
332
+ : 'api_error',
333
+ message: `[${finalModel}] ${errorMsg}`,
334
+ },
335
+ });
336
+ }
337
+ }
338
+ catch {
339
+ errorBody = JSON.stringify({
340
+ type: 'error',
341
+ error: { type: 'api_error', message: `Backend returned ${response.status}` },
342
+ });
343
+ }
344
+ res.writeHead(response.status, { 'Content-Type': 'application/json' });
345
+ res.end(errorBody);
346
+ log(`⚠️ ${response.status} from backend for ${finalModel}`);
347
+ return;
348
+ }
172
349
  res.writeHead(response.status, responseHeaders);
173
350
  const isStreaming = responseHeaders['content-type']?.includes('text/event-stream');
174
351
  if (response.body) {
175
352
  const reader = response.body.getReader();
176
353
  const decoder = new TextDecoder();
177
- let lastChunkText = '';
354
+ let fullResponse = '';
355
+ const STREAM_CAP = 5_000_000; // 5MB cap on accumulated stream
178
356
  const pump = async () => {
179
357
  while (true) {
180
358
  const { done, value } = await reader.read();
181
359
  if (done) {
182
- if (isStreaming && lastChunkText) {
183
- const match = lastChunkText.match(/"output_tokens"\s*:\s*(\d+)/);
184
- if (match) {
185
- lastOutputTokens = parseInt(match[1], 10);
186
- debug(options, `recorded output_tokens: ${lastOutputTokens} (stream)`);
360
+ // Record stats from streaming response
361
+ if (isStreaming && fullResponse) {
362
+ // Search full response for the last output_tokens value
363
+ const allOutputMatches = [...fullResponse.matchAll(/"output_tokens"\s*:\s*(\d+)/g)];
364
+ const lastOutputMatch = allOutputMatches[allOutputMatches.length - 1];
365
+ const inputMatch = fullResponse.match(/"input_tokens"\s*:\s*(\d+)/);
366
+ if (lastOutputMatch) {
367
+ lastOutputTokens = parseInt(lastOutputMatch[1], 10);
368
+ const inputTokens = inputMatch
369
+ ? parseInt(inputMatch[1], 10)
370
+ : 0;
371
+ const latencyMs = Date.now() - requestStartTime;
372
+ const cost = estimateCost(finalModel, inputTokens, lastOutputTokens);
373
+ recordUsage(finalModel, inputTokens, lastOutputTokens, cost, latencyMs, usedFallback);
374
+ debug(options, `recorded: model=${finalModel} in=${inputTokens} out=${lastOutputTokens} cost=$${cost.toFixed(4)} fallback=${usedFallback}`);
187
375
  }
188
376
  }
189
377
  res.end();
190
378
  break;
191
379
  }
192
- if (isStreaming) {
193
- lastChunkText = decoder.decode(value, { stream: true });
380
+ if (isStreaming && fullResponse.length < STREAM_CAP) {
381
+ const chunk = decoder.decode(value, { stream: true });
382
+ fullResponse += chunk;
194
383
  }
195
384
  res.write(value);
196
385
  }
@@ -203,15 +392,22 @@ export function createProxy(options) {
203
392
  const parsed = JSON.parse(text);
204
393
  if (parsed.usage?.output_tokens) {
205
394
  lastOutputTokens = parsed.usage.output_tokens;
206
- debug(options, `recorded output_tokens: ${lastOutputTokens}`);
395
+ const inputTokens = parsed.usage?.input_tokens || 0;
396
+ const latencyMs = Date.now() - requestStartTime;
397
+ const cost = estimateCost(finalModel, inputTokens, lastOutputTokens);
398
+ recordUsage(finalModel, inputTokens, lastOutputTokens, cost, latencyMs, usedFallback);
399
+ debug(options, `recorded: model=${finalModel} in=${inputTokens} out=${lastOutputTokens} cost=$${cost.toFixed(4)} fallback=${usedFallback}`);
207
400
  }
208
401
  }
209
- catch { /* not JSON */ }
402
+ catch {
403
+ /* not JSON */
404
+ }
210
405
  res.end(text);
211
406
  }
212
407
  }
213
408
  catch (error) {
214
409
  const msg = error instanceof Error ? error.message : 'Proxy error';
410
+ log(`❌ Error: ${msg}`);
215
411
  res.writeHead(502, { 'Content-Type': 'application/json' });
216
412
  res.end(JSON.stringify({
217
413
  type: 'error',
@@ -292,9 +488,12 @@ export function classifyRequest(body) {
292
488
  .map((b) => b.text)
293
489
  .join('\n');
294
490
  }
295
- if (content.includes('```') || content.includes('function ') ||
296
- content.includes('class ') || content.includes('import ') ||
297
- content.includes('def ') || content.includes('const ')) {
491
+ if (content.includes('```') ||
492
+ content.includes('function ') ||
493
+ content.includes('class ') ||
494
+ content.includes('import ') ||
495
+ content.includes('def ') ||
496
+ content.includes('const ')) {
298
497
  return { category: 'code' };
299
498
  }
300
499
  if (content.length < 100) {
@@ -0,0 +1,22 @@
1
+ /**
2
+ * Smart Router for brcc
3
+ * Ported from ClawRouter - 15-dimension weighted scoring for tier classification
4
+ */
5
+ export type Tier = 'SIMPLE' | 'MEDIUM' | 'COMPLEX' | 'REASONING';
6
+ export type RoutingProfile = 'auto' | 'eco' | 'premium' | 'free';
7
+ export interface RoutingResult {
8
+ model: string;
9
+ tier: Tier;
10
+ confidence: number;
11
+ signals: string[];
12
+ savings: number;
13
+ }
14
+ export declare function routeRequest(prompt: string, profile?: RoutingProfile): RoutingResult;
15
+ /**
16
+ * Get fallback models for a tier
17
+ */
18
+ export declare function getFallbackChain(tier: Tier, profile?: RoutingProfile): string[];
19
+ /**
20
+ * Parse routing profile from model string
21
+ */
22
+ export declare function parseRoutingProfile(model: string): RoutingProfile | null;