@blockrun/cc 0.9.1 → 0.9.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/balance.js +8 -2
- package/dist/commands/models.js +8 -1
- package/dist/commands/start.js +10 -5
- package/dist/index.js +11 -1
- package/dist/proxy/fallback.js +1 -1
- package/dist/proxy/server.js +140 -21
- package/dist/proxy/sse-translator.d.ts +29 -0
- package/dist/proxy/sse-translator.js +296 -0
- package/dist/router/index.js +45 -12
- package/package.json +1 -1
package/dist/commands/balance.js
CHANGED
|
@@ -27,8 +27,14 @@ export async function balanceCommand() {
|
|
|
27
27
|
}
|
|
28
28
|
}
|
|
29
29
|
}
|
|
30
|
-
catch {
|
|
31
|
-
|
|
30
|
+
catch (err) {
|
|
31
|
+
const msg = err instanceof Error ? err.message : '';
|
|
32
|
+
if (msg.includes('ENOENT') || msg.includes('wallet') || msg.includes('key')) {
|
|
33
|
+
console.log(chalk.red('No wallet found. Run `brcc setup` first.'));
|
|
34
|
+
}
|
|
35
|
+
else {
|
|
36
|
+
console.log(chalk.red(`Error checking balance: ${msg || 'unknown error'}`));
|
|
37
|
+
}
|
|
32
38
|
process.exit(1);
|
|
33
39
|
}
|
|
34
40
|
}
|
package/dist/commands/models.js
CHANGED
|
@@ -37,6 +37,13 @@ export async function modelsCommand() {
|
|
|
37
37
|
console.log(`\n${chalk.dim(`${models.length} models available. Use:`)} ${chalk.bold('brcc start --model <model-id>')}`);
|
|
38
38
|
}
|
|
39
39
|
catch (err) {
|
|
40
|
-
|
|
40
|
+
const msg = err instanceof Error ? err.message : 'unknown error';
|
|
41
|
+
if (msg.includes('fetch') || msg.includes('ECONNREFUSED') || msg.includes('ENOTFOUND')) {
|
|
42
|
+
console.log(chalk.red(`Cannot reach BlockRun API at ${apiUrl}`));
|
|
43
|
+
console.log(chalk.dim('Check your internet connection or try again later.'));
|
|
44
|
+
}
|
|
45
|
+
else {
|
|
46
|
+
console.log(chalk.red(`Error: ${msg}`));
|
|
47
|
+
}
|
|
41
48
|
}
|
|
42
49
|
}
|
package/dist/commands/start.js
CHANGED
|
@@ -8,6 +8,11 @@ export async function startCommand(options) {
|
|
|
8
8
|
const chain = loadChain();
|
|
9
9
|
const apiUrl = API_URLS[chain];
|
|
10
10
|
const fallbackEnabled = options.fallback !== false; // Default true
|
|
11
|
+
const port = parseInt(options.port || String(DEFAULT_PROXY_PORT));
|
|
12
|
+
if (isNaN(port) || port < 1 || port > 65535) {
|
|
13
|
+
console.log(chalk.red(`Invalid port: ${options.port}. Must be 1-65535.`));
|
|
14
|
+
process.exit(1);
|
|
15
|
+
}
|
|
11
16
|
if (chain === 'solana') {
|
|
12
17
|
const wallet = await getOrCreateSolanaWallet();
|
|
13
18
|
if (wallet.isNew) {
|
|
@@ -16,7 +21,6 @@ export async function startCommand(options) {
|
|
|
16
21
|
console.log(`\nSend USDC on Solana to this address, then run ${chalk.bold('brcc start')} again.\n`);
|
|
17
22
|
return;
|
|
18
23
|
}
|
|
19
|
-
const port = parseInt(options.port || String(DEFAULT_PROXY_PORT));
|
|
20
24
|
const shouldLaunch = options.launch !== false;
|
|
21
25
|
const model = options.model;
|
|
22
26
|
console.log(chalk.bold('brcc — BlockRun Claude Code\n'));
|
|
@@ -35,7 +39,7 @@ export async function startCommand(options) {
|
|
|
35
39
|
debug: options.debug,
|
|
36
40
|
fallbackEnabled,
|
|
37
41
|
});
|
|
38
|
-
launchServer(server, port, shouldLaunch, model);
|
|
42
|
+
launchServer(server, port, shouldLaunch, model, options.debug);
|
|
39
43
|
}
|
|
40
44
|
else {
|
|
41
45
|
const wallet = getOrCreateWallet();
|
|
@@ -45,7 +49,6 @@ export async function startCommand(options) {
|
|
|
45
49
|
console.log(`\nSend USDC on Base to this address, then run ${chalk.bold('brcc start')} again.\n`);
|
|
46
50
|
return;
|
|
47
51
|
}
|
|
48
|
-
const port = parseInt(options.port || String(DEFAULT_PROXY_PORT));
|
|
49
52
|
const shouldLaunch = options.launch !== false;
|
|
50
53
|
const model = options.model;
|
|
51
54
|
console.log(chalk.bold('brcc — BlockRun Claude Code\n'));
|
|
@@ -64,13 +67,15 @@ export async function startCommand(options) {
|
|
|
64
67
|
debug: options.debug,
|
|
65
68
|
fallbackEnabled,
|
|
66
69
|
});
|
|
67
|
-
launchServer(server, port, shouldLaunch, model);
|
|
70
|
+
launchServer(server, port, shouldLaunch, model, options.debug);
|
|
68
71
|
}
|
|
69
72
|
}
|
|
70
|
-
function launchServer(server, port, shouldLaunch, model) {
|
|
73
|
+
function launchServer(server, port, shouldLaunch, model, debug) {
|
|
71
74
|
server.listen(port, () => {
|
|
72
75
|
console.log(chalk.green(`✓ Proxy running on port ${port}`));
|
|
73
76
|
console.log(chalk.dim(` Usage tracking: ~/.blockrun/brcc-stats.json`));
|
|
77
|
+
if (debug)
|
|
78
|
+
console.log(chalk.dim(` Debug log: ~/.blockrun/brcc-debug.log`));
|
|
74
79
|
console.log(chalk.dim(` Run 'brcc stats' to view statistics\n`));
|
|
75
80
|
if (shouldLaunch) {
|
|
76
81
|
console.log('Starting Claude Code...\n');
|
package/dist/index.js
CHANGED
|
@@ -6,12 +6,22 @@ import { balanceCommand } from './commands/balance.js';
|
|
|
6
6
|
import { modelsCommand } from './commands/models.js';
|
|
7
7
|
import { configCommand } from './commands/config.js';
|
|
8
8
|
import { statsCommand } from './commands/stats.js';
|
|
9
|
+
import fs from 'node:fs';
|
|
10
|
+
import path from 'node:path';
|
|
11
|
+
import { fileURLToPath } from 'node:url';
|
|
12
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
13
|
+
let version = '0.9.0';
|
|
14
|
+
try {
|
|
15
|
+
const pkg = JSON.parse(fs.readFileSync(path.resolve(__dirname, '../package.json'), 'utf-8'));
|
|
16
|
+
version = pkg.version || version;
|
|
17
|
+
}
|
|
18
|
+
catch { /* use default */ }
|
|
9
19
|
const program = new Command();
|
|
10
20
|
program
|
|
11
21
|
.name('brcc')
|
|
12
22
|
.description('BlockRun Claude Code — run Claude Code with any model, pay with USDC.\n\n' +
|
|
13
23
|
'Use /model inside Claude Code to switch between models on the fly.')
|
|
14
|
-
.version(
|
|
24
|
+
.version(version);
|
|
15
25
|
program
|
|
16
26
|
.command('setup [chain]')
|
|
17
27
|
.description('Create a new wallet for payments (base or solana)')
|
package/dist/proxy/fallback.js
CHANGED
|
@@ -7,7 +7,7 @@ export const DEFAULT_FALLBACK_CONFIG = {
|
|
|
7
7
|
'blockrun/auto', // Smart routing (default)
|
|
8
8
|
'blockrun/eco', // Cheapest capable model
|
|
9
9
|
'deepseek/deepseek-chat', // Direct fallback
|
|
10
|
-
'nvidia/
|
|
10
|
+
'nvidia/nemotron-ultra-253b', // Free model as ultimate fallback
|
|
11
11
|
],
|
|
12
12
|
retryOn: [429, 500, 502, 503, 504, 529],
|
|
13
13
|
maxRetries: 5,
|
package/dist/proxy/server.js
CHANGED
|
@@ -40,24 +40,56 @@ const DEFAULT_MAX_TOKENS = 4096;
|
|
|
40
40
|
let lastOutputTokens = 0;
|
|
41
41
|
// Model shortcuts for quick switching
|
|
42
42
|
const MODEL_SHORTCUTS = {
|
|
43
|
+
// Routing profiles
|
|
43
44
|
auto: 'blockrun/auto',
|
|
44
45
|
smart: 'blockrun/auto',
|
|
45
46
|
eco: 'blockrun/eco',
|
|
46
47
|
premium: 'blockrun/premium',
|
|
47
|
-
|
|
48
|
-
gpt5: 'openai/gpt-5.4',
|
|
49
|
-
'gpt-5': 'openai/gpt-5.4',
|
|
50
|
-
'gpt-5.4': 'openai/gpt-5.4',
|
|
48
|
+
// Anthropic
|
|
51
49
|
sonnet: 'anthropic/claude-sonnet-4.6',
|
|
52
50
|
claude: 'anthropic/claude-sonnet-4.6',
|
|
53
51
|
opus: 'anthropic/claude-opus-4.6',
|
|
54
52
|
haiku: 'anthropic/claude-haiku-4.5',
|
|
55
|
-
|
|
53
|
+
// OpenAI
|
|
54
|
+
gpt: 'openai/gpt-5.4',
|
|
55
|
+
gpt5: 'openai/gpt-5.4',
|
|
56
|
+
'gpt-5': 'openai/gpt-5.4',
|
|
57
|
+
'gpt-5.4': 'openai/gpt-5.4',
|
|
58
|
+
'gpt-5.4-pro': 'openai/gpt-5.4-pro',
|
|
59
|
+
'gpt-5.3': 'openai/gpt-5.3',
|
|
60
|
+
'gpt-5.2': 'openai/gpt-5.2',
|
|
61
|
+
'gpt-5.2-pro': 'openai/gpt-5.2-pro',
|
|
62
|
+
'gpt-4.1': 'openai/gpt-4.1',
|
|
63
|
+
codex: 'openai/gpt-5.3-codex',
|
|
64
|
+
nano: 'openai/gpt-5-nano',
|
|
65
|
+
mini: 'openai/gpt-5-mini',
|
|
66
|
+
o3: 'openai/o3',
|
|
67
|
+
o4: 'openai/o4-mini',
|
|
68
|
+
'o4-mini': 'openai/o4-mini',
|
|
69
|
+
o1: 'openai/o1',
|
|
70
|
+
// Google
|
|
56
71
|
gemini: 'google/gemini-2.5-pro',
|
|
72
|
+
flash: 'google/gemini-2.5-flash',
|
|
73
|
+
'gemini-3': 'google/gemini-3.1-pro',
|
|
74
|
+
// xAI
|
|
57
75
|
grok: 'xai/grok-3',
|
|
58
|
-
|
|
59
|
-
|
|
76
|
+
'grok-4': 'xai/grok-4-0709',
|
|
77
|
+
'grok-fast': 'xai/grok-4-1-fast-reasoning',
|
|
78
|
+
// DeepSeek
|
|
79
|
+
deepseek: 'deepseek/deepseek-chat',
|
|
80
|
+
r1: 'deepseek/deepseek-reasoner',
|
|
81
|
+
// Free models
|
|
82
|
+
free: 'nvidia/nemotron-ultra-253b',
|
|
83
|
+
nemotron: 'nvidia/nemotron-ultra-253b',
|
|
84
|
+
'deepseek-free': 'nvidia/deepseek-v3.2',
|
|
85
|
+
devstral: 'nvidia/devstral-2-123b',
|
|
86
|
+
'qwen-coder': 'nvidia/qwen3-coder-480b',
|
|
87
|
+
maverick: 'nvidia/llama-4-maverick',
|
|
88
|
+
// Minimax
|
|
89
|
+
minimax: 'minimax/minimax-m2.7',
|
|
90
|
+
// Others
|
|
60
91
|
glm: 'zai/glm-5',
|
|
92
|
+
kimi: 'moonshot/kimi-k2.5',
|
|
61
93
|
};
|
|
62
94
|
// Model pricing (per 1M tokens) - used for stats
|
|
63
95
|
const MODEL_PRICING = {
|
|
@@ -66,20 +98,68 @@ const MODEL_PRICING = {
|
|
|
66
98
|
'blockrun/eco': { input: 0.2, output: 1.0 },
|
|
67
99
|
'blockrun/premium': { input: 3.0, output: 15.0 },
|
|
68
100
|
'blockrun/free': { input: 0, output: 0 },
|
|
69
|
-
//
|
|
101
|
+
// FREE - NVIDIA models
|
|
102
|
+
'nvidia/gpt-oss-120b': { input: 0, output: 0 },
|
|
103
|
+
'nvidia/gpt-oss-20b': { input: 0, output: 0 },
|
|
104
|
+
'nvidia/nemotron-ultra-253b': { input: 0, output: 0 },
|
|
105
|
+
'nvidia/nemotron-3-super-120b': { input: 0, output: 0 },
|
|
106
|
+
'nvidia/nemotron-super-49b': { input: 0, output: 0 },
|
|
107
|
+
'nvidia/deepseek-v3.2': { input: 0, output: 0 },
|
|
108
|
+
'nvidia/mistral-large-3-675b': { input: 0, output: 0 },
|
|
109
|
+
'nvidia/qwen3-coder-480b': { input: 0, output: 0 },
|
|
110
|
+
'nvidia/devstral-2-123b': { input: 0, output: 0 },
|
|
111
|
+
'nvidia/glm-4.7': { input: 0, output: 0 },
|
|
112
|
+
'nvidia/llama-4-maverick': { input: 0, output: 0 },
|
|
113
|
+
// Anthropic
|
|
70
114
|
'anthropic/claude-sonnet-4.6': { input: 3.0, output: 15.0 },
|
|
71
115
|
'anthropic/claude-opus-4.6': { input: 5.0, output: 25.0 },
|
|
72
116
|
'anthropic/claude-haiku-4.5': { input: 1.0, output: 5.0 },
|
|
73
|
-
|
|
117
|
+
// OpenAI
|
|
118
|
+
'openai/gpt-5-nano': { input: 0.05, output: 0.4 },
|
|
119
|
+
'openai/gpt-4.1-nano': { input: 0.1, output: 0.4 },
|
|
120
|
+
'openai/gpt-4o-mini': { input: 0.15, output: 0.6 },
|
|
74
121
|
'openai/gpt-5-mini': { input: 0.25, output: 2.0 },
|
|
75
|
-
'
|
|
122
|
+
'openai/gpt-4.1-mini': { input: 0.4, output: 1.6 },
|
|
123
|
+
'openai/gpt-5.2': { input: 1.75, output: 14.0 },
|
|
124
|
+
'openai/gpt-5.3': { input: 1.75, output: 14.0 },
|
|
125
|
+
'openai/gpt-5.3-codex': { input: 1.75, output: 14.0 },
|
|
126
|
+
'openai/gpt-4.1': { input: 2.0, output: 8.0 },
|
|
127
|
+
'openai/o3': { input: 2.0, output: 8.0 },
|
|
128
|
+
'openai/gpt-4o': { input: 2.5, output: 10.0 },
|
|
129
|
+
'openai/gpt-5.4': { input: 2.5, output: 15.0 },
|
|
130
|
+
'openai/o1-mini': { input: 1.1, output: 4.4 },
|
|
131
|
+
'openai/o3-mini': { input: 1.1, output: 4.4 },
|
|
132
|
+
'openai/o4-mini': { input: 1.1, output: 4.4 },
|
|
133
|
+
'openai/o1': { input: 15.0, output: 60.0 },
|
|
134
|
+
'openai/gpt-5.2-pro': { input: 21.0, output: 168.0 },
|
|
135
|
+
'openai/gpt-5.4-pro': { input: 30.0, output: 180.0 },
|
|
136
|
+
// Google
|
|
137
|
+
'google/gemini-2.5-flash-lite': { input: 0.1, output: 0.4 },
|
|
76
138
|
'google/gemini-2.5-flash': { input: 0.3, output: 2.5 },
|
|
77
|
-
'
|
|
78
|
-
'
|
|
139
|
+
'google/gemini-3-flash-preview': { input: 0.5, output: 3.0 },
|
|
140
|
+
'google/gemini-2.5-pro': { input: 1.25, output: 10.0 },
|
|
141
|
+
'google/gemini-3-pro-preview': { input: 2.0, output: 12.0 },
|
|
142
|
+
'google/gemini-3.1-pro': { input: 2.0, output: 12.0 },
|
|
143
|
+
// xAI
|
|
79
144
|
'xai/grok-4-fast': { input: 0.2, output: 0.5 },
|
|
80
|
-
'
|
|
81
|
-
'
|
|
145
|
+
'xai/grok-4-fast-reasoning': { input: 0.2, output: 0.5 },
|
|
146
|
+
'xai/grok-4-1-fast': { input: 0.2, output: 0.5 },
|
|
147
|
+
'xai/grok-4-1-fast-reasoning': { input: 0.2, output: 0.5 },
|
|
148
|
+
'xai/grok-4-0709': { input: 0.2, output: 1.5 },
|
|
149
|
+
'xai/grok-3-mini': { input: 0.3, output: 0.5 },
|
|
150
|
+
'xai/grok-2-vision': { input: 2.0, output: 10.0 },
|
|
151
|
+
'xai/grok-3': { input: 3.0, output: 15.0 },
|
|
152
|
+
// DeepSeek
|
|
153
|
+
'deepseek/deepseek-chat': { input: 0.28, output: 0.42 },
|
|
154
|
+
'deepseek/deepseek-reasoner': { input: 0.28, output: 0.42 },
|
|
155
|
+
// Minimax
|
|
156
|
+
'minimax/minimax-m2.7': { input: 0.3, output: 1.2 },
|
|
157
|
+
'minimax/minimax-m2.5': { input: 0.3, output: 1.2 },
|
|
158
|
+
// Others
|
|
82
159
|
'moonshot/kimi-k2.5': { input: 0.6, output: 3.0 },
|
|
160
|
+
'nvidia/kimi-k2.5': { input: 0.55, output: 2.5 },
|
|
161
|
+
'zai/glm-5': { input: 1.0, output: 3.2 },
|
|
162
|
+
'zai/glm-5-turbo': { input: 1.2, output: 4.0 },
|
|
83
163
|
};
|
|
84
164
|
function estimateCost(model, inputTokens, outputTokens) {
|
|
85
165
|
const pricing = MODEL_PRICING[model] || { input: 2.0, output: 10.0 };
|
|
@@ -301,23 +381,63 @@ export function createProxy(options) {
|
|
|
301
381
|
response.headers.forEach((v, k) => {
|
|
302
382
|
responseHeaders[k] = v;
|
|
303
383
|
});
|
|
384
|
+
// Intercept error responses and ensure Anthropic-format errors
|
|
385
|
+
// so Claude Code doesn't fall back to showing a login page
|
|
386
|
+
if (response.status >= 400 && !responseHeaders['content-type']?.includes('text/event-stream')) {
|
|
387
|
+
let errorBody;
|
|
388
|
+
try {
|
|
389
|
+
const rawText = await response.text();
|
|
390
|
+
const parsed = JSON.parse(rawText);
|
|
391
|
+
// Already has Anthropic error shape? Pass through
|
|
392
|
+
if (parsed.type === 'error' && parsed.error) {
|
|
393
|
+
errorBody = rawText;
|
|
394
|
+
}
|
|
395
|
+
else {
|
|
396
|
+
// Wrap in Anthropic error format
|
|
397
|
+
const errorMsg = parsed.error?.message || parsed.message || rawText.slice(0, 500);
|
|
398
|
+
errorBody = JSON.stringify({
|
|
399
|
+
type: 'error',
|
|
400
|
+
error: {
|
|
401
|
+
type: response.status === 401 ? 'authentication_error'
|
|
402
|
+
: response.status === 402 ? 'invalid_request_error'
|
|
403
|
+
: response.status === 429 ? 'rate_limit_error'
|
|
404
|
+
: response.status === 400 ? 'invalid_request_error'
|
|
405
|
+
: 'api_error',
|
|
406
|
+
message: `[${finalModel}] ${errorMsg}`,
|
|
407
|
+
},
|
|
408
|
+
});
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
catch {
|
|
412
|
+
errorBody = JSON.stringify({
|
|
413
|
+
type: 'error',
|
|
414
|
+
error: { type: 'api_error', message: `Backend returned ${response.status}` },
|
|
415
|
+
});
|
|
416
|
+
}
|
|
417
|
+
res.writeHead(response.status, { 'Content-Type': 'application/json' });
|
|
418
|
+
res.end(errorBody);
|
|
419
|
+
log(`⚠️ ${response.status} from backend for ${finalModel}`);
|
|
420
|
+
return;
|
|
421
|
+
}
|
|
304
422
|
res.writeHead(response.status, responseHeaders);
|
|
305
423
|
const isStreaming = responseHeaders['content-type']?.includes('text/event-stream');
|
|
306
424
|
if (response.body) {
|
|
307
425
|
const reader = response.body.getReader();
|
|
308
426
|
const decoder = new TextDecoder();
|
|
309
|
-
let lastChunkText = '';
|
|
310
427
|
let fullResponse = '';
|
|
428
|
+
const STREAM_CAP = 5_000_000; // 5MB cap on accumulated stream
|
|
311
429
|
const pump = async () => {
|
|
312
430
|
while (true) {
|
|
313
431
|
const { done, value } = await reader.read();
|
|
314
432
|
if (done) {
|
|
315
433
|
// Record stats from streaming response
|
|
316
|
-
if (isStreaming &&
|
|
317
|
-
|
|
434
|
+
if (isStreaming && fullResponse) {
|
|
435
|
+
// Search full response for the last output_tokens value
|
|
436
|
+
const allOutputMatches = [...fullResponse.matchAll(/"output_tokens"\s*:\s*(\d+)/g)];
|
|
437
|
+
const lastOutputMatch = allOutputMatches[allOutputMatches.length - 1];
|
|
318
438
|
const inputMatch = fullResponse.match(/"input_tokens"\s*:\s*(\d+)/);
|
|
319
|
-
if (
|
|
320
|
-
lastOutputTokens = parseInt(
|
|
439
|
+
if (lastOutputMatch) {
|
|
440
|
+
lastOutputTokens = parseInt(lastOutputMatch[1], 10);
|
|
321
441
|
const inputTokens = inputMatch
|
|
322
442
|
? parseInt(inputMatch[1], 10)
|
|
323
443
|
: 0;
|
|
@@ -330,9 +450,8 @@ export function createProxy(options) {
|
|
|
330
450
|
res.end();
|
|
331
451
|
break;
|
|
332
452
|
}
|
|
333
|
-
if (isStreaming) {
|
|
453
|
+
if (isStreaming && fullResponse.length < STREAM_CAP) {
|
|
334
454
|
const chunk = decoder.decode(value, { stream: true });
|
|
335
|
-
lastChunkText = chunk;
|
|
336
455
|
fullResponse += chunk;
|
|
337
456
|
}
|
|
338
457
|
res.write(value);
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SSE Event Translator: OpenAI → Anthropic Messages API format
|
|
3
|
+
*
|
|
4
|
+
* Handles three critical gaps in the streaming pipeline:
|
|
5
|
+
* 1. Tool calls: choice.delta.tool_calls → content_block_start/content_block_delta (tool_use)
|
|
6
|
+
* 2. Reasoning: reasoning_content → content_block_start/content_block_delta (thinking)
|
|
7
|
+
* 3. Ensures proper content_block_stop and message_stop events
|
|
8
|
+
*/
|
|
9
|
+
export declare class SSETranslator {
|
|
10
|
+
private state;
|
|
11
|
+
private buffer;
|
|
12
|
+
constructor(model?: string);
|
|
13
|
+
/**
|
|
14
|
+
* Detect whether an SSE chunk is in OpenAI format.
|
|
15
|
+
* Returns true if it contains OpenAI-style `choices[].delta` structure.
|
|
16
|
+
*/
|
|
17
|
+
static isOpenAIFormat(chunk: string): boolean;
|
|
18
|
+
/**
|
|
19
|
+
* Process a raw SSE text chunk and return translated Anthropic-format SSE events.
|
|
20
|
+
* Returns null if no translation needed (already Anthropic format or not parseable).
|
|
21
|
+
*/
|
|
22
|
+
processChunk(rawChunk: string): string | null;
|
|
23
|
+
private parseSSEEvents;
|
|
24
|
+
private formatSSE;
|
|
25
|
+
private closeThinkingBlock;
|
|
26
|
+
private closeTextBlock;
|
|
27
|
+
private closeToolCalls;
|
|
28
|
+
private closeActiveBlocks;
|
|
29
|
+
}
|
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SSE Event Translator: OpenAI → Anthropic Messages API format
|
|
3
|
+
*
|
|
4
|
+
* Handles three critical gaps in the streaming pipeline:
|
|
5
|
+
* 1. Tool calls: choice.delta.tool_calls → content_block_start/content_block_delta (tool_use)
|
|
6
|
+
* 2. Reasoning: reasoning_content → content_block_start/content_block_delta (thinking)
|
|
7
|
+
* 3. Ensures proper content_block_stop and message_stop events
|
|
8
|
+
*/
|
|
9
|
+
// ─── SSE Translator ─────────────────────────────────────────────────────────
|
|
10
|
+
export class SSETranslator {
|
|
11
|
+
state;
|
|
12
|
+
buffer = '';
|
|
13
|
+
constructor(model = 'unknown') {
|
|
14
|
+
this.state = {
|
|
15
|
+
messageId: `msg_brcc_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`,
|
|
16
|
+
model,
|
|
17
|
+
blockIndex: 0,
|
|
18
|
+
activeToolCalls: new Map(),
|
|
19
|
+
thinkingBlockActive: false,
|
|
20
|
+
textBlockActive: false,
|
|
21
|
+
messageStarted: false,
|
|
22
|
+
inputTokens: 0,
|
|
23
|
+
outputTokens: 0,
|
|
24
|
+
};
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Detect whether an SSE chunk is in OpenAI format.
|
|
28
|
+
* Returns true if it contains OpenAI-style `choices[].delta` structure.
|
|
29
|
+
*/
|
|
30
|
+
static isOpenAIFormat(chunk) {
|
|
31
|
+
// Look for OpenAI-specific patterns in the SSE data
|
|
32
|
+
return (chunk.includes('"choices"') &&
|
|
33
|
+
chunk.includes('"delta"') &&
|
|
34
|
+
!chunk.includes('"content_block_'));
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Process a raw SSE text chunk and return translated Anthropic-format SSE events.
|
|
38
|
+
* Returns null if no translation needed (already Anthropic format or not parseable).
|
|
39
|
+
*/
|
|
40
|
+
processChunk(rawChunk) {
|
|
41
|
+
this.buffer += rawChunk;
|
|
42
|
+
const events = this.parseSSEEvents();
|
|
43
|
+
if (events.length === 0)
|
|
44
|
+
return null;
|
|
45
|
+
const translated = [];
|
|
46
|
+
for (const event of events) {
|
|
47
|
+
if (event.data === '[DONE]') {
|
|
48
|
+
// Close any active blocks, then emit message_stop
|
|
49
|
+
translated.push(...this.closeActiveBlocks());
|
|
50
|
+
translated.push(this.formatSSE('message_delta', {
|
|
51
|
+
type: 'message_delta',
|
|
52
|
+
delta: { stop_reason: 'end_turn', stop_sequence: null },
|
|
53
|
+
usage: { output_tokens: this.state.outputTokens },
|
|
54
|
+
}));
|
|
55
|
+
translated.push(this.formatSSE('message_stop', { type: 'message_stop' }));
|
|
56
|
+
continue;
|
|
57
|
+
}
|
|
58
|
+
let parsed;
|
|
59
|
+
try {
|
|
60
|
+
parsed = JSON.parse(event.data);
|
|
61
|
+
}
|
|
62
|
+
catch {
|
|
63
|
+
continue;
|
|
64
|
+
}
|
|
65
|
+
// Skip if this doesn't look like OpenAI format
|
|
66
|
+
if (!parsed.choices || parsed.choices.length === 0) {
|
|
67
|
+
// Could be a usage-only event
|
|
68
|
+
if (parsed.usage) {
|
|
69
|
+
this.state.inputTokens = parsed.usage.prompt_tokens || 0;
|
|
70
|
+
this.state.outputTokens = parsed.usage.completion_tokens || 0;
|
|
71
|
+
}
|
|
72
|
+
continue;
|
|
73
|
+
}
|
|
74
|
+
// Emit message_start on first chunk
|
|
75
|
+
if (!this.state.messageStarted) {
|
|
76
|
+
this.state.messageStarted = true;
|
|
77
|
+
if (parsed.model)
|
|
78
|
+
this.state.model = parsed.model;
|
|
79
|
+
translated.push(this.formatSSE('message_start', {
|
|
80
|
+
type: 'message_start',
|
|
81
|
+
message: {
|
|
82
|
+
id: this.state.messageId,
|
|
83
|
+
type: 'message',
|
|
84
|
+
role: 'assistant',
|
|
85
|
+
model: this.state.model,
|
|
86
|
+
content: [],
|
|
87
|
+
stop_reason: null,
|
|
88
|
+
stop_sequence: null,
|
|
89
|
+
usage: { input_tokens: this.state.inputTokens, output_tokens: 0 },
|
|
90
|
+
},
|
|
91
|
+
}));
|
|
92
|
+
translated.push(this.formatSSE('ping', { type: 'ping' }));
|
|
93
|
+
}
|
|
94
|
+
const choice = parsed.choices[0];
|
|
95
|
+
const delta = choice.delta;
|
|
96
|
+
// ── Reasoning content → thinking block ──
|
|
97
|
+
if (delta.reasoning_content) {
|
|
98
|
+
if (!this.state.thinkingBlockActive) {
|
|
99
|
+
// Close text block if active
|
|
100
|
+
if (this.state.textBlockActive) {
|
|
101
|
+
translated.push(...this.closeTextBlock());
|
|
102
|
+
}
|
|
103
|
+
this.state.thinkingBlockActive = true;
|
|
104
|
+
translated.push(this.formatSSE('content_block_start', {
|
|
105
|
+
type: 'content_block_start',
|
|
106
|
+
index: this.state.blockIndex,
|
|
107
|
+
content_block: { type: 'thinking', thinking: '' },
|
|
108
|
+
}));
|
|
109
|
+
}
|
|
110
|
+
translated.push(this.formatSSE('content_block_delta', {
|
|
111
|
+
type: 'content_block_delta',
|
|
112
|
+
index: this.state.blockIndex,
|
|
113
|
+
delta: { type: 'thinking_delta', thinking: delta.reasoning_content },
|
|
114
|
+
}));
|
|
115
|
+
this.state.outputTokens++;
|
|
116
|
+
}
|
|
117
|
+
// ── Text content → text block ──
|
|
118
|
+
if (delta.content) {
|
|
119
|
+
// Close thinking block if transitioning
|
|
120
|
+
if (this.state.thinkingBlockActive) {
|
|
121
|
+
translated.push(...this.closeThinkingBlock());
|
|
122
|
+
}
|
|
123
|
+
if (!this.state.textBlockActive) {
|
|
124
|
+
// Close any active tool calls first
|
|
125
|
+
translated.push(...this.closeToolCalls());
|
|
126
|
+
this.state.textBlockActive = true;
|
|
127
|
+
translated.push(this.formatSSE('content_block_start', {
|
|
128
|
+
type: 'content_block_start',
|
|
129
|
+
index: this.state.blockIndex,
|
|
130
|
+
content_block: { type: 'text', text: '' },
|
|
131
|
+
}));
|
|
132
|
+
}
|
|
133
|
+
translated.push(this.formatSSE('content_block_delta', {
|
|
134
|
+
type: 'content_block_delta',
|
|
135
|
+
index: this.state.blockIndex,
|
|
136
|
+
delta: { type: 'text_delta', text: delta.content },
|
|
137
|
+
}));
|
|
138
|
+
this.state.outputTokens++;
|
|
139
|
+
}
|
|
140
|
+
// ── Tool calls → tool_use blocks ──
|
|
141
|
+
if (delta.tool_calls && delta.tool_calls.length > 0) {
|
|
142
|
+
// Close thinking/text blocks first
|
|
143
|
+
if (this.state.thinkingBlockActive) {
|
|
144
|
+
translated.push(...this.closeThinkingBlock());
|
|
145
|
+
}
|
|
146
|
+
if (this.state.textBlockActive) {
|
|
147
|
+
translated.push(...this.closeTextBlock());
|
|
148
|
+
}
|
|
149
|
+
for (const tc of delta.tool_calls) {
|
|
150
|
+
const tcIndex = tc.index;
|
|
151
|
+
if (tc.id && tc.function?.name) {
|
|
152
|
+
// New tool call start
|
|
153
|
+
// Close previous tool call at same index if exists
|
|
154
|
+
if (this.state.activeToolCalls.has(tcIndex)) {
|
|
155
|
+
translated.push(this.formatSSE('content_block_stop', {
|
|
156
|
+
type: 'content_block_stop',
|
|
157
|
+
index: this.state.blockIndex,
|
|
158
|
+
}));
|
|
159
|
+
this.state.blockIndex++;
|
|
160
|
+
}
|
|
161
|
+
const toolId = tc.id;
|
|
162
|
+
const toolName = tc.function.name;
|
|
163
|
+
this.state.activeToolCalls.set(tcIndex, { id: toolId, name: toolName });
|
|
164
|
+
translated.push(this.formatSSE('content_block_start', {
|
|
165
|
+
type: 'content_block_start',
|
|
166
|
+
index: this.state.blockIndex,
|
|
167
|
+
content_block: {
|
|
168
|
+
type: 'tool_use',
|
|
169
|
+
id: toolId,
|
|
170
|
+
name: toolName,
|
|
171
|
+
input: {},
|
|
172
|
+
},
|
|
173
|
+
}));
|
|
174
|
+
// If there are initial arguments, send them
|
|
175
|
+
if (tc.function.arguments) {
|
|
176
|
+
translated.push(this.formatSSE('content_block_delta', {
|
|
177
|
+
type: 'content_block_delta',
|
|
178
|
+
index: this.state.blockIndex,
|
|
179
|
+
delta: {
|
|
180
|
+
type: 'input_json_delta',
|
|
181
|
+
partial_json: tc.function.arguments,
|
|
182
|
+
},
|
|
183
|
+
}));
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
else if (tc.function?.arguments) {
|
|
187
|
+
// Continuation of existing tool call - stream arguments
|
|
188
|
+
translated.push(this.formatSSE('content_block_delta', {
|
|
189
|
+
type: 'content_block_delta',
|
|
190
|
+
index: this.state.blockIndex,
|
|
191
|
+
delta: {
|
|
192
|
+
type: 'input_json_delta',
|
|
193
|
+
partial_json: tc.function.arguments,
|
|
194
|
+
},
|
|
195
|
+
}));
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
this.state.outputTokens++;
|
|
199
|
+
}
|
|
200
|
+
// ── Handle finish_reason ──
|
|
201
|
+
if (choice.finish_reason) {
|
|
202
|
+
translated.push(...this.closeActiveBlocks());
|
|
203
|
+
const stopReason = choice.finish_reason === 'tool_calls'
|
|
204
|
+
? 'tool_use'
|
|
205
|
+
: choice.finish_reason === 'stop'
|
|
206
|
+
? 'end_turn'
|
|
207
|
+
: choice.finish_reason;
|
|
208
|
+
translated.push(this.formatSSE('message_delta', {
|
|
209
|
+
type: 'message_delta',
|
|
210
|
+
delta: { stop_reason: stopReason, stop_sequence: null },
|
|
211
|
+
usage: { output_tokens: this.state.outputTokens },
|
|
212
|
+
}));
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
return translated.length > 0 ? translated.join('') : null;
|
|
216
|
+
}
|
|
217
|
+
// ── Helpers ─────────────────────────────────────────────────────────────
|
|
218
|
+
parseSSEEvents() {
|
|
219
|
+
const events = [];
|
|
220
|
+
const lines = this.buffer.split('\n');
|
|
221
|
+
let currentEvent;
|
|
222
|
+
let dataLines = [];
|
|
223
|
+
let consumed = 0;
|
|
224
|
+
for (let i = 0; i < lines.length; i++) {
|
|
225
|
+
const line = lines[i];
|
|
226
|
+
if (line.startsWith('event: ')) {
|
|
227
|
+
currentEvent = line.slice(7).trim();
|
|
228
|
+
}
|
|
229
|
+
else if (line.startsWith('data: ')) {
|
|
230
|
+
dataLines.push(line.slice(6));
|
|
231
|
+
}
|
|
232
|
+
else if (line === '' && dataLines.length > 0) {
|
|
233
|
+
// End of event
|
|
234
|
+
events.push({ event: currentEvent, data: dataLines.join('\n') });
|
|
235
|
+
currentEvent = undefined;
|
|
236
|
+
dataLines = [];
|
|
237
|
+
consumed = lines.slice(0, i + 1).join('\n').length + 1;
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
// Keep unconsumed text in buffer
|
|
241
|
+
if (consumed > 0) {
|
|
242
|
+
this.buffer = this.buffer.slice(consumed);
|
|
243
|
+
}
|
|
244
|
+
return events;
|
|
245
|
+
}
|
|
246
|
+
formatSSE(event, data) {
|
|
247
|
+
return `event: ${event}\ndata: ${JSON.stringify(data)}\n\n`;
|
|
248
|
+
}
|
|
249
|
+
closeThinkingBlock() {
|
|
250
|
+
if (!this.state.thinkingBlockActive)
|
|
251
|
+
return [];
|
|
252
|
+
this.state.thinkingBlockActive = false;
|
|
253
|
+
const events = [
|
|
254
|
+
this.formatSSE('content_block_stop', {
|
|
255
|
+
type: 'content_block_stop',
|
|
256
|
+
index: this.state.blockIndex,
|
|
257
|
+
}),
|
|
258
|
+
];
|
|
259
|
+
this.state.blockIndex++;
|
|
260
|
+
return events;
|
|
261
|
+
}
|
|
262
|
+
closeTextBlock() {
|
|
263
|
+
if (!this.state.textBlockActive)
|
|
264
|
+
return [];
|
|
265
|
+
this.state.textBlockActive = false;
|
|
266
|
+
const events = [
|
|
267
|
+
this.formatSSE('content_block_stop', {
|
|
268
|
+
type: 'content_block_stop',
|
|
269
|
+
index: this.state.blockIndex,
|
|
270
|
+
}),
|
|
271
|
+
];
|
|
272
|
+
this.state.blockIndex++;
|
|
273
|
+
return events;
|
|
274
|
+
}
|
|
275
|
+
closeToolCalls() {
|
|
276
|
+
if (this.state.activeToolCalls.size === 0)
|
|
277
|
+
return [];
|
|
278
|
+
const events = [];
|
|
279
|
+
for (const [_index] of this.state.activeToolCalls) {
|
|
280
|
+
events.push(this.formatSSE('content_block_stop', {
|
|
281
|
+
type: 'content_block_stop',
|
|
282
|
+
index: this.state.blockIndex,
|
|
283
|
+
}));
|
|
284
|
+
this.state.blockIndex++;
|
|
285
|
+
}
|
|
286
|
+
this.state.activeToolCalls.clear();
|
|
287
|
+
return events;
|
|
288
|
+
}
|
|
289
|
+
closeActiveBlocks() {
|
|
290
|
+
const events = [];
|
|
291
|
+
events.push(...this.closeThinkingBlock());
|
|
292
|
+
events.push(...this.closeTextBlock());
|
|
293
|
+
events.push(...this.closeToolCalls());
|
|
294
|
+
return events;
|
|
295
|
+
}
|
|
296
|
+
}
|
package/dist/router/index.js
CHANGED
|
@@ -6,11 +6,11 @@
|
|
|
6
6
|
const AUTO_TIERS = {
|
|
7
7
|
SIMPLE: {
|
|
8
8
|
primary: 'google/gemini-2.5-flash',
|
|
9
|
-
fallback: ['deepseek/deepseek-chat', 'nvidia/
|
|
9
|
+
fallback: ['deepseek/deepseek-chat', 'nvidia/nemotron-ultra-253b'],
|
|
10
10
|
},
|
|
11
11
|
MEDIUM: {
|
|
12
12
|
primary: 'moonshot/kimi-k2.5',
|
|
13
|
-
fallback: ['google/gemini-2.5-flash', '
|
|
13
|
+
fallback: ['google/gemini-2.5-flash', 'minimax/minimax-m2.7'],
|
|
14
14
|
},
|
|
15
15
|
COMPLEX: {
|
|
16
16
|
primary: 'google/gemini-3.1-pro',
|
|
@@ -23,20 +23,20 @@ const AUTO_TIERS = {
|
|
|
23
23
|
};
|
|
24
24
|
const ECO_TIERS = {
|
|
25
25
|
SIMPLE: {
|
|
26
|
-
primary: 'nvidia/
|
|
27
|
-
fallback: ['
|
|
26
|
+
primary: 'nvidia/nemotron-ultra-253b',
|
|
27
|
+
fallback: ['nvidia/gpt-oss-120b', 'nvidia/deepseek-v3.2'],
|
|
28
28
|
},
|
|
29
29
|
MEDIUM: {
|
|
30
30
|
primary: 'google/gemini-2.5-flash-lite',
|
|
31
|
-
fallback: ['nvidia/
|
|
31
|
+
fallback: ['nvidia/nemotron-ultra-253b', 'nvidia/qwen3-coder-480b'],
|
|
32
32
|
},
|
|
33
33
|
COMPLEX: {
|
|
34
34
|
primary: 'google/gemini-2.5-flash-lite',
|
|
35
|
-
fallback: ['deepseek/deepseek-chat'],
|
|
35
|
+
fallback: ['deepseek/deepseek-chat', 'nvidia/mistral-large-3-675b'],
|
|
36
36
|
},
|
|
37
37
|
REASONING: {
|
|
38
38
|
primary: 'xai/grok-4-1-fast-reasoning',
|
|
39
|
-
fallback: ['deepseek/deepseek-reasoner'],
|
|
39
|
+
fallback: ['deepseek/deepseek-reasoner', 'nvidia/nemotron-ultra-253b'],
|
|
40
40
|
},
|
|
41
41
|
};
|
|
42
42
|
const PREMIUM_TIERS = {
|
|
@@ -189,7 +189,7 @@ export function routeRequest(prompt, profile = 'auto') {
|
|
|
189
189
|
// Free profile - always use free model
|
|
190
190
|
if (profile === 'free') {
|
|
191
191
|
return {
|
|
192
|
-
model: 'nvidia/
|
|
192
|
+
model: 'nvidia/nemotron-ultra-253b',
|
|
193
193
|
tier: 'SIMPLE',
|
|
194
194
|
confidence: 1.0,
|
|
195
195
|
signals: ['free-profile'],
|
|
@@ -217,15 +217,48 @@ export function routeRequest(prompt, profile = 'auto') {
|
|
|
217
217
|
// Baseline: Claude Opus at $5/$25 per 1M tokens
|
|
218
218
|
const OPUS_COST_PER_1K = 0.015; // rough average
|
|
219
219
|
const modelCosts = {
|
|
220
|
+
// FREE
|
|
220
221
|
'nvidia/gpt-oss-120b': 0,
|
|
221
|
-
'
|
|
222
|
+
'nvidia/gpt-oss-20b': 0,
|
|
223
|
+
'nvidia/nemotron-ultra-253b': 0,
|
|
224
|
+
'nvidia/nemotron-3-super-120b': 0,
|
|
225
|
+
'nvidia/nemotron-super-49b': 0,
|
|
226
|
+
'nvidia/deepseek-v3.2': 0,
|
|
227
|
+
'nvidia/mistral-large-3-675b': 0,
|
|
228
|
+
'nvidia/qwen3-coder-480b': 0,
|
|
229
|
+
'nvidia/devstral-2-123b': 0,
|
|
230
|
+
'nvidia/glm-4.7': 0,
|
|
231
|
+
'nvidia/llama-4-maverick': 0,
|
|
232
|
+
// Budget
|
|
233
|
+
'openai/gpt-5-nano': 0.0002,
|
|
234
|
+
'openai/gpt-4.1-nano': 0.0003,
|
|
222
235
|
'google/gemini-2.5-flash-lite': 0.0003,
|
|
236
|
+
'xai/grok-4-fast': 0.0004,
|
|
237
|
+
'xai/grok-4-1-fast': 0.0004,
|
|
238
|
+
'xai/grok-4-1-fast-reasoning': 0.0004,
|
|
223
239
|
'deepseek/deepseek-chat': 0.0004,
|
|
224
|
-
'
|
|
240
|
+
'deepseek/deepseek-reasoner': 0.0004,
|
|
241
|
+
'minimax/minimax-m2.7': 0.0008,
|
|
242
|
+
'minimax/minimax-m2.5': 0.0008,
|
|
243
|
+
'google/gemini-2.5-flash': 0.0014,
|
|
244
|
+
'openai/gpt-5-mini': 0.0011,
|
|
245
|
+
'moonshot/kimi-k2.5': 0.0018,
|
|
246
|
+
// Mid-range
|
|
247
|
+
'anthropic/claude-haiku-4.5': 0.003,
|
|
248
|
+
'zai/glm-5': 0.0021,
|
|
249
|
+
'openai/o4-mini': 0.0028,
|
|
250
|
+
'google/gemini-2.5-pro': 0.0056,
|
|
251
|
+
'openai/gpt-5.3-codex': 0.0079,
|
|
252
|
+
'openai/gpt-5.2': 0.0079,
|
|
253
|
+
'openai/gpt-5.3': 0.0079,
|
|
254
|
+
'openai/gpt-4.1': 0.005,
|
|
255
|
+
'openai/o3': 0.005,
|
|
225
256
|
'google/gemini-3.1-pro': 0.007,
|
|
257
|
+
'openai/gpt-5.4': 0.0088,
|
|
258
|
+
// Premium
|
|
226
259
|
'anthropic/claude-sonnet-4.6': 0.009,
|
|
260
|
+
'xai/grok-3': 0.009,
|
|
227
261
|
'anthropic/claude-opus-4.6': 0.015,
|
|
228
|
-
'xai/grok-4-1-fast-reasoning': 0.0004,
|
|
229
262
|
};
|
|
230
263
|
const modelCost = modelCosts[model] ?? 0.005;
|
|
231
264
|
const savings = Math.max(0, (OPUS_COST_PER_1K - modelCost) / OPUS_COST_PER_1K);
|
|
@@ -250,7 +283,7 @@ export function getFallbackChain(tier, profile = 'auto') {
|
|
|
250
283
|
tierConfigs = PREMIUM_TIERS;
|
|
251
284
|
break;
|
|
252
285
|
case 'free':
|
|
253
|
-
return ['nvidia/
|
|
286
|
+
return ['nvidia/nemotron-ultra-253b'];
|
|
254
287
|
default:
|
|
255
288
|
tierConfigs = AUTO_TIERS;
|
|
256
289
|
}
|