@blockrun/cc 0.9.2 → 0.9.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/proxy/fallback.js +1 -1
- package/dist/proxy/server.js +93 -20
- package/dist/proxy/sse-translator.d.ts +29 -0
- package/dist/proxy/sse-translator.js +296 -0
- package/dist/router/index.js +45 -19
- package/package.json +1 -1
package/dist/proxy/fallback.js
CHANGED
|
@@ -7,7 +7,7 @@ export const DEFAULT_FALLBACK_CONFIG = {
|
|
|
7
7
|
'blockrun/auto', // Smart routing (default)
|
|
8
8
|
'blockrun/eco', // Cheapest capable model
|
|
9
9
|
'deepseek/deepseek-chat', // Direct fallback
|
|
10
|
-
'nvidia/
|
|
10
|
+
'nvidia/nemotron-ultra-253b', // Free model as ultimate fallback
|
|
11
11
|
],
|
|
12
12
|
retryOn: [429, 500, 502, 503, 504, 529],
|
|
13
13
|
maxRetries: 5,
|
package/dist/proxy/server.js
CHANGED
|
@@ -40,24 +40,56 @@ const DEFAULT_MAX_TOKENS = 4096;
|
|
|
40
40
|
let lastOutputTokens = 0;
|
|
41
41
|
// Model shortcuts for quick switching
|
|
42
42
|
const MODEL_SHORTCUTS = {
|
|
43
|
+
// Routing profiles
|
|
43
44
|
auto: 'blockrun/auto',
|
|
44
45
|
smart: 'blockrun/auto',
|
|
45
46
|
eco: 'blockrun/eco',
|
|
46
47
|
premium: 'blockrun/premium',
|
|
47
|
-
|
|
48
|
-
gpt5: 'openai/gpt-5.4',
|
|
49
|
-
'gpt-5': 'openai/gpt-5.4',
|
|
50
|
-
'gpt-5.4': 'openai/gpt-5.4',
|
|
48
|
+
// Anthropic
|
|
51
49
|
sonnet: 'anthropic/claude-sonnet-4.6',
|
|
52
50
|
claude: 'anthropic/claude-sonnet-4.6',
|
|
53
51
|
opus: 'anthropic/claude-opus-4.6',
|
|
54
52
|
haiku: 'anthropic/claude-haiku-4.5',
|
|
55
|
-
|
|
53
|
+
// OpenAI
|
|
54
|
+
gpt: 'openai/gpt-5.4',
|
|
55
|
+
gpt5: 'openai/gpt-5.4',
|
|
56
|
+
'gpt-5': 'openai/gpt-5.4',
|
|
57
|
+
'gpt-5.4': 'openai/gpt-5.4',
|
|
58
|
+
'gpt-5.4-pro': 'openai/gpt-5.4-pro',
|
|
59
|
+
'gpt-5.3': 'openai/gpt-5.3',
|
|
60
|
+
'gpt-5.2': 'openai/gpt-5.2',
|
|
61
|
+
'gpt-5.2-pro': 'openai/gpt-5.2-pro',
|
|
62
|
+
'gpt-4.1': 'openai/gpt-4.1',
|
|
63
|
+
codex: 'openai/gpt-5.3-codex',
|
|
64
|
+
nano: 'openai/gpt-5-nano',
|
|
65
|
+
mini: 'openai/gpt-5-mini',
|
|
66
|
+
o3: 'openai/o3',
|
|
67
|
+
o4: 'openai/o4-mini',
|
|
68
|
+
'o4-mini': 'openai/o4-mini',
|
|
69
|
+
o1: 'openai/o1',
|
|
70
|
+
// Google
|
|
56
71
|
gemini: 'google/gemini-2.5-pro',
|
|
72
|
+
flash: 'google/gemini-2.5-flash',
|
|
73
|
+
'gemini-3': 'google/gemini-3.1-pro',
|
|
74
|
+
// xAI
|
|
57
75
|
grok: 'xai/grok-3',
|
|
58
|
-
|
|
59
|
-
|
|
76
|
+
'grok-4': 'xai/grok-4-0709',
|
|
77
|
+
'grok-fast': 'xai/grok-4-1-fast-reasoning',
|
|
78
|
+
// DeepSeek
|
|
79
|
+
deepseek: 'deepseek/deepseek-chat',
|
|
80
|
+
r1: 'deepseek/deepseek-reasoner',
|
|
81
|
+
// Free models
|
|
82
|
+
free: 'nvidia/nemotron-ultra-253b',
|
|
83
|
+
nemotron: 'nvidia/nemotron-ultra-253b',
|
|
84
|
+
'deepseek-free': 'nvidia/deepseek-v3.2',
|
|
85
|
+
devstral: 'nvidia/devstral-2-123b',
|
|
86
|
+
'qwen-coder': 'nvidia/qwen3-coder-480b',
|
|
87
|
+
maverick: 'nvidia/llama-4-maverick',
|
|
88
|
+
// Minimax
|
|
89
|
+
minimax: 'minimax/minimax-m2.7',
|
|
90
|
+
// Others
|
|
60
91
|
glm: 'zai/glm-5',
|
|
92
|
+
kimi: 'moonshot/kimi-k2.5',
|
|
61
93
|
};
|
|
62
94
|
// Model pricing (per 1M tokens) - used for stats
|
|
63
95
|
const MODEL_PRICING = {
|
|
@@ -66,27 +98,68 @@ const MODEL_PRICING = {
|
|
|
66
98
|
'blockrun/eco': { input: 0.2, output: 1.0 },
|
|
67
99
|
'blockrun/premium': { input: 3.0, output: 15.0 },
|
|
68
100
|
'blockrun/free': { input: 0, output: 0 },
|
|
69
|
-
//
|
|
101
|
+
// FREE - NVIDIA models
|
|
102
|
+
'nvidia/gpt-oss-120b': { input: 0, output: 0 },
|
|
103
|
+
'nvidia/gpt-oss-20b': { input: 0, output: 0 },
|
|
104
|
+
'nvidia/nemotron-ultra-253b': { input: 0, output: 0 },
|
|
105
|
+
'nvidia/nemotron-3-super-120b': { input: 0, output: 0 },
|
|
106
|
+
'nvidia/nemotron-super-49b': { input: 0, output: 0 },
|
|
107
|
+
'nvidia/deepseek-v3.2': { input: 0, output: 0 },
|
|
108
|
+
'nvidia/mistral-large-3-675b': { input: 0, output: 0 },
|
|
109
|
+
'nvidia/qwen3-coder-480b': { input: 0, output: 0 },
|
|
110
|
+
'nvidia/devstral-2-123b': { input: 0, output: 0 },
|
|
111
|
+
'nvidia/glm-4.7': { input: 0, output: 0 },
|
|
112
|
+
'nvidia/llama-4-maverick': { input: 0, output: 0 },
|
|
113
|
+
// Anthropic
|
|
70
114
|
'anthropic/claude-sonnet-4.6': { input: 3.0, output: 15.0 },
|
|
71
115
|
'anthropic/claude-opus-4.6': { input: 5.0, output: 25.0 },
|
|
72
116
|
'anthropic/claude-haiku-4.5': { input: 1.0, output: 5.0 },
|
|
73
|
-
|
|
117
|
+
// OpenAI
|
|
118
|
+
'openai/gpt-5-nano': { input: 0.05, output: 0.4 },
|
|
119
|
+
'openai/gpt-4.1-nano': { input: 0.1, output: 0.4 },
|
|
120
|
+
'openai/gpt-4o-mini': { input: 0.15, output: 0.6 },
|
|
74
121
|
'openai/gpt-5-mini': { input: 0.25, output: 2.0 },
|
|
75
|
-
'
|
|
122
|
+
'openai/gpt-4.1-mini': { input: 0.4, output: 1.6 },
|
|
123
|
+
'openai/gpt-5.2': { input: 1.75, output: 14.0 },
|
|
124
|
+
'openai/gpt-5.3': { input: 1.75, output: 14.0 },
|
|
125
|
+
'openai/gpt-5.3-codex': { input: 1.75, output: 14.0 },
|
|
126
|
+
'openai/gpt-4.1': { input: 2.0, output: 8.0 },
|
|
127
|
+
'openai/o3': { input: 2.0, output: 8.0 },
|
|
128
|
+
'openai/gpt-4o': { input: 2.5, output: 10.0 },
|
|
129
|
+
'openai/gpt-5.4': { input: 2.5, output: 15.0 },
|
|
130
|
+
'openai/o1-mini': { input: 1.1, output: 4.4 },
|
|
131
|
+
'openai/o3-mini': { input: 1.1, output: 4.4 },
|
|
132
|
+
'openai/o4-mini': { input: 1.1, output: 4.4 },
|
|
133
|
+
'openai/o1': { input: 15.0, output: 60.0 },
|
|
134
|
+
'openai/gpt-5.2-pro': { input: 21.0, output: 168.0 },
|
|
135
|
+
'openai/gpt-5.4-pro': { input: 30.0, output: 180.0 },
|
|
136
|
+
// Google
|
|
137
|
+
'google/gemini-2.5-flash-lite': { input: 0.1, output: 0.4 },
|
|
76
138
|
'google/gemini-2.5-flash': { input: 0.3, output: 2.5 },
|
|
77
|
-
'
|
|
78
|
-
'
|
|
79
|
-
'
|
|
139
|
+
'google/gemini-3-flash-preview': { input: 0.5, output: 3.0 },
|
|
140
|
+
'google/gemini-2.5-pro': { input: 1.25, output: 10.0 },
|
|
141
|
+
'google/gemini-3-pro-preview': { input: 2.0, output: 12.0 },
|
|
142
|
+
'google/gemini-3.1-pro': { input: 2.0, output: 12.0 },
|
|
143
|
+
// xAI
|
|
80
144
|
'xai/grok-4-fast': { input: 0.2, output: 0.5 },
|
|
145
|
+
'xai/grok-4-fast-reasoning': { input: 0.2, output: 0.5 },
|
|
146
|
+
'xai/grok-4-1-fast': { input: 0.2, output: 0.5 },
|
|
81
147
|
'xai/grok-4-1-fast-reasoning': { input: 0.2, output: 0.5 },
|
|
82
|
-
'
|
|
83
|
-
'
|
|
148
|
+
'xai/grok-4-0709': { input: 0.2, output: 1.5 },
|
|
149
|
+
'xai/grok-3-mini': { input: 0.3, output: 0.5 },
|
|
150
|
+
'xai/grok-2-vision': { input: 2.0, output: 10.0 },
|
|
151
|
+
'xai/grok-3': { input: 3.0, output: 15.0 },
|
|
152
|
+
// DeepSeek
|
|
153
|
+
'deepseek/deepseek-chat': { input: 0.28, output: 0.42 },
|
|
154
|
+
'deepseek/deepseek-reasoner': { input: 0.28, output: 0.42 },
|
|
155
|
+
// Minimax
|
|
156
|
+
'minimax/minimax-m2.7': { input: 0.3, output: 1.2 },
|
|
157
|
+
'minimax/minimax-m2.5': { input: 0.3, output: 1.2 },
|
|
158
|
+
// Others
|
|
84
159
|
'moonshot/kimi-k2.5': { input: 0.6, output: 3.0 },
|
|
85
|
-
'
|
|
86
|
-
'
|
|
87
|
-
'
|
|
88
|
-
'google/gemini-2.5-flash-lite': { input: 0.08, output: 0.3 },
|
|
89
|
-
'google/gemini-3.1-pro': { input: 1.25, output: 10.0 },
|
|
160
|
+
'nvidia/kimi-k2.5': { input: 0.55, output: 2.5 },
|
|
161
|
+
'zai/glm-5': { input: 1.0, output: 3.2 },
|
|
162
|
+
'zai/glm-5-turbo': { input: 1.2, output: 4.0 },
|
|
90
163
|
};
|
|
91
164
|
function estimateCost(model, inputTokens, outputTokens) {
|
|
92
165
|
const pricing = MODEL_PRICING[model] || { input: 2.0, output: 10.0 };
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SSE Event Translator: OpenAI → Anthropic Messages API format
|
|
3
|
+
*
|
|
4
|
+
* Handles three critical gaps in the streaming pipeline:
|
|
5
|
+
* 1. Tool calls: choice.delta.tool_calls → content_block_start/content_block_delta (tool_use)
|
|
6
|
+
* 2. Reasoning: reasoning_content → content_block_start/content_block_delta (thinking)
|
|
7
|
+
* 3. Ensures proper content_block_stop and message_stop events
|
|
8
|
+
*/
|
|
9
|
+
export declare class SSETranslator {
|
|
10
|
+
private state;
|
|
11
|
+
private buffer;
|
|
12
|
+
constructor(model?: string);
|
|
13
|
+
/**
|
|
14
|
+
* Detect whether an SSE chunk is in OpenAI format.
|
|
15
|
+
* Returns true if it contains OpenAI-style `choices[].delta` structure.
|
|
16
|
+
*/
|
|
17
|
+
static isOpenAIFormat(chunk: string): boolean;
|
|
18
|
+
/**
|
|
19
|
+
* Process a raw SSE text chunk and return translated Anthropic-format SSE events.
|
|
20
|
+
* Returns null if no translation needed (already Anthropic format or not parseable).
|
|
21
|
+
*/
|
|
22
|
+
processChunk(rawChunk: string): string | null;
|
|
23
|
+
private parseSSEEvents;
|
|
24
|
+
private formatSSE;
|
|
25
|
+
private closeThinkingBlock;
|
|
26
|
+
private closeTextBlock;
|
|
27
|
+
private closeToolCalls;
|
|
28
|
+
private closeActiveBlocks;
|
|
29
|
+
}
|
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SSE Event Translator: OpenAI → Anthropic Messages API format
|
|
3
|
+
*
|
|
4
|
+
* Handles three critical gaps in the streaming pipeline:
|
|
5
|
+
* 1. Tool calls: choice.delta.tool_calls → content_block_start/content_block_delta (tool_use)
|
|
6
|
+
* 2. Reasoning: reasoning_content → content_block_start/content_block_delta (thinking)
|
|
7
|
+
* 3. Ensures proper content_block_stop and message_stop events
|
|
8
|
+
*/
|
|
9
|
+
// ─── SSE Translator ─────────────────────────────────────────────────────────
|
|
10
|
+
export class SSETranslator {
|
|
11
|
+
state;
|
|
12
|
+
buffer = '';
|
|
13
|
+
constructor(model = 'unknown') {
|
|
14
|
+
this.state = {
|
|
15
|
+
messageId: `msg_brcc_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`,
|
|
16
|
+
model,
|
|
17
|
+
blockIndex: 0,
|
|
18
|
+
activeToolCalls: new Map(),
|
|
19
|
+
thinkingBlockActive: false,
|
|
20
|
+
textBlockActive: false,
|
|
21
|
+
messageStarted: false,
|
|
22
|
+
inputTokens: 0,
|
|
23
|
+
outputTokens: 0,
|
|
24
|
+
};
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Detect whether an SSE chunk is in OpenAI format.
|
|
28
|
+
* Returns true if it contains OpenAI-style `choices[].delta` structure.
|
|
29
|
+
*/
|
|
30
|
+
static isOpenAIFormat(chunk) {
|
|
31
|
+
// Look for OpenAI-specific patterns in the SSE data
|
|
32
|
+
return (chunk.includes('"choices"') &&
|
|
33
|
+
chunk.includes('"delta"') &&
|
|
34
|
+
!chunk.includes('"content_block_'));
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Process a raw SSE text chunk and return translated Anthropic-format SSE events.
|
|
38
|
+
* Returns null if no translation needed (already Anthropic format or not parseable).
|
|
39
|
+
*/
|
|
40
|
+
processChunk(rawChunk) {
|
|
41
|
+
this.buffer += rawChunk;
|
|
42
|
+
const events = this.parseSSEEvents();
|
|
43
|
+
if (events.length === 0)
|
|
44
|
+
return null;
|
|
45
|
+
const translated = [];
|
|
46
|
+
for (const event of events) {
|
|
47
|
+
if (event.data === '[DONE]') {
|
|
48
|
+
// Close any active blocks, then emit message_stop
|
|
49
|
+
translated.push(...this.closeActiveBlocks());
|
|
50
|
+
translated.push(this.formatSSE('message_delta', {
|
|
51
|
+
type: 'message_delta',
|
|
52
|
+
delta: { stop_reason: 'end_turn', stop_sequence: null },
|
|
53
|
+
usage: { output_tokens: this.state.outputTokens },
|
|
54
|
+
}));
|
|
55
|
+
translated.push(this.formatSSE('message_stop', { type: 'message_stop' }));
|
|
56
|
+
continue;
|
|
57
|
+
}
|
|
58
|
+
let parsed;
|
|
59
|
+
try {
|
|
60
|
+
parsed = JSON.parse(event.data);
|
|
61
|
+
}
|
|
62
|
+
catch {
|
|
63
|
+
continue;
|
|
64
|
+
}
|
|
65
|
+
// Skip if this doesn't look like OpenAI format
|
|
66
|
+
if (!parsed.choices || parsed.choices.length === 0) {
|
|
67
|
+
// Could be a usage-only event
|
|
68
|
+
if (parsed.usage) {
|
|
69
|
+
this.state.inputTokens = parsed.usage.prompt_tokens || 0;
|
|
70
|
+
this.state.outputTokens = parsed.usage.completion_tokens || 0;
|
|
71
|
+
}
|
|
72
|
+
continue;
|
|
73
|
+
}
|
|
74
|
+
// Emit message_start on first chunk
|
|
75
|
+
if (!this.state.messageStarted) {
|
|
76
|
+
this.state.messageStarted = true;
|
|
77
|
+
if (parsed.model)
|
|
78
|
+
this.state.model = parsed.model;
|
|
79
|
+
translated.push(this.formatSSE('message_start', {
|
|
80
|
+
type: 'message_start',
|
|
81
|
+
message: {
|
|
82
|
+
id: this.state.messageId,
|
|
83
|
+
type: 'message',
|
|
84
|
+
role: 'assistant',
|
|
85
|
+
model: this.state.model,
|
|
86
|
+
content: [],
|
|
87
|
+
stop_reason: null,
|
|
88
|
+
stop_sequence: null,
|
|
89
|
+
usage: { input_tokens: this.state.inputTokens, output_tokens: 0 },
|
|
90
|
+
},
|
|
91
|
+
}));
|
|
92
|
+
translated.push(this.formatSSE('ping', { type: 'ping' }));
|
|
93
|
+
}
|
|
94
|
+
const choice = parsed.choices[0];
|
|
95
|
+
const delta = choice.delta;
|
|
96
|
+
// ── Reasoning content → thinking block ──
|
|
97
|
+
if (delta.reasoning_content) {
|
|
98
|
+
if (!this.state.thinkingBlockActive) {
|
|
99
|
+
// Close text block if active
|
|
100
|
+
if (this.state.textBlockActive) {
|
|
101
|
+
translated.push(...this.closeTextBlock());
|
|
102
|
+
}
|
|
103
|
+
this.state.thinkingBlockActive = true;
|
|
104
|
+
translated.push(this.formatSSE('content_block_start', {
|
|
105
|
+
type: 'content_block_start',
|
|
106
|
+
index: this.state.blockIndex,
|
|
107
|
+
content_block: { type: 'thinking', thinking: '' },
|
|
108
|
+
}));
|
|
109
|
+
}
|
|
110
|
+
translated.push(this.formatSSE('content_block_delta', {
|
|
111
|
+
type: 'content_block_delta',
|
|
112
|
+
index: this.state.blockIndex,
|
|
113
|
+
delta: { type: 'thinking_delta', thinking: delta.reasoning_content },
|
|
114
|
+
}));
|
|
115
|
+
this.state.outputTokens++;
|
|
116
|
+
}
|
|
117
|
+
// ── Text content → text block ──
|
|
118
|
+
if (delta.content) {
|
|
119
|
+
// Close thinking block if transitioning
|
|
120
|
+
if (this.state.thinkingBlockActive) {
|
|
121
|
+
translated.push(...this.closeThinkingBlock());
|
|
122
|
+
}
|
|
123
|
+
if (!this.state.textBlockActive) {
|
|
124
|
+
// Close any active tool calls first
|
|
125
|
+
translated.push(...this.closeToolCalls());
|
|
126
|
+
this.state.textBlockActive = true;
|
|
127
|
+
translated.push(this.formatSSE('content_block_start', {
|
|
128
|
+
type: 'content_block_start',
|
|
129
|
+
index: this.state.blockIndex,
|
|
130
|
+
content_block: { type: 'text', text: '' },
|
|
131
|
+
}));
|
|
132
|
+
}
|
|
133
|
+
translated.push(this.formatSSE('content_block_delta', {
|
|
134
|
+
type: 'content_block_delta',
|
|
135
|
+
index: this.state.blockIndex,
|
|
136
|
+
delta: { type: 'text_delta', text: delta.content },
|
|
137
|
+
}));
|
|
138
|
+
this.state.outputTokens++;
|
|
139
|
+
}
|
|
140
|
+
// ── Tool calls → tool_use blocks ──
|
|
141
|
+
if (delta.tool_calls && delta.tool_calls.length > 0) {
|
|
142
|
+
// Close thinking/text blocks first
|
|
143
|
+
if (this.state.thinkingBlockActive) {
|
|
144
|
+
translated.push(...this.closeThinkingBlock());
|
|
145
|
+
}
|
|
146
|
+
if (this.state.textBlockActive) {
|
|
147
|
+
translated.push(...this.closeTextBlock());
|
|
148
|
+
}
|
|
149
|
+
for (const tc of delta.tool_calls) {
|
|
150
|
+
const tcIndex = tc.index;
|
|
151
|
+
if (tc.id && tc.function?.name) {
|
|
152
|
+
// New tool call start
|
|
153
|
+
// Close previous tool call at same index if exists
|
|
154
|
+
if (this.state.activeToolCalls.has(tcIndex)) {
|
|
155
|
+
translated.push(this.formatSSE('content_block_stop', {
|
|
156
|
+
type: 'content_block_stop',
|
|
157
|
+
index: this.state.blockIndex,
|
|
158
|
+
}));
|
|
159
|
+
this.state.blockIndex++;
|
|
160
|
+
}
|
|
161
|
+
const toolId = tc.id;
|
|
162
|
+
const toolName = tc.function.name;
|
|
163
|
+
this.state.activeToolCalls.set(tcIndex, { id: toolId, name: toolName });
|
|
164
|
+
translated.push(this.formatSSE('content_block_start', {
|
|
165
|
+
type: 'content_block_start',
|
|
166
|
+
index: this.state.blockIndex,
|
|
167
|
+
content_block: {
|
|
168
|
+
type: 'tool_use',
|
|
169
|
+
id: toolId,
|
|
170
|
+
name: toolName,
|
|
171
|
+
input: {},
|
|
172
|
+
},
|
|
173
|
+
}));
|
|
174
|
+
// If there are initial arguments, send them
|
|
175
|
+
if (tc.function.arguments) {
|
|
176
|
+
translated.push(this.formatSSE('content_block_delta', {
|
|
177
|
+
type: 'content_block_delta',
|
|
178
|
+
index: this.state.blockIndex,
|
|
179
|
+
delta: {
|
|
180
|
+
type: 'input_json_delta',
|
|
181
|
+
partial_json: tc.function.arguments,
|
|
182
|
+
},
|
|
183
|
+
}));
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
else if (tc.function?.arguments) {
|
|
187
|
+
// Continuation of existing tool call - stream arguments
|
|
188
|
+
translated.push(this.formatSSE('content_block_delta', {
|
|
189
|
+
type: 'content_block_delta',
|
|
190
|
+
index: this.state.blockIndex,
|
|
191
|
+
delta: {
|
|
192
|
+
type: 'input_json_delta',
|
|
193
|
+
partial_json: tc.function.arguments,
|
|
194
|
+
},
|
|
195
|
+
}));
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
this.state.outputTokens++;
|
|
199
|
+
}
|
|
200
|
+
// ── Handle finish_reason ──
|
|
201
|
+
if (choice.finish_reason) {
|
|
202
|
+
translated.push(...this.closeActiveBlocks());
|
|
203
|
+
const stopReason = choice.finish_reason === 'tool_calls'
|
|
204
|
+
? 'tool_use'
|
|
205
|
+
: choice.finish_reason === 'stop'
|
|
206
|
+
? 'end_turn'
|
|
207
|
+
: choice.finish_reason;
|
|
208
|
+
translated.push(this.formatSSE('message_delta', {
|
|
209
|
+
type: 'message_delta',
|
|
210
|
+
delta: { stop_reason: stopReason, stop_sequence: null },
|
|
211
|
+
usage: { output_tokens: this.state.outputTokens },
|
|
212
|
+
}));
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
return translated.length > 0 ? translated.join('') : null;
|
|
216
|
+
}
|
|
217
|
+
// ── Helpers ─────────────────────────────────────────────────────────────
|
|
218
|
+
parseSSEEvents() {
|
|
219
|
+
const events = [];
|
|
220
|
+
const lines = this.buffer.split('\n');
|
|
221
|
+
let currentEvent;
|
|
222
|
+
let dataLines = [];
|
|
223
|
+
let consumed = 0;
|
|
224
|
+
for (let i = 0; i < lines.length; i++) {
|
|
225
|
+
const line = lines[i];
|
|
226
|
+
if (line.startsWith('event: ')) {
|
|
227
|
+
currentEvent = line.slice(7).trim();
|
|
228
|
+
}
|
|
229
|
+
else if (line.startsWith('data: ')) {
|
|
230
|
+
dataLines.push(line.slice(6));
|
|
231
|
+
}
|
|
232
|
+
else if (line === '' && dataLines.length > 0) {
|
|
233
|
+
// End of event
|
|
234
|
+
events.push({ event: currentEvent, data: dataLines.join('\n') });
|
|
235
|
+
currentEvent = undefined;
|
|
236
|
+
dataLines = [];
|
|
237
|
+
consumed = lines.slice(0, i + 1).join('\n').length + 1;
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
// Keep unconsumed text in buffer
|
|
241
|
+
if (consumed > 0) {
|
|
242
|
+
this.buffer = this.buffer.slice(consumed);
|
|
243
|
+
}
|
|
244
|
+
return events;
|
|
245
|
+
}
|
|
246
|
+
formatSSE(event, data) {
|
|
247
|
+
return `event: ${event}\ndata: ${JSON.stringify(data)}\n\n`;
|
|
248
|
+
}
|
|
249
|
+
closeThinkingBlock() {
|
|
250
|
+
if (!this.state.thinkingBlockActive)
|
|
251
|
+
return [];
|
|
252
|
+
this.state.thinkingBlockActive = false;
|
|
253
|
+
const events = [
|
|
254
|
+
this.formatSSE('content_block_stop', {
|
|
255
|
+
type: 'content_block_stop',
|
|
256
|
+
index: this.state.blockIndex,
|
|
257
|
+
}),
|
|
258
|
+
];
|
|
259
|
+
this.state.blockIndex++;
|
|
260
|
+
return events;
|
|
261
|
+
}
|
|
262
|
+
closeTextBlock() {
|
|
263
|
+
if (!this.state.textBlockActive)
|
|
264
|
+
return [];
|
|
265
|
+
this.state.textBlockActive = false;
|
|
266
|
+
const events = [
|
|
267
|
+
this.formatSSE('content_block_stop', {
|
|
268
|
+
type: 'content_block_stop',
|
|
269
|
+
index: this.state.blockIndex,
|
|
270
|
+
}),
|
|
271
|
+
];
|
|
272
|
+
this.state.blockIndex++;
|
|
273
|
+
return events;
|
|
274
|
+
}
|
|
275
|
+
closeToolCalls() {
|
|
276
|
+
if (this.state.activeToolCalls.size === 0)
|
|
277
|
+
return [];
|
|
278
|
+
const events = [];
|
|
279
|
+
for (const [_index] of this.state.activeToolCalls) {
|
|
280
|
+
events.push(this.formatSSE('content_block_stop', {
|
|
281
|
+
type: 'content_block_stop',
|
|
282
|
+
index: this.state.blockIndex,
|
|
283
|
+
}));
|
|
284
|
+
this.state.blockIndex++;
|
|
285
|
+
}
|
|
286
|
+
this.state.activeToolCalls.clear();
|
|
287
|
+
return events;
|
|
288
|
+
}
|
|
289
|
+
closeActiveBlocks() {
|
|
290
|
+
const events = [];
|
|
291
|
+
events.push(...this.closeThinkingBlock());
|
|
292
|
+
events.push(...this.closeTextBlock());
|
|
293
|
+
events.push(...this.closeToolCalls());
|
|
294
|
+
return events;
|
|
295
|
+
}
|
|
296
|
+
}
|
package/dist/router/index.js
CHANGED
|
@@ -6,11 +6,11 @@
|
|
|
6
6
|
const AUTO_TIERS = {
|
|
7
7
|
SIMPLE: {
|
|
8
8
|
primary: 'google/gemini-2.5-flash',
|
|
9
|
-
fallback: ['deepseek/deepseek-chat', 'nvidia/
|
|
9
|
+
fallback: ['deepseek/deepseek-chat', 'nvidia/nemotron-ultra-253b'],
|
|
10
10
|
},
|
|
11
11
|
MEDIUM: {
|
|
12
12
|
primary: 'moonshot/kimi-k2.5',
|
|
13
|
-
fallback: ['google/gemini-2.5-flash', '
|
|
13
|
+
fallback: ['google/gemini-2.5-flash', 'minimax/minimax-m2.7'],
|
|
14
14
|
},
|
|
15
15
|
COMPLEX: {
|
|
16
16
|
primary: 'google/gemini-3.1-pro',
|
|
@@ -23,20 +23,20 @@ const AUTO_TIERS = {
|
|
|
23
23
|
};
|
|
24
24
|
const ECO_TIERS = {
|
|
25
25
|
SIMPLE: {
|
|
26
|
-
primary: 'nvidia/
|
|
27
|
-
fallback: ['
|
|
26
|
+
primary: 'nvidia/nemotron-ultra-253b',
|
|
27
|
+
fallback: ['nvidia/gpt-oss-120b', 'nvidia/deepseek-v3.2'],
|
|
28
28
|
},
|
|
29
29
|
MEDIUM: {
|
|
30
30
|
primary: 'google/gemini-2.5-flash-lite',
|
|
31
|
-
fallback: ['nvidia/
|
|
31
|
+
fallback: ['nvidia/nemotron-ultra-253b', 'nvidia/qwen3-coder-480b'],
|
|
32
32
|
},
|
|
33
33
|
COMPLEX: {
|
|
34
34
|
primary: 'google/gemini-2.5-flash-lite',
|
|
35
|
-
fallback: ['deepseek/deepseek-chat'],
|
|
35
|
+
fallback: ['deepseek/deepseek-chat', 'nvidia/mistral-large-3-675b'],
|
|
36
36
|
},
|
|
37
37
|
REASONING: {
|
|
38
38
|
primary: 'xai/grok-4-1-fast-reasoning',
|
|
39
|
-
fallback: ['deepseek/deepseek-reasoner'],
|
|
39
|
+
fallback: ['deepseek/deepseek-reasoner', 'nvidia/nemotron-ultra-253b'],
|
|
40
40
|
},
|
|
41
41
|
};
|
|
42
42
|
const PREMIUM_TIERS = {
|
|
@@ -189,7 +189,7 @@ export function routeRequest(prompt, profile = 'auto') {
|
|
|
189
189
|
// Free profile - always use free model
|
|
190
190
|
if (profile === 'free') {
|
|
191
191
|
return {
|
|
192
|
-
model: 'nvidia/
|
|
192
|
+
model: 'nvidia/nemotron-ultra-253b',
|
|
193
193
|
tier: 'SIMPLE',
|
|
194
194
|
confidence: 1.0,
|
|
195
195
|
signals: ['free-profile'],
|
|
@@ -217,22 +217,48 @@ export function routeRequest(prompt, profile = 'auto') {
|
|
|
217
217
|
// Baseline: Claude Opus at $5/$25 per 1M tokens
|
|
218
218
|
const OPUS_COST_PER_1K = 0.015; // rough average
|
|
219
219
|
const modelCosts = {
|
|
220
|
+
// FREE
|
|
220
221
|
'nvidia/gpt-oss-120b': 0,
|
|
221
|
-
'
|
|
222
|
+
'nvidia/gpt-oss-20b': 0,
|
|
223
|
+
'nvidia/nemotron-ultra-253b': 0,
|
|
224
|
+
'nvidia/nemotron-3-super-120b': 0,
|
|
225
|
+
'nvidia/nemotron-super-49b': 0,
|
|
226
|
+
'nvidia/deepseek-v3.2': 0,
|
|
227
|
+
'nvidia/mistral-large-3-675b': 0,
|
|
228
|
+
'nvidia/qwen3-coder-480b': 0,
|
|
229
|
+
'nvidia/devstral-2-123b': 0,
|
|
230
|
+
'nvidia/glm-4.7': 0,
|
|
231
|
+
'nvidia/llama-4-maverick': 0,
|
|
232
|
+
// Budget
|
|
233
|
+
'openai/gpt-5-nano': 0.0002,
|
|
234
|
+
'openai/gpt-4.1-nano': 0.0003,
|
|
222
235
|
'google/gemini-2.5-flash-lite': 0.0003,
|
|
236
|
+
'xai/grok-4-fast': 0.0004,
|
|
237
|
+
'xai/grok-4-1-fast': 0.0004,
|
|
238
|
+
'xai/grok-4-1-fast-reasoning': 0.0004,
|
|
223
239
|
'deepseek/deepseek-chat': 0.0004,
|
|
224
|
-
'deepseek/deepseek-reasoner': 0.
|
|
225
|
-
'
|
|
226
|
-
'
|
|
227
|
-
'google/gemini-
|
|
240
|
+
'deepseek/deepseek-reasoner': 0.0004,
|
|
241
|
+
'minimax/minimax-m2.7': 0.0008,
|
|
242
|
+
'minimax/minimax-m2.5': 0.0008,
|
|
243
|
+
'google/gemini-2.5-flash': 0.0014,
|
|
244
|
+
'openai/gpt-5-mini': 0.0011,
|
|
245
|
+
'moonshot/kimi-k2.5': 0.0018,
|
|
246
|
+
// Mid-range
|
|
228
247
|
'anthropic/claude-haiku-4.5': 0.003,
|
|
248
|
+
'zai/glm-5': 0.0021,
|
|
249
|
+
'openai/o4-mini': 0.0028,
|
|
250
|
+
'google/gemini-2.5-pro': 0.0056,
|
|
251
|
+
'openai/gpt-5.3-codex': 0.0079,
|
|
252
|
+
'openai/gpt-5.2': 0.0079,
|
|
253
|
+
'openai/gpt-5.3': 0.0079,
|
|
254
|
+
'openai/gpt-4.1': 0.005,
|
|
255
|
+
'openai/o3': 0.005,
|
|
256
|
+
'google/gemini-3.1-pro': 0.007,
|
|
257
|
+
'openai/gpt-5.4': 0.0088,
|
|
258
|
+
// Premium
|
|
229
259
|
'anthropic/claude-sonnet-4.6': 0.009,
|
|
260
|
+
'xai/grok-3': 0.009,
|
|
230
261
|
'anthropic/claude-opus-4.6': 0.015,
|
|
231
|
-
'openai/gpt-5.3-codex': 0.008,
|
|
232
|
-
'openai/gpt-5.4': 0.009,
|
|
233
|
-
'openai/o3': 0.012,
|
|
234
|
-
'openai/o4-mini': 0.006,
|
|
235
|
-
'xai/grok-4-1-fast-reasoning': 0.0004,
|
|
236
262
|
};
|
|
237
263
|
const modelCost = modelCosts[model] ?? 0.005;
|
|
238
264
|
const savings = Math.max(0, (OPUS_COST_PER_1K - modelCost) / OPUS_COST_PER_1K);
|
|
@@ -257,7 +283,7 @@ export function getFallbackChain(tier, profile = 'auto') {
|
|
|
257
283
|
tierConfigs = PREMIUM_TIERS;
|
|
258
284
|
break;
|
|
259
285
|
case 'free':
|
|
260
|
-
return ['nvidia/
|
|
286
|
+
return ['nvidia/nemotron-ultra-253b'];
|
|
261
287
|
default:
|
|
262
288
|
tierConfigs = AUTO_TIERS;
|
|
263
289
|
}
|