llmflow 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +142 -0
- package/bin/llmflow.js +91 -0
- package/db.js +857 -0
- package/logger.js +122 -0
- package/otlp-export.js +564 -0
- package/otlp-logs.js +238 -0
- package/otlp-metrics.js +300 -0
- package/otlp.js +398 -0
- package/package.json +62 -0
- package/pricing.fallback.json +58 -0
- package/pricing.js +154 -0
- package/providers/anthropic.js +195 -0
- package/providers/azure.js +159 -0
- package/providers/base.js +145 -0
- package/providers/cohere.js +225 -0
- package/providers/gemini.js +278 -0
- package/providers/index.js +130 -0
- package/providers/ollama.js +36 -0
- package/providers/openai-compatible.js +77 -0
- package/providers/openai.js +217 -0
- package/providers/passthrough.js +573 -0
- package/public/app.js +1484 -0
- package/public/index.html +367 -0
- package/public/style.css +1152 -0
- package/server.js +1222 -0
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
const BaseProvider = require('./base');
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Cohere v2 Chat API provider.
|
|
5
|
+
*
|
|
6
|
+
* Key differences from OpenAI:
|
|
7
|
+
* - Endpoint: POST /v2/chat
|
|
8
|
+
* - Uses Bearer token authentication
|
|
9
|
+
* - Response has nested usage structure (tokens.input_tokens, tokens.output_tokens)
|
|
10
|
+
* - Assistant content is array of {type: "text", text: "..."} objects
|
|
11
|
+
* - Different finish reasons: COMPLETE, STOP_SEQUENCE, MAX_TOKENS, TOOL_CALL
|
|
12
|
+
* - Streaming uses granular event types (message-start, content-delta, message-end)
|
|
13
|
+
*/
|
|
14
|
+
class CohereProvider extends BaseProvider {
|
|
15
|
+
constructor(config = {}) {
|
|
16
|
+
super();
|
|
17
|
+
this.name = 'cohere';
|
|
18
|
+
this.displayName = 'Cohere';
|
|
19
|
+
this.hostname = config.hostname || 'api.cohere.com';
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
getTarget(req) {
|
|
23
|
+
let path = req.path;
|
|
24
|
+
|
|
25
|
+
// Map OpenAI-style paths to Cohere paths
|
|
26
|
+
if (path === '/v1/chat/completions' || path === '/chat/completions') {
|
|
27
|
+
path = '/v2/chat';
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
return {
|
|
31
|
+
hostname: this.hostname,
|
|
32
|
+
port: 443,
|
|
33
|
+
path: path,
|
|
34
|
+
protocol: 'https'
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
transformRequestHeaders(headers, req) {
|
|
39
|
+
return {
|
|
40
|
+
'Content-Type': 'application/json',
|
|
41
|
+
'Authorization': headers.authorization,
|
|
42
|
+
'X-Client-Name': 'llmflow-proxy'
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
transformRequestBody(body, req) {
|
|
47
|
+
if (!body) return body;
|
|
48
|
+
|
|
49
|
+
// Cohere v2 is very similar to OpenAI format
|
|
50
|
+
// Main differences: max_tokens -> max_tokens, stop -> stop_sequences
|
|
51
|
+
const transformed = {
|
|
52
|
+
model: body.model,
|
|
53
|
+
messages: body.messages,
|
|
54
|
+
stream: body.stream || false
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
// Optional parameters
|
|
58
|
+
if (body.max_tokens) transformed.max_tokens = body.max_tokens;
|
|
59
|
+
if (body.temperature !== undefined) transformed.temperature = body.temperature;
|
|
60
|
+
if (body.top_p !== undefined) transformed.p = body.top_p; // Cohere uses 'p' not 'top_p'
|
|
61
|
+
if (body.frequency_penalty !== undefined) transformed.frequency_penalty = body.frequency_penalty;
|
|
62
|
+
if (body.presence_penalty !== undefined) transformed.presence_penalty = body.presence_penalty;
|
|
63
|
+
if (body.stop) {
|
|
64
|
+
transformed.stop_sequences = Array.isArray(body.stop) ? body.stop : [body.stop];
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
return transformed;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
normalizeResponse(body, req) {
|
|
71
|
+
if (!body || body.error) {
|
|
72
|
+
return { data: body, usage: null, model: req.body?.model };
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// Extract text content from message.content array
|
|
76
|
+
let textContent = '';
|
|
77
|
+
if (body.message?.content) {
|
|
78
|
+
if (Array.isArray(body.message.content)) {
|
|
79
|
+
textContent = body.message.content
|
|
80
|
+
.filter(c => c.type === 'text')
|
|
81
|
+
.map(c => c.text)
|
|
82
|
+
.join('');
|
|
83
|
+
} else if (typeof body.message.content === 'string') {
|
|
84
|
+
textContent = body.message.content;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// Map Cohere finish reasons to OpenAI format
|
|
89
|
+
const finishReasonMap = {
|
|
90
|
+
'COMPLETE': 'stop',
|
|
91
|
+
'STOP_SEQUENCE': 'stop',
|
|
92
|
+
'MAX_TOKENS': 'length',
|
|
93
|
+
'TOOL_CALL': 'tool_calls',
|
|
94
|
+
'ERROR': 'content_filter',
|
|
95
|
+
'TIMEOUT': 'content_filter'
|
|
96
|
+
};
|
|
97
|
+
|
|
98
|
+
// Extract usage - Cohere has nested structure
|
|
99
|
+
const tokens = body.usage?.tokens || {};
|
|
100
|
+
const billedUnits = body.usage?.billed_units || {};
|
|
101
|
+
const normalizedUsage = {
|
|
102
|
+
prompt_tokens: tokens.input_tokens || billedUnits.input_tokens || 0,
|
|
103
|
+
completion_tokens: tokens.output_tokens || billedUnits.output_tokens || 0,
|
|
104
|
+
total_tokens: (tokens.input_tokens || 0) + (tokens.output_tokens || 0)
|
|
105
|
+
};
|
|
106
|
+
|
|
107
|
+
// Build OpenAI-compatible response
|
|
108
|
+
const normalized = {
|
|
109
|
+
id: body.id || `cohere-${Date.now()}`,
|
|
110
|
+
object: 'chat.completion',
|
|
111
|
+
model: req.body?.model || 'command',
|
|
112
|
+
choices: [{
|
|
113
|
+
index: 0,
|
|
114
|
+
message: {
|
|
115
|
+
role: 'assistant',
|
|
116
|
+
content: textContent
|
|
117
|
+
},
|
|
118
|
+
finish_reason: finishReasonMap[body.finish_reason] || 'stop'
|
|
119
|
+
}],
|
|
120
|
+
usage: normalizedUsage
|
|
121
|
+
};
|
|
122
|
+
|
|
123
|
+
return {
|
|
124
|
+
data: normalized,
|
|
125
|
+
usage: normalizedUsage,
|
|
126
|
+
model: req.body?.model || 'command'
|
|
127
|
+
};
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
parseStreamChunk(chunk) {
|
|
131
|
+
const lines = chunk.split('\n');
|
|
132
|
+
let content = '';
|
|
133
|
+
let usage = null;
|
|
134
|
+
let done = false;
|
|
135
|
+
|
|
136
|
+
for (const line of lines) {
|
|
137
|
+
const trimmed = line.trim();
|
|
138
|
+
if (!trimmed.startsWith('data:')) continue;
|
|
139
|
+
|
|
140
|
+
const payload = trimmed.slice(5).trim();
|
|
141
|
+
if (payload === '[DONE]') {
|
|
142
|
+
done = true;
|
|
143
|
+
continue;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
try {
|
|
147
|
+
const json = JSON.parse(payload);
|
|
148
|
+
|
|
149
|
+
// Handle different Cohere streaming event types
|
|
150
|
+
switch (json.type) {
|
|
151
|
+
case 'content-delta':
|
|
152
|
+
// Content is in delta.message.content array
|
|
153
|
+
if (json.delta?.message?.content) {
|
|
154
|
+
for (const c of json.delta.message.content) {
|
|
155
|
+
if (c.type === 'text' && c.text) {
|
|
156
|
+
content += c.text;
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
break;
|
|
161
|
+
|
|
162
|
+
case 'message-end':
|
|
163
|
+
done = true;
|
|
164
|
+
// Extract usage from message-end event
|
|
165
|
+
if (json.delta?.usage) {
|
|
166
|
+
const tokens = json.delta.usage.tokens || {};
|
|
167
|
+
const billedUnits = json.delta.usage.billed_units || {};
|
|
168
|
+
usage = {
|
|
169
|
+
prompt_tokens: tokens.input_tokens || billedUnits.input_tokens || 0,
|
|
170
|
+
completion_tokens: tokens.output_tokens || billedUnits.output_tokens || 0,
|
|
171
|
+
total_tokens: (tokens.input_tokens || 0) + (tokens.output_tokens || 0)
|
|
172
|
+
};
|
|
173
|
+
}
|
|
174
|
+
break;
|
|
175
|
+
|
|
176
|
+
case 'message-start':
|
|
177
|
+
case 'content-start':
|
|
178
|
+
case 'content-end':
|
|
179
|
+
// These are structural events, no content to extract
|
|
180
|
+
break;
|
|
181
|
+
}
|
|
182
|
+
} catch {
|
|
183
|
+
// Ignore parse errors for partial chunks
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
return { content, usage, done };
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
extractUsage(response) {
|
|
191
|
+
// Handle both normalized and raw Cohere response
|
|
192
|
+
if (response.usage) {
|
|
193
|
+
// Already normalized
|
|
194
|
+
if (response.usage.prompt_tokens !== undefined) {
|
|
195
|
+
return response.usage;
|
|
196
|
+
}
|
|
197
|
+
// Raw Cohere format
|
|
198
|
+
const tokens = response.usage.tokens || {};
|
|
199
|
+
const billedUnits = response.usage.billed_units || {};
|
|
200
|
+
return {
|
|
201
|
+
prompt_tokens: tokens.input_tokens || billedUnits.input_tokens || 0,
|
|
202
|
+
completion_tokens: tokens.output_tokens || billedUnits.output_tokens || 0,
|
|
203
|
+
total_tokens: (tokens.input_tokens || 0) + (tokens.output_tokens || 0)
|
|
204
|
+
};
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
return { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 };
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
assembleStreamingResponse(fullContent, usage, req, traceId) {
|
|
211
|
+
return {
|
|
212
|
+
id: traceId,
|
|
213
|
+
object: 'chat.completion',
|
|
214
|
+
model: req.body?.model || 'command',
|
|
215
|
+
choices: [{
|
|
216
|
+
message: { role: 'assistant', content: fullContent },
|
|
217
|
+
finish_reason: 'stop'
|
|
218
|
+
}],
|
|
219
|
+
usage: usage,
|
|
220
|
+
_streaming: true
|
|
221
|
+
};
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
module.exports = CohereProvider;
|
|
@@ -0,0 +1,278 @@
|
|
|
1
|
+
const BaseProvider = require('./base');
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Google Gemini provider.
|
|
5
|
+
* Handles the unique Gemini API format with request/response transformation.
|
|
6
|
+
*
|
|
7
|
+
* Key differences from OpenAI:
|
|
8
|
+
* - API key in query string OR Authorization header
|
|
9
|
+
* - Different endpoint structure: /v1beta/models/{model}:generateContent
|
|
10
|
+
* - Different request format (contents, systemInstruction, generationConfig)
|
|
11
|
+
* - Different response format (candidates, usageMetadata)
|
|
12
|
+
*/
|
|
13
|
+
class GeminiProvider extends BaseProvider {
|
|
14
|
+
constructor(config = {}) {
|
|
15
|
+
super();
|
|
16
|
+
this.name = 'gemini';
|
|
17
|
+
this.displayName = 'Google Gemini';
|
|
18
|
+
this.hostname = config.hostname || 'generativelanguage.googleapis.com';
|
|
19
|
+
this.apiVersion = config.apiVersion || 'v1beta';
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
getTarget(req) {
|
|
23
|
+
// Extract model from request body for endpoint construction
|
|
24
|
+
const model = req.body?.model || 'gemini-2.0-flash';
|
|
25
|
+
const isStreaming = req.body?.stream === true;
|
|
26
|
+
|
|
27
|
+
// Gemini uses different endpoints for streaming
|
|
28
|
+
const action = isStreaming ? 'streamGenerateContent' : 'generateContent';
|
|
29
|
+
|
|
30
|
+
// Build the path with model
|
|
31
|
+
let path = `/${this.apiVersion}/models/${model}:${action}`;
|
|
32
|
+
|
|
33
|
+
// Add API key as query param if provided in headers
|
|
34
|
+
const apiKey = this.extractApiKey(req.headers);
|
|
35
|
+
if (apiKey) {
|
|
36
|
+
path += `?key=${apiKey}`;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
return {
|
|
40
|
+
hostname: this.hostname,
|
|
41
|
+
port: 443,
|
|
42
|
+
path: path,
|
|
43
|
+
protocol: 'https'
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
extractApiKey(headers) {
|
|
48
|
+
if (!headers) return null;
|
|
49
|
+
|
|
50
|
+
// Check for API key in various header formats
|
|
51
|
+
let apiKey = headers['x-goog-api-key'];
|
|
52
|
+
|
|
53
|
+
if (!apiKey && headers.authorization) {
|
|
54
|
+
const auth = headers.authorization;
|
|
55
|
+
if (auth.startsWith('Bearer ')) {
|
|
56
|
+
apiKey = auth.slice(7);
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
return apiKey;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
transformRequestHeaders(headers, req) {
|
|
64
|
+
// Gemini prefers API key in URL, but we can also use header
|
|
65
|
+
const result = {
|
|
66
|
+
'Content-Type': 'application/json'
|
|
67
|
+
};
|
|
68
|
+
|
|
69
|
+
// If using OAuth, include Authorization header
|
|
70
|
+
if (headers.authorization && !this.extractApiKey(headers)) {
|
|
71
|
+
result['Authorization'] = headers.authorization;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
return result;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
transformRequestBody(body, req) {
|
|
78
|
+
if (!body) return body;
|
|
79
|
+
|
|
80
|
+
// If already in Gemini format, pass through
|
|
81
|
+
if (body.contents) {
|
|
82
|
+
return body;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// Transform from OpenAI format to Gemini format
|
|
86
|
+
const transformed = {};
|
|
87
|
+
|
|
88
|
+
// Transform messages to contents
|
|
89
|
+
if (body.messages) {
|
|
90
|
+
const systemMessages = body.messages.filter(m => m.role === 'system');
|
|
91
|
+
const otherMessages = body.messages.filter(m => m.role !== 'system');
|
|
92
|
+
|
|
93
|
+
// System instruction
|
|
94
|
+
if (systemMessages.length > 0) {
|
|
95
|
+
transformed.systemInstruction = {
|
|
96
|
+
parts: [{ text: systemMessages.map(m => m.content).join('\n') }]
|
|
97
|
+
};
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// Contents (user/assistant messages)
|
|
101
|
+
transformed.contents = otherMessages.map(msg => ({
|
|
102
|
+
role: msg.role === 'assistant' ? 'model' : 'user',
|
|
103
|
+
parts: [{ text: typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content) }]
|
|
104
|
+
}));
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// Generation config
|
|
108
|
+
const generationConfig = {};
|
|
109
|
+
if (body.max_tokens) generationConfig.maxOutputTokens = body.max_tokens;
|
|
110
|
+
if (body.temperature !== undefined) generationConfig.temperature = body.temperature;
|
|
111
|
+
if (body.top_p !== undefined) generationConfig.topP = body.top_p;
|
|
112
|
+
if (body.stop) {
|
|
113
|
+
generationConfig.stopSequences = Array.isArray(body.stop) ? body.stop : [body.stop];
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
if (Object.keys(generationConfig).length > 0) {
|
|
117
|
+
transformed.generationConfig = generationConfig;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
return transformed;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
normalizeResponse(body, req) {
|
|
124
|
+
if (!body || body.error) {
|
|
125
|
+
return { data: body, usage: null, model: req.body?.model };
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// Extract text content from candidates
|
|
129
|
+
let textContent = '';
|
|
130
|
+
let finishReason = 'stop';
|
|
131
|
+
|
|
132
|
+
if (Array.isArray(body.candidates) && body.candidates.length > 0) {
|
|
133
|
+
const candidate = body.candidates[0];
|
|
134
|
+
if (candidate.content?.parts) {
|
|
135
|
+
textContent = candidate.content.parts
|
|
136
|
+
.filter(p => p.text)
|
|
137
|
+
.map(p => p.text)
|
|
138
|
+
.join('');
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// Map finish reason
|
|
142
|
+
const reasonMap = {
|
|
143
|
+
'STOP': 'stop',
|
|
144
|
+
'MAX_TOKENS': 'length',
|
|
145
|
+
'SAFETY': 'content_filter',
|
|
146
|
+
'RECITATION': 'content_filter'
|
|
147
|
+
};
|
|
148
|
+
finishReason = reasonMap[candidate.finishReason] || candidate.finishReason?.toLowerCase() || 'stop';
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// Extract usage
|
|
152
|
+
const usage = body.usageMetadata || {};
|
|
153
|
+
const normalizedUsage = {
|
|
154
|
+
prompt_tokens: usage.promptTokenCount || 0,
|
|
155
|
+
completion_tokens: usage.candidatesTokenCount || 0,
|
|
156
|
+
total_tokens: usage.totalTokenCount || 0
|
|
157
|
+
};
|
|
158
|
+
|
|
159
|
+
// Build OpenAI-compatible response
|
|
160
|
+
const normalized = {
|
|
161
|
+
id: `gemini-${Date.now()}`,
|
|
162
|
+
object: 'chat.completion',
|
|
163
|
+
model: req.body?.model || 'gemini',
|
|
164
|
+
choices: [{
|
|
165
|
+
index: 0,
|
|
166
|
+
message: {
|
|
167
|
+
role: 'assistant',
|
|
168
|
+
content: textContent
|
|
169
|
+
},
|
|
170
|
+
finish_reason: finishReason
|
|
171
|
+
}],
|
|
172
|
+
usage: normalizedUsage
|
|
173
|
+
};
|
|
174
|
+
|
|
175
|
+
return {
|
|
176
|
+
data: normalized,
|
|
177
|
+
usage: normalizedUsage,
|
|
178
|
+
model: req.body?.model || 'gemini'
|
|
179
|
+
};
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
parseStreamChunk(chunk) {
|
|
183
|
+
const lines = chunk.split('\n');
|
|
184
|
+
let content = '';
|
|
185
|
+
let usage = null;
|
|
186
|
+
let done = false;
|
|
187
|
+
|
|
188
|
+
for (const line of lines) {
|
|
189
|
+
const trimmed = line.trim();
|
|
190
|
+
if (!trimmed) continue;
|
|
191
|
+
|
|
192
|
+
// Gemini streaming returns JSON array items or objects
|
|
193
|
+
try {
|
|
194
|
+
let json;
|
|
195
|
+
|
|
196
|
+
// Handle data: prefix if present
|
|
197
|
+
if (trimmed.startsWith('data:')) {
|
|
198
|
+
const payload = trimmed.slice(5).trim();
|
|
199
|
+
if (payload === '[DONE]') {
|
|
200
|
+
done = true;
|
|
201
|
+
continue;
|
|
202
|
+
}
|
|
203
|
+
json = JSON.parse(payload);
|
|
204
|
+
} else if (trimmed.startsWith('[') || trimmed.startsWith('{')) {
|
|
205
|
+
// Direct JSON response (Gemini sometimes returns array)
|
|
206
|
+
json = JSON.parse(trimmed);
|
|
207
|
+
if (Array.isArray(json)) {
|
|
208
|
+
json = json[0];
|
|
209
|
+
}
|
|
210
|
+
} else {
|
|
211
|
+
continue;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
// Extract content from candidates
|
|
215
|
+
if (json.candidates?.[0]?.content?.parts) {
|
|
216
|
+
for (const part of json.candidates[0].content.parts) {
|
|
217
|
+
if (part.text) content += part.text;
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
// Check for usage metadata
|
|
222
|
+
if (json.usageMetadata) {
|
|
223
|
+
usage = {
|
|
224
|
+
prompt_tokens: json.usageMetadata.promptTokenCount || 0,
|
|
225
|
+
completion_tokens: json.usageMetadata.candidatesTokenCount || 0,
|
|
226
|
+
total_tokens: json.usageMetadata.totalTokenCount || 0
|
|
227
|
+
};
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
// Check finish reason
|
|
231
|
+
if (json.candidates?.[0]?.finishReason) {
|
|
232
|
+
done = true;
|
|
233
|
+
}
|
|
234
|
+
} catch {
|
|
235
|
+
// Ignore parse errors for partial chunks
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
return { content, usage, done };
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
extractUsage(response) {
|
|
243
|
+
// Handle both normalized and raw Gemini response
|
|
244
|
+
if (response.usage) {
|
|
245
|
+
return {
|
|
246
|
+
prompt_tokens: response.usage.prompt_tokens || response.usage.promptTokenCount || 0,
|
|
247
|
+
completion_tokens: response.usage.completion_tokens || response.usage.candidatesTokenCount || 0,
|
|
248
|
+
total_tokens: response.usage.total_tokens || response.usage.totalTokenCount || 0
|
|
249
|
+
};
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
if (response.usageMetadata) {
|
|
253
|
+
return {
|
|
254
|
+
prompt_tokens: response.usageMetadata.promptTokenCount || 0,
|
|
255
|
+
completion_tokens: response.usageMetadata.candidatesTokenCount || 0,
|
|
256
|
+
total_tokens: response.usageMetadata.totalTokenCount || 0
|
|
257
|
+
};
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
return { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 };
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
assembleStreamingResponse(fullContent, usage, req, traceId) {
|
|
264
|
+
return {
|
|
265
|
+
id: traceId,
|
|
266
|
+
object: 'chat.completion',
|
|
267
|
+
model: req.body?.model || 'gemini',
|
|
268
|
+
choices: [{
|
|
269
|
+
message: { role: 'assistant', content: fullContent },
|
|
270
|
+
finish_reason: 'stop'
|
|
271
|
+
}],
|
|
272
|
+
usage: usage,
|
|
273
|
+
_streaming: true
|
|
274
|
+
};
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
module.exports = GeminiProvider;
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
const BaseProvider = require('./base');
|
|
2
|
+
const OpenAIProvider = require('./openai');
|
|
3
|
+
const OllamaProvider = require('./ollama');
|
|
4
|
+
const AnthropicProvider = require('./anthropic');
|
|
5
|
+
const GeminiProvider = require('./gemini');
|
|
6
|
+
const CohereProvider = require('./cohere');
|
|
7
|
+
const AzureOpenAIProvider = require('./azure');
|
|
8
|
+
const {
|
|
9
|
+
OpenAICompatibleProvider,
|
|
10
|
+
GroqProvider,
|
|
11
|
+
MistralProvider,
|
|
12
|
+
TogetherProvider,
|
|
13
|
+
PerplexityProvider,
|
|
14
|
+
OpenRouterProvider
|
|
15
|
+
} = require('./openai-compatible');
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Provider Registry
|
|
19
|
+
* Maps path prefixes to provider instances
|
|
20
|
+
*/
|
|
21
|
+
class ProviderRegistry {
|
|
22
|
+
constructor() {
|
|
23
|
+
this.providers = new Map();
|
|
24
|
+
this.defaultProvider = null;
|
|
25
|
+
|
|
26
|
+
// Register default providers
|
|
27
|
+
this.registerDefaults();
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
registerDefaults() {
|
|
31
|
+
// Default OpenAI provider (no prefix)
|
|
32
|
+
this.defaultProvider = new OpenAIProvider();
|
|
33
|
+
|
|
34
|
+
// Path-based providers
|
|
35
|
+
this.register('ollama', new OllamaProvider());
|
|
36
|
+
this.register('anthropic', new AnthropicProvider());
|
|
37
|
+
this.register('gemini', new GeminiProvider());
|
|
38
|
+
this.register('cohere', new CohereProvider());
|
|
39
|
+
this.register('azure', new AzureOpenAIProvider());
|
|
40
|
+
this.register('groq', GroqProvider);
|
|
41
|
+
this.register('mistral', MistralProvider);
|
|
42
|
+
this.register('together', TogetherProvider);
|
|
43
|
+
this.register('perplexity', PerplexityProvider);
|
|
44
|
+
this.register('openrouter', OpenRouterProvider);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Register a provider with a path prefix
|
|
49
|
+
* @param {string} prefix - URL path prefix (e.g., 'anthropic' for /anthropic/v1/...)
|
|
50
|
+
* @param {BaseProvider} provider - Provider instance
|
|
51
|
+
*/
|
|
52
|
+
register(prefix, provider) {
|
|
53
|
+
this.providers.set(prefix.toLowerCase(), provider);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Get a provider based on request path or header
|
|
58
|
+
* @param {Object} req - Express request object
|
|
59
|
+
* @returns {{ provider: BaseProvider, cleanPath: string }}
|
|
60
|
+
*/
|
|
61
|
+
resolve(req) {
|
|
62
|
+
// Check for X-LLMFlow-Provider header override
|
|
63
|
+
const headerProvider = req.headers['x-llmflow-provider'];
|
|
64
|
+
if (headerProvider && this.providers.has(headerProvider.toLowerCase())) {
|
|
65
|
+
return {
|
|
66
|
+
provider: this.providers.get(headerProvider.toLowerCase()),
|
|
67
|
+
cleanPath: req.path
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// Check path prefix: /ollama/v1/... -> ollama provider
|
|
72
|
+
const pathMatch = req.path.match(/^\/([^\/]+)(\/.*)?$/);
|
|
73
|
+
if (pathMatch) {
|
|
74
|
+
const prefix = pathMatch[1].toLowerCase();
|
|
75
|
+
if (this.providers.has(prefix)) {
|
|
76
|
+
const cleanPath = pathMatch[2] || '/';
|
|
77
|
+
return {
|
|
78
|
+
provider: this.providers.get(prefix),
|
|
79
|
+
cleanPath: cleanPath
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// Default to OpenAI
|
|
85
|
+
return {
|
|
86
|
+
provider: this.defaultProvider,
|
|
87
|
+
cleanPath: req.path
|
|
88
|
+
};
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* List all registered providers
|
|
93
|
+
* @returns {Array} List of { name, displayName, prefix }
|
|
94
|
+
*/
|
|
95
|
+
list() {
|
|
96
|
+
const result = [{
|
|
97
|
+
name: this.defaultProvider.name,
|
|
98
|
+
displayName: this.defaultProvider.displayName,
|
|
99
|
+
prefix: '/v1/*',
|
|
100
|
+
default: true
|
|
101
|
+
}];
|
|
102
|
+
|
|
103
|
+
for (const [prefix, provider] of this.providers) {
|
|
104
|
+
result.push({
|
|
105
|
+
name: provider.name,
|
|
106
|
+
displayName: provider.displayName,
|
|
107
|
+
prefix: `/${prefix}/v1/*`,
|
|
108
|
+
default: false
|
|
109
|
+
});
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
return result;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// Singleton instance
|
|
117
|
+
const registry = new ProviderRegistry();
|
|
118
|
+
|
|
119
|
+
module.exports = {
|
|
120
|
+
registry,
|
|
121
|
+
ProviderRegistry,
|
|
122
|
+
BaseProvider,
|
|
123
|
+
OpenAIProvider,
|
|
124
|
+
OllamaProvider,
|
|
125
|
+
AnthropicProvider,
|
|
126
|
+
GeminiProvider,
|
|
127
|
+
CohereProvider,
|
|
128
|
+
AzureOpenAIProvider,
|
|
129
|
+
OpenAICompatibleProvider
|
|
130
|
+
};
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
const BaseProvider = require('./base');
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Ollama provider - local LLM server with OpenAI-compatible API.
|
|
5
|
+
* Uses HTTP instead of HTTPS.
|
|
6
|
+
*/
|
|
7
|
+
class OllamaProvider extends BaseProvider {
|
|
8
|
+
constructor(config = {}) {
|
|
9
|
+
super();
|
|
10
|
+
this.name = 'ollama';
|
|
11
|
+
this.displayName = 'Ollama';
|
|
12
|
+
this.hostname = config.hostname || process.env.OLLAMA_HOST || 'localhost';
|
|
13
|
+
this.port = config.port || parseInt(process.env.OLLAMA_PORT) || 11434;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
getTarget(req) {
|
|
17
|
+
return {
|
|
18
|
+
hostname: this.hostname,
|
|
19
|
+
port: this.port,
|
|
20
|
+
path: req.path,
|
|
21
|
+
protocol: 'http'
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
transformRequestHeaders(headers, req) {
|
|
26
|
+
return {
|
|
27
|
+
'Content-Type': 'application/json'
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
getHttpModule() {
|
|
32
|
+
return require('http');
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
module.exports = OllamaProvider;
|