llmflow 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,225 @@
1
+ const BaseProvider = require('./base');
2
+
3
+ /**
4
+ * Cohere v2 Chat API provider.
5
+ *
6
+ * Key differences from OpenAI:
7
+ * - Endpoint: POST /v2/chat
8
+ * - Uses Bearer token authentication
9
+ * - Response has nested usage structure (tokens.input_tokens, tokens.output_tokens)
10
+ * - Assistant content is array of {type: "text", text: "..."} objects
11
+ * - Different finish reasons: COMPLETE, STOP_SEQUENCE, MAX_TOKENS, TOOL_CALL
12
+ * - Streaming uses granular event types (message-start, content-delta, message-end)
13
+ */
14
+ class CohereProvider extends BaseProvider {
15
+ constructor(config = {}) {
16
+ super();
17
+ this.name = 'cohere';
18
+ this.displayName = 'Cohere';
19
+ this.hostname = config.hostname || 'api.cohere.com';
20
+ }
21
+
22
+ getTarget(req) {
23
+ let path = req.path;
24
+
25
+ // Map OpenAI-style paths to Cohere paths
26
+ if (path === '/v1/chat/completions' || path === '/chat/completions') {
27
+ path = '/v2/chat';
28
+ }
29
+
30
+ return {
31
+ hostname: this.hostname,
32
+ port: 443,
33
+ path: path,
34
+ protocol: 'https'
35
+ };
36
+ }
37
+
38
+ transformRequestHeaders(headers, req) {
39
+ return {
40
+ 'Content-Type': 'application/json',
41
+ 'Authorization': headers.authorization,
42
+ 'X-Client-Name': 'llmflow-proxy'
43
+ };
44
+ }
45
+
46
+ transformRequestBody(body, req) {
47
+ if (!body) return body;
48
+
49
+ // Cohere v2 is very similar to OpenAI format
50
+ // Main differences: max_tokens -> max_tokens, stop -> stop_sequences
51
+ const transformed = {
52
+ model: body.model,
53
+ messages: body.messages,
54
+ stream: body.stream || false
55
+ };
56
+
57
+ // Optional parameters
58
+ if (body.max_tokens) transformed.max_tokens = body.max_tokens;
59
+ if (body.temperature !== undefined) transformed.temperature = body.temperature;
60
+ if (body.top_p !== undefined) transformed.p = body.top_p; // Cohere uses 'p' not 'top_p'
61
+ if (body.frequency_penalty !== undefined) transformed.frequency_penalty = body.frequency_penalty;
62
+ if (body.presence_penalty !== undefined) transformed.presence_penalty = body.presence_penalty;
63
+ if (body.stop) {
64
+ transformed.stop_sequences = Array.isArray(body.stop) ? body.stop : [body.stop];
65
+ }
66
+
67
+ return transformed;
68
+ }
69
+
70
+ normalizeResponse(body, req) {
71
+ if (!body || body.error) {
72
+ return { data: body, usage: null, model: req.body?.model };
73
+ }
74
+
75
+ // Extract text content from message.content array
76
+ let textContent = '';
77
+ if (body.message?.content) {
78
+ if (Array.isArray(body.message.content)) {
79
+ textContent = body.message.content
80
+ .filter(c => c.type === 'text')
81
+ .map(c => c.text)
82
+ .join('');
83
+ } else if (typeof body.message.content === 'string') {
84
+ textContent = body.message.content;
85
+ }
86
+ }
87
+
88
+ // Map Cohere finish reasons to OpenAI format
89
+ const finishReasonMap = {
90
+ 'COMPLETE': 'stop',
91
+ 'STOP_SEQUENCE': 'stop',
92
+ 'MAX_TOKENS': 'length',
93
+ 'TOOL_CALL': 'tool_calls',
94
+ 'ERROR': 'content_filter',
95
+ 'TIMEOUT': 'content_filter'
96
+ };
97
+
98
+ // Extract usage - Cohere has nested structure
99
+ const tokens = body.usage?.tokens || {};
100
+ const billedUnits = body.usage?.billed_units || {};
101
+ const normalizedUsage = {
102
+ prompt_tokens: tokens.input_tokens || billedUnits.input_tokens || 0,
103
+ completion_tokens: tokens.output_tokens || billedUnits.output_tokens || 0,
104
+ total_tokens: (tokens.input_tokens || 0) + (tokens.output_tokens || 0)
105
+ };
106
+
107
+ // Build OpenAI-compatible response
108
+ const normalized = {
109
+ id: body.id || `cohere-${Date.now()}`,
110
+ object: 'chat.completion',
111
+ model: req.body?.model || 'command',
112
+ choices: [{
113
+ index: 0,
114
+ message: {
115
+ role: 'assistant',
116
+ content: textContent
117
+ },
118
+ finish_reason: finishReasonMap[body.finish_reason] || 'stop'
119
+ }],
120
+ usage: normalizedUsage
121
+ };
122
+
123
+ return {
124
+ data: normalized,
125
+ usage: normalizedUsage,
126
+ model: req.body?.model || 'command'
127
+ };
128
+ }
129
+
130
+ parseStreamChunk(chunk) {
131
+ const lines = chunk.split('\n');
132
+ let content = '';
133
+ let usage = null;
134
+ let done = false;
135
+
136
+ for (const line of lines) {
137
+ const trimmed = line.trim();
138
+ if (!trimmed.startsWith('data:')) continue;
139
+
140
+ const payload = trimmed.slice(5).trim();
141
+ if (payload === '[DONE]') {
142
+ done = true;
143
+ continue;
144
+ }
145
+
146
+ try {
147
+ const json = JSON.parse(payload);
148
+
149
+ // Handle different Cohere streaming event types
150
+ switch (json.type) {
151
+ case 'content-delta':
152
+ // Content is in delta.message.content array
153
+ if (json.delta?.message?.content) {
154
+ for (const c of json.delta.message.content) {
155
+ if (c.type === 'text' && c.text) {
156
+ content += c.text;
157
+ }
158
+ }
159
+ }
160
+ break;
161
+
162
+ case 'message-end':
163
+ done = true;
164
+ // Extract usage from message-end event
165
+ if (json.delta?.usage) {
166
+ const tokens = json.delta.usage.tokens || {};
167
+ const billedUnits = json.delta.usage.billed_units || {};
168
+ usage = {
169
+ prompt_tokens: tokens.input_tokens || billedUnits.input_tokens || 0,
170
+ completion_tokens: tokens.output_tokens || billedUnits.output_tokens || 0,
171
+ total_tokens: (tokens.input_tokens || 0) + (tokens.output_tokens || 0)
172
+ };
173
+ }
174
+ break;
175
+
176
+ case 'message-start':
177
+ case 'content-start':
178
+ case 'content-end':
179
+ // These are structural events, no content to extract
180
+ break;
181
+ }
182
+ } catch {
183
+ // Ignore parse errors for partial chunks
184
+ }
185
+ }
186
+
187
+ return { content, usage, done };
188
+ }
189
+
190
+ extractUsage(response) {
191
+ // Handle both normalized and raw Cohere response
192
+ if (response.usage) {
193
+ // Already normalized
194
+ if (response.usage.prompt_tokens !== undefined) {
195
+ return response.usage;
196
+ }
197
+ // Raw Cohere format
198
+ const tokens = response.usage.tokens || {};
199
+ const billedUnits = response.usage.billed_units || {};
200
+ return {
201
+ prompt_tokens: tokens.input_tokens || billedUnits.input_tokens || 0,
202
+ completion_tokens: tokens.output_tokens || billedUnits.output_tokens || 0,
203
+ total_tokens: (tokens.input_tokens || 0) + (tokens.output_tokens || 0)
204
+ };
205
+ }
206
+
207
+ return { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 };
208
+ }
209
+
210
+ assembleStreamingResponse(fullContent, usage, req, traceId) {
211
+ return {
212
+ id: traceId,
213
+ object: 'chat.completion',
214
+ model: req.body?.model || 'command',
215
+ choices: [{
216
+ message: { role: 'assistant', content: fullContent },
217
+ finish_reason: 'stop'
218
+ }],
219
+ usage: usage,
220
+ _streaming: true
221
+ };
222
+ }
223
+ }
224
+
225
+ module.exports = CohereProvider;
@@ -0,0 +1,278 @@
1
+ const BaseProvider = require('./base');
2
+
3
+ /**
4
+ * Google Gemini provider.
5
+ * Handles the unique Gemini API format with request/response transformation.
6
+ *
7
+ * Key differences from OpenAI:
8
+ * - API key in query string OR Authorization header
9
+ * - Different endpoint structure: /v1beta/models/{model}:generateContent
10
+ * - Different request format (contents, systemInstruction, generationConfig)
11
+ * - Different response format (candidates, usageMetadata)
12
+ */
13
+ class GeminiProvider extends BaseProvider {
14
+ constructor(config = {}) {
15
+ super();
16
+ this.name = 'gemini';
17
+ this.displayName = 'Google Gemini';
18
+ this.hostname = config.hostname || 'generativelanguage.googleapis.com';
19
+ this.apiVersion = config.apiVersion || 'v1beta';
20
+ }
21
+
22
+ getTarget(req) {
23
+ // Extract model from request body for endpoint construction
24
+ const model = req.body?.model || 'gemini-2.0-flash';
25
+ const isStreaming = req.body?.stream === true;
26
+
27
+ // Gemini uses different endpoints for streaming
28
+ const action = isStreaming ? 'streamGenerateContent' : 'generateContent';
29
+
30
+ // Build the path with model
31
+ let path = `/${this.apiVersion}/models/${model}:${action}`;
32
+
33
+ // Add API key as query param if provided in headers
34
+ const apiKey = this.extractApiKey(req.headers);
35
+ if (apiKey) {
36
+ path += `?key=${apiKey}`;
37
+ }
38
+
39
+ return {
40
+ hostname: this.hostname,
41
+ port: 443,
42
+ path: path,
43
+ protocol: 'https'
44
+ };
45
+ }
46
+
47
+ extractApiKey(headers) {
48
+ if (!headers) return null;
49
+
50
+ // Check for API key in various header formats
51
+ let apiKey = headers['x-goog-api-key'];
52
+
53
+ if (!apiKey && headers.authorization) {
54
+ const auth = headers.authorization;
55
+ if (auth.startsWith('Bearer ')) {
56
+ apiKey = auth.slice(7);
57
+ }
58
+ }
59
+
60
+ return apiKey;
61
+ }
62
+
63
+ transformRequestHeaders(headers, req) {
64
+ // Gemini prefers API key in URL, but we can also use header
65
+ const result = {
66
+ 'Content-Type': 'application/json'
67
+ };
68
+
69
+ // If using OAuth, include Authorization header
70
+ if (headers.authorization && !this.extractApiKey(headers)) {
71
+ result['Authorization'] = headers.authorization;
72
+ }
73
+
74
+ return result;
75
+ }
76
+
77
+ transformRequestBody(body, req) {
78
+ if (!body) return body;
79
+
80
+ // If already in Gemini format, pass through
81
+ if (body.contents) {
82
+ return body;
83
+ }
84
+
85
+ // Transform from OpenAI format to Gemini format
86
+ const transformed = {};
87
+
88
+ // Transform messages to contents
89
+ if (body.messages) {
90
+ const systemMessages = body.messages.filter(m => m.role === 'system');
91
+ const otherMessages = body.messages.filter(m => m.role !== 'system');
92
+
93
+ // System instruction
94
+ if (systemMessages.length > 0) {
95
+ transformed.systemInstruction = {
96
+ parts: [{ text: systemMessages.map(m => m.content).join('\n') }]
97
+ };
98
+ }
99
+
100
+ // Contents (user/assistant messages)
101
+ transformed.contents = otherMessages.map(msg => ({
102
+ role: msg.role === 'assistant' ? 'model' : 'user',
103
+ parts: [{ text: typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content) }]
104
+ }));
105
+ }
106
+
107
+ // Generation config
108
+ const generationConfig = {};
109
+ if (body.max_tokens) generationConfig.maxOutputTokens = body.max_tokens;
110
+ if (body.temperature !== undefined) generationConfig.temperature = body.temperature;
111
+ if (body.top_p !== undefined) generationConfig.topP = body.top_p;
112
+ if (body.stop) {
113
+ generationConfig.stopSequences = Array.isArray(body.stop) ? body.stop : [body.stop];
114
+ }
115
+
116
+ if (Object.keys(generationConfig).length > 0) {
117
+ transformed.generationConfig = generationConfig;
118
+ }
119
+
120
+ return transformed;
121
+ }
122
+
123
+ normalizeResponse(body, req) {
124
+ if (!body || body.error) {
125
+ return { data: body, usage: null, model: req.body?.model };
126
+ }
127
+
128
+ // Extract text content from candidates
129
+ let textContent = '';
130
+ let finishReason = 'stop';
131
+
132
+ if (Array.isArray(body.candidates) && body.candidates.length > 0) {
133
+ const candidate = body.candidates[0];
134
+ if (candidate.content?.parts) {
135
+ textContent = candidate.content.parts
136
+ .filter(p => p.text)
137
+ .map(p => p.text)
138
+ .join('');
139
+ }
140
+
141
+ // Map finish reason
142
+ const reasonMap = {
143
+ 'STOP': 'stop',
144
+ 'MAX_TOKENS': 'length',
145
+ 'SAFETY': 'content_filter',
146
+ 'RECITATION': 'content_filter'
147
+ };
148
+ finishReason = reasonMap[candidate.finishReason] || candidate.finishReason?.toLowerCase() || 'stop';
149
+ }
150
+
151
+ // Extract usage
152
+ const usage = body.usageMetadata || {};
153
+ const normalizedUsage = {
154
+ prompt_tokens: usage.promptTokenCount || 0,
155
+ completion_tokens: usage.candidatesTokenCount || 0,
156
+ total_tokens: usage.totalTokenCount || 0
157
+ };
158
+
159
+ // Build OpenAI-compatible response
160
+ const normalized = {
161
+ id: `gemini-${Date.now()}`,
162
+ object: 'chat.completion',
163
+ model: req.body?.model || 'gemini',
164
+ choices: [{
165
+ index: 0,
166
+ message: {
167
+ role: 'assistant',
168
+ content: textContent
169
+ },
170
+ finish_reason: finishReason
171
+ }],
172
+ usage: normalizedUsage
173
+ };
174
+
175
+ return {
176
+ data: normalized,
177
+ usage: normalizedUsage,
178
+ model: req.body?.model || 'gemini'
179
+ };
180
+ }
181
+
182
+ parseStreamChunk(chunk) {
183
+ const lines = chunk.split('\n');
184
+ let content = '';
185
+ let usage = null;
186
+ let done = false;
187
+
188
+ for (const line of lines) {
189
+ const trimmed = line.trim();
190
+ if (!trimmed) continue;
191
+
192
+ // Gemini streaming returns JSON array items or objects
193
+ try {
194
+ let json;
195
+
196
+ // Handle data: prefix if present
197
+ if (trimmed.startsWith('data:')) {
198
+ const payload = trimmed.slice(5).trim();
199
+ if (payload === '[DONE]') {
200
+ done = true;
201
+ continue;
202
+ }
203
+ json = JSON.parse(payload);
204
+ } else if (trimmed.startsWith('[') || trimmed.startsWith('{')) {
205
+ // Direct JSON response (Gemini sometimes returns array)
206
+ json = JSON.parse(trimmed);
207
+ if (Array.isArray(json)) {
208
+ json = json[0];
209
+ }
210
+ } else {
211
+ continue;
212
+ }
213
+
214
+ // Extract content from candidates
215
+ if (json.candidates?.[0]?.content?.parts) {
216
+ for (const part of json.candidates[0].content.parts) {
217
+ if (part.text) content += part.text;
218
+ }
219
+ }
220
+
221
+ // Check for usage metadata
222
+ if (json.usageMetadata) {
223
+ usage = {
224
+ prompt_tokens: json.usageMetadata.promptTokenCount || 0,
225
+ completion_tokens: json.usageMetadata.candidatesTokenCount || 0,
226
+ total_tokens: json.usageMetadata.totalTokenCount || 0
227
+ };
228
+ }
229
+
230
+ // Check finish reason
231
+ if (json.candidates?.[0]?.finishReason) {
232
+ done = true;
233
+ }
234
+ } catch {
235
+ // Ignore parse errors for partial chunks
236
+ }
237
+ }
238
+
239
+ return { content, usage, done };
240
+ }
241
+
242
+ extractUsage(response) {
243
+ // Handle both normalized and raw Gemini response
244
+ if (response.usage) {
245
+ return {
246
+ prompt_tokens: response.usage.prompt_tokens || response.usage.promptTokenCount || 0,
247
+ completion_tokens: response.usage.completion_tokens || response.usage.candidatesTokenCount || 0,
248
+ total_tokens: response.usage.total_tokens || response.usage.totalTokenCount || 0
249
+ };
250
+ }
251
+
252
+ if (response.usageMetadata) {
253
+ return {
254
+ prompt_tokens: response.usageMetadata.promptTokenCount || 0,
255
+ completion_tokens: response.usageMetadata.candidatesTokenCount || 0,
256
+ total_tokens: response.usageMetadata.totalTokenCount || 0
257
+ };
258
+ }
259
+
260
+ return { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 };
261
+ }
262
+
263
+ assembleStreamingResponse(fullContent, usage, req, traceId) {
264
+ return {
265
+ id: traceId,
266
+ object: 'chat.completion',
267
+ model: req.body?.model || 'gemini',
268
+ choices: [{
269
+ message: { role: 'assistant', content: fullContent },
270
+ finish_reason: 'stop'
271
+ }],
272
+ usage: usage,
273
+ _streaming: true
274
+ };
275
+ }
276
+ }
277
+
278
+ module.exports = GeminiProvider;
@@ -0,0 +1,130 @@
1
+ const BaseProvider = require('./base');
2
+ const OpenAIProvider = require('./openai');
3
+ const OllamaProvider = require('./ollama');
4
+ const AnthropicProvider = require('./anthropic');
5
+ const GeminiProvider = require('./gemini');
6
+ const CohereProvider = require('./cohere');
7
+ const AzureOpenAIProvider = require('./azure');
8
+ const {
9
+ OpenAICompatibleProvider,
10
+ GroqProvider,
11
+ MistralProvider,
12
+ TogetherProvider,
13
+ PerplexityProvider,
14
+ OpenRouterProvider
15
+ } = require('./openai-compatible');
16
+
17
+ /**
18
+ * Provider Registry
19
+ * Maps path prefixes to provider instances
20
+ */
21
+ class ProviderRegistry {
22
+ constructor() {
23
+ this.providers = new Map();
24
+ this.defaultProvider = null;
25
+
26
+ // Register default providers
27
+ this.registerDefaults();
28
+ }
29
+
30
+ registerDefaults() {
31
+ // Default OpenAI provider (no prefix)
32
+ this.defaultProvider = new OpenAIProvider();
33
+
34
+ // Path-based providers
35
+ this.register('ollama', new OllamaProvider());
36
+ this.register('anthropic', new AnthropicProvider());
37
+ this.register('gemini', new GeminiProvider());
38
+ this.register('cohere', new CohereProvider());
39
+ this.register('azure', new AzureOpenAIProvider());
40
+ this.register('groq', GroqProvider);
41
+ this.register('mistral', MistralProvider);
42
+ this.register('together', TogetherProvider);
43
+ this.register('perplexity', PerplexityProvider);
44
+ this.register('openrouter', OpenRouterProvider);
45
+ }
46
+
47
+ /**
48
+ * Register a provider with a path prefix
49
+ * @param {string} prefix - URL path prefix (e.g., 'anthropic' for /anthropic/v1/...)
50
+ * @param {BaseProvider} provider - Provider instance
51
+ */
52
+ register(prefix, provider) {
53
+ this.providers.set(prefix.toLowerCase(), provider);
54
+ }
55
+
56
+ /**
57
+ * Get a provider based on request path or header
58
+ * @param {Object} req - Express request object
59
+ * @returns {{ provider: BaseProvider, cleanPath: string }}
60
+ */
61
+ resolve(req) {
62
+ // Check for X-LLMFlow-Provider header override
63
+ const headerProvider = req.headers['x-llmflow-provider'];
64
+ if (headerProvider && this.providers.has(headerProvider.toLowerCase())) {
65
+ return {
66
+ provider: this.providers.get(headerProvider.toLowerCase()),
67
+ cleanPath: req.path
68
+ };
69
+ }
70
+
71
+ // Check path prefix: /ollama/v1/... -> ollama provider
72
+ const pathMatch = req.path.match(/^\/([^\/]+)(\/.*)?$/);
73
+ if (pathMatch) {
74
+ const prefix = pathMatch[1].toLowerCase();
75
+ if (this.providers.has(prefix)) {
76
+ const cleanPath = pathMatch[2] || '/';
77
+ return {
78
+ provider: this.providers.get(prefix),
79
+ cleanPath: cleanPath
80
+ };
81
+ }
82
+ }
83
+
84
+ // Default to OpenAI
85
+ return {
86
+ provider: this.defaultProvider,
87
+ cleanPath: req.path
88
+ };
89
+ }
90
+
91
+ /**
92
+ * List all registered providers
93
+ * @returns {Array} List of { name, displayName, prefix }
94
+ */
95
+ list() {
96
+ const result = [{
97
+ name: this.defaultProvider.name,
98
+ displayName: this.defaultProvider.displayName,
99
+ prefix: '/v1/*',
100
+ default: true
101
+ }];
102
+
103
+ for (const [prefix, provider] of this.providers) {
104
+ result.push({
105
+ name: provider.name,
106
+ displayName: provider.displayName,
107
+ prefix: `/${prefix}/v1/*`,
108
+ default: false
109
+ });
110
+ }
111
+
112
+ return result;
113
+ }
114
+ }
115
+
116
+ // Singleton instance
117
+ const registry = new ProviderRegistry();
118
+
119
+ module.exports = {
120
+ registry,
121
+ ProviderRegistry,
122
+ BaseProvider,
123
+ OpenAIProvider,
124
+ OllamaProvider,
125
+ AnthropicProvider,
126
+ GeminiProvider,
127
+ CohereProvider,
128
+ AzureOpenAIProvider,
129
+ OpenAICompatibleProvider
130
+ };
@@ -0,0 +1,36 @@
1
+ const BaseProvider = require('./base');
2
+
3
+ /**
4
+ * Ollama provider - local LLM server with OpenAI-compatible API.
5
+ * Uses HTTP instead of HTTPS.
6
+ */
7
+ class OllamaProvider extends BaseProvider {
8
+ constructor(config = {}) {
9
+ super();
10
+ this.name = 'ollama';
11
+ this.displayName = 'Ollama';
12
+ this.hostname = config.hostname || process.env.OLLAMA_HOST || 'localhost';
13
+ this.port = config.port || parseInt(process.env.OLLAMA_PORT) || 11434;
14
+ }
15
+
16
+ getTarget(req) {
17
+ return {
18
+ hostname: this.hostname,
19
+ port: this.port,
20
+ path: req.path,
21
+ protocol: 'http'
22
+ };
23
+ }
24
+
25
+ transformRequestHeaders(headers, req) {
26
+ return {
27
+ 'Content-Type': 'application/json'
28
+ };
29
+ }
30
+
31
+ getHttpModule() {
32
+ return require('http');
33
+ }
34
+ }
35
+
36
+ module.exports = OllamaProvider;