lynkr 3.2.1 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +770 -25
- package/ROUTER_COMPARISON.md +173 -0
- package/TIER_ROUTING_PLAN.md +771 -0
- package/docs/GSD_LEARNINGS.md +1116 -0
- package/docs/LOCAL_EMBEDDINGS_PLAN.md +1024 -0
- package/docs/index.md +49 -5
- package/final-test.js +33 -0
- package/package.json +2 -2
- package/src/api/openai-router.js +755 -0
- package/src/api/router.js +4 -0
- package/src/clients/bedrock-utils.js +298 -0
- package/src/clients/databricks.js +265 -0
- package/src/clients/databricks.js.backup +1036 -0
- package/src/clients/openai-format.js +393 -0
- package/src/clients/routing.js +12 -0
- package/src/config/index.js +55 -3
- package/src/orchestrator/index.js +8 -1
- package/src/tools/smart-selection.js +1 -1
- package/test/bedrock-integration.test.js +471 -0
- package/test/cursor-integration.test.js +484 -0
- package/test/llamacpp-integration.test.js +13 -34
- package/test/lmstudio-integration.test.js +335 -0
|
@@ -0,0 +1,755 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OpenAI API Compatibility Router
|
|
3
|
+
*
|
|
4
|
+
* Implements OpenAI API endpoints for Cursor IDE compatibility.
|
|
5
|
+
* Routes:
|
|
6
|
+
* - POST /v1/chat/completions - Chat API with streaming support
|
|
7
|
+
* - GET /v1/models - List available models
|
|
8
|
+
* - POST /v1/embeddings - Generate embeddings (via OpenRouter or OpenAI)
|
|
9
|
+
* - GET /v1/health - Health check
|
|
10
|
+
*
|
|
11
|
+
* Note: If MODEL_PROVIDER=openrouter, the same OPENROUTER_API_KEY is used
|
|
12
|
+
* for both chat completions and embeddings - no additional configuration needed.
|
|
13
|
+
*
|
|
14
|
+
* @module api/openai-router
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
const express = require("express");
|
|
18
|
+
const logger = require("../logger");
|
|
19
|
+
const config = require("../config");
|
|
20
|
+
const orchestrator = require("../orchestrator");
|
|
21
|
+
const {
|
|
22
|
+
convertOpenAIToAnthropic,
|
|
23
|
+
convertAnthropicToOpenAI,
|
|
24
|
+
convertAnthropicStreamChunkToOpenAI
|
|
25
|
+
} = require("../clients/openai-format");
|
|
26
|
+
|
|
27
|
+
const router = express.Router();
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* POST /v1/chat/completions
|
|
31
|
+
*
|
|
32
|
+
* OpenAI-compatible chat completions endpoint.
|
|
33
|
+
* Converts OpenAI format → Anthropic → processes → converts back to OpenAI format.
|
|
34
|
+
*/
|
|
35
|
+
router.post("/chat/completions", async (req, res) => {
|
|
36
|
+
const startTime = Date.now();
|
|
37
|
+
const sessionId = req.headers["x-session-id"] || req.headers["authorization"]?.split(" ")[1] || "openai-session";
|
|
38
|
+
|
|
39
|
+
try {
|
|
40
|
+
logger.info({
|
|
41
|
+
endpoint: "/v1/chat/completions",
|
|
42
|
+
model: req.body.model,
|
|
43
|
+
messageCount: req.body.messages?.length,
|
|
44
|
+
stream: req.body.stream || false,
|
|
45
|
+
hasTools: !!req.body.tools,
|
|
46
|
+
toolCount: req.body.tools?.length || 0
|
|
47
|
+
}, "=== OPENAI CHAT COMPLETION REQUEST ===");
|
|
48
|
+
|
|
49
|
+
// Convert OpenAI request to Anthropic format
|
|
50
|
+
const anthropicRequest = convertOpenAIToAnthropic(req.body);
|
|
51
|
+
|
|
52
|
+
// Add session ID for tracking
|
|
53
|
+
anthropicRequest.sessionId = sessionId;
|
|
54
|
+
|
|
55
|
+
// Handle streaming vs non-streaming
|
|
56
|
+
if (req.body.stream) {
|
|
57
|
+
// Set up SSE headers
|
|
58
|
+
res.setHeader("Content-Type", "text/event-stream");
|
|
59
|
+
res.setHeader("Cache-Control", "no-cache");
|
|
60
|
+
res.setHeader("Connection", "keep-alive");
|
|
61
|
+
|
|
62
|
+
// Process request through orchestrator (streaming mode)
|
|
63
|
+
anthropicRequest.stream = true;
|
|
64
|
+
|
|
65
|
+
try {
|
|
66
|
+
// Call orchestrator and get streaming response
|
|
67
|
+
const anthropicResponse = await orchestrator.orchestrateRequest(anthropicRequest, {
|
|
68
|
+
raw: res,
|
|
69
|
+
writeHead: res.writeHead.bind(res),
|
|
70
|
+
write: res.write.bind(res),
|
|
71
|
+
end: res.end.bind(res)
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
// Orchestrator handles streaming directly to response
|
|
75
|
+
// If we reach here, streaming is complete
|
|
76
|
+
logger.info({
|
|
77
|
+
duration: Date.now() - startTime,
|
|
78
|
+
mode: "streaming"
|
|
79
|
+
}, "OpenAI streaming completed");
|
|
80
|
+
|
|
81
|
+
} catch (streamError) {
|
|
82
|
+
logger.error({ error: streamError.message }, "Streaming error");
|
|
83
|
+
|
|
84
|
+
// Send error in OpenAI streaming format
|
|
85
|
+
const errorChunk = {
|
|
86
|
+
id: `chatcmpl-error-${Date.now()}`,
|
|
87
|
+
object: "chat.completion.chunk",
|
|
88
|
+
created: Math.floor(Date.now() / 1000),
|
|
89
|
+
model: req.body.model,
|
|
90
|
+
choices: [
|
|
91
|
+
{
|
|
92
|
+
index: 0,
|
|
93
|
+
delta: {
|
|
94
|
+
role: "assistant",
|
|
95
|
+
content: `Error: ${streamError.message}`
|
|
96
|
+
},
|
|
97
|
+
finish_reason: "stop"
|
|
98
|
+
}
|
|
99
|
+
]
|
|
100
|
+
};
|
|
101
|
+
|
|
102
|
+
res.write(`data: ${JSON.stringify(errorChunk)}\n\n`);
|
|
103
|
+
res.write("data: [DONE]\n\n");
|
|
104
|
+
res.end();
|
|
105
|
+
}
|
|
106
|
+
} else {
|
|
107
|
+
// Non-streaming mode
|
|
108
|
+
const anthropicResponse = await orchestrator.orchestrateRequest(anthropicRequest);
|
|
109
|
+
|
|
110
|
+
// Convert Anthropic response to OpenAI format
|
|
111
|
+
const openaiResponse = convertAnthropicToOpenAI(anthropicResponse, req.body.model);
|
|
112
|
+
|
|
113
|
+
logger.info({
|
|
114
|
+
duration: Date.now() - startTime,
|
|
115
|
+
mode: "non-streaming",
|
|
116
|
+
inputTokens: openaiResponse.usage.prompt_tokens,
|
|
117
|
+
outputTokens: openaiResponse.usage.completion_tokens,
|
|
118
|
+
finishReason: openaiResponse.choices[0].finish_reason
|
|
119
|
+
}, "=== OPENAI CHAT COMPLETION RESPONSE ===");
|
|
120
|
+
|
|
121
|
+
res.json(openaiResponse);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
} catch (error) {
|
|
125
|
+
logger.error({
|
|
126
|
+
error: error.message,
|
|
127
|
+
stack: error.stack,
|
|
128
|
+
duration: Date.now() - startTime
|
|
129
|
+
}, "OpenAI chat completion error");
|
|
130
|
+
|
|
131
|
+
// Return OpenAI-format error
|
|
132
|
+
res.status(500).json({
|
|
133
|
+
error: {
|
|
134
|
+
message: error.message || "Internal server error",
|
|
135
|
+
type: "server_error",
|
|
136
|
+
code: "internal_error"
|
|
137
|
+
}
|
|
138
|
+
});
|
|
139
|
+
}
|
|
140
|
+
});
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* GET /v1/models
|
|
144
|
+
*
|
|
145
|
+
* List available models based on configured provider.
|
|
146
|
+
* Returns OpenAI-compatible model list.
|
|
147
|
+
*/
|
|
148
|
+
router.get("/models", (req, res) => {
|
|
149
|
+
try {
|
|
150
|
+
const provider = config.modelProvider?.type || "databricks";
|
|
151
|
+
const models = [];
|
|
152
|
+
|
|
153
|
+
// Add models based on configured provider
|
|
154
|
+
switch (provider) {
|
|
155
|
+
case "databricks":
|
|
156
|
+
models.push(
|
|
157
|
+
{
|
|
158
|
+
id: "claude-sonnet-4.5",
|
|
159
|
+
object: "model",
|
|
160
|
+
created: 1704067200,
|
|
161
|
+
owned_by: "databricks",
|
|
162
|
+
permission: [],
|
|
163
|
+
root: "claude-sonnet-4.5",
|
|
164
|
+
parent: null
|
|
165
|
+
},
|
|
166
|
+
{
|
|
167
|
+
id: "claude-opus-4.5",
|
|
168
|
+
object: "model",
|
|
169
|
+
created: 1704067200,
|
|
170
|
+
owned_by: "databricks",
|
|
171
|
+
permission: [],
|
|
172
|
+
root: "claude-opus-4.5",
|
|
173
|
+
parent: null
|
|
174
|
+
}
|
|
175
|
+
);
|
|
176
|
+
break;
|
|
177
|
+
|
|
178
|
+
case "bedrock":
|
|
179
|
+
const bedrockModelId = config.bedrock?.modelId || "anthropic.claude-3-5-sonnet-20241022-v2:0";
|
|
180
|
+
models.push({
|
|
181
|
+
id: bedrockModelId,
|
|
182
|
+
object: "model",
|
|
183
|
+
created: 1704067200,
|
|
184
|
+
owned_by: "aws-bedrock",
|
|
185
|
+
permission: [],
|
|
186
|
+
root: bedrockModelId,
|
|
187
|
+
parent: null
|
|
188
|
+
});
|
|
189
|
+
break;
|
|
190
|
+
|
|
191
|
+
case "azure-anthropic":
|
|
192
|
+
models.push({
|
|
193
|
+
id: "claude-3-5-sonnet",
|
|
194
|
+
object: "model",
|
|
195
|
+
created: 1704067200,
|
|
196
|
+
owned_by: "azure-anthropic",
|
|
197
|
+
permission: [],
|
|
198
|
+
root: "claude-3-5-sonnet",
|
|
199
|
+
parent: null
|
|
200
|
+
});
|
|
201
|
+
break;
|
|
202
|
+
|
|
203
|
+
case "openrouter":
|
|
204
|
+
const openrouterModel = config.openrouter?.model || "openai/gpt-4o-mini";
|
|
205
|
+
models.push({
|
|
206
|
+
id: openrouterModel,
|
|
207
|
+
object: "model",
|
|
208
|
+
created: 1704067200,
|
|
209
|
+
owned_by: "openrouter",
|
|
210
|
+
permission: [],
|
|
211
|
+
root: openrouterModel,
|
|
212
|
+
parent: null
|
|
213
|
+
});
|
|
214
|
+
break;
|
|
215
|
+
|
|
216
|
+
case "openai":
|
|
217
|
+
models.push(
|
|
218
|
+
{
|
|
219
|
+
id: "gpt-4o",
|
|
220
|
+
object: "model",
|
|
221
|
+
created: 1704067200,
|
|
222
|
+
owned_by: "openai",
|
|
223
|
+
permission: [],
|
|
224
|
+
root: "gpt-4o",
|
|
225
|
+
parent: null
|
|
226
|
+
},
|
|
227
|
+
{
|
|
228
|
+
id: "gpt-4o-mini",
|
|
229
|
+
object: "model",
|
|
230
|
+
created: 1704067200,
|
|
231
|
+
owned_by: "openai",
|
|
232
|
+
permission: [],
|
|
233
|
+
root: "gpt-4o-mini",
|
|
234
|
+
parent: null
|
|
235
|
+
}
|
|
236
|
+
);
|
|
237
|
+
break;
|
|
238
|
+
|
|
239
|
+
case "azure-openai":
|
|
240
|
+
const azureDeployment = config.azureOpenAI?.deployment || "gpt-4o";
|
|
241
|
+
models.push({
|
|
242
|
+
id: azureDeployment,
|
|
243
|
+
object: "model",
|
|
244
|
+
created: 1704067200,
|
|
245
|
+
owned_by: "azure-openai",
|
|
246
|
+
permission: [],
|
|
247
|
+
root: azureDeployment,
|
|
248
|
+
parent: null
|
|
249
|
+
});
|
|
250
|
+
break;
|
|
251
|
+
|
|
252
|
+
case "ollama":
|
|
253
|
+
const ollamaModel = config.ollama?.model || "qwen2.5-coder:7b";
|
|
254
|
+
models.push({
|
|
255
|
+
id: ollamaModel,
|
|
256
|
+
object: "model",
|
|
257
|
+
created: 1704067200,
|
|
258
|
+
owned_by: "ollama",
|
|
259
|
+
permission: [],
|
|
260
|
+
root: ollamaModel,
|
|
261
|
+
parent: null
|
|
262
|
+
});
|
|
263
|
+
break;
|
|
264
|
+
|
|
265
|
+
case "llamacpp":
|
|
266
|
+
const llamacppModel = config.llamacpp?.model || "default";
|
|
267
|
+
models.push({
|
|
268
|
+
id: llamacppModel,
|
|
269
|
+
object: "model",
|
|
270
|
+
created: 1704067200,
|
|
271
|
+
owned_by: "llamacpp",
|
|
272
|
+
permission: [],
|
|
273
|
+
root: llamacppModel,
|
|
274
|
+
parent: null
|
|
275
|
+
});
|
|
276
|
+
break;
|
|
277
|
+
|
|
278
|
+
default:
|
|
279
|
+
// Generic model
|
|
280
|
+
models.push({
|
|
281
|
+
id: "claude-3-5-sonnet",
|
|
282
|
+
object: "model",
|
|
283
|
+
created: 1704067200,
|
|
284
|
+
owned_by: "lynkr",
|
|
285
|
+
permission: [],
|
|
286
|
+
root: "claude-3-5-sonnet",
|
|
287
|
+
parent: null
|
|
288
|
+
});
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
logger.debug({
|
|
292
|
+
provider,
|
|
293
|
+
modelCount: models.length,
|
|
294
|
+
models: models.map(m => m.id)
|
|
295
|
+
}, "Listed models for OpenAI API");
|
|
296
|
+
|
|
297
|
+
res.json({
|
|
298
|
+
object: "list",
|
|
299
|
+
data: models
|
|
300
|
+
});
|
|
301
|
+
|
|
302
|
+
} catch (error) {
|
|
303
|
+
logger.error({ error: error.message }, "Error listing models");
|
|
304
|
+
res.status(500).json({
|
|
305
|
+
error: {
|
|
306
|
+
message: error.message || "Failed to list models",
|
|
307
|
+
type: "server_error",
|
|
308
|
+
code: "internal_error"
|
|
309
|
+
}
|
|
310
|
+
});
|
|
311
|
+
}
|
|
312
|
+
});
|
|
313
|
+
|
|
314
|
+
/**
|
|
315
|
+
* Determine which provider to use for embeddings
|
|
316
|
+
* Priority:
|
|
317
|
+
* 1. Explicit EMBEDDINGS_PROVIDER env var
|
|
318
|
+
* 2. Same provider as MODEL_PROVIDER (if it supports embeddings)
|
|
319
|
+
* 3. First available: OpenRouter > OpenAI > Ollama > llama.cpp
|
|
320
|
+
*/
|
|
321
|
+
function determineEmbeddingProvider(requestedModel = null) {
|
|
322
|
+
const explicitProvider = process.env.EMBEDDINGS_PROVIDER?.trim();
|
|
323
|
+
|
|
324
|
+
// Priority 1: Explicit configuration
|
|
325
|
+
if (explicitProvider) {
|
|
326
|
+
switch (explicitProvider) {
|
|
327
|
+
case "ollama":
|
|
328
|
+
if (!config.ollama?.embeddingsModel) {
|
|
329
|
+
logger.warn("EMBEDDINGS_PROVIDER=ollama but OLLAMA_EMBEDDINGS_MODEL not set");
|
|
330
|
+
return null;
|
|
331
|
+
}
|
|
332
|
+
return {
|
|
333
|
+
provider: "ollama",
|
|
334
|
+
model: requestedModel || config.ollama.embeddingsModel,
|
|
335
|
+
endpoint: config.ollama.embeddingsEndpoint
|
|
336
|
+
};
|
|
337
|
+
|
|
338
|
+
case "llamacpp":
|
|
339
|
+
if (!config.llamacpp?.embeddingsEndpoint) {
|
|
340
|
+
logger.warn("EMBEDDINGS_PROVIDER=llamacpp but LLAMACPP_EMBEDDINGS_ENDPOINT not set");
|
|
341
|
+
return null;
|
|
342
|
+
}
|
|
343
|
+
return {
|
|
344
|
+
provider: "llamacpp",
|
|
345
|
+
model: requestedModel || "default",
|
|
346
|
+
endpoint: config.llamacpp.embeddingsEndpoint
|
|
347
|
+
};
|
|
348
|
+
|
|
349
|
+
case "openrouter":
|
|
350
|
+
if (!config.openrouter?.apiKey) {
|
|
351
|
+
logger.warn("EMBEDDINGS_PROVIDER=openrouter but OPENROUTER_API_KEY not set");
|
|
352
|
+
return null;
|
|
353
|
+
}
|
|
354
|
+
return {
|
|
355
|
+
provider: "openrouter",
|
|
356
|
+
model: requestedModel || config.openrouter.embeddingsModel,
|
|
357
|
+
apiKey: config.openrouter.apiKey,
|
|
358
|
+
endpoint: "https://openrouter.ai/api/v1/embeddings"
|
|
359
|
+
};
|
|
360
|
+
|
|
361
|
+
case "openai":
|
|
362
|
+
if (!config.openai?.apiKey) {
|
|
363
|
+
logger.warn("EMBEDDINGS_PROVIDER=openai but OPENAI_API_KEY not set");
|
|
364
|
+
return null;
|
|
365
|
+
}
|
|
366
|
+
return {
|
|
367
|
+
provider: "openai",
|
|
368
|
+
model: requestedModel || "text-embedding-ada-002",
|
|
369
|
+
apiKey: config.openai.apiKey,
|
|
370
|
+
endpoint: "https://api.openai.com/v1/embeddings"
|
|
371
|
+
};
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
// Priority 2: Same as chat provider (if supported)
|
|
376
|
+
const chatProvider = config.modelProvider?.type;
|
|
377
|
+
|
|
378
|
+
if (chatProvider === "openrouter" && config.openrouter?.apiKey) {
|
|
379
|
+
return {
|
|
380
|
+
provider: "openrouter",
|
|
381
|
+
model: requestedModel || config.openrouter.embeddingsModel,
|
|
382
|
+
apiKey: config.openrouter.apiKey,
|
|
383
|
+
endpoint: "https://openrouter.ai/api/v1/embeddings"
|
|
384
|
+
};
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
if (chatProvider === "ollama" && config.ollama?.embeddingsModel) {
|
|
388
|
+
return {
|
|
389
|
+
provider: "ollama",
|
|
390
|
+
model: requestedModel || config.ollama.embeddingsModel,
|
|
391
|
+
endpoint: config.ollama.embeddingsEndpoint
|
|
392
|
+
};
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
if (chatProvider === "llamacpp" && config.llamacpp?.embeddingsEndpoint) {
|
|
396
|
+
return {
|
|
397
|
+
provider: "llamacpp",
|
|
398
|
+
model: requestedModel || "default",
|
|
399
|
+
endpoint: config.llamacpp.embeddingsEndpoint
|
|
400
|
+
};
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
// Priority 3: First available provider
|
|
404
|
+
if (config.openrouter?.apiKey) {
|
|
405
|
+
return {
|
|
406
|
+
provider: "openrouter",
|
|
407
|
+
model: requestedModel || config.openrouter.embeddingsModel,
|
|
408
|
+
apiKey: config.openrouter.apiKey,
|
|
409
|
+
endpoint: "https://openrouter.ai/api/v1/embeddings"
|
|
410
|
+
};
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
if (config.openai?.apiKey) {
|
|
414
|
+
return {
|
|
415
|
+
provider: "openai",
|
|
416
|
+
model: requestedModel || "text-embedding-ada-002",
|
|
417
|
+
apiKey: config.openai.apiKey,
|
|
418
|
+
endpoint: "https://api.openai.com/v1/embeddings"
|
|
419
|
+
};
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
if (config.ollama?.embeddingsModel) {
|
|
423
|
+
return {
|
|
424
|
+
provider: "ollama",
|
|
425
|
+
model: requestedModel || config.ollama.embeddingsModel,
|
|
426
|
+
endpoint: config.ollama.embeddingsEndpoint
|
|
427
|
+
};
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
if (config.llamacpp?.embeddingsEndpoint) {
|
|
431
|
+
return {
|
|
432
|
+
provider: "llamacpp",
|
|
433
|
+
model: requestedModel || "default",
|
|
434
|
+
endpoint: config.llamacpp.embeddingsEndpoint
|
|
435
|
+
};
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
return null; // No provider available
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
/**
|
|
442
|
+
* Generate embeddings using Ollama
|
|
443
|
+
* Note: Ollama only supports single prompt, not batch
|
|
444
|
+
*/
|
|
445
|
+
async function generateOllamaEmbeddings(inputs, embeddingConfig) {
|
|
446
|
+
const { model, endpoint } = embeddingConfig;
|
|
447
|
+
|
|
448
|
+
logger.info({
|
|
449
|
+
model,
|
|
450
|
+
endpoint,
|
|
451
|
+
inputCount: inputs.length
|
|
452
|
+
}, "Generating embeddings with Ollama");
|
|
453
|
+
|
|
454
|
+
// Ollama doesn't support batch, so we need to process one by one
|
|
455
|
+
const embeddings = [];
|
|
456
|
+
|
|
457
|
+
for (let i = 0; i < inputs.length; i++) {
|
|
458
|
+
const input = inputs[i];
|
|
459
|
+
|
|
460
|
+
try {
|
|
461
|
+
const response = await fetch(endpoint, {
|
|
462
|
+
method: "POST",
|
|
463
|
+
headers: {
|
|
464
|
+
"Content-Type": "application/json"
|
|
465
|
+
},
|
|
466
|
+
body: JSON.stringify({
|
|
467
|
+
model: model,
|
|
468
|
+
prompt: input
|
|
469
|
+
})
|
|
470
|
+
});
|
|
471
|
+
|
|
472
|
+
if (!response.ok) {
|
|
473
|
+
const errorText = await response.text();
|
|
474
|
+
throw new Error(`Ollama embeddings error (${response.status}): ${errorText}`);
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
const data = await response.json();
|
|
478
|
+
|
|
479
|
+
embeddings.push({
|
|
480
|
+
object: "embedding",
|
|
481
|
+
embedding: data.embedding,
|
|
482
|
+
index: i
|
|
483
|
+
});
|
|
484
|
+
|
|
485
|
+
} catch (error) {
|
|
486
|
+
logger.error({
|
|
487
|
+
error: error.message,
|
|
488
|
+
input: input.substring(0, 100),
|
|
489
|
+
index: i
|
|
490
|
+
}, "Failed to generate Ollama embedding");
|
|
491
|
+
throw error;
|
|
492
|
+
}
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
// Convert to OpenAI format
|
|
496
|
+
return {
|
|
497
|
+
object: "list",
|
|
498
|
+
data: embeddings,
|
|
499
|
+
model: model,
|
|
500
|
+
usage: {
|
|
501
|
+
prompt_tokens: 0, // Ollama doesn't provide this
|
|
502
|
+
total_tokens: 0
|
|
503
|
+
}
|
|
504
|
+
};
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
/**
|
|
508
|
+
* Generate embeddings using llama.cpp
|
|
509
|
+
* llama.cpp uses OpenAI-compatible format, so minimal conversion needed
|
|
510
|
+
*/
|
|
511
|
+
async function generateLlamaCppEmbeddings(inputs, embeddingConfig) {
|
|
512
|
+
const { model, endpoint } = embeddingConfig;
|
|
513
|
+
|
|
514
|
+
logger.info({
|
|
515
|
+
model,
|
|
516
|
+
endpoint,
|
|
517
|
+
inputCount: inputs.length
|
|
518
|
+
}, "Generating embeddings with llama.cpp");
|
|
519
|
+
|
|
520
|
+
try {
|
|
521
|
+
const response = await fetch(endpoint, {
|
|
522
|
+
method: "POST",
|
|
523
|
+
headers: {
|
|
524
|
+
"Content-Type": "application/json"
|
|
525
|
+
},
|
|
526
|
+
body: JSON.stringify({
|
|
527
|
+
input: inputs, // llama.cpp supports batch
|
|
528
|
+
encoding_format: "float"
|
|
529
|
+
})
|
|
530
|
+
});
|
|
531
|
+
|
|
532
|
+
if (!response.ok) {
|
|
533
|
+
const errorText = await response.text();
|
|
534
|
+
throw new Error(`llama.cpp embeddings error (${response.status}): ${errorText}`);
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
const data = await response.json();
|
|
538
|
+
|
|
539
|
+
// llama.cpp returns OpenAI-compatible format, but ensure consistency
|
|
540
|
+
return {
|
|
541
|
+
object: "list",
|
|
542
|
+
data: data.data || [],
|
|
543
|
+
model: model || data.model || "default",
|
|
544
|
+
usage: data.usage || {
|
|
545
|
+
prompt_tokens: 0,
|
|
546
|
+
total_tokens: 0
|
|
547
|
+
}
|
|
548
|
+
};
|
|
549
|
+
|
|
550
|
+
} catch (error) {
|
|
551
|
+
logger.error({
|
|
552
|
+
error: error.message,
|
|
553
|
+
endpoint
|
|
554
|
+
}, "Failed to generate llama.cpp embeddings");
|
|
555
|
+
throw error;
|
|
556
|
+
}
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
/**
|
|
560
|
+
* Generate embeddings using OpenRouter
|
|
561
|
+
*/
|
|
562
|
+
async function generateOpenRouterEmbeddings(inputs, embeddingConfig) {
|
|
563
|
+
const { model, apiKey, endpoint } = embeddingConfig;
|
|
564
|
+
|
|
565
|
+
logger.info({
|
|
566
|
+
model,
|
|
567
|
+
inputCount: inputs.length
|
|
568
|
+
}, "Generating embeddings with OpenRouter");
|
|
569
|
+
|
|
570
|
+
const response = await fetch(endpoint, {
|
|
571
|
+
method: "POST",
|
|
572
|
+
headers: {
|
|
573
|
+
"Content-Type": "application/json",
|
|
574
|
+
"Authorization": `Bearer ${apiKey}`,
|
|
575
|
+
"HTTP-Referer": "https://github.com/vishalveerareddy123/Lynkr",
|
|
576
|
+
"X-Title": "Lynkr"
|
|
577
|
+
},
|
|
578
|
+
body: JSON.stringify({
|
|
579
|
+
model: model,
|
|
580
|
+
input: inputs,
|
|
581
|
+
encoding_format: "float"
|
|
582
|
+
})
|
|
583
|
+
});
|
|
584
|
+
|
|
585
|
+
if (!response.ok) {
|
|
586
|
+
const errorText = await response.text();
|
|
587
|
+
throw new Error(`OpenRouter embeddings error (${response.status}): ${errorText}`);
|
|
588
|
+
}
|
|
589
|
+
|
|
590
|
+
return await response.json();
|
|
591
|
+
}
|
|
592
|
+
|
|
593
|
+
/**
|
|
594
|
+
* Generate embeddings using OpenAI
|
|
595
|
+
*/
|
|
596
|
+
async function generateOpenAIEmbeddings(inputs, embeddingConfig) {
|
|
597
|
+
const { model, apiKey, endpoint } = embeddingConfig;
|
|
598
|
+
|
|
599
|
+
logger.info({
|
|
600
|
+
model,
|
|
601
|
+
inputCount: inputs.length
|
|
602
|
+
}, "Generating embeddings with OpenAI");
|
|
603
|
+
|
|
604
|
+
const response = await fetch(endpoint, {
|
|
605
|
+
method: "POST",
|
|
606
|
+
headers: {
|
|
607
|
+
"Content-Type": "application/json",
|
|
608
|
+
"Authorization": `Bearer ${apiKey}`
|
|
609
|
+
},
|
|
610
|
+
body: JSON.stringify({
|
|
611
|
+
model: model,
|
|
612
|
+
input: inputs,
|
|
613
|
+
encoding_format: "float"
|
|
614
|
+
})
|
|
615
|
+
});
|
|
616
|
+
|
|
617
|
+
if (!response.ok) {
|
|
618
|
+
const errorText = await response.text();
|
|
619
|
+
throw new Error(`OpenAI embeddings error (${response.status}): ${errorText}`);
|
|
620
|
+
}
|
|
621
|
+
|
|
622
|
+
return await response.json();
|
|
623
|
+
}
|
|
624
|
+
|
|
625
|
+
/**
|
|
626
|
+
* POST /v1/embeddings
|
|
627
|
+
*
|
|
628
|
+
* Generate embeddings using configured provider (Ollama, llama.cpp, OpenRouter, or OpenAI).
|
|
629
|
+
* Required for Cursor's semantic search features.
|
|
630
|
+
*/
|
|
631
|
+
router.post("/embeddings", async (req, res) => {
|
|
632
|
+
const startTime = Date.now();
|
|
633
|
+
|
|
634
|
+
try {
|
|
635
|
+
const { input, model, encoding_format } = req.body;
|
|
636
|
+
|
|
637
|
+
// Validate input
|
|
638
|
+
if (!input) {
|
|
639
|
+
return res.status(400).json({
|
|
640
|
+
error: {
|
|
641
|
+
message: "Missing required parameter: input",
|
|
642
|
+
type: "invalid_request_error",
|
|
643
|
+
code: "missing_parameter"
|
|
644
|
+
}
|
|
645
|
+
});
|
|
646
|
+
}
|
|
647
|
+
|
|
648
|
+
// Convert input to array if string
|
|
649
|
+
const inputs = Array.isArray(input) ? input : [input];
|
|
650
|
+
|
|
651
|
+
logger.info({
|
|
652
|
+
endpoint: "/v1/embeddings",
|
|
653
|
+
model: model || "auto-detect",
|
|
654
|
+
inputCount: inputs.length,
|
|
655
|
+
inputLengths: inputs.map(i => i.length)
|
|
656
|
+
}, "=== OPENAI EMBEDDINGS REQUEST ===");
|
|
657
|
+
|
|
658
|
+
// Determine which provider to use for embeddings
|
|
659
|
+
const embeddingConfig = determineEmbeddingProvider(model);
|
|
660
|
+
|
|
661
|
+
if (!embeddingConfig) {
|
|
662
|
+
logger.warn("No embedding provider configured");
|
|
663
|
+
return res.status(501).json({
|
|
664
|
+
error: {
|
|
665
|
+
message: "Embeddings not configured. Set up one of: OPENROUTER_API_KEY, OPENAI_API_KEY, OLLAMA_EMBEDDINGS_MODEL, or LLAMACPP_EMBEDDINGS_ENDPOINT in your .env file to enable @Codebase semantic search.",
|
|
666
|
+
type: "not_implemented",
|
|
667
|
+
code: "embeddings_not_configured"
|
|
668
|
+
}
|
|
669
|
+
});
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
// Route to appropriate provider
|
|
673
|
+
let embeddingResponse;
|
|
674
|
+
|
|
675
|
+
try {
|
|
676
|
+
switch (embeddingConfig.provider) {
|
|
677
|
+
case "ollama":
|
|
678
|
+
embeddingResponse = await generateOllamaEmbeddings(inputs, embeddingConfig);
|
|
679
|
+
break;
|
|
680
|
+
|
|
681
|
+
case "llamacpp":
|
|
682
|
+
embeddingResponse = await generateLlamaCppEmbeddings(inputs, embeddingConfig);
|
|
683
|
+
break;
|
|
684
|
+
|
|
685
|
+
case "openrouter":
|
|
686
|
+
embeddingResponse = await generateOpenRouterEmbeddings(inputs, embeddingConfig);
|
|
687
|
+
break;
|
|
688
|
+
|
|
689
|
+
case "openai":
|
|
690
|
+
embeddingResponse = await generateOpenAIEmbeddings(inputs, embeddingConfig);
|
|
691
|
+
break;
|
|
692
|
+
|
|
693
|
+
default:
|
|
694
|
+
throw new Error(`Unsupported embedding provider: ${embeddingConfig.provider}`);
|
|
695
|
+
}
|
|
696
|
+
} catch (error) {
|
|
697
|
+
logger.error({
|
|
698
|
+
error: error.message,
|
|
699
|
+
provider: embeddingConfig.provider,
|
|
700
|
+
}, "Embeddings generation failed");
|
|
701
|
+
|
|
702
|
+
return res.status(500).json({
|
|
703
|
+
error: {
|
|
704
|
+
message: error.message || "Embeddings generation failed",
|
|
705
|
+
type: "server_error",
|
|
706
|
+
code: "embeddings_error"
|
|
707
|
+
}
|
|
708
|
+
});
|
|
709
|
+
}
|
|
710
|
+
|
|
711
|
+
logger.info({
|
|
712
|
+
provider: embeddingConfig.provider,
|
|
713
|
+
model: embeddingConfig.model,
|
|
714
|
+
duration: Date.now() - startTime,
|
|
715
|
+
embeddingCount: embeddingResponse.data?.length || 0,
|
|
716
|
+
totalTokens: embeddingResponse.usage?.total_tokens || 0
|
|
717
|
+
}, "=== EMBEDDINGS RESPONSE ===");
|
|
718
|
+
|
|
719
|
+
// Return embeddings in OpenAI format
|
|
720
|
+
res.json(embeddingResponse);
|
|
721
|
+
|
|
722
|
+
} catch (error) {
|
|
723
|
+
logger.error({
|
|
724
|
+
error: error.message,
|
|
725
|
+
stack: error.stack,
|
|
726
|
+
duration: Date.now() - startTime
|
|
727
|
+
}, "Embeddings error");
|
|
728
|
+
|
|
729
|
+
res.status(500).json({
|
|
730
|
+
error: {
|
|
731
|
+
message: error.message || "Internal server error",
|
|
732
|
+
type: "server_error",
|
|
733
|
+
code: "internal_error"
|
|
734
|
+
}
|
|
735
|
+
});
|
|
736
|
+
}
|
|
737
|
+
});
|
|
738
|
+
|
|
739
|
+
/**
|
|
740
|
+
* GET /v1/health
|
|
741
|
+
*
|
|
742
|
+
* Health check endpoint (alias to /health/ready).
|
|
743
|
+
* Used by Cursor to verify connection.
|
|
744
|
+
*/
|
|
745
|
+
router.get("/health", (req, res) => {
|
|
746
|
+
res.json({
|
|
747
|
+
status: "ok",
|
|
748
|
+
provider: config.modelProvider?.type || "databricks",
|
|
749
|
+
openai_compatible: true,
|
|
750
|
+
cursor_compatible: true,
|
|
751
|
+
timestamp: new Date().toISOString()
|
|
752
|
+
});
|
|
753
|
+
});
|
|
754
|
+
|
|
755
|
+
module.exports = router;
|