lynkr 3.3.1 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,919 @@
1
+ /**
2
+ * OpenAI API Compatibility Router
3
+ *
4
+ * Implements OpenAI API endpoints for Cursor IDE compatibility.
5
+ * Routes:
6
+ * - POST /v1/chat/completions - Chat API with streaming support
7
+ * - GET /v1/models - List available models
8
+ * - POST /v1/embeddings - Generate embeddings (via OpenRouter or OpenAI)
9
+ * - GET /v1/health - Health check
10
+ *
11
+ * Note: If MODEL_PROVIDER=openrouter, the same OPENROUTER_API_KEY is used
12
+ * for both chat completions and embeddings - no additional configuration needed.
13
+ *
14
+ * @module api/openai-router
15
+ */
16
+
17
+ const express = require("express");
18
+ const logger = require("../logger");
19
+ const config = require("../config");
20
+ const orchestrator = require("../orchestrator");
21
+ const { getSession } = require("../sessions");
22
+ const {
23
+ convertOpenAIToAnthropic,
24
+ convertAnthropicToOpenAI,
25
+ convertAnthropicStreamChunkToOpenAI
26
+ } = require("../clients/openai-format");
27
+
28
+ const router = express.Router();
29
+
30
+ /**
31
+ * POST /v1/chat/completions
32
+ *
33
+ * OpenAI-compatible chat completions endpoint.
34
+ * Converts OpenAI format → Anthropic → processes → converts back to OpenAI format.
35
+ */
36
+ router.post("/chat/completions", async (req, res) => {
37
+ const startTime = Date.now();
38
+ const sessionId = req.headers["x-session-id"] || req.headers["authorization"]?.split(" ")[1] || "openai-session";
39
+
40
+ try {
41
+ logger.info({
42
+ endpoint: "/v1/chat/completions",
43
+ model: req.body.model,
44
+ messageCount: req.body.messages?.length,
45
+ stream: req.body.stream || false,
46
+ hasTools: !!req.body.tools,
47
+ toolCount: req.body.tools?.length || 0,
48
+ hasMessages: !!req.body.messages,
49
+ messagesType: typeof req.body.messages,
50
+ requestBodyKeys: Object.keys(req.body),
51
+ // Log first 500 chars of body for debugging
52
+ requestBodyPreview: JSON.stringify(req.body).substring(0, 500)
53
+ }, "=== OPENAI CHAT COMPLETION REQUEST ===");
54
+
55
+ // Convert OpenAI request to Anthropic format
56
+ const anthropicRequest = convertOpenAIToAnthropic(req.body);
57
+
58
+ // Get or create session
59
+ const session = getSession(sessionId);
60
+
61
+ // Handle streaming vs non-streaming
62
+ if (req.body.stream) {
63
+ // Set up SSE headers for streaming
64
+ res.setHeader("Content-Type", "text/event-stream");
65
+ res.setHeader("Cache-Control", "no-cache");
66
+ res.setHeader("Connection", "keep-alive");
67
+
68
+ try {
69
+ // For streaming, we need to handle it differently - convert to non-streaming temporarily
70
+ // Get non-streaming response from orchestrator
71
+ anthropicRequest.stream = false; // Force non-streaming from orchestrator
72
+
73
+ const result = await orchestrator.processMessage({
74
+ payload: anthropicRequest,
75
+ headers: req.headers,
76
+ session: session,
77
+ options: {
78
+ maxSteps: req.body?.max_steps
79
+ }
80
+ });
81
+
82
+ // Check if we have a valid response body
83
+ if (!result || !result.body) {
84
+ logger.error({
85
+ result: result ? JSON.stringify(result) : "null",
86
+ resultKeys: result ? Object.keys(result) : null
87
+ }, "Invalid orchestrator response for streaming");
88
+ throw new Error("Invalid response from orchestrator");
89
+ }
90
+
91
+ // Convert to OpenAI format
92
+ const openaiResponse = convertAnthropicToOpenAI(result.body, req.body.model);
93
+
94
+ // Simulate streaming by sending the complete response as chunks
95
+ const content = openaiResponse.choices[0].message.content || "";
96
+ const words = content.split(" ");
97
+
98
+ // Send start chunk
99
+ const startChunk = {
100
+ id: openaiResponse.id,
101
+ object: "chat.completion.chunk",
102
+ created: openaiResponse.created,
103
+ model: req.body.model,
104
+ choices: [{
105
+ index: 0,
106
+ delta: { role: "assistant", content: "" },
107
+ finish_reason: null
108
+ }]
109
+ };
110
+ res.write(`data: ${JSON.stringify(startChunk)}\n\n`);
111
+
112
+ // Send content in word chunks
113
+ for (let i = 0; i < words.length; i++) {
114
+ const word = words[i] + (i < words.length - 1 ? " " : "");
115
+ const chunk = {
116
+ id: openaiResponse.id,
117
+ object: "chat.completion.chunk",
118
+ created: openaiResponse.created,
119
+ model: req.body.model,
120
+ choices: [{
121
+ index: 0,
122
+ delta: { content: word },
123
+ finish_reason: null
124
+ }]
125
+ };
126
+ res.write(`data: ${JSON.stringify(chunk)}\n\n`);
127
+ }
128
+
129
+ // Send finish chunk
130
+ const finishChunk = {
131
+ id: openaiResponse.id,
132
+ object: "chat.completion.chunk",
133
+ created: openaiResponse.created,
134
+ model: req.body.model,
135
+ choices: [{
136
+ index: 0,
137
+ delta: {},
138
+ finish_reason: openaiResponse.choices[0].finish_reason
139
+ }]
140
+ };
141
+ res.write(`data: ${JSON.stringify(finishChunk)}\n\n`);
142
+ res.write("data: [DONE]\n\n");
143
+ res.end();
144
+
145
+ logger.info({
146
+ duration: Date.now() - startTime,
147
+ mode: "streaming",
148
+ inputTokens: openaiResponse.usage.prompt_tokens,
149
+ outputTokens: openaiResponse.usage.completion_tokens
150
+ }, "OpenAI streaming completed");
151
+
152
+ } catch (streamError) {
153
+ logger.error({ error: streamError.message, stack: streamError.stack }, "Streaming error");
154
+
155
+ // Send error in OpenAI streaming format
156
+ const errorChunk = {
157
+ id: `chatcmpl-error-${Date.now()}`,
158
+ object: "chat.completion.chunk",
159
+ created: Math.floor(Date.now() / 1000),
160
+ model: req.body.model,
161
+ choices: [{
162
+ index: 0,
163
+ delta: {
164
+ role: "assistant",
165
+ content: `Error: ${streamError.message}`
166
+ },
167
+ finish_reason: "stop"
168
+ }]
169
+ };
170
+
171
+ res.write(`data: ${JSON.stringify(errorChunk)}\n\n`);
172
+ res.write("data: [DONE]\n\n");
173
+ res.end();
174
+ }
175
+ } else {
176
+ // Non-streaming mode
177
+ const result = await orchestrator.processMessage({
178
+ payload: anthropicRequest,
179
+ headers: req.headers,
180
+ session: session,
181
+ options: {
182
+ maxSteps: req.body?.max_steps
183
+ }
184
+ });
185
+
186
+ // Debug logging
187
+ logger.debug({
188
+ resultKeys: Object.keys(result || {}),
189
+ hasBody: !!result?.body,
190
+ bodyType: typeof result?.body,
191
+ bodyKeys: result?.body ? Object.keys(result.body) : null
192
+ }, "Orchestrator result structure");
193
+
194
+ // Convert Anthropic response to OpenAI format
195
+ const openaiResponse = convertAnthropicToOpenAI(result.body, req.body.model);
196
+
197
+ logger.info({
198
+ duration: Date.now() - startTime,
199
+ mode: "non-streaming",
200
+ inputTokens: openaiResponse.usage.prompt_tokens,
201
+ outputTokens: openaiResponse.usage.completion_tokens,
202
+ finishReason: openaiResponse.choices[0].finish_reason
203
+ }, "=== OPENAI CHAT COMPLETION RESPONSE ===");
204
+
205
+ res.json(openaiResponse);
206
+ }
207
+
208
+ } catch (error) {
209
+ logger.error({
210
+ error: error.message,
211
+ stack: error.stack,
212
+ duration: Date.now() - startTime
213
+ }, "OpenAI chat completion error");
214
+
215
+ // Return OpenAI-format error
216
+ res.status(500).json({
217
+ error: {
218
+ message: error.message || "Internal server error",
219
+ type: "server_error",
220
+ code: "internal_error"
221
+ }
222
+ });
223
+ }
224
+ });
225
+
226
+ /**
227
+ * GET /v1/models
228
+ *
229
+ * List available models based on configured provider.
230
+ * Returns OpenAI-compatible model list.
231
+ */
232
+ router.get("/models", (req, res) => {
233
+ try {
234
+ const provider = config.modelProvider?.type || "databricks";
235
+ const models = [];
236
+
237
+ // Add models based on configured provider
238
+ switch (provider) {
239
+ case "databricks":
240
+ models.push(
241
+ {
242
+ id: "claude-sonnet-4.5",
243
+ object: "model",
244
+ created: 1704067200,
245
+ owned_by: "databricks",
246
+ permission: [],
247
+ root: "claude-sonnet-4.5",
248
+ parent: null
249
+ },
250
+ {
251
+ id: "claude-opus-4.5",
252
+ object: "model",
253
+ created: 1704067200,
254
+ owned_by: "databricks",
255
+ permission: [],
256
+ root: "claude-opus-4.5",
257
+ parent: null
258
+ }
259
+ );
260
+ break;
261
+
262
+ case "bedrock":
263
+ const bedrockModelId = config.bedrock?.modelId || "anthropic.claude-3-5-sonnet-20241022-v2:0";
264
+ models.push({
265
+ id: bedrockModelId,
266
+ object: "model",
267
+ created: 1704067200,
268
+ owned_by: "aws-bedrock",
269
+ permission: [],
270
+ root: bedrockModelId,
271
+ parent: null
272
+ });
273
+ break;
274
+
275
+ case "azure-anthropic":
276
+ models.push({
277
+ id: "claude-3-5-sonnet",
278
+ object: "model",
279
+ created: 1704067200,
280
+ owned_by: "azure-anthropic",
281
+ permission: [],
282
+ root: "claude-3-5-sonnet",
283
+ parent: null
284
+ });
285
+ break;
286
+
287
+ case "openrouter":
288
+ const openrouterModel = config.openrouter?.model || "openai/gpt-4o-mini";
289
+ models.push({
290
+ id: openrouterModel,
291
+ object: "model",
292
+ created: 1704067200,
293
+ owned_by: "openrouter",
294
+ permission: [],
295
+ root: openrouterModel,
296
+ parent: null
297
+ });
298
+ break;
299
+
300
+ case "openai":
301
+ models.push(
302
+ {
303
+ id: "gpt-4o",
304
+ object: "model",
305
+ created: 1704067200,
306
+ owned_by: "openai",
307
+ permission: [],
308
+ root: "gpt-4o",
309
+ parent: null
310
+ },
311
+ {
312
+ id: "gpt-4o-mini",
313
+ object: "model",
314
+ created: 1704067200,
315
+ owned_by: "openai",
316
+ permission: [],
317
+ root: "gpt-4o-mini",
318
+ parent: null
319
+ }
320
+ );
321
+ break;
322
+
323
+ case "azure-openai":
324
+ // Return standard OpenAI model names that Cursor recognizes
325
+ // The actual Azure deployment name doesn't matter - Lynkr routes based on config
326
+ models.push(
327
+ {
328
+ id: "gpt-4o",
329
+ object: "model",
330
+ created: 1704067200,
331
+ owned_by: "openai",
332
+ permission: [],
333
+ root: "gpt-4o",
334
+ parent: null
335
+ },
336
+ {
337
+ id: "gpt-4-turbo",
338
+ object: "model",
339
+ created: 1704067200,
340
+ owned_by: "openai",
341
+ permission: [],
342
+ root: "gpt-4-turbo",
343
+ parent: null
344
+ },
345
+ {
346
+ id: "gpt-4",
347
+ object: "model",
348
+ created: 1704067200,
349
+ owned_by: "openai",
350
+ permission: [],
351
+ root: "gpt-4",
352
+ parent: null
353
+ },
354
+ {
355
+ id: "gpt-3.5-turbo",
356
+ object: "model",
357
+ created: 1704067200,
358
+ owned_by: "openai",
359
+ permission: [],
360
+ root: "gpt-3.5-turbo",
361
+ parent: null
362
+ }
363
+ );
364
+ break;
365
+
366
+ case "ollama":
367
+ const ollamaModel = config.ollama?.model || "qwen2.5-coder:7b";
368
+ models.push({
369
+ id: ollamaModel,
370
+ object: "model",
371
+ created: 1704067200,
372
+ owned_by: "ollama",
373
+ permission: [],
374
+ root: ollamaModel,
375
+ parent: null
376
+ });
377
+ break;
378
+
379
+ case "llamacpp":
380
+ const llamacppModel = config.llamacpp?.model || "default";
381
+ models.push({
382
+ id: llamacppModel,
383
+ object: "model",
384
+ created: 1704067200,
385
+ owned_by: "llamacpp",
386
+ permission: [],
387
+ root: llamacppModel,
388
+ parent: null
389
+ });
390
+ break;
391
+
392
+ default:
393
+ // Generic model
394
+ models.push({
395
+ id: "claude-3-5-sonnet",
396
+ object: "model",
397
+ created: 1704067200,
398
+ owned_by: "lynkr",
399
+ permission: [],
400
+ root: "claude-3-5-sonnet",
401
+ parent: null
402
+ });
403
+ }
404
+
405
+ // Add embedding models if embeddings are configured
406
+ const embeddingConfig = determineEmbeddingProvider();
407
+ if (embeddingConfig) {
408
+ let embeddingModelId;
409
+ switch (embeddingConfig.provider) {
410
+ case "llamacpp":
411
+ embeddingModelId = "text-embedding-3-small"; // Generic name for Cursor
412
+ break;
413
+ case "ollama":
414
+ embeddingModelId = embeddingConfig.model;
415
+ break;
416
+ case "openrouter":
417
+ embeddingModelId = embeddingConfig.model;
418
+ break;
419
+ case "openai":
420
+ embeddingModelId = embeddingConfig.model || "text-embedding-ada-002";
421
+ break;
422
+ default:
423
+ embeddingModelId = "text-embedding-3-small";
424
+ }
425
+
426
+ models.push({
427
+ id: embeddingModelId,
428
+ object: "model",
429
+ created: 1704067200,
430
+ owned_by: embeddingConfig.provider,
431
+ permission: [],
432
+ root: embeddingModelId,
433
+ parent: null
434
+ });
435
+ }
436
+
437
+ logger.debug({
438
+ provider,
439
+ modelCount: models.length,
440
+ models: models.map(m => m.id),
441
+ hasEmbeddings: !!embeddingConfig
442
+ }, "Listed models for OpenAI API");
443
+
444
+ res.json({
445
+ object: "list",
446
+ data: models
447
+ });
448
+
449
+ } catch (error) {
450
+ logger.error({ error: error.message }, "Error listing models");
451
+ res.status(500).json({
452
+ error: {
453
+ message: error.message || "Failed to list models",
454
+ type: "server_error",
455
+ code: "internal_error"
456
+ }
457
+ });
458
+ }
459
+ });
460
+
461
+ /**
462
+ * Determine which provider to use for embeddings
463
+ * Priority:
464
+ * 1. Explicit EMBEDDINGS_PROVIDER env var
465
+ * 2. Same provider as MODEL_PROVIDER (if it supports embeddings)
466
+ * 3. First available: OpenRouter > OpenAI > Ollama > llama.cpp
467
+ */
468
+ function determineEmbeddingProvider(requestedModel = null) {
469
+ const explicitProvider = process.env.EMBEDDINGS_PROVIDER?.trim();
470
+
471
+ // Priority 1: Explicit configuration
472
+ if (explicitProvider) {
473
+ switch (explicitProvider) {
474
+ case "ollama":
475
+ if (!config.ollama?.embeddingsModel) {
476
+ logger.warn("EMBEDDINGS_PROVIDER=ollama but OLLAMA_EMBEDDINGS_MODEL not set");
477
+ return null;
478
+ }
479
+ return {
480
+ provider: "ollama",
481
+ model: requestedModel || config.ollama.embeddingsModel,
482
+ endpoint: config.ollama.embeddingsEndpoint
483
+ };
484
+
485
+ case "llamacpp":
486
+ if (!config.llamacpp?.embeddingsEndpoint) {
487
+ logger.warn("EMBEDDINGS_PROVIDER=llamacpp but LLAMACPP_EMBEDDINGS_ENDPOINT not set");
488
+ return null;
489
+ }
490
+ return {
491
+ provider: "llamacpp",
492
+ model: requestedModel || "default",
493
+ endpoint: config.llamacpp.embeddingsEndpoint
494
+ };
495
+
496
+ case "openrouter":
497
+ if (!config.openrouter?.apiKey) {
498
+ logger.warn("EMBEDDINGS_PROVIDER=openrouter but OPENROUTER_API_KEY not set");
499
+ return null;
500
+ }
501
+ return {
502
+ provider: "openrouter",
503
+ model: requestedModel || config.openrouter.embeddingsModel,
504
+ apiKey: config.openrouter.apiKey,
505
+ endpoint: "https://openrouter.ai/api/v1/embeddings"
506
+ };
507
+
508
+ case "openai":
509
+ if (!config.openai?.apiKey) {
510
+ logger.warn("EMBEDDINGS_PROVIDER=openai but OPENAI_API_KEY not set");
511
+ return null;
512
+ }
513
+ return {
514
+ provider: "openai",
515
+ model: requestedModel || "text-embedding-ada-002",
516
+ apiKey: config.openai.apiKey,
517
+ endpoint: "https://api.openai.com/v1/embeddings"
518
+ };
519
+ }
520
+ }
521
+
522
+ // Priority 2: Same as chat provider (if supported)
523
+ const chatProvider = config.modelProvider?.type;
524
+
525
+ if (chatProvider === "openrouter" && config.openrouter?.apiKey) {
526
+ return {
527
+ provider: "openrouter",
528
+ model: requestedModel || config.openrouter.embeddingsModel,
529
+ apiKey: config.openrouter.apiKey,
530
+ endpoint: "https://openrouter.ai/api/v1/embeddings"
531
+ };
532
+ }
533
+
534
+ if (chatProvider === "ollama" && config.ollama?.embeddingsModel) {
535
+ return {
536
+ provider: "ollama",
537
+ model: requestedModel || config.ollama.embeddingsModel,
538
+ endpoint: config.ollama.embeddingsEndpoint
539
+ };
540
+ }
541
+
542
+ if (chatProvider === "llamacpp" && config.llamacpp?.embeddingsEndpoint) {
543
+ return {
544
+ provider: "llamacpp",
545
+ model: requestedModel || "default",
546
+ endpoint: config.llamacpp.embeddingsEndpoint
547
+ };
548
+ }
549
+
550
+ // Priority 3: First available provider
551
+ if (config.openrouter?.apiKey) {
552
+ return {
553
+ provider: "openrouter",
554
+ model: requestedModel || config.openrouter.embeddingsModel,
555
+ apiKey: config.openrouter.apiKey,
556
+ endpoint: "https://openrouter.ai/api/v1/embeddings"
557
+ };
558
+ }
559
+
560
+ if (config.openai?.apiKey) {
561
+ return {
562
+ provider: "openai",
563
+ model: requestedModel || "text-embedding-ada-002",
564
+ apiKey: config.openai.apiKey,
565
+ endpoint: "https://api.openai.com/v1/embeddings"
566
+ };
567
+ }
568
+
569
+ if (config.ollama?.embeddingsModel) {
570
+ return {
571
+ provider: "ollama",
572
+ model: requestedModel || config.ollama.embeddingsModel,
573
+ endpoint: config.ollama.embeddingsEndpoint
574
+ };
575
+ }
576
+
577
+ if (config.llamacpp?.embeddingsEndpoint) {
578
+ return {
579
+ provider: "llamacpp",
580
+ model: requestedModel || "default",
581
+ endpoint: config.llamacpp.embeddingsEndpoint
582
+ };
583
+ }
584
+
585
+ return null; // No provider available
586
+ }
587
+
588
+ /**
589
+ * Generate embeddings using Ollama
590
+ * Note: Ollama only supports single prompt, not batch
591
+ */
592
+ async function generateOllamaEmbeddings(inputs, embeddingConfig) {
593
+ const { model, endpoint } = embeddingConfig;
594
+
595
+ logger.info({
596
+ model,
597
+ endpoint,
598
+ inputCount: inputs.length
599
+ }, "Generating embeddings with Ollama");
600
+
601
+ // Ollama doesn't support batch, so we need to process one by one
602
+ const embeddings = [];
603
+
604
+ for (let i = 0; i < inputs.length; i++) {
605
+ const input = inputs[i];
606
+
607
+ try {
608
+ const response = await fetch(endpoint, {
609
+ method: "POST",
610
+ headers: {
611
+ "Content-Type": "application/json"
612
+ },
613
+ body: JSON.stringify({
614
+ model: model,
615
+ prompt: input
616
+ })
617
+ });
618
+
619
+ if (!response.ok) {
620
+ const errorText = await response.text();
621
+ throw new Error(`Ollama embeddings error (${response.status}): ${errorText}`);
622
+ }
623
+
624
+ const data = await response.json();
625
+
626
+ embeddings.push({
627
+ object: "embedding",
628
+ embedding: data.embedding,
629
+ index: i
630
+ });
631
+
632
+ } catch (error) {
633
+ logger.error({
634
+ error: error.message,
635
+ input: input.substring(0, 100),
636
+ index: i
637
+ }, "Failed to generate Ollama embedding");
638
+ throw error;
639
+ }
640
+ }
641
+
642
+ // Convert to OpenAI format
643
+ return {
644
+ object: "list",
645
+ data: embeddings,
646
+ model: model,
647
+ usage: {
648
+ prompt_tokens: 0, // Ollama doesn't provide this
649
+ total_tokens: 0
650
+ }
651
+ };
652
+ }
653
+
654
+ /**
655
+ * Generate embeddings using llama.cpp
656
+ * llama.cpp uses OpenAI-compatible format, so minimal conversion needed
657
+ */
658
+ async function generateLlamaCppEmbeddings(inputs, embeddingConfig) {
659
+ const { model, endpoint } = embeddingConfig;
660
+
661
+ logger.info({
662
+ model,
663
+ endpoint,
664
+ inputCount: inputs.length
665
+ }, "Generating embeddings with llama.cpp");
666
+
667
+ try {
668
+ const response = await fetch(endpoint, {
669
+ method: "POST",
670
+ headers: {
671
+ "Content-Type": "application/json"
672
+ },
673
+ body: JSON.stringify({
674
+ input: inputs, // llama.cpp supports batch
675
+ encoding_format: "float"
676
+ })
677
+ });
678
+
679
+ if (!response.ok) {
680
+ const errorText = await response.text();
681
+ throw new Error(`llama.cpp embeddings error (${response.status}): ${errorText}`);
682
+ }
683
+
684
+ const data = await response.json();
685
+
686
+ // llama.cpp returns array format: [{index: 0, embedding: [[...]]}]
687
+ // Need to convert to OpenAI format: {data: [{object: "embedding", embedding: [...], index: 0}]}
688
+ let embeddingsData;
689
+
690
+ if (Array.isArray(data)) {
691
+ // llama.cpp returns array directly
692
+ embeddingsData = data.map(item => ({
693
+ object: "embedding",
694
+ embedding: Array.isArray(item.embedding[0]) ? item.embedding[0] : item.embedding, // Flatten double-nested array
695
+ index: item.index
696
+ }));
697
+ } else if (data.data) {
698
+ // Already in OpenAI format
699
+ embeddingsData = data.data;
700
+ } else {
701
+ embeddingsData = [];
702
+ }
703
+
704
+ return {
705
+ object: "list",
706
+ data: embeddingsData,
707
+ model: model || data.model || "default",
708
+ usage: data.usage || {
709
+ prompt_tokens: 0,
710
+ total_tokens: 0
711
+ }
712
+ };
713
+
714
+ } catch (error) {
715
+ logger.error({
716
+ error: error.message,
717
+ endpoint
718
+ }, "Failed to generate llama.cpp embeddings");
719
+ throw error;
720
+ }
721
+ }
722
+
723
+ /**
724
+ * Generate embeddings using OpenRouter
725
+ */
726
+ async function generateOpenRouterEmbeddings(inputs, embeddingConfig) {
727
+ const { model, apiKey, endpoint } = embeddingConfig;
728
+
729
+ logger.info({
730
+ model,
731
+ inputCount: inputs.length
732
+ }, "Generating embeddings with OpenRouter");
733
+
734
+ const response = await fetch(endpoint, {
735
+ method: "POST",
736
+ headers: {
737
+ "Content-Type": "application/json",
738
+ "Authorization": `Bearer ${apiKey}`,
739
+ "HTTP-Referer": "https://github.com/vishalveerareddy123/Lynkr",
740
+ "X-Title": "Lynkr"
741
+ },
742
+ body: JSON.stringify({
743
+ model: model,
744
+ input: inputs,
745
+ encoding_format: "float"
746
+ })
747
+ });
748
+
749
+ if (!response.ok) {
750
+ const errorText = await response.text();
751
+ throw new Error(`OpenRouter embeddings error (${response.status}): ${errorText}`);
752
+ }
753
+
754
+ return await response.json();
755
+ }
756
+
757
+ /**
758
+ * Generate embeddings using OpenAI
759
+ */
760
+ async function generateOpenAIEmbeddings(inputs, embeddingConfig) {
761
+ const { model, apiKey, endpoint } = embeddingConfig;
762
+
763
+ logger.info({
764
+ model,
765
+ inputCount: inputs.length
766
+ }, "Generating embeddings with OpenAI");
767
+
768
+ const response = await fetch(endpoint, {
769
+ method: "POST",
770
+ headers: {
771
+ "Content-Type": "application/json",
772
+ "Authorization": `Bearer ${apiKey}`
773
+ },
774
+ body: JSON.stringify({
775
+ model: model,
776
+ input: inputs,
777
+ encoding_format: "float"
778
+ })
779
+ });
780
+
781
+ if (!response.ok) {
782
+ const errorText = await response.text();
783
+ throw new Error(`OpenAI embeddings error (${response.status}): ${errorText}`);
784
+ }
785
+
786
+ return await response.json();
787
+ }
788
+
789
+ /**
790
+ * POST /v1/embeddings
791
+ *
792
+ * Generate embeddings using configured provider (Ollama, llama.cpp, OpenRouter, or OpenAI).
793
+ * Required for Cursor's semantic search features.
794
+ */
795
+ router.post("/embeddings", async (req, res) => {
796
+ const startTime = Date.now();
797
+
798
+ try {
799
+ const { input, model, encoding_format } = req.body;
800
+
801
+ // Validate input
802
+ if (!input) {
803
+ return res.status(400).json({
804
+ error: {
805
+ message: "Missing required parameter: input",
806
+ type: "invalid_request_error",
807
+ code: "missing_parameter"
808
+ }
809
+ });
810
+ }
811
+
812
+ // Convert input to array if string
813
+ const inputs = Array.isArray(input) ? input : [input];
814
+
815
+ logger.info({
816
+ endpoint: "/v1/embeddings",
817
+ model: model || "auto-detect",
818
+ inputCount: inputs.length,
819
+ inputLengths: inputs.map(i => i.length)
820
+ }, "=== OPENAI EMBEDDINGS REQUEST ===");
821
+
822
+ // Determine which provider to use for embeddings
823
+ const embeddingConfig = determineEmbeddingProvider(model);
824
+
825
+ if (!embeddingConfig) {
826
+ logger.warn("No embedding provider configured");
827
+ return res.status(501).json({
828
+ error: {
829
+ message: "Embeddings not configured. Set up one of: OPENROUTER_API_KEY, OPENAI_API_KEY, OLLAMA_EMBEDDINGS_MODEL, or LLAMACPP_EMBEDDINGS_ENDPOINT in your .env file to enable @Codebase semantic search.",
830
+ type: "not_implemented",
831
+ code: "embeddings_not_configured"
832
+ }
833
+ });
834
+ }
835
+
836
+ // Route to appropriate provider
837
+ let embeddingResponse;
838
+
839
+ try {
840
+ switch (embeddingConfig.provider) {
841
+ case "ollama":
842
+ embeddingResponse = await generateOllamaEmbeddings(inputs, embeddingConfig);
843
+ break;
844
+
845
+ case "llamacpp":
846
+ embeddingResponse = await generateLlamaCppEmbeddings(inputs, embeddingConfig);
847
+ break;
848
+
849
+ case "openrouter":
850
+ embeddingResponse = await generateOpenRouterEmbeddings(inputs, embeddingConfig);
851
+ break;
852
+
853
+ case "openai":
854
+ embeddingResponse = await generateOpenAIEmbeddings(inputs, embeddingConfig);
855
+ break;
856
+
857
+ default:
858
+ throw new Error(`Unsupported embedding provider: ${embeddingConfig.provider}`);
859
+ }
860
+ } catch (error) {
861
+ logger.error({
862
+ error: error.message,
863
+ provider: embeddingConfig.provider,
864
+ }, "Embeddings generation failed");
865
+
866
+ return res.status(500).json({
867
+ error: {
868
+ message: error.message || "Embeddings generation failed",
869
+ type: "server_error",
870
+ code: "embeddings_error"
871
+ }
872
+ });
873
+ }
874
+
875
+ logger.info({
876
+ provider: embeddingConfig.provider,
877
+ model: embeddingConfig.model,
878
+ duration: Date.now() - startTime,
879
+ embeddingCount: embeddingResponse.data?.length || 0,
880
+ totalTokens: embeddingResponse.usage?.total_tokens || 0
881
+ }, "=== EMBEDDINGS RESPONSE ===");
882
+
883
+ // Return embeddings in OpenAI format
884
+ res.json(embeddingResponse);
885
+
886
+ } catch (error) {
887
+ logger.error({
888
+ error: error.message,
889
+ stack: error.stack,
890
+ duration: Date.now() - startTime
891
+ }, "Embeddings error");
892
+
893
+ res.status(500).json({
894
+ error: {
895
+ message: error.message || "Internal server error",
896
+ type: "server_error",
897
+ code: "internal_error"
898
+ }
899
+ });
900
+ }
901
+ });
902
+
903
+ /**
904
+ * GET /v1/health
905
+ *
906
+ * Health check endpoint (alias to /health/ready).
907
+ * Used by Cursor to verify connection.
908
+ */
909
+ router.get("/health", (req, res) => {
910
+ res.json({
911
+ status: "ok",
912
+ provider: config.modelProvider?.type || "databricks",
913
+ openai_compatible: true,
914
+ cursor_compatible: true,
915
+ timestamp: new Date().toISOString()
916
+ });
917
+ });
918
+
919
+ module.exports = router;