lynkr 4.0.0 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "lynkr",
3
- "version": "4.0.0",
3
+ "version": "4.2.0",
4
4
  "description": "Self-hosted Claude Code & Cursor proxy with Databricks,AWS BedRock,Azure adapters, openrouter, Ollama,llamacpp,LM Studio, workspace tooling, and MCP integration.",
5
5
  "main": "index.js",
6
6
  "bin": {
@@ -18,6 +18,7 @@ const express = require("express");
18
18
  const logger = require("../logger");
19
19
  const config = require("../config");
20
20
  const orchestrator = require("../orchestrator");
21
+ const { getSession } = require("../sessions");
21
22
  const {
22
23
  convertOpenAIToAnthropic,
23
24
  convertAnthropicToOpenAI,
@@ -43,43 +44,113 @@ router.post("/chat/completions", async (req, res) => {
43
44
  messageCount: req.body.messages?.length,
44
45
  stream: req.body.stream || false,
45
46
  hasTools: !!req.body.tools,
46
- toolCount: req.body.tools?.length || 0
47
+ toolCount: req.body.tools?.length || 0,
48
+ hasMessages: !!req.body.messages,
49
+ messagesType: typeof req.body.messages,
50
+ requestBodyKeys: Object.keys(req.body),
51
+ // Log first 500 chars of body for debugging
52
+ requestBodyPreview: JSON.stringify(req.body).substring(0, 500)
47
53
  }, "=== OPENAI CHAT COMPLETION REQUEST ===");
48
54
 
49
55
  // Convert OpenAI request to Anthropic format
50
56
  const anthropicRequest = convertOpenAIToAnthropic(req.body);
51
57
 
52
- // Add session ID for tracking
53
- anthropicRequest.sessionId = sessionId;
58
+ // Get or create session
59
+ const session = getSession(sessionId);
54
60
 
55
61
  // Handle streaming vs non-streaming
56
62
  if (req.body.stream) {
57
- // Set up SSE headers
63
+ // Set up SSE headers for streaming
58
64
  res.setHeader("Content-Type", "text/event-stream");
59
65
  res.setHeader("Cache-Control", "no-cache");
60
66
  res.setHeader("Connection", "keep-alive");
61
67
 
62
- // Process request through orchestrator (streaming mode)
63
- anthropicRequest.stream = true;
64
-
65
68
  try {
66
- // Call orchestrator and get streaming response
67
- const anthropicResponse = await orchestrator.orchestrateRequest(anthropicRequest, {
68
- raw: res,
69
- writeHead: res.writeHead.bind(res),
70
- write: res.write.bind(res),
71
- end: res.end.bind(res)
69
+ // For streaming, we need to handle it differently - convert to non-streaming temporarily
70
+ // Get non-streaming response from orchestrator
71
+ anthropicRequest.stream = false; // Force non-streaming from orchestrator
72
+
73
+ const result = await orchestrator.processMessage({
74
+ payload: anthropicRequest,
75
+ headers: req.headers,
76
+ session: session,
77
+ options: {
78
+ maxSteps: req.body?.max_steps
79
+ }
72
80
  });
73
81
 
74
- // Orchestrator handles streaming directly to response
75
- // If we reach here, streaming is complete
82
+ // Check if we have a valid response body
83
+ if (!result || !result.body) {
84
+ logger.error({
85
+ result: result ? JSON.stringify(result) : "null",
86
+ resultKeys: result ? Object.keys(result) : null
87
+ }, "Invalid orchestrator response for streaming");
88
+ throw new Error("Invalid response from orchestrator");
89
+ }
90
+
91
+ // Convert to OpenAI format
92
+ const openaiResponse = convertAnthropicToOpenAI(result.body, req.body.model);
93
+
94
+ // Simulate streaming by sending the complete response as chunks
95
+ const content = openaiResponse.choices[0].message.content || "";
96
+ const words = content.split(" ");
97
+
98
+ // Send start chunk
99
+ const startChunk = {
100
+ id: openaiResponse.id,
101
+ object: "chat.completion.chunk",
102
+ created: openaiResponse.created,
103
+ model: req.body.model,
104
+ choices: [{
105
+ index: 0,
106
+ delta: { role: "assistant", content: "" },
107
+ finish_reason: null
108
+ }]
109
+ };
110
+ res.write(`data: ${JSON.stringify(startChunk)}\n\n`);
111
+
112
+ // Send content in word chunks
113
+ for (let i = 0; i < words.length; i++) {
114
+ const word = words[i] + (i < words.length - 1 ? " " : "");
115
+ const chunk = {
116
+ id: openaiResponse.id,
117
+ object: "chat.completion.chunk",
118
+ created: openaiResponse.created,
119
+ model: req.body.model,
120
+ choices: [{
121
+ index: 0,
122
+ delta: { content: word },
123
+ finish_reason: null
124
+ }]
125
+ };
126
+ res.write(`data: ${JSON.stringify(chunk)}\n\n`);
127
+ }
128
+
129
+ // Send finish chunk
130
+ const finishChunk = {
131
+ id: openaiResponse.id,
132
+ object: "chat.completion.chunk",
133
+ created: openaiResponse.created,
134
+ model: req.body.model,
135
+ choices: [{
136
+ index: 0,
137
+ delta: {},
138
+ finish_reason: openaiResponse.choices[0].finish_reason
139
+ }]
140
+ };
141
+ res.write(`data: ${JSON.stringify(finishChunk)}\n\n`);
142
+ res.write("data: [DONE]\n\n");
143
+ res.end();
144
+
76
145
  logger.info({
77
146
  duration: Date.now() - startTime,
78
- mode: "streaming"
147
+ mode: "streaming",
148
+ inputTokens: openaiResponse.usage.prompt_tokens,
149
+ outputTokens: openaiResponse.usage.completion_tokens
79
150
  }, "OpenAI streaming completed");
80
151
 
81
152
  } catch (streamError) {
82
- logger.error({ error: streamError.message }, "Streaming error");
153
+ logger.error({ error: streamError.message, stack: streamError.stack }, "Streaming error");
83
154
 
84
155
  // Send error in OpenAI streaming format
85
156
  const errorChunk = {
@@ -87,16 +158,14 @@ router.post("/chat/completions", async (req, res) => {
87
158
  object: "chat.completion.chunk",
88
159
  created: Math.floor(Date.now() / 1000),
89
160
  model: req.body.model,
90
- choices: [
91
- {
92
- index: 0,
93
- delta: {
94
- role: "assistant",
95
- content: `Error: ${streamError.message}`
96
- },
97
- finish_reason: "stop"
98
- }
99
- ]
161
+ choices: [{
162
+ index: 0,
163
+ delta: {
164
+ role: "assistant",
165
+ content: `Error: ${streamError.message}`
166
+ },
167
+ finish_reason: "stop"
168
+ }]
100
169
  };
101
170
 
102
171
  res.write(`data: ${JSON.stringify(errorChunk)}\n\n`);
@@ -105,10 +174,25 @@ router.post("/chat/completions", async (req, res) => {
105
174
  }
106
175
  } else {
107
176
  // Non-streaming mode
108
- const anthropicResponse = await orchestrator.orchestrateRequest(anthropicRequest);
177
+ const result = await orchestrator.processMessage({
178
+ payload: anthropicRequest,
179
+ headers: req.headers,
180
+ session: session,
181
+ options: {
182
+ maxSteps: req.body?.max_steps
183
+ }
184
+ });
185
+
186
+ // Debug logging
187
+ logger.debug({
188
+ resultKeys: Object.keys(result || {}),
189
+ hasBody: !!result?.body,
190
+ bodyType: typeof result?.body,
191
+ bodyKeys: result?.body ? Object.keys(result.body) : null
192
+ }, "Orchestrator result structure");
109
193
 
110
194
  // Convert Anthropic response to OpenAI format
111
- const openaiResponse = convertAnthropicToOpenAI(anthropicResponse, req.body.model);
195
+ const openaiResponse = convertAnthropicToOpenAI(result.body, req.body.model);
112
196
 
113
197
  logger.info({
114
198
  duration: Date.now() - startTime,
@@ -237,16 +321,46 @@ router.get("/models", (req, res) => {
237
321
  break;
238
322
 
239
323
  case "azure-openai":
240
- const azureDeployment = config.azureOpenAI?.deployment || "gpt-4o";
241
- models.push({
242
- id: azureDeployment,
243
- object: "model",
244
- created: 1704067200,
245
- owned_by: "azure-openai",
246
- permission: [],
247
- root: azureDeployment,
248
- parent: null
249
- });
324
+ // Return standard OpenAI model names that Cursor recognizes
325
+ // The actual Azure deployment name doesn't matter - Lynkr routes based on config
326
+ models.push(
327
+ {
328
+ id: "gpt-4o",
329
+ object: "model",
330
+ created: 1704067200,
331
+ owned_by: "openai",
332
+ permission: [],
333
+ root: "gpt-4o",
334
+ parent: null
335
+ },
336
+ {
337
+ id: "gpt-4-turbo",
338
+ object: "model",
339
+ created: 1704067200,
340
+ owned_by: "openai",
341
+ permission: [],
342
+ root: "gpt-4-turbo",
343
+ parent: null
344
+ },
345
+ {
346
+ id: "gpt-4",
347
+ object: "model",
348
+ created: 1704067200,
349
+ owned_by: "openai",
350
+ permission: [],
351
+ root: "gpt-4",
352
+ parent: null
353
+ },
354
+ {
355
+ id: "gpt-3.5-turbo",
356
+ object: "model",
357
+ created: 1704067200,
358
+ owned_by: "openai",
359
+ permission: [],
360
+ root: "gpt-3.5-turbo",
361
+ parent: null
362
+ }
363
+ );
250
364
  break;
251
365
 
252
366
  case "ollama":
@@ -288,10 +402,43 @@ router.get("/models", (req, res) => {
288
402
  });
289
403
  }
290
404
 
405
+ // Add embedding models if embeddings are configured
406
+ const embeddingConfig = determineEmbeddingProvider();
407
+ if (embeddingConfig) {
408
+ let embeddingModelId;
409
+ switch (embeddingConfig.provider) {
410
+ case "llamacpp":
411
+ embeddingModelId = "text-embedding-3-small"; // Generic name for Cursor
412
+ break;
413
+ case "ollama":
414
+ embeddingModelId = embeddingConfig.model;
415
+ break;
416
+ case "openrouter":
417
+ embeddingModelId = embeddingConfig.model;
418
+ break;
419
+ case "openai":
420
+ embeddingModelId = embeddingConfig.model || "text-embedding-ada-002";
421
+ break;
422
+ default:
423
+ embeddingModelId = "text-embedding-3-small";
424
+ }
425
+
426
+ models.push({
427
+ id: embeddingModelId,
428
+ object: "model",
429
+ created: 1704067200,
430
+ owned_by: embeddingConfig.provider,
431
+ permission: [],
432
+ root: embeddingModelId,
433
+ parent: null
434
+ });
435
+ }
436
+
291
437
  logger.debug({
292
438
  provider,
293
439
  modelCount: models.length,
294
- models: models.map(m => m.id)
440
+ models: models.map(m => m.id),
441
+ hasEmbeddings: !!embeddingConfig
295
442
  }, "Listed models for OpenAI API");
296
443
 
297
444
  res.json({
@@ -536,10 +683,27 @@ async function generateLlamaCppEmbeddings(inputs, embeddingConfig) {
536
683
 
537
684
  const data = await response.json();
538
685
 
539
- // llama.cpp returns OpenAI-compatible format, but ensure consistency
686
+ // llama.cpp returns array format: [{index: 0, embedding: [[...]]}]
687
+ // Need to convert to OpenAI format: {data: [{object: "embedding", embedding: [...], index: 0}]}
688
+ let embeddingsData;
689
+
690
+ if (Array.isArray(data)) {
691
+ // llama.cpp returns array directly
692
+ embeddingsData = data.map(item => ({
693
+ object: "embedding",
694
+ embedding: Array.isArray(item.embedding[0]) ? item.embedding[0] : item.embedding, // Flatten double-nested array
695
+ index: item.index
696
+ }));
697
+ } else if (data.data) {
698
+ // Already in OpenAI format
699
+ embeddingsData = data.data;
700
+ } else {
701
+ embeddingsData = [];
702
+ }
703
+
540
704
  return {
541
705
  object: "list",
542
- data: data.data || [],
706
+ data: embeddingsData,
543
707
  model: model || data.model || "default",
544
708
  usage: data.usage || {
545
709
  prompt_tokens: 0,
@@ -736,6 +900,193 @@ router.post("/embeddings", async (req, res) => {
736
900
  }
737
901
  });
738
902
 
903
+ /**
904
+ * POST /v1/responses
905
+ *
906
+ * OpenAI Responses API endpoint (used by GPT-5-Codex and newer models).
907
+ * Converts Responses API format to Chat Completions → processes → converts back.
908
+ */
909
+ router.post("/responses", async (req, res) => {
910
+ const startTime = Date.now();
911
+ const sessionId = req.headers["x-session-id"] || req.headers["authorization"]?.split(" ")[1] || "responses-session";
912
+
913
+ try {
914
+ const { convertResponsesToChat, convertChatToResponses } = require("../clients/responses-format");
915
+
916
+ // Comprehensive debug logging
917
+ logger.info({
918
+ endpoint: "/v1/responses",
919
+ inputType: typeof req.body.input,
920
+ inputIsArray: Array.isArray(req.body.input),
921
+ inputLength: Array.isArray(req.body.input) ? req.body.input.length : req.body.input?.length,
922
+ inputPreview: typeof req.body.input === 'string'
923
+ ? req.body.input.substring(0, 100)
924
+ : Array.isArray(req.body.input)
925
+ ? req.body.input.map(m => ({role: m?.role, hasContent: !!m?.content, hasTool: !!m?.tool_calls}))
926
+ : 'unknown',
927
+ model: req.body.model,
928
+ hasTools: !!req.body.tools,
929
+ stream: req.body.stream || false,
930
+ fullRequestBodyKeys: Object.keys(req.body)
931
+ }, "=== RESPONSES API REQUEST ===");
932
+
933
+ // Convert Responses API to Chat Completions format
934
+ const chatRequest = convertResponsesToChat(req.body);
935
+
936
+ logger.info({
937
+ chatRequestMessageCount: chatRequest.messages?.length,
938
+ chatRequestMessages: chatRequest.messages?.map(m => ({
939
+ role: m.role,
940
+ hasContent: !!m.content,
941
+ contentPreview: typeof m.content === 'string' ? m.content.substring(0, 50) : m.content
942
+ }))
943
+ }, "After Responses→Chat conversion");
944
+
945
+ // Convert to Anthropic format
946
+ const anthropicRequest = convertOpenAIToAnthropic(chatRequest);
947
+
948
+ logger.info({
949
+ anthropicMessageCount: anthropicRequest.messages?.length,
950
+ anthropicMessages: anthropicRequest.messages?.map(m => ({
951
+ role: m.role,
952
+ hasContent: !!m.content
953
+ }))
954
+ }, "After Chat→Anthropic conversion");
955
+
956
+ // Get session
957
+ const session = getSession(sessionId);
958
+
959
+ // Handle streaming vs non-streaming
960
+ if (req.body.stream) {
961
+ // Set up SSE headers for streaming
962
+ res.setHeader("Content-Type", "text/event-stream");
963
+ res.setHeader("Cache-Control", "no-cache");
964
+ res.setHeader("Connection", "keep-alive");
965
+
966
+ try {
967
+ // Force non-streaming from orchestrator
968
+ anthropicRequest.stream = false;
969
+
970
+ const result = await orchestrator.processMessage({
971
+ payload: anthropicRequest,
972
+ headers: req.headers,
973
+ session: session,
974
+ options: {
975
+ maxSteps: req.body?.max_steps
976
+ }
977
+ });
978
+
979
+ // Convert back: Anthropic → OpenAI → Responses
980
+ const chatResponse = convertAnthropicToOpenAI(result.body, req.body.model);
981
+ const responsesResponse = convertChatToResponses(chatResponse);
982
+
983
+ // Simulate streaming using OpenAI Responses API SSE format
984
+ const content = responsesResponse.content || "";
985
+ const words = content.split(" ");
986
+
987
+ // Send response.created event
988
+ const createdEvent = {
989
+ id: responsesResponse.id,
990
+ object: "response.created",
991
+ created: responsesResponse.created,
992
+ model: req.body.model
993
+ };
994
+ res.write(`event: response.created\n`);
995
+ res.write(`data: ${JSON.stringify(createdEvent)}\n\n`);
996
+
997
+ // Send content in word chunks using response.output_text.delta
998
+ for (let i = 0; i < words.length; i++) {
999
+ const word = words[i] + (i < words.length - 1 ? " " : "");
1000
+ const deltaEvent = {
1001
+ id: responsesResponse.id,
1002
+ object: "response.output_text.delta",
1003
+ delta: word,
1004
+ created: responsesResponse.created
1005
+ };
1006
+ res.write(`event: response.output_text.delta\n`);
1007
+ res.write(`data: ${JSON.stringify(deltaEvent)}\n\n`);
1008
+ }
1009
+
1010
+ // Send response.completed event
1011
+ const completedEvent = {
1012
+ id: responsesResponse.id,
1013
+ object: "response.completed",
1014
+ created: responsesResponse.created,
1015
+ model: req.body.model,
1016
+ content: content,
1017
+ stop_reason: responsesResponse.stop_reason,
1018
+ usage: responsesResponse.usage
1019
+ };
1020
+ res.write(`event: response.completed\n`);
1021
+ res.write(`data: ${JSON.stringify(completedEvent)}\n\n`);
1022
+
1023
+ // Optional: Send [DONE] marker
1024
+ res.write("data: [DONE]\n\n");
1025
+ res.end();
1026
+
1027
+ logger.info({
1028
+ duration: Date.now() - startTime,
1029
+ mode: "streaming",
1030
+ contentLength: content.length
1031
+ }, "=== RESPONSES API STREAMING COMPLETE ===");
1032
+
1033
+ } catch (streamError) {
1034
+ logger.error({ error: streamError.message, stack: streamError.stack }, "Responses API streaming error");
1035
+
1036
+ // Send error via SSE
1037
+ res.write(`data: ${JSON.stringify({
1038
+ error: {
1039
+ message: streamError.message || "Internal server error",
1040
+ type: "server_error",
1041
+ code: "internal_error"
1042
+ }
1043
+ })}\n\n`);
1044
+ res.end();
1045
+ }
1046
+
1047
+ } else {
1048
+ // Non-streaming response
1049
+ anthropicRequest.stream = false;
1050
+
1051
+ const result = await orchestrator.processMessage({
1052
+ payload: anthropicRequest,
1053
+ headers: req.headers,
1054
+ session: session,
1055
+ options: {
1056
+ maxSteps: req.body?.max_steps
1057
+ }
1058
+ });
1059
+
1060
+ // Convert back: Anthropic → OpenAI → Responses
1061
+ const chatResponse = convertAnthropicToOpenAI(result.body, req.body.model);
1062
+ const responsesResponse = convertChatToResponses(chatResponse);
1063
+
1064
+ logger.info({
1065
+ duration: Date.now() - startTime,
1066
+ contentLength: responsesResponse.content?.length || 0,
1067
+ stopReason: responsesResponse.stop_reason
1068
+ }, "=== RESPONSES API RESPONSE ===");
1069
+
1070
+ res.json(responsesResponse);
1071
+ }
1072
+
1073
+ } catch (error) {
1074
+ logger.error({
1075
+ error: error.message,
1076
+ stack: error.stack,
1077
+ duration: Date.now() - startTime
1078
+ }, "Responses API error");
1079
+
1080
+ res.status(500).json({
1081
+ error: {
1082
+ message: error.message || "Internal server error",
1083
+ type: "server_error",
1084
+ code: "internal_error"
1085
+ }
1086
+ });
1087
+ }
1088
+ });
1089
+
739
1090
  /**
740
1091
  * GET /v1/health
741
1092
  *