npm - lynkr - Versions diffs - 4.0.0 → 4.2.0 - Mend

lynkr 4.0.0 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/README.md +276 -2873
package/documentation/README.md +98 -0
package/documentation/api.md +806 -0
package/documentation/claude-code-cli.md +672 -0
package/documentation/contributing.md +571 -0
package/documentation/cursor-integration.md +731 -0
package/documentation/docker.md +867 -0
package/documentation/embeddings.md +760 -0
package/documentation/faq.md +659 -0
package/documentation/features.md +396 -0
package/documentation/installation.md +706 -0
package/documentation/memory-system.md +476 -0
package/documentation/production.md +601 -0
package/documentation/providers.md +735 -0
package/documentation/testing.md +629 -0
package/documentation/token-optimization.md +323 -0
package/documentation/tools.md +697 -0
package/documentation/troubleshooting.md +864 -0
package/package.json +1 -1
package/src/api/openai-router.js +393 -42
package/src/api/router.js +172 -22
package/src/clients/databricks.js +82 -7
package/src/clients/openai-format.js +48 -12
package/src/clients/openrouter-utils.js +15 -5
package/src/clients/responses-format.js +214 -0
package/src/clients/standard-tools.js +4 -4
package/src/orchestrator/index.js +32 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "lynkr",
-  "version": "4.0.0",
+  "version": "4.2.0",
   "description": "Self-hosted Claude Code & Cursor proxy with Databricks,AWS BedRock,Azure  adapters, openrouter, Ollama,llamacpp,LM Studio, workspace tooling, and MCP integration.",
   "main": "index.js",
   "bin": {

package/src/api/openai-router.js CHANGED Viewed

@@ -18,6 +18,7 @@ const express = require("express");
 const logger = require("../logger");
 const config = require("../config");
 const orchestrator = require("../orchestrator");
+const { getSession } = require("../sessions");
 const {
   convertOpenAIToAnthropic,
   convertAnthropicToOpenAI,
@@ -43,43 +44,113 @@ router.post("/chat/completions", async (req, res) => {
       messageCount: req.body.messages?.length,
       stream: req.body.stream || false,
       hasTools: !!req.body.tools,
-      toolCount: req.body.tools?.length || 0
+      toolCount: req.body.tools?.length || 0,
+      hasMessages: !!req.body.messages,
+      messagesType: typeof req.body.messages,
+      requestBodyKeys: Object.keys(req.body),
+      // Log first 500 chars of body for debugging
+      requestBodyPreview: JSON.stringify(req.body).substring(0, 500)
     }, "=== OPENAI CHAT COMPLETION REQUEST ===");
     // Convert OpenAI request to Anthropic format
     const anthropicRequest = convertOpenAIToAnthropic(req.body);
-    // Add session ID for tracking
-    anthropicRequest.sessionId = sessionId;
+    // Get or create session
+    const session = getSession(sessionId);
     // Handle streaming vs non-streaming
     if (req.body.stream) {
-      // Set up SSE headers
+      // Set up SSE headers for streaming
       res.setHeader("Content-Type", "text/event-stream");
       res.setHeader("Cache-Control", "no-cache");
       res.setHeader("Connection", "keep-alive");
-      // Process request through orchestrator (streaming mode)
-      anthropicRequest.stream = true;
       try {
-        // Call orchestrator and get streaming response
-        const anthropicResponse = await orchestrator.orchestrateRequest(anthropicRequest, {
-          raw: res,
-          writeHead: res.writeHead.bind(res),
-          write: res.write.bind(res),
-          end: res.end.bind(res)
+        // For streaming, we need to handle it differently - convert to non-streaming temporarily
+        // Get non-streaming response from orchestrator
+        anthropicRequest.stream = false; // Force non-streaming from orchestrator
+        const result = await orchestrator.processMessage({
+          payload: anthropicRequest,
+          headers: req.headers,
+          session: session,
+          options: {
+            maxSteps: req.body?.max_steps
+          }
         });
-        // Orchestrator handles streaming directly to response
-        // If we reach here, streaming is complete
+        // Check if we have a valid response body
+        if (!result || !result.body) {
+          logger.error({
+            result: result ? JSON.stringify(result) : "null",
+            resultKeys: result ? Object.keys(result) : null
+          }, "Invalid orchestrator response for streaming");
+          throw new Error("Invalid response from orchestrator");
+        }
+        // Convert to OpenAI format
+        const openaiResponse = convertAnthropicToOpenAI(result.body, req.body.model);
+        // Simulate streaming by sending the complete response as chunks
+        const content = openaiResponse.choices[0].message.content || "";
+        const words = content.split(" ");
+        // Send start chunk
+        const startChunk = {
+          id: openaiResponse.id,
+          object: "chat.completion.chunk",
+          created: openaiResponse.created,
+          model: req.body.model,
+          choices: [{
+            index: 0,
+            delta: { role: "assistant", content: "" },
+            finish_reason: null
+          }]
+        };
+        res.write(`data: ${JSON.stringify(startChunk)}\n\n`);
+        // Send content in word chunks
+        for (let i = 0; i < words.length; i++) {
+          const word = words[i] + (i < words.length - 1 ? " " : "");
+          const chunk = {
+            id: openaiResponse.id,
+            object: "chat.completion.chunk",
+            created: openaiResponse.created,
+            model: req.body.model,
+            choices: [{
+              index: 0,
+              delta: { content: word },
+              finish_reason: null
+            }]
+          };
+          res.write(`data: ${JSON.stringify(chunk)}\n\n`);
+        }
+        // Send finish chunk
+        const finishChunk = {
+          id: openaiResponse.id,
+          object: "chat.completion.chunk",
+          created: openaiResponse.created,
+          model: req.body.model,
+          choices: [{
+            index: 0,
+            delta: {},
+            finish_reason: openaiResponse.choices[0].finish_reason
+          }]
+        };
+        res.write(`data: ${JSON.stringify(finishChunk)}\n\n`);
+        res.write("data: [DONE]\n\n");
+        res.end();
         logger.info({
           duration: Date.now() - startTime,
-          mode: "streaming"
+          mode: "streaming",
+          inputTokens: openaiResponse.usage.prompt_tokens,
+          outputTokens: openaiResponse.usage.completion_tokens
         }, "OpenAI streaming completed");
       } catch (streamError) {
-        logger.error({ error: streamError.message }, "Streaming error");
+        logger.error({ error: streamError.message, stack: streamError.stack }, "Streaming error");
         // Send error in OpenAI streaming format
         const errorChunk = {
@@ -87,16 +158,14 @@ router.post("/chat/completions", async (req, res) => {
           object: "chat.completion.chunk",
           created: Math.floor(Date.now() / 1000),
           model: req.body.model,
-          choices: [
-            {
-              index: 0,
-              delta: {
-                role: "assistant",
-                content: `Error: ${streamError.message}`
-              },
-              finish_reason: "stop"
-            }
-          ]
+          choices: [{
+            index: 0,
+            delta: {
+              role: "assistant",
+              content: `Error: ${streamError.message}`
+            },
+            finish_reason: "stop"
+          }]
         };
         res.write(`data: ${JSON.stringify(errorChunk)}\n\n`);
@@ -105,10 +174,25 @@ router.post("/chat/completions", async (req, res) => {
       }
     } else {
       // Non-streaming mode
-      const anthropicResponse = await orchestrator.orchestrateRequest(anthropicRequest);
+      const result = await orchestrator.processMessage({
+        payload: anthropicRequest,
+        headers: req.headers,
+        session: session,
+        options: {
+          maxSteps: req.body?.max_steps
+        }
+      });
+      // Debug logging
+      logger.debug({
+        resultKeys: Object.keys(result || {}),
+        hasBody: !!result?.body,
+        bodyType: typeof result?.body,
+        bodyKeys: result?.body ? Object.keys(result.body) : null
+      }, "Orchestrator result structure");
       // Convert Anthropic response to OpenAI format
-      const openaiResponse = convertAnthropicToOpenAI(anthropicResponse, req.body.model);
+      const openaiResponse = convertAnthropicToOpenAI(result.body, req.body.model);
       logger.info({
         duration: Date.now() - startTime,
@@ -237,16 +321,46 @@ router.get("/models", (req, res) => {
         break;
       case "azure-openai":
-        const azureDeployment = config.azureOpenAI?.deployment || "gpt-4o";
-        models.push({
-          id: azureDeployment,
-          object: "model",
-          created: 1704067200,
-          owned_by: "azure-openai",
-          permission: [],
-          root: azureDeployment,
-          parent: null
-        });
+        // Return standard OpenAI model names that Cursor recognizes
+        // The actual Azure deployment name doesn't matter - Lynkr routes based on config
+        models.push(
+          {
+            id: "gpt-4o",
+            object: "model",
+            created: 1704067200,
+            owned_by: "openai",
+            permission: [],
+            root: "gpt-4o",
+            parent: null
+          },
+          {
+            id: "gpt-4-turbo",
+            object: "model",
+            created: 1704067200,
+            owned_by: "openai",
+            permission: [],
+            root: "gpt-4-turbo",
+            parent: null
+          },
+          {
+            id: "gpt-4",
+            object: "model",
+            created: 1704067200,
+            owned_by: "openai",
+            permission: [],
+            root: "gpt-4",
+            parent: null
+          },
+          {
+            id: "gpt-3.5-turbo",
+            object: "model",
+            created: 1704067200,
+            owned_by: "openai",
+            permission: [],
+            root: "gpt-3.5-turbo",
+            parent: null
+          }
+        );
         break;
       case "ollama":
@@ -288,10 +402,43 @@ router.get("/models", (req, res) => {
         });
     }
+    // Add embedding models if embeddings are configured
+    const embeddingConfig = determineEmbeddingProvider();
+    if (embeddingConfig) {
+      let embeddingModelId;
+      switch (embeddingConfig.provider) {
+        case "llamacpp":
+          embeddingModelId = "text-embedding-3-small"; // Generic name for Cursor
+          break;
+        case "ollama":
+          embeddingModelId = embeddingConfig.model;
+          break;
+        case "openrouter":
+          embeddingModelId = embeddingConfig.model;
+          break;
+        case "openai":
+          embeddingModelId = embeddingConfig.model || "text-embedding-ada-002";
+          break;
+        default:
+          embeddingModelId = "text-embedding-3-small";
+      }
+      models.push({
+        id: embeddingModelId,
+        object: "model",
+        created: 1704067200,
+        owned_by: embeddingConfig.provider,
+        permission: [],
+        root: embeddingModelId,
+        parent: null
+      });
+    }
     logger.debug({
       provider,
       modelCount: models.length,
-      models: models.map(m => m.id)
+      models: models.map(m => m.id),
+      hasEmbeddings: !!embeddingConfig
     }, "Listed models for OpenAI API");
     res.json({
@@ -536,10 +683,27 @@ async function generateLlamaCppEmbeddings(inputs, embeddingConfig) {
     const data = await response.json();
-    // llama.cpp returns OpenAI-compatible format, but ensure consistency
+    // llama.cpp returns array format: [{index: 0, embedding: [[...]]}]
+    // Need to convert to OpenAI format: {data: [{object: "embedding", embedding: [...], index: 0}]}
+    let embeddingsData;
+    if (Array.isArray(data)) {
+      // llama.cpp returns array directly
+      embeddingsData = data.map(item => ({
+        object: "embedding",
+        embedding: Array.isArray(item.embedding[0]) ? item.embedding[0] : item.embedding, // Flatten double-nested array
+        index: item.index
+      }));
+    } else if (data.data) {
+      // Already in OpenAI format
+      embeddingsData = data.data;
+    } else {
+      embeddingsData = [];
+    }
     return {
       object: "list",
-      data: data.data || [],
+      data: embeddingsData,
       model: model || data.model || "default",
       usage: data.usage || {
         prompt_tokens: 0,
@@ -736,6 +900,193 @@ router.post("/embeddings", async (req, res) => {
   }
 });
+/**
+ * POST /v1/responses
+ *
+ * OpenAI Responses API endpoint (used by GPT-5-Codex and newer models).
+ * Converts Responses API format to Chat Completions → processes → converts back.
+ */
+router.post("/responses", async (req, res) => {
+  const startTime = Date.now();
+  const sessionId = req.headers["x-session-id"] || req.headers["authorization"]?.split(" ")[1] || "responses-session";
+  try {
+    const { convertResponsesToChat, convertChatToResponses } = require("../clients/responses-format");
+    // Comprehensive debug logging
+    logger.info({
+      endpoint: "/v1/responses",
+      inputType: typeof req.body.input,
+      inputIsArray: Array.isArray(req.body.input),
+      inputLength: Array.isArray(req.body.input) ? req.body.input.length : req.body.input?.length,
+      inputPreview: typeof req.body.input === 'string'
+        ? req.body.input.substring(0, 100)
+        : Array.isArray(req.body.input)
+          ? req.body.input.map(m => ({role: m?.role, hasContent: !!m?.content, hasTool: !!m?.tool_calls}))
+          : 'unknown',
+      model: req.body.model,
+      hasTools: !!req.body.tools,
+      stream: req.body.stream || false,
+      fullRequestBodyKeys: Object.keys(req.body)
+    }, "=== RESPONSES API REQUEST ===");
+    // Convert Responses API to Chat Completions format
+    const chatRequest = convertResponsesToChat(req.body);
+    logger.info({
+      chatRequestMessageCount: chatRequest.messages?.length,
+      chatRequestMessages: chatRequest.messages?.map(m => ({
+        role: m.role,
+        hasContent: !!m.content,
+        contentPreview: typeof m.content === 'string' ? m.content.substring(0, 50) : m.content
+      }))
+    }, "After Responses→Chat conversion");
+    // Convert to Anthropic format
+    const anthropicRequest = convertOpenAIToAnthropic(chatRequest);
+    logger.info({
+      anthropicMessageCount: anthropicRequest.messages?.length,
+      anthropicMessages: anthropicRequest.messages?.map(m => ({
+        role: m.role,
+        hasContent: !!m.content
+      }))
+    }, "After Chat→Anthropic conversion");
+    // Get session
+    const session = getSession(sessionId);
+    // Handle streaming vs non-streaming
+    if (req.body.stream) {
+      // Set up SSE headers for streaming
+      res.setHeader("Content-Type", "text/event-stream");
+      res.setHeader("Cache-Control", "no-cache");
+      res.setHeader("Connection", "keep-alive");
+      try {
+        // Force non-streaming from orchestrator
+        anthropicRequest.stream = false;
+        const result = await orchestrator.processMessage({
+          payload: anthropicRequest,
+          headers: req.headers,
+          session: session,
+          options: {
+            maxSteps: req.body?.max_steps
+          }
+        });
+        // Convert back: Anthropic → OpenAI → Responses
+        const chatResponse = convertAnthropicToOpenAI(result.body, req.body.model);
+        const responsesResponse = convertChatToResponses(chatResponse);
+        // Simulate streaming using OpenAI Responses API SSE format
+        const content = responsesResponse.content || "";
+        const words = content.split(" ");
+        // Send response.created event
+        const createdEvent = {
+          id: responsesResponse.id,
+          object: "response.created",
+          created: responsesResponse.created,
+          model: req.body.model
+        };
+        res.write(`event: response.created\n`);
+        res.write(`data: ${JSON.stringify(createdEvent)}\n\n`);
+        // Send content in word chunks using response.output_text.delta
+        for (let i = 0; i < words.length; i++) {
+          const word = words[i] + (i < words.length - 1 ? " " : "");
+          const deltaEvent = {
+            id: responsesResponse.id,
+            object: "response.output_text.delta",
+            delta: word,
+            created: responsesResponse.created
+          };
+          res.write(`event: response.output_text.delta\n`);
+          res.write(`data: ${JSON.stringify(deltaEvent)}\n\n`);
+        }
+        // Send response.completed event
+        const completedEvent = {
+          id: responsesResponse.id,
+          object: "response.completed",
+          created: responsesResponse.created,
+          model: req.body.model,
+          content: content,
+          stop_reason: responsesResponse.stop_reason,
+          usage: responsesResponse.usage
+        };
+        res.write(`event: response.completed\n`);
+        res.write(`data: ${JSON.stringify(completedEvent)}\n\n`);
+        // Optional: Send [DONE] marker
+        res.write("data: [DONE]\n\n");
+        res.end();
+        logger.info({
+          duration: Date.now() - startTime,
+          mode: "streaming",
+          contentLength: content.length
+        }, "=== RESPONSES API STREAMING COMPLETE ===");
+      } catch (streamError) {
+        logger.error({ error: streamError.message, stack: streamError.stack }, "Responses API streaming error");
+        // Send error via SSE
+        res.write(`data: ${JSON.stringify({
+          error: {
+            message: streamError.message || "Internal server error",
+            type: "server_error",
+            code: "internal_error"
+          }
+        })}\n\n`);
+        res.end();
+      }
+    } else {
+      // Non-streaming response
+      anthropicRequest.stream = false;
+      const result = await orchestrator.processMessage({
+        payload: anthropicRequest,
+        headers: req.headers,
+        session: session,
+        options: {
+          maxSteps: req.body?.max_steps
+        }
+      });
+      // Convert back: Anthropic → OpenAI → Responses
+      const chatResponse = convertAnthropicToOpenAI(result.body, req.body.model);
+      const responsesResponse = convertChatToResponses(chatResponse);
+      logger.info({
+        duration: Date.now() - startTime,
+        contentLength: responsesResponse.content?.length || 0,
+        stopReason: responsesResponse.stop_reason
+      }, "=== RESPONSES API RESPONSE ===");
+      res.json(responsesResponse);
+    }
+  } catch (error) {
+    logger.error({
+      error: error.message,
+      stack: error.stack,
+      duration: Date.now() - startTime
+    }, "Responses API error");
+    res.status(500).json({
+      error: {
+        message: error.message || "Internal server error",
+        type: "server_error",
+        code: "internal_error"
+      }
+    });
+  }
+});
 /**
  * GET /v1/health
  *