npm - @aj-archipelago/cortex - Versions diffs - 1.3.22 → 1.3.23 - Mend

@aj-archipelago/cortex 1.3.22 → 1.3.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/README.md +64 -0
package/config.js +26 -1
package/helper-apps/cortex-realtime-voice-server/src/realtime/client.ts +9 -4
package/helper-apps/cortex-realtime-voice-server/src/realtime/realtimeTypes.ts +1 -0
package/lib/util.js +4 -24
package/package.json +5 -2
package/pathways/system/rest_streaming/sys_ollama_chat.js +21 -0
package/pathways/system/rest_streaming/sys_ollama_completion.js +14 -0
package/pathways/transcribe_gemini.js +181 -53
package/server/modelExecutor.js +8 -0
package/server/pathwayResolver.js +6 -1
package/server/plugins/claude3VertexPlugin.js +41 -15
package/server/plugins/gemini15ChatPlugin.js +90 -1
package/server/plugins/gemini15VisionPlugin.js +9 -3
package/server/plugins/modelPlugin.js +11 -8
package/server/plugins/ollamaChatPlugin.js +158 -0
package/server/plugins/ollamaCompletionPlugin.js +147 -0
package/server/rest.js +46 -5
package/tests/multimodal_conversion.test.js +169 -0
package/tests/transcribe_gemini.test.js +217 -0

package/server/plugins/claude3VertexPlugin.js CHANGED Viewed

@@ -380,7 +380,7 @@ class Claude3VertexPlugin extends OpenAIVisionPlugin {
     cortexRequest.params = {}; // query params
     cortexRequest.stream = stream;
     cortexRequest.urlSuffix = cortexRequest.stream
-      ? ":streamRawPredict"
+      ? ":streamRawPredict?alt=sse"
       : ":rawPredict";
     const gcpAuthTokenHelper = this.config.get("gcpAuthTokenHelper");
@@ -392,33 +392,59 @@ class Claude3VertexPlugin extends OpenAIVisionPlugin {
   processStreamEvent(event, requestProgress) {
     const eventData = JSON.parse(event.data);
+    const baseOpenAIResponse = {
+      id: eventData.message?.id || `chatcmpl-${Date.now()}`,
+      object: "chat.completion.chunk",
+      created: Math.floor(Date.now() / 1000),
+      model: this.modelName,
+      choices: [{
+        index: 0,
+        delta: {},
+        finish_reason: null
+      }]
+    };
     switch (eventData.type) {
       case "message_start":
-        requestProgress.data = JSON.stringify(eventData.message);
-        break;
-      case "content_block_start":
-        break;
-      case "ping":
+        // Initial message with role
+        baseOpenAIResponse.choices[0].delta = {
+          role: "assistant",
+          content: ""
+        };
+        requestProgress.data = JSON.stringify(baseOpenAIResponse);
         break;
       case "content_block_delta":
         if (eventData.delta.type === "text_delta") {
-          requestProgress.data = JSON.stringify(eventData.delta.text);
+          baseOpenAIResponse.choices[0].delta = {
+            content: eventData.delta.text
+          };
+          requestProgress.data = JSON.stringify(baseOpenAIResponse);
         }
         break;
-      case "content_block_stop":
-        break;
-      case "message_delta":
-        break;
       case "message_stop":
-        requestProgress.data = "[DONE]";
+        baseOpenAIResponse.choices[0].delta = {};
+        baseOpenAIResponse.choices[0].finish_reason = "stop";
+        requestProgress.data = JSON.stringify(baseOpenAIResponse);
         requestProgress.progress = 1;
         break;
       case "error":
-        requestProgress.data = `\n\n*** ${
-          eventData.error.message || eventData.error
-        } ***`;
+        baseOpenAIResponse.choices[0].delta = {
+          content: `\n\n*** ${eventData.error.message || eventData.error} ***`
+        };
+        baseOpenAIResponse.choices[0].finish_reason = "error";
+        requestProgress.data = JSON.stringify(baseOpenAIResponse);
         requestProgress.progress = 1;
         break;
+      // Ignore other event types as they don't map to OpenAI format
+      case "content_block_start":
+      case "content_block_stop":
+      case "message_delta":
+      case "ping":
+        break;
     }
     return requestProgress;

package/server/plugins/gemini15ChatPlugin.js CHANGED Viewed

@@ -56,7 +56,11 @@ class Gemini15ChatPlugin extends ModelPlugin {
                 const { role, author, content } = message;
                 if (role === 'system') {
-                    systemParts.push({ text: content });
+                    if (Array.isArray(content)) {
+                        content.forEach(item => systemParts.push({ text: item }));
+                    } else {
+                        systemParts.push({ text: content });
+                    }
                     return;
                 }
@@ -169,6 +173,91 @@ class Gemini15ChatPlugin extends ModelPlugin {
         return this.executeRequest(cortexRequest);
     }
+    processStreamEvent(event, requestProgress) {
+        const eventData = JSON.parse(event.data);
+        // Initialize requestProgress if needed
+        requestProgress = requestProgress || {};
+        requestProgress.data = requestProgress.data || null;
+        // Create a helper function to generate message chunks
+        const createChunk = (delta) => ({
+            id: eventData.responseId || `chatcmpl-${Date.now()}`,
+            object: "chat.completion.chunk",
+            created: Math.floor(Date.now() / 1000),
+            model: this.modelName,
+            choices: [{
+                index: 0,
+                delta,
+                finish_reason: null
+            }]
+        });
+        // Handle content chunks - do this first before handling any finish conditions
+        if (eventData.candidates?.[0]?.content?.parts?.[0]?.text) {
+            if (!requestProgress.started) {
+                // First chunk - send role
+                requestProgress.data = JSON.stringify(createChunk({ role: "assistant" }));
+                requestProgress.started = true;
+                // Immediately follow up with the first content chunk
+                requestProgress.data = JSON.stringify(createChunk({
+                    content: eventData.candidates[0].content.parts[0].text
+                }));
+            } else {
+                // Send content chunk
+                requestProgress.data = JSON.stringify(createChunk({
+                    content: eventData.candidates[0].content.parts[0].text
+                }));
+            }
+            // If this message also has STOP, mark it for completion but don't overwrite the content
+            if (eventData.candidates[0].finishReason === "STOP") {
+                requestProgress.progress = 1;
+            }
+        } else if (eventData.candidates?.[0]?.finishReason === "STOP") {
+            // Only send DONE if there was no content in this message
+            requestProgress.data = '[DONE]';
+            requestProgress.progress = 1;
+        }
+        // Handle safety blocks
+        if (eventData.candidates?.[0]?.safetyRatings?.some(rating => rating.blocked)) {
+            requestProgress.data = JSON.stringify({
+                id: eventData.responseId || `chatcmpl-${Date.now()}`,
+                object: "chat.completion.chunk",
+                created: Math.floor(Date.now() / 1000),
+                model: this.modelName,
+                choices: [{
+                    index: 0,
+                    delta: { content: "\n\n*** Response blocked due to safety ratings ***" },
+                    finish_reason: "content_filter"
+                }]
+            });
+            requestProgress.progress = 1;
+            return requestProgress;
+        }
+        // Handle prompt feedback blocks
+        if (eventData.promptFeedback?.blockReason) {
+            requestProgress.data = JSON.stringify({
+                id: eventData.responseId || `chatcmpl-${Date.now()}`,
+                object: "chat.completion.chunk",
+                created: Math.floor(Date.now() / 1000),
+                model: this.modelName,
+                choices: [{
+                    index: 0,
+                    delta: { content: `\n\n*** Response blocked: ${eventData.promptFeedback.blockReason} ***` },
+                    finish_reason: "content_filter"
+                }]
+            });
+            requestProgress.progress = 1;
+            return requestProgress;
+        }
+        return requestProgress;
+    }
     // Override the logging function to display the messages and responses
     logRequestData(data, responseData, prompt) {
         const messages = data && data.contents;

package/server/plugins/gemini15VisionPlugin.js CHANGED Viewed

@@ -24,19 +24,24 @@ class Gemini15VisionPlugin extends Gemini15ChatPlugin {
                 const { role, author, content } = message;
                 if (role === 'system') {
-                    systemParts.push({ text: content });
+                    if (Array.isArray(content)) {
+                        content.forEach(item => systemParts.push({ text: item }));
+                    } else {
+                        systemParts.push({ text: content });
+                    }
                     return;
                 }
                 // Convert content to Gemini format, trying to maintain compatibility
                 const convertPartToGemini = (inputPart) => {
                     try {
+                        // First try to parse as JSON if it's a string
                         const part = typeof inputPart === 'string' ? JSON.parse(inputPart) : inputPart;
                         const {type, text, image_url, gcs} = part;
                         let fileUrl = gcs || image_url?.url;
                         if (typeof part === 'string') {
-                            return { text: text };
+                            return { text: inputPart };
                         } else if (type === 'text') {
                             return { text: text };
                         } else if (type === 'image_url') {
@@ -77,7 +82,8 @@ class Gemini15VisionPlugin extends Gemini15ChatPlugin {
                             return null;
                         }
                     } catch (e) {
-                        // this space intentionally left blank
+                        // If JSON parsing fails or any other error, treat as plain text
+                        return inputPart ? { text: inputPart } : null;
                     }
                     return inputPart ? { text: inputPart } : null;
                 };

package/server/plugins/modelPlugin.js CHANGED Viewed

@@ -381,14 +381,17 @@ class ModelPlugin {
             // finish reason can be in different places in the message
             const finishReason = parsedMessage?.choices?.[0]?.finish_reason || parsedMessage?.candidates?.[0]?.finishReason;
-            if (finishReason?.toLowerCase() === 'stop') {
-                requestProgress.progress = 1;
-            } else {
-                if (finishReason?.toLowerCase() === 'safety') {
-                    const safetyRatings = JSON.stringify(parsedMessage?.candidates?.[0]?.safetyRatings) || '';
-                    logger.warn(`Request ${this.requestId} was blocked by the safety filter. ${safetyRatings}`);
-                    requestProgress.data = `\n\nResponse blocked by safety filter: ${safetyRatings}`;
-                    requestProgress.progress = 1;
+            if (finishReason) {
+                switch (finishReason.toLowerCase()) {
+                    case 'safety':
+                        const safetyRatings = JSON.stringify(parsedMessage?.candidates?.[0]?.safetyRatings) || '';
+                        logger.warn(`Request ${this.requestId} was blocked by the safety filter. ${safetyRatings}`);
+                        requestProgress.data = `\n\nResponse blocked by safety filter: ${safetyRatings}`;
+                        requestProgress.progress = 1;
+                        break;
+                    default:
+                        requestProgress.progress = 1;
+                        break;
                 }
             }
         }

package/server/plugins/ollamaChatPlugin.js ADDED Viewed

@@ -0,0 +1,158 @@
+import ModelPlugin from './modelPlugin.js';
+import logger from '../../lib/logger.js';
+import { Transform } from 'stream';
+class OllamaChatPlugin extends ModelPlugin {
+  getRequestParameters(text, parameters, prompt) {
+    const { modelPromptMessages } = this.getCompiledPrompt(text, parameters, prompt);
+    return {
+      data: {
+        model: parameters.ollamaModel,
+        messages: modelPromptMessages,
+        stream: parameters.stream
+      },
+      params: {}
+    };
+  }
+  logRequestData(data, responseData, prompt) {
+    const { stream, messages, model } = data;
+    if (messages && messages.length > 0) {
+      logger.info(`[ollama chat request sent to model ${model} containing ${messages.length} messages]`);
+      let totalLength = 0;
+      let totalUnits;
+      messages.forEach((message, index) => {
+        const content = message.content;
+        const { length, units } = this.getLength(content);
+        const preview = this.shortenContent(content);
+        logger.verbose(
+          `message ${index + 1}: role: ${message.role}, ${units}: ${length}, content: "${preview}"`
+        );
+        totalLength += length;
+        totalUnits = units;
+      });
+      logger.info(`[chat request contained ${totalLength} ${totalUnits}]`);
+    }
+    if (stream) {
+      logger.info(`[response received as an SSE stream]`);
+    } else if (responseData) {
+      const responseText = this.parseResponse(responseData);
+      const { length, units } = this.getLength(responseText);
+      logger.info(`[response received containing ${length} ${units}]`);
+      logger.verbose(`${this.shortenContent(responseText)}`);
+    }
+    prompt &&
+      prompt.debugInfo &&
+      (prompt.debugInfo += `\n${JSON.stringify(data)}`);
+  }
+  parseResponse(data) {
+    // If data is not a string (e.g. streaming), return as is
+    if (typeof data !== 'string') {
+      return data;
+    }
+    // Split into lines and filter empty ones
+    const lines = data.split('\n').filter(line => line.trim());
+    let fullResponse = '';
+    for (const line of lines) {
+      try {
+        const jsonObj = JSON.parse(line);
+        if (jsonObj.message && jsonObj.message.content) {
+          // Unescape special sequences
+          const content = jsonObj.message.content
+            .replace(/\\n/g, '\n')
+            .replace(/\\"/g, '"')
+            .replace(/\\\\/g, '\\')
+            .replace(/\\u003c/g, '<')
+            .replace(/\\u003e/g, '>');
+          fullResponse += content;
+        }
+      } catch (err) {
+        // If we can't parse the line as JSON, just skip it
+        continue;
+      }
+    }
+    return fullResponse;
+  }
+  processStreamEvent(event, requestProgress) {
+    try {
+      const data = JSON.parse(event.data);
+      // Handle the streaming response
+      if (data.message?.content) {
+        // Unescape special sequences in the content
+        const content = data.message.content
+          .replace(/\\n/g, '\n')
+          .replace(/\\"/g, '"')
+          .replace(/\\\\/g, '\\')
+          .replace(/\\u003c/g, '<')
+          .replace(/\\u003e/g, '>');
+        requestProgress.data = JSON.stringify(content);
+      }
+      // Check if this is the final message
+      if (data.done) {
+        requestProgress.data = '[DONE]';
+        requestProgress.progress = 1;
+      }
+      return requestProgress;
+    } catch (err) {
+      // If we can't parse the event data, return the progress as is
+      return requestProgress;
+    }
+  }
+  async execute(text, parameters, prompt, cortexRequest) {
+    const requestParameters = this.getRequestParameters(text, parameters, prompt);
+    cortexRequest.data = { ...(cortexRequest.data || {}), ...requestParameters.data };
+    cortexRequest.params = { ...(cortexRequest.params || {}), ...requestParameters.params };
+    // For Ollama streaming, transform NDJSON to SSE format
+    if (parameters.stream) {
+      const response = await this.executeRequest(cortexRequest);
+      // Create a transform stream that converts NDJSON to SSE format
+      const transformer = new Transform({
+        decodeStrings: false, // Keep as string
+        transform(chunk, encoding, callback) {
+          try {
+            const lines = chunk.toString().split('\n');
+            for (const line of lines) {
+              if (line.trim()) {
+                // Format as SSE data
+                this.push(`data: ${line}\n\n`);
+              }
+            }
+            callback();
+          } catch (err) {
+            callback(err);
+          }
+        }
+      });
+      // Pipe the response through our transformer
+      response.pipe(transformer);
+      // Return the transformed stream
+      return transformer;
+    }
+    return this.executeRequest(cortexRequest);
+  }
+}
+export default OllamaChatPlugin;

package/server/plugins/ollamaCompletionPlugin.js ADDED Viewed

@@ -0,0 +1,147 @@
+import ModelPlugin from './modelPlugin.js';
+import logger from '../../lib/logger.js';
+import { Transform } from 'stream';
+class OllamaCompletionPlugin extends ModelPlugin {
+  getRequestParameters(text, parameters, prompt) {
+    return {
+      data: {
+        model: parameters.ollamaModel,
+        prompt: text,
+        stream: parameters.stream
+      },
+      params: {}
+    };
+  }
+  logRequestData(data, responseData, prompt) {
+    const { stream, prompt: promptText, model } = data;
+    if (promptText) {
+      logger.info(`[ollama completion request sent to model ${model}]`);
+      const { length, units } = this.getLength(promptText);
+      const preview = this.shortenContent(promptText);
+      logger.verbose(`prompt ${units}: ${length}, content: "${preview}"`);
+      logger.info(`[completion request contained ${length} ${units}]`);
+    }
+    if (stream) {
+      logger.info(`[response received as an SSE stream]`);
+    } else if (responseData) {
+      const responseText = this.parseResponse(responseData);
+      const { length, units } = this.getLength(responseText);
+      logger.info(`[response received containing ${length} ${units}]`);
+      logger.verbose(`${this.shortenContent(responseText)}`);
+    }
+    prompt &&
+      prompt.debugInfo &&
+      (prompt.debugInfo += `\n${JSON.stringify(data)}`);
+  }
+  parseResponse(data) {
+    // If data is not a string (e.g. streaming), return as is
+    if (typeof data !== 'string') {
+      return data;
+    }
+    // Split into lines and filter empty ones
+    const lines = data.split('\n').filter(line => line.trim());
+    let fullResponse = '';
+    for (const line of lines) {
+      try {
+        const jsonObj = JSON.parse(line);
+        if (jsonObj.response) {
+          // Unescape special sequences
+          const content = jsonObj.response
+            .replace(/\\n/g, '\n')
+            .replace(/\\"/g, '"')
+            .replace(/\\\\/g, '\\')
+            .replace(/\\u003c/g, '<')
+            .replace(/\\u003e/g, '>');
+          fullResponse += content;
+        }
+      } catch (err) {
+        // If we can't parse the line as JSON, just skip it
+        continue;
+      }
+    }
+    return fullResponse;
+  }
+  processStreamEvent(event, requestProgress) {
+    try {
+      const data = JSON.parse(event.data);
+      // Handle the streaming response
+      if (data.response) {
+        // Unescape special sequences in the content
+        const content = data.response
+          .replace(/\\n/g, '\n')
+          .replace(/\\"/g, '"')
+          .replace(/\\\\/g, '\\')
+          .replace(/\\u003c/g, '<')
+          .replace(/\\u003e/g, '>');
+        requestProgress.data = JSON.stringify(content);
+      }
+      // Check if this is the final message
+      if (data.done) {
+        requestProgress.data = '[DONE]';
+        requestProgress.progress = 1;
+      }
+      return requestProgress;
+    } catch (err) {
+      // If we can't parse the event data, return the progress as is
+      return requestProgress;
+    }
+  }
+  async execute(text, parameters, prompt, cortexRequest) {
+    const requestParameters = this.getRequestParameters(text, parameters, prompt);
+    cortexRequest.data = { ...(cortexRequest.data || {}), ...requestParameters.data };
+    cortexRequest.params = { ...(cortexRequest.params || {}), ...requestParameters.params };
+    // For Ollama streaming, transform NDJSON to SSE format
+    if (parameters.stream) {
+      const response = await this.executeRequest(cortexRequest);
+      // Create a transform stream that converts NDJSON to SSE format
+      const transformer = new Transform({
+        decodeStrings: false, // Keep as string
+        transform(chunk, encoding, callback) {
+          try {
+            const lines = chunk.toString().split('\n');
+            for (const line of lines) {
+              if (line.trim()) {
+                // Format as SSE data
+                this.push(`data: ${line}\n\n`);
+              }
+            }
+            callback();
+          } catch (err) {
+            callback(err);
+          }
+        }
+      });
+      // Pipe the response through our transformer
+      response.pipe(transformer);
+      // Return the transformed stream
+      return transformer;
+    }
+    return this.executeRequest(cortexRequest);
+  }
+}
+export default OllamaCompletionPlugin;

package/server/rest.js CHANGED Viewed

@@ -6,6 +6,22 @@ import { requestState } from './requestState.js';
 import { v4 as uuidv4 } from 'uuid';
 import logger from '../lib/logger.js';
 import { getSingleTokenChunks } from './chunker.js';
+import axios from 'axios';
+const getOllamaModels = async (ollamaUrl) => {
+    try {
+        const response = await axios.get(`${ollamaUrl}/api/tags`);
+        return response.data.models.map(model => ({
+            id: `ollama-${model.name}`,
+            object: 'model',
+            owned_by: 'ollama',
+            permission: ''
+        }));
+    } catch (error) {
+        logger.error(`Error fetching Ollama models: ${error.message}`);
+        return [];
+    }
+};
 const chunkTextIntoTokens = (() => {
     let partialToken = '';
@@ -282,7 +298,14 @@ function buildRestEndpoints(pathways, app, server, config) {
         // Create OpenAI compatible endpoints
         app.post('/v1/completions', async (req, res) => {
             const modelName = req.body.model || 'gpt-3.5-turbo';
-            const pathwayName = openAICompletionModels[modelName] || openAICompletionModels['*'];
+            let pathwayName;
+            if (modelName.startsWith('ollama-')) {
+                pathwayName = 'sys_ollama_completion';
+                req.body.ollamaModel = modelName.replace('ollama-', '');
+            } else {
+                pathwayName = openAICompletionModels[modelName] || openAICompletionModels['*'];
+            }
             if (!pathwayName) {
                 res.status(404).json({
@@ -318,7 +341,6 @@ function buildRestEndpoints(pathways, app, server, config) {
             if (Boolean(req.body.stream)) {
                 jsonResponse.id = `cmpl-${resultText}`;
                 jsonResponse.choices[0].finish_reason = null;
-                //jsonResponse.object = "text_completion.chunk";
                 processIncomingStream(resultText, res, jsonResponse, pathway);
             } else {
@@ -330,7 +352,14 @@ function buildRestEndpoints(pathways, app, server, config) {
         app.post('/v1/chat/completions', async (req, res) => {
             const modelName = req.body.model || 'gpt-3.5-turbo';
-            const pathwayName = openAIChatModels[modelName] || openAIChatModels['*'];
+            let pathwayName;
+            if (modelName.startsWith('ollama-')) {
+                pathwayName = 'sys_ollama_chat';
+                req.body.ollamaModel = modelName.replace('ollama-', '');
+            } else {
+                pathwayName = openAIChatModels[modelName] || openAIChatModels['*'];
+            }
             if (!pathwayName) {
                 res.status(404).json({
@@ -385,8 +414,11 @@ function buildRestEndpoints(pathways, app, server, config) {
         app.get('/v1/models', async (req, res) => {
             const openAIModels = { ...openAIChatModels, ...openAICompletionModels };
             const defaultModelId = 'gpt-3.5-turbo';
+            let models = [];
-            const models = Object.entries(openAIModels)
+            // Get standard OpenAI-compatible models, filtering out our internal pathway models
+            models = Object.entries(openAIModels)
+                .filter(([modelId]) => !['ollama-chat', 'ollama-completion'].includes(modelId))
                 .map(([modelId]) => {
                     if (modelId.includes('*')) {
                         modelId = defaultModelId;
@@ -397,7 +429,16 @@ function buildRestEndpoints(pathways, app, server, config) {
                         owned_by: 'openai',
                         permission: '',
                     };
-                })
+                });
+            // Get Ollama models if configured
+            if (config.get('ollamaUrl')) {
+                const ollamaModels = await getOllamaModels(config.get('ollamaUrl'));
+                models = [...models, ...ollamaModels];
+            }
+            // Filter out duplicates and sort
+            models = models
                 .filter((model, index, self) => {
                     return index === self.findIndex((m) => m.id === model.id);
                 })