npm - @aj-archipelago/cortex - Versions diffs - 1.3.22 → 1.3.23 - Mend

@aj-archipelago/cortex 1.3.22 → 1.3.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/README.md +64 -0
package/config.js +26 -1
package/helper-apps/cortex-realtime-voice-server/src/realtime/client.ts +9 -4
package/helper-apps/cortex-realtime-voice-server/src/realtime/realtimeTypes.ts +1 -0
package/lib/util.js +4 -24
package/package.json +5 -2
package/pathways/system/rest_streaming/sys_ollama_chat.js +21 -0
package/pathways/system/rest_streaming/sys_ollama_completion.js +14 -0
package/pathways/transcribe_gemini.js +181 -53
package/server/modelExecutor.js +8 -0
package/server/pathwayResolver.js +6 -1
package/server/plugins/claude3VertexPlugin.js +41 -15
package/server/plugins/gemini15ChatPlugin.js +90 -1
package/server/plugins/gemini15VisionPlugin.js +9 -3
package/server/plugins/modelPlugin.js +11 -8
package/server/plugins/ollamaChatPlugin.js +158 -0
package/server/plugins/ollamaCompletionPlugin.js +147 -0
package/server/rest.js +46 -5
package/tests/multimodal_conversion.test.js +169 -0
package/tests/transcribe_gemini.test.js +217 -0

package/README.md CHANGED Viewed

@@ -561,6 +561,70 @@ Each model configuration can include:
 }
 ```
+### API Compatibility
+Cortex provides OpenAI-compatible REST endpoints that allow you to use various models through a standardized interface. When `enableRestEndpoints` is set to `true`, Cortex exposes the following endpoints:
+- `/v1/models`: List available models
+- `/v1/chat/completions`: Chat completion endpoint
+- `/v1/completions`: Text completion endpoint
+This means you can use Cortex with any client library or tool that supports the OpenAI API format. For example:
+```python
+from openai import OpenAI
+client = OpenAI(
+    base_url="http://localhost:4000/v1",  # Point to your Cortex server
+    api_key="your-key"  # If you have configured cortexApiKeys
+)
+response = client.chat.completions.create(
+    model="gpt-4",  # Or any model configured in Cortex
+    messages=[{"role": "user", "content": "Hello!"}]
+)
+```
+#### Ollama Integration
+Cortex includes built-in support for Ollama models through its OpenAI-compatible REST interface. When `ollamaUrl` is configured in your settings, Cortex will:
+1. Automatically discover and expose all available Ollama models through the `/v1/models` endpoint with an "ollama-" prefix
+2. Route any requests using an "ollama-" prefixed model to the appropriate Ollama endpoint
+To enable Ollama support, add the following to your configuration:
+```json
+{
+    "enableRestEndpoints": true,
+    "ollamaUrl": "http://localhost:11434"  // or your Ollama server URL
+}
+```
+You can then use any Ollama model through the standard OpenAI-compatible endpoints:
+```bash
+# List available models (will include Ollama models with "ollama-" prefix)
+curl http://localhost:4000/v1/models
+# Use an Ollama model for chat
+curl http://localhost:4000/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "ollama-llama2",
+    "messages": [{"role": "user", "content": "Hello!"}]
+  }'
+# Use an Ollama model for completions
+curl http://localhost:4000/v1/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "ollama-codellama",
+    "prompt": "Write a function that"
+  }'
+```
+This integration allows you to seamlessly use local Ollama models alongside cloud-based models through a single, consistent interface.
 ### Other Configuration Properties
 The following properties can be configured through environment variables or the configuration file:

package/config.js CHANGED Viewed

@@ -85,6 +85,11 @@ var config = convict({
         default: false,
         env: 'CORTEX_ENABLE_REST'
     },
+    ollamaUrl: {
+        format: String,
+        default: 'http://127.0.0.1:11434',
+        env: 'OLLAMA_URL'
+    },
     entityConstants: {
         format: Object,
         default: {
@@ -281,7 +286,27 @@ var config = convict({
                 "headers": {
                     "Content-Type": "application/json"
                 },
-            }
+            },
+            "ollama-chat": {
+                "type": "OLLAMA-CHAT",
+                "url": "{{ollamaUrl}}/api/chat",
+                "headers": {
+                  "Content-Type": "application/json"
+                },
+                "requestsPerSecond": 10,
+                "maxTokenLength": 131072,
+                "supportsStreaming": true
+            },
+            "ollama-completion": {
+                "type": "OLLAMA-COMPLETION",
+                "url": "{{ollamaUrl}}/api/generate",
+                "headers": {
+                  "Content-Type": "application/json"
+                },
+                "requestsPerSecond": 10,
+                "maxTokenLength": 131072,
+                "supportsStreaming": true
+            },
         },
         env: 'CORTEX_MODELS'
     },

package/helper-apps/cortex-realtime-voice-server/src/realtime/client.ts CHANGED Viewed

@@ -355,14 +355,19 @@ export class RealtimeVoiceClient extends EventEmitter implements TypedEmitter {
     if (!this.isConnected) {
       throw new Error('Not connected');
     }
+    // Create a new config object without custom_voice_id
+    const { custom_voice_id, ...filteredConfig } = {
+      ...this.sessionConfig,
+      ...sessionConfig
+    };
     const message = JSON.stringify({
       event_id: createId(),
       type: 'session.update',
-      session: {
-        ...this.sessionConfig,
-        ...sessionConfig,
-      },
+      session: filteredConfig,
     });
     // No need to log session update messages as they can be noisy
     logger.log('Sending session update message:', message);
     this.ws?.send(message);

package/helper-apps/cortex-realtime-voice-server/src/realtime/realtimeTypes.ts CHANGED Viewed

@@ -46,6 +46,7 @@ export type RealtimeSessionConfig = {
   modalities: Array<Modality>,
   instructions: string,
   voice: Voice,
+  custom_voice_id?: string | null,
   input_audio_format: AudioFormat,
   output_audio_format: AudioFormat,
   input_audio_transcription: null | { model: 'whisper-1' | (string & {}) },

package/lib/util.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import logger from "./logger.js";
 import stream from 'stream';
-import subsrt from 'subsrt';
+import subvibe from '@aj-archipelago/subvibe';
 import os from 'os';
 import http from 'http';
 import https from 'https';
@@ -126,19 +126,9 @@ function convertSrtToText(str) {
 function alignSubtitles(subtitles, format, offsets) {
     const result = [];
-    function preprocessStr(str) {
-        try{
-            if(!str) return '';
-            return str.trim().replace(/(\n\n)(?!\n)/g, '\n\n\n');
-        }catch(e){
-            logger.error(`An error occurred in content text preprocessing: ${e}`);
-            return '';
-        }
-    }
     function shiftSubtitles(subtitle, shiftOffset) {
-        const captions = subsrt.parse(preprocessStr(subtitle));
-        const resynced = subsrt.resync(captions, { offset: shiftOffset });
+        const captions = subvibe.parse(subtitle);
+        const resynced = subvibe.resync(captions.cues, { offset: shiftOffset });
         return resynced;
     }
@@ -146,18 +136,8 @@ function alignSubtitles(subtitles, format, offsets) {
         result.push(...shiftSubtitles(subtitles[i], offsets[i]*1000)); // convert to milliseconds
     }
-    try {
-        //if content has needed html style tags, keep them
-        for(const obj of result) {
-            if(obj && obj.content){
-                obj.text = obj.content;
-            }
-        }
-    } catch (error) {
-        logger.error(`An error occurred in content text parsing: ${error}`);
-    }
-    return subsrt.build(result, { format: format === 'vtt' ? 'vtt' : 'srt' });
+    return subvibe.build(result, format || 'srt');
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@aj-archipelago/cortex",
-  "version": "1.3.22",
+  "version": "1.3.23",
   "description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
   "private": false,
   "repository": {
@@ -33,6 +33,7 @@
   "type": "module",
   "homepage": "https://github.com/aj-archipelago/cortex#readme",
   "dependencies": {
+    "@aj-archipelago/subvibe": "^1.0.3",
     "@apollo/server": "^4.7.3",
     "@apollo/server-plugin-response-cache": "^4.1.2",
     "@apollo/utils.keyvadapter": "^3.0.0",
@@ -63,7 +64,6 @@
     "ioredis": "^5.3.1",
     "keyv": "^4.5.2",
     "mime-types": "^2.1.35",
-    "subsrt": "^1.1.1",
     "uuid": "^9.0.0",
     "winston": "^3.11.0",
     "ws": "^8.12.0"
@@ -86,5 +86,8 @@
       "dotenv/config"
     ],
     "concurrency": 1
+  },
+  "overrides": {
+    "whatwg-url": "^12.0.0"
   }
 }

package/pathways/system/rest_streaming/sys_ollama_chat.js ADDED Viewed

@@ -0,0 +1,21 @@
+// sys_ollama_chat.js
+// override handler for ollama chat model
+import { Prompt } from '../../../server/prompt.js';
+export default {
+    prompt:
+    [
+        new Prompt({ messages: [
+            "{{messages}}",
+        ]}),
+    ],
+    inputParameters: {
+        messages: [{ role: '', content: '' }],
+        ollamaModel: '',
+    },
+    model: 'ollama-chat',
+    useInputChunking: false,
+    emulateOpenAIChatModel: 'ollama-chat',
+    timeout: 300,
+}

package/pathways/system/rest_streaming/sys_ollama_completion.js ADDED Viewed

@@ -0,0 +1,14 @@
+// sys_ollama_completion.js
+// default handler for ollama completion endpoints when REST endpoints are enabled
+export default {
+    prompt: `{{text}}`,
+    inputParameters: {
+        text: '',
+        ollamaModel: '',
+    },
+    model: 'ollama-completion',
+    useInputChunking: false,
+    emulateOpenAICompletionModel: 'ollama-completion',
+    timeout: 300,
+}

package/pathways/transcribe_gemini.js CHANGED Viewed

@@ -5,36 +5,124 @@ import { Prompt } from "../server/prompt.js";
 const OFFSET_CHUNK = 500; //seconds of each chunk offset, only used if helper does not provide
 export function convertSrtToVtt(data) {
     if (!data || !data.trim()) {
         return "WEBVTT\n\n";
     }
-    // remove dos newlines
+    // If it's already VTT format and has header
+    if (data.trim().startsWith("WEBVTT")) {
+        const lines = data.split("\n");
+        const result = ["WEBVTT", ""]; // Start with header and blank line
+        let currentCue = [];
+        for (let i = 0; i < lines.length; i++) {
+            const line = lines[i].trim();
+            // Skip empty lines and the WEBVTT header
+            if (!line || line === "WEBVTT") {
+                continue;
+            }
+            // If it's a number by itself, it's a cue identifier
+            if (/^\d+$/.test(line)) {
+                // If we have a previous cue, add it with proper spacing
+                if (currentCue.length > 0) {
+                    result.push(currentCue.join("\n"));
+                    result.push(""); // Add blank line between cues
+                    currentCue = [];
+                }
+                currentCue.push(line);
+                continue;
+            }
+            // Check for and convert timestamps
+            const fullTimeRegex = /^(\d{2}):(\d{2}):(\d{2})[,.](\d{3})\s*-->\s*(\d{2}):(\d{2}):(\d{2})[,.](\d{3})$/;
+            const shortTimeRegex = /^(\d{2}):(\d{2})[,.](\d{3})\s*-->\s*(\d{2}):(\d{2})[,.](\d{3})$/;
+            const ultraShortTimeRegex = /^(\d{1,2})[.](\d{3})\s*-->\s*(\d{1,2})[.](\d{3})$/;
+            const fullMatch = line.match(fullTimeRegex);
+            const shortMatch = line.match(shortTimeRegex);
+            const ultraShortMatch = line.match(ultraShortTimeRegex);
+            if (fullMatch) {
+                // Already in correct format, just convert comma to dot
+                const convertedTime = line.replace(/,/g, '.');
+                currentCue.push(convertedTime);
+            } else if (shortMatch) {
+                // Convert MM:SS to HH:MM:SS
+                const convertedTime = `00:${shortMatch[1]}:${shortMatch[2]}.${shortMatch[3]} --> 00:${shortMatch[4]}:${shortMatch[5]}.${shortMatch[6]}`;
+                currentCue.push(convertedTime);
+            } else if (ultraShortMatch) {
+                // Convert SS to HH:MM:SS
+                const convertedTime = `00:00:${ultraShortMatch[1].padStart(2, '0')}.${ultraShortMatch[2]} --> 00:00:${ultraShortMatch[3].padStart(2, '0')}.${ultraShortMatch[4]}`;
+                currentCue.push(convertedTime);
+            } else if (!line.includes('-->')) {
+                // Must be subtitle text
+                currentCue.push(line);
+            }
+        }
+        // Add the last cue if there is one
+        if (currentCue.length > 0) {
+            result.push(currentCue.join("\n"));
+            result.push(""); // Add final blank line
+        }
+        // Join with newlines and ensure proper ending
+        return result.join("\n") + "\n";
+    }
+    // remove dos newlines and trim
     var srt = data.replace(/\r+/g, "");
-    // trim white space start and end
     srt = srt.replace(/^\s+|\s+$/g, "");
-    // Convert all timestamps from comma to dot format
-    srt = srt.replace(/(\d{2}:\d{2}:\d{2}),(\d{3})/g, "$1.$2");
-    // Add blank lines before sequence numbers that are followed by timecodes
-    srt = srt.replace(/(\n)(\d+)\n(\d{2}:\d{2}:\d{2}[,.])/g, "$1\n$2\n$3");
-    // get cues
-    var cuelist = srt.split("\n\n");
-    var result = "";
-    if (cuelist.length > 0) {
-        result += "WEBVTT\n\n";
-        for (var i = 0; i < cuelist.length; i = i + 1) {
-            const cue = convertSrtCue(cuelist[i]);
-            // Only add non-empty cues
-            if (cue) {
-                result += cue;
-            }
+    // Split into cues and filter out empty ones
+    var cuelist = srt.split("\n\n").filter(cue => cue.trim());
+    // Always add WEBVTT header
+    var result = "WEBVTT\n\n";
+    // Convert each cue to VTT format
+    for (const cue of cuelist) {
+        const lines = cue.split("\n").map(line => line.trim()).filter(line => line);
+        if (lines.length < 2) continue;
+        let output = [];
+        // Handle cue identifier
+        if (/^\d+$/.test(lines[0])) {
+            output.push(lines[0]);
+            lines.shift();
+        }
+        // Handle timestamp line
+        const timeLine = lines[0];
+        const fullTimeRegex = /^(\d{2}):(\d{2}):(\d{2})[,.](\d{3})\s*-->\s*(\d{2}):(\d{2}):(\d{2})[,.](\d{3})$/;
+        const shortTimeRegex = /^(\d{2}):(\d{2})[,.](\d{3})\s*-->\s*(\d{2}):(\d{2})[,.](\d{3})$/;
+        const ultraShortTimeRegex = /^(\d{1,2})[.](\d{3})\s*-->\s*(\d{1,2})[.](\d{3})$/;
+        const fullMatch = timeLine.match(fullTimeRegex);
+        const shortMatch = timeLine.match(shortTimeRegex);
+        const ultraShortMatch = timeLine.match(ultraShortTimeRegex);
+        if (fullMatch) {
+            output.push(timeLine.replace(/,/g, '.'));
+        } else if (shortMatch) {
+            output.push(`00:${shortMatch[1]}:${shortMatch[2]}.${shortMatch[3]} --> 00:${shortMatch[4]}:${shortMatch[5]}.${shortMatch[6]}`);
+        } else if (ultraShortMatch) {
+            output.push(`00:00:${ultraShortMatch[1].padStart(2, '0')}.${ultraShortMatch[2]} --> 00:00:${ultraShortMatch[3].padStart(2, '0')}.${ultraShortMatch[4]}`);
+        } else {
+            continue; // Invalid timestamp format
         }
+        // Add remaining lines as subtitle text
+        output.push(...lines.slice(1));
+        // Add the cue to result
+        result += output.join("\n") + "\n\n";
     }
     return result;
 }
@@ -42,18 +130,20 @@ function convertSrtCue(caption) {
     if (!caption || !caption.trim()) {
         return "";
     }
-    // remove all html tags for security reasons
-    //srt = srt.replace(/<[a-zA-Z\/][^>]*>/g, '');
     var cue = "";
     var s = caption.split(/\n/);
-    // concatenate muilt-line string separated in array into one
+    // concatenate multi-line string separated in array into one
     while (s.length > 3) {
         for (var i = 3; i < s.length; i++) {
             s[2] += "\n" + s[i];
         }
         s.splice(3, s.length - 3);
     }
     var line = 0;
     // detect identifier
     if (
         s[0] &&
@@ -67,10 +157,11 @@ function convertSrtCue(caption) {
             line += 1;
         }
     }
     // get time strings
     if (s[line] && s[line].match(/\d+:\d+:\d+/)) {
         // convert time string
-        var m = s[1].match(
+        var m = s[line].match(
             /(\d{2}):(\d{2}):(\d{2})[,.](\d{3})\s*--?>\s*(\d{2}):(\d{2}):(\d{2})[,.](\d{3})/,
         );
         if (m) {
@@ -93,17 +184,43 @@ function convertSrtCue(caption) {
                 "\n";
             line += 1;
         } else {
-            // Unrecognized timestring
-            return "";
+            // Try alternate timestamp format
+            m = s[line].match(
+                /(\d{2}):(\d{2})\.(\d{3})\s*--?>\s*(\d{2}):(\d{2})\.(\d{3})/,
+            );
+            if (m) {
+                // Convert to full timestamp format
+                cue +=
+                    "00:" +
+                    m[1] +
+                    ":" +
+                    m[2] +
+                    "." +
+                    m[3] +
+                    " --> " +
+                    "00:" +
+                    m[4] +
+                    ":" +
+                    m[5] +
+                    "." +
+                    m[6] +
+                    "\n";
+                line += 1;
+            } else {
+                // Unrecognized timestring
+                return "";
+            }
         }
     } else {
         // file format error or comment lines
         return "";
     }
     // get cue text
     if (s[line]) {
         cue += s[line] + "\n\n";
     }
     return cue;
 }
@@ -112,48 +229,59 @@ export function detectSubtitleFormat(text) {
     const cleanText = text.replace(/\r+/g, "").trim();
     const lines = cleanText.split("\n");
-    // Check if it's VTT format
+    // Check if it's VTT format - be more lenient with the header
     if (lines[0]?.trim() === "WEBVTT") {
         return "vtt";
     }
-    // Check if it's SRT format
-    // SRT files have a specific pattern:
-    // 1. Numeric index
-    // 2. Timestamp in format: 00:00:00,000 --> 00:00:00,000
-    // 3. Subtitle text
-    // 4. Blank line
-    const timeRegex =
-        /(\d{2}:\d{2}:\d{2})[,.](\d{3})\s*-->\s*(\d{2}:\d{2}:\d{2})[,.](\d{3})/;
+    // Define regex patterns for timestamp formats
+    const srtTimeRegex =
+        /(\d{2}:\d{2}:\d{2})[,.]\d{3}\s*-->\s*(\d{2}:\d{2}:\d{2})[,.]\d{3}/;
+    const vttTimeRegex =
+        /(?:\d{2}:)?(\d{1,2})[.]\d{3}\s*-->\s*(?:\d{2}:)?(\d{1,2})[.]\d{3}/;
-    let hasValidStructure = false;
-    let index = 1;
+    let hasSrtTimestamps = false;
+    let hasVttTimestamps = false;
+    let hasSequentialNumbers = false;
+    let lastNumber = 0;
-    // Check first few entries to confirm SRT structure
+    // Look through first few lines to detect patterns
     for (let i = 0; i < Math.min(lines.length, 12); i++) {
         const line = lines[i]?.trim();
         if (!line) continue;
-        // Check if line is a number matching our expected index
-        if (line === index.toString()) {
-            // Look ahead for timestamp
-            const nextLine = lines[i + 1]?.trim();
-            if (nextLine && timeRegex.test(nextLine)) {
-                hasValidStructure = true;
-                index++;
-                i++; // Skip timestamp line since we've verified it
+        // Check for timestamps
+        if (srtTimeRegex.test(line)) {
+            hasSrtTimestamps = true;
+        }
+        if (vttTimeRegex.test(line)) {
+            hasVttTimestamps = true;
+        }
+        // Check for sequential numbers
+        const numberMatch = line.match(/^(\d+)$/);
+        if (numberMatch) {
+            const num = parseInt(numberMatch[1]);
+            if (lastNumber === 0 || num === lastNumber + 1) {
+                hasSequentialNumbers = true;
+                lastNumber = num;
             }
         }
     }
-    if (hasValidStructure) {
+    // If it has SRT-style timestamps (HH:MM:SS), it's SRT
+    if (hasSrtTimestamps && hasSequentialNumbers) {
         return "srt";
     }
+    // If it has VTT-style timestamps (MM:SS) or WEBVTT header, it's VTT
+    if (hasVttTimestamps) {
+        return "vtt";
+    }
     return null;
 }
 export default {
     prompt:
     [
@@ -203,7 +331,6 @@ export default {
             const progress = (completedCount + partialRatio) / totalCount;
             logger.info(`Progress for ${requestId}: ${progress}`);
-            console.log(`Progress for ${requestId}: ${progress}`);
             publishRequestProgress({
                 requestId,
                 progress,
@@ -290,7 +417,7 @@ WEBVTT
 Hello World2!
 2
-00:05.344 --> 00:00:08.809
+00:00:05.344 --> 00:00:08.809
 Being AI is also great!
 - If asked text format, e.g.:
@@ -327,6 +454,7 @@ Even a single newline or space can cause the response to be rejected. You must f
             return messages;
         }
         const processChunksParallel = async (chunks, args) => {
             try {
                 const chunkPromises = chunks.map(async (chunk, index) => ({
@@ -338,8 +466,6 @@ Even a single newline or space can cause the response to be rejected. You must f
                     })
                 }));
-                // const results = await Promise.all(chunkPromises);
                 const results = await Promise.all(
                 chunkPromises.map(promise =>
                     promise.then(result => {
@@ -374,6 +500,8 @@ Even a single newline or space can cause the response to be rejected. You must f
         if (['srt','vtt'].includes(responseFormat) || wordTimestamped) { // align subtitles for formats
             // convert as gemini output is unstable
             for(let i = 0; i < result.length; i++) {
                 try{

package/server/modelExecutor.js CHANGED Viewed

@@ -28,6 +28,8 @@ import NeuralSpacePlugin from './plugins/neuralSpacePlugin.js';
 import RunwareAiPlugin from './plugins/runwareAiPlugin.js';
 import ReplicateApiPlugin from './plugins/replicateApiPlugin.js';
 import AzureVideoTranslatePlugin from './plugins/azureVideoTranslatePlugin.js';
+import OllamaChatPlugin from './plugins/ollamaChatPlugin.js';
+import OllamaCompletionPlugin from './plugins/ollamaCompletionPlugin.js';
 class ModelExecutor {
     constructor(pathway, model) {
@@ -116,6 +118,12 @@ class ModelExecutor {
             case 'AZURE-VIDEO-TRANSLATE':
                 plugin = new AzureVideoTranslatePlugin(pathway, model);
                 break;
+            case 'OLLAMA-CHAT':
+                plugin = new OllamaChatPlugin(pathway, model);
+                break;
+            case 'OLLAMA-COMPLETION':
+                plugin = new OllamaCompletionPlugin(pathway, model);
+                break;
             default:
                 throw new Error(`Unsupported model type: ${model.type}`);
         }

package/server/pathwayResolver.js CHANGED Viewed

@@ -89,8 +89,13 @@ class PathwayResolver {
                     progress: 1,
                     data: '[DONE]',
                 });
+            } else {
+                publishRequestProgress({
+                    requestId: this.rootRequestId || this.requestId,
+                    progress: 1,
+                    data: error.message || error.toString(),
+                });
             }
-            return;
         }
         // If the response is a string, it's a regular long running response