npm - @aj-archipelago/cortex - Versions diffs - 1.3.23 → 1.3.25 - Mend

@aj-archipelago/cortex 1.3.23 → 1.3.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/lib/pathwayTools.js +16 -12
package/package.json +2 -2
package/pathways/system/entity/sys_entity_continue.js +1 -5
package/pathways/system/entity/sys_entity_start.js +12 -20
package/pathways/system/entity/sys_generator_memory.js +3 -3
package/pathways/system/entity/sys_generator_results.js +1 -1
package/pathways/system/rest_streaming/sys_openai_chat.js +2 -2
package/pathways/transcribe_gemini.js +1 -296
package/pathways/translate_subtitle.js +11 -259
package/server/pathwayResolver.js +38 -34
package/server/plugins/claude3VertexPlugin.js +10 -1
package/server/plugins/gemini15ChatPlugin.js +4 -0
package/tests/openai_api.test.js +43 -23
package/tests/streaming.test.js +197 -0
package/tests/translate_srt.test.js +41 -2
package/tests/transcribe_gemini.test.js +0 -217

package/lib/pathwayTools.js CHANGED Viewed

@@ -21,23 +21,25 @@ const callPathway = async (pathwayName, inArgs, pathwayResolver) => {
     let rootRequestId = pathwayResolver?.rootRequestId || pathwayResolver?.requestId;
     let data = await pathway.rootResolver(parent, {...args, rootRequestId}, { config, pathway, requestState } );
+    pathwayResolver && pathwayResolver.mergeResults(data);
+    let returnValue = data?.result || null;
     if (args.async || args.stream) {
         const { result: requestId } = data;
         // Fire the resolver for the async requestProgress
-        logger.info(`Callpathway starting async requestProgress, requestId: ${requestId}`);
+        logger.info(`Callpathway starting async requestProgress, pathway: ${pathwayName}, requestId: ${requestId}`);
         const { resolver, args } = requestState[requestId];
         requestState[requestId].useRedis = false;
         requestState[requestId].started = true;
-        data = resolver && await resolver(args);
+        resolver && await resolver(args);
+        returnValue = null;
     }
-    // Update pathwayResolver with new data if available
-    pathwayResolver?.mergeResults(data);
-    return data?.result;
+    return returnValue;
 };
 const gpt3Encode = (text) => {
@@ -48,7 +50,7 @@ const gpt3Decode = (text) => {
     return decode(text);
 }
-const say = async (requestId, message, maxMessageLength = Infinity) => {
+const say = async (requestId, message, maxMessageLength = Infinity, voiceResponse = true) => {
     try {
         const chunks = getSemanticChunks(message, maxMessageLength);
@@ -60,11 +62,13 @@ const say = async (requestId, message, maxMessageLength = Infinity) => {
             });
         }
-        await publishRequestProgress({
-            requestId,
-            progress: 0.5,
-            data: " ... "
-        });
+        if (voiceResponse) {
+            await publishRequestProgress({
+                requestId,
+                progress: 0.5,
+                data: " ... "
+            });
+        }
         await publishRequestProgress({
             requestId,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@aj-archipelago/cortex",
-  "version": "1.3.23",
+  "version": "1.3.25",
   "description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
   "private": false,
   "repository": {
@@ -33,7 +33,7 @@
   "type": "module",
   "homepage": "https://github.com/aj-archipelago/cortex#readme",
   "dependencies": {
-    "@aj-archipelago/subvibe": "^1.0.3",
+    "@aj-archipelago/subvibe": "^1.0.8",
     "@apollo/server": "^4.7.3",
     "@apollo/server-plugin-response-cache": "^4.1.2",
     "@apollo/utils.keyvadapter": "^3.0.0",

package/pathways/system/entity/sys_entity_continue.js CHANGED Viewed

@@ -57,11 +57,7 @@ export default {
             const result = await callPathway(generatorPathway, newArgs, resolver);
-            if (args.stream) {
-                return "";
-            }
-            if (!result) {
+            if (!result && !args.stream) {
                 result = await callPathway('sys_generator_error', { ...args, text: `Tried to use a tool (${generatorPathway}), but no result was returned`, stream: false }, resolver);
             }

package/pathways/system/entity/sys_entity_start.js CHANGED Viewed

@@ -105,22 +105,12 @@ export default {
             }
         }
-        const fetchChatResponse = async (args, pathwayResolver) => {
-            const [chatResponse, chatTitleResponse] = await Promise.all([
-                callPathway('sys_generator_quick', {...args, model: styleModel}, pathwayResolver),
-                callPathway('chat_title', { ...args, chatHistory: chatHistoryBeforeMemory, stream: false}),
-            ]);
-            title = chatTitleResponse;
-            return chatResponse;
-        };
-        // start fetching the default response - we may need it later
+        // start fetching responses in parallel if not streaming
         let fetchChatResponsePromise;
         if (!args.stream) {
-            fetchChatResponsePromise = fetchChatResponse({ ...args, ackResponse }, pathwayResolver);
+            fetchChatResponsePromise = callPathway('sys_generator_quick', {...args, model: styleModel, ackResponse}, pathwayResolver);
         }
+        const fetchTitleResponsePromise = callPathway('chat_title', {...args, chatHistory: chatHistoryBeforeMemory, stream: false});
         const visionContentPresent = chatArgsHasImageUrl(args);
@@ -223,6 +213,8 @@ export default {
                 }
             }
+            title = await fetchTitleResponsePromise;
             if (toolCallbackMessage) {
                 if (args.skipCallbackMessage) {
                     pathwayResolver.tool = JSON.stringify({ hideFromModel: false, search: false, title });
@@ -231,11 +223,11 @@ export default {
                 if (args.stream) {
                     if (!ackResponse) {
-                        await say(pathwayResolver.requestId, toolCallbackMessage || "One moment please.", 10);
+                        await say(pathwayResolver.requestId, toolCallbackMessage || "One moment please.", 10, args.voiceResponse ? true : false);
                     }
-                    pathwayResolver.tool = JSON.stringify({ hideFromModel: false, search: false, title });
                     await callPathway('sys_entity_continue', { ...args, stream: true, generatorPathway: toolCallbackName }, pathwayResolver);
-                    return "";
+                    pathwayResolver.tool = JSON.stringify({ hideFromModel: false, search: false, title });
+                    return;
                 }
                 pathwayResolver.tool = JSON.stringify({
@@ -250,15 +242,15 @@ export default {
                 return toolCallbackMessage || "One moment please.";
             }
-            const chatResponse = await (fetchChatResponsePromise || fetchChatResponse({ ...args, ackResponse }, pathwayResolver));
+            const chatResponse = await (fetchChatResponsePromise || callPathway('sys_generator_quick', {...args, model: styleModel, ackResponse}, pathwayResolver));
             pathwayResolver.tool = JSON.stringify({ search: false, title });
-            return args.stream ? "" : chatResponse;
+            return args.stream ? null : chatResponse;
         } catch (e) {
             pathwayResolver.logError(e);
-            const chatResponse = await (fetchChatResponsePromise || fetchChatResponse({ ...args, ackResponse }, pathwayResolver));
+            const chatResponse = await (fetchChatResponsePromise || callPathway('sys_generator_quick', {...args, model: styleModel, ackResponse}, pathwayResolver));
             pathwayResolver.tool = JSON.stringify({ search: false, title });
-            return args.stream ? "" : chatResponse;
+            return args.stream ? null : chatResponse;
         }
     }
 };

package/pathways/system/entity/sys_generator_memory.js CHANGED Viewed

@@ -18,7 +18,7 @@ export default {
         const { aiStyle, AI_STYLE_ANTHROPIC, AI_STYLE_OPENAI } = args;
         const styleModel = aiStyle === "Anthropic" ? AI_STYLE_ANTHROPIC : AI_STYLE_OPENAI;
-        const memoryContext = await callPathway('sys_search_memory', { ...args, section: 'memoryAll', updateContext: true });
+        const memoryContext = await callPathway('sys_search_memory', { ...args, stream: false, section: 'memoryAll', updateContext: true });
         if (memoryContext) {
             const {toolCallId} = addToolCalls(args.chatHistory, "search memory for relevant information", "memory_lookup");
             addToolResults(args.chatHistory, memoryContext, toolCallId);
@@ -26,9 +26,9 @@ export default {
         let result;
         if (args.voiceResponse) {
-            result = await callPathway('sys_generator_quick', { ...args, model: styleModel, stream: false });
+            result = await callPathway('sys_generator_quick', { ...args, model: styleModel, stream: false }, resolver);
         } else {
-            result = await callPathway('sys_generator_quick', { ...args, model: styleModel });
+            result = await callPathway('sys_generator_quick', { ...args, model: styleModel }, resolver);
         }
         resolver.tool = JSON.stringify({ toolUsed: "memory" });

package/pathways/system/entity/sys_generator_results.js CHANGED Viewed

@@ -341,7 +341,7 @@ Here are the information sources that were found:
                 clearTimeout(timeoutId);
             }
-            if (!args.stream) {
+            if (!args.voiceResponse) {
                 const referencedSources = extractReferencedSources(result);
                 searchResults = searchResults.length ? pruneSearchResults(searchResults, referencedSources) : [];
             }

package/pathways/system/rest_streaming/sys_openai_chat.js CHANGED Viewed

@@ -13,7 +13,7 @@ export default {
     inputParameters: {
         messages: [],
     },
-    model: 'oai-gpturbo',
+    model: 'oai-gpt4o',
     useInputChunking: false,
-    emulateOpenAIChatModel: 'gpt-3.5-turbo',
+    emulateOpenAIChatModel: 'gpt-4o',
 }

package/pathways/transcribe_gemini.js CHANGED Viewed

@@ -5,283 +5,6 @@ import { Prompt } from "../server/prompt.js";
 const OFFSET_CHUNK = 500; //seconds of each chunk offset, only used if helper does not provide
-export function convertSrtToVtt(data) {
-    if (!data || !data.trim()) {
-        return "WEBVTT\n\n";
-    }
-    // If it's already VTT format and has header
-    if (data.trim().startsWith("WEBVTT")) {
-        const lines = data.split("\n");
-        const result = ["WEBVTT", ""]; // Start with header and blank line
-        let currentCue = [];
-        for (let i = 0; i < lines.length; i++) {
-            const line = lines[i].trim();
-            // Skip empty lines and the WEBVTT header
-            if (!line || line === "WEBVTT") {
-                continue;
-            }
-            // If it's a number by itself, it's a cue identifier
-            if (/^\d+$/.test(line)) {
-                // If we have a previous cue, add it with proper spacing
-                if (currentCue.length > 0) {
-                    result.push(currentCue.join("\n"));
-                    result.push(""); // Add blank line between cues
-                    currentCue = [];
-                }
-                currentCue.push(line);
-                continue;
-            }
-            // Check for and convert timestamps
-            const fullTimeRegex = /^(\d{2}):(\d{2}):(\d{2})[,.](\d{3})\s*-->\s*(\d{2}):(\d{2}):(\d{2})[,.](\d{3})$/;
-            const shortTimeRegex = /^(\d{2}):(\d{2})[,.](\d{3})\s*-->\s*(\d{2}):(\d{2})[,.](\d{3})$/;
-            const ultraShortTimeRegex = /^(\d{1,2})[.](\d{3})\s*-->\s*(\d{1,2})[.](\d{3})$/;
-            const fullMatch = line.match(fullTimeRegex);
-            const shortMatch = line.match(shortTimeRegex);
-            const ultraShortMatch = line.match(ultraShortTimeRegex);
-            if (fullMatch) {
-                // Already in correct format, just convert comma to dot
-                const convertedTime = line.replace(/,/g, '.');
-                currentCue.push(convertedTime);
-            } else if (shortMatch) {
-                // Convert MM:SS to HH:MM:SS
-                const convertedTime = `00:${shortMatch[1]}:${shortMatch[2]}.${shortMatch[3]} --> 00:${shortMatch[4]}:${shortMatch[5]}.${shortMatch[6]}`;
-                currentCue.push(convertedTime);
-            } else if (ultraShortMatch) {
-                // Convert SS to HH:MM:SS
-                const convertedTime = `00:00:${ultraShortMatch[1].padStart(2, '0')}.${ultraShortMatch[2]} --> 00:00:${ultraShortMatch[3].padStart(2, '0')}.${ultraShortMatch[4]}`;
-                currentCue.push(convertedTime);
-            } else if (!line.includes('-->')) {
-                // Must be subtitle text
-                currentCue.push(line);
-            }
-        }
-        // Add the last cue if there is one
-        if (currentCue.length > 0) {
-            result.push(currentCue.join("\n"));
-            result.push(""); // Add final blank line
-        }
-        // Join with newlines and ensure proper ending
-        return result.join("\n") + "\n";
-    }
-    // remove dos newlines and trim
-    var srt = data.replace(/\r+/g, "");
-    srt = srt.replace(/^\s+|\s+$/g, "");
-    // Split into cues and filter out empty ones
-    var cuelist = srt.split("\n\n").filter(cue => cue.trim());
-    // Always add WEBVTT header
-    var result = "WEBVTT\n\n";
-    // Convert each cue to VTT format
-    for (const cue of cuelist) {
-        const lines = cue.split("\n").map(line => line.trim()).filter(line => line);
-        if (lines.length < 2) continue;
-        let output = [];
-        // Handle cue identifier
-        if (/^\d+$/.test(lines[0])) {
-            output.push(lines[0]);
-            lines.shift();
-        }
-        // Handle timestamp line
-        const timeLine = lines[0];
-        const fullTimeRegex = /^(\d{2}):(\d{2}):(\d{2})[,.](\d{3})\s*-->\s*(\d{2}):(\d{2}):(\d{2})[,.](\d{3})$/;
-        const shortTimeRegex = /^(\d{2}):(\d{2})[,.](\d{3})\s*-->\s*(\d{2}):(\d{2})[,.](\d{3})$/;
-        const ultraShortTimeRegex = /^(\d{1,2})[.](\d{3})\s*-->\s*(\d{1,2})[.](\d{3})$/;
-        const fullMatch = timeLine.match(fullTimeRegex);
-        const shortMatch = timeLine.match(shortTimeRegex);
-        const ultraShortMatch = timeLine.match(ultraShortTimeRegex);
-        if (fullMatch) {
-            output.push(timeLine.replace(/,/g, '.'));
-        } else if (shortMatch) {
-            output.push(`00:${shortMatch[1]}:${shortMatch[2]}.${shortMatch[3]} --> 00:${shortMatch[4]}:${shortMatch[5]}.${shortMatch[6]}`);
-        } else if (ultraShortMatch) {
-            output.push(`00:00:${ultraShortMatch[1].padStart(2, '0')}.${ultraShortMatch[2]} --> 00:00:${ultraShortMatch[3].padStart(2, '0')}.${ultraShortMatch[4]}`);
-        } else {
-            continue; // Invalid timestamp format
-        }
-        // Add remaining lines as subtitle text
-        output.push(...lines.slice(1));
-        // Add the cue to result
-        result += output.join("\n") + "\n\n";
-    }
-    return result;
-}
-function convertSrtCue(caption) {
-    if (!caption || !caption.trim()) {
-        return "";
-    }
-    var cue = "";
-    var s = caption.split(/\n/);
-    // concatenate multi-line string separated in array into one
-    while (s.length > 3) {
-        for (var i = 3; i < s.length; i++) {
-            s[2] += "\n" + s[i];
-        }
-        s.splice(3, s.length - 3);
-    }
-    var line = 0;
-    // detect identifier
-    if (
-        s[0] &&
-        s[1] &&
-        !s[0].match(/\d+:\d+:\d+/) &&
-        s[1].match(/\d+:\d+:\d+/)
-    ) {
-        const match = s[0].match(/^\d+$/); // Only match if the entire line is a number
-        if (match) {
-            cue += match[0] + "\n";
-            line += 1;
-        }
-    }
-    // get time strings
-    if (s[line] && s[line].match(/\d+:\d+:\d+/)) {
-        // convert time string
-        var m = s[line].match(
-            /(\d{2}):(\d{2}):(\d{2})[,.](\d{3})\s*--?>\s*(\d{2}):(\d{2}):(\d{2})[,.](\d{3})/,
-        );
-        if (m) {
-            cue +=
-                m[1] +
-                ":" +
-                m[2] +
-                ":" +
-                m[3] +
-                "." +
-                m[4] +
-                " --> " +
-                m[5] +
-                ":" +
-                m[6] +
-                ":" +
-                m[7] +
-                "." +
-                m[8] +
-                "\n";
-            line += 1;
-        } else {
-            // Try alternate timestamp format
-            m = s[line].match(
-                /(\d{2}):(\d{2})\.(\d{3})\s*--?>\s*(\d{2}):(\d{2})\.(\d{3})/,
-            );
-            if (m) {
-                // Convert to full timestamp format
-                cue +=
-                    "00:" +
-                    m[1] +
-                    ":" +
-                    m[2] +
-                    "." +
-                    m[3] +
-                    " --> " +
-                    "00:" +
-                    m[4] +
-                    ":" +
-                    m[5] +
-                    "." +
-                    m[6] +
-                    "\n";
-                line += 1;
-            } else {
-                // Unrecognized timestring
-                return "";
-            }
-        }
-    } else {
-        // file format error or comment lines
-        return "";
-    }
-    // get cue text
-    if (s[line]) {
-        cue += s[line] + "\n\n";
-    }
-    return cue;
-}
-export function detectSubtitleFormat(text) {
-    // Remove DOS newlines and trim whitespace
-    const cleanText = text.replace(/\r+/g, "").trim();
-    const lines = cleanText.split("\n");
-    // Check if it's VTT format - be more lenient with the header
-    if (lines[0]?.trim() === "WEBVTT") {
-        return "vtt";
-    }
-    // Define regex patterns for timestamp formats
-    const srtTimeRegex =
-        /(\d{2}:\d{2}:\d{2})[,.]\d{3}\s*-->\s*(\d{2}:\d{2}:\d{2})[,.]\d{3}/;
-    const vttTimeRegex =
-        /(?:\d{2}:)?(\d{1,2})[.]\d{3}\s*-->\s*(?:\d{2}:)?(\d{1,2})[.]\d{3}/;
-    let hasSrtTimestamps = false;
-    let hasVttTimestamps = false;
-    let hasSequentialNumbers = false;
-    let lastNumber = 0;
-    // Look through first few lines to detect patterns
-    for (let i = 0; i < Math.min(lines.length, 12); i++) {
-        const line = lines[i]?.trim();
-        if (!line) continue;
-        // Check for timestamps
-        if (srtTimeRegex.test(line)) {
-            hasSrtTimestamps = true;
-        }
-        if (vttTimeRegex.test(line)) {
-            hasVttTimestamps = true;
-        }
-        // Check for sequential numbers
-        const numberMatch = line.match(/^(\d+)$/);
-        if (numberMatch) {
-            const num = parseInt(numberMatch[1]);
-            if (lastNumber === 0 || num === lastNumber + 1) {
-                hasSequentialNumbers = true;
-                lastNumber = num;
-            }
-        }
-    }
-    // If it has SRT-style timestamps (HH:MM:SS), it's SRT
-    if (hasSrtTimestamps && hasSequentialNumbers) {
-        return "srt";
-    }
-    // If it has VTT-style timestamps (MM:SS) or WEBVTT header, it's VTT
-    if (hasVttTimestamps) {
-        return "vtt";
-    }
-    return null;
-}
 export default {
     prompt:
     [
@@ -381,7 +104,7 @@ export default {
         function getMessages(file, format) {
-            const responseFormat = format!== 'text' ? 'SRT' : 'text';
+            const responseFormat = format!== 'text' ? 'VTT' : 'text';
             const messages = [
                 {"role": "system", "content": `Instructions:\nYou are an AI entity with expertise of transcription. Your response only contains the transcription, no comments or additonal stuff.
@@ -491,26 +214,8 @@ Even a single newline or space can cause the response to be rejected. You must f
         // }
         const result = await processChunksParallel(chunks, args);
-        // publishRequestProgress({
-        //     requestId: this.rootRequestId || this.requestId,
-        //     progress: 1,
-        //     data: "a",
-        // });
         if (['srt','vtt'].includes(responseFormat) || wordTimestamped) { // align subtitles for formats
-            // convert as gemini output is unstable
-            for(let i = 0; i < result.length; i++) {
-                try{
-                    result[i] = convertSrtToVtt(result[i]);
-                }catch(error){
-                    logger.error(`Error converting to vtt: ${error}`);
-                }
-            }
             const offsets = chunks.map((chunk, index) => chunk?.offset || index * OFFSET_CHUNK);
             return alignSubtitles(result, responseFormat, offsets);
         }