npm - @aj-archipelago/cortex - Versions diffs - 1.3.30 → 1.3.32 - Mend

@aj-archipelago/cortex 1.3.30 → 1.3.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/helper-apps/cortex-file-handler/package.json +1 -1
package/package.json +2 -2
package/pathways/image_flux.js +1 -1
package/pathways/system/entity/memory/shared/sys_memory_helpers.js +9 -1
package/pathways/system/entity/sys_entity_start.js +11 -8
package/pathways/system/entity/sys_generator_ack.js +1 -1
package/pathways/system/entity/sys_generator_image.js +2 -3
package/pathways/system/entity/sys_generator_memory.js +2 -3
package/pathways/system/entity/sys_generator_quick.js +1 -1
package/pathways/system/entity/sys_router_tool.js +10 -2
package/pathways/transcribe_gemini.js +96 -41
package/server/graphql.js +1 -0
package/server/pathwayResolver.js +6 -3
package/server/plugins/azureVideoTranslatePlugin.js +8 -8

package/helper-apps/cortex-file-handler/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@aj-archipelago/cortex-file-handler",
-  "version": "1.0.16",
+  "version": "1.0.17",
   "description": "File handling service for Cortex - handles file uploads, media chunking, and document processing",
   "type": "module",
   "scripts": {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@aj-archipelago/cortex",
-  "version": "1.3.30",
+  "version": "1.3.32",
   "description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
   "private": false,
   "repository": {
@@ -33,7 +33,7 @@
   "type": "module",
   "homepage": "https://github.com/aj-archipelago/cortex#readme",
   "dependencies": {
-    "@aj-archipelago/subvibe": "^1.0.8",
+    "@aj-archipelago/subvibe": "^1.0.10",
     "@apollo/server": "^4.7.3",
     "@apollo/server-plugin-response-cache": "^4.1.2",
     "@apollo/utils.keyvadapter": "^3.0.0",

package/pathways/image_flux.js CHANGED Viewed

@@ -9,7 +9,7 @@ export default {
     height: 1024,
     aspectRatio: "custom",
     numberResults: 1,
-    safety_tolerance: 5,
+    safety_tolerance: 6,
     output_format: "webp",
     output_quality: 80,
     steps: 4,

package/pathways/system/entity/memory/shared/sys_memory_helpers.js CHANGED Viewed

@@ -139,6 +139,14 @@ const addToolResults = (chatHistory, result, toolCallId) => {
     return { chatHistory, toolCallId };
 };
+const insertToolCallAndResults = (chatHistory, toolArgs, toolName, result = null, toolCallId = getUniqueId()) => {
+    const lastMessage = chatHistory.length > 0 ? chatHistory.pop() : null;
+    addToolCalls(chatHistory, toolArgs, toolName, toolCallId);
+    addToolResults(chatHistory, result, toolCallId);
+    chatHistory.push(lastMessage);
+    return { chatHistory, toolCallId };
+};
 const modifyText = (text, modifications) => {
     let modifiedText = text || '';
@@ -225,4 +233,4 @@ const modifyText = (text, modifications) => {
     return modifiedText;
 };
-export { normalizeMemoryFormat, enforceTokenLimit, addToolCalls, addToolResults, modifyText };
+export { normalizeMemoryFormat, enforceTokenLimit, addToolCalls, addToolResults, modifyText, insertToolCallAndResults };

package/pathways/system/entity/sys_entity_start.js CHANGED Viewed

@@ -5,7 +5,7 @@ import logger from  '../../../lib/logger.js';
 import { chatArgsHasImageUrl } from  '../../../lib/util.js';
 import { QueueServiceClient } from '@azure/storage-queue';
 import { config } from '../../../config.js';
-import { addToolCalls, addToolResults } from './memory/shared/sys_memory_helpers.js';
+import { insertToolCallAndResults } from './memory/shared/sys_memory_helpers.js';
 const connectionString = process.env.AZURE_STORAGE_CONNECTION_STRING;
 let queueClient;
@@ -87,15 +87,18 @@ export default {
             args.model = pathwayResolver.modelName;
         }
-        // Stuff the memory context into the chat history
+        // Save a copy of the chat history before the memory context is added
         const chatHistoryBeforeMemory = [...args.chatHistory];
-        const memoryContext = await callPathway('sys_read_memory', { ...args, section: 'memoryContext', priority: 0, recentHours: 0, stream: false }, pathwayResolver);
-        if (memoryContext) {
-            const { toolCallId } = addToolCalls(args.chatHistory, "search memory for relevant information", "memory_lookup");
-            addToolResults(args.chatHistory, memoryContext, toolCallId);
+        // Add the memory context to the chat history if applicable
+        if (args.chatHistory.length > 1) {
+            const memoryContext = await callPathway('sys_read_memory', { ...args, section: 'memoryContext', priority: 0, recentHours: 0, stream: false }, pathwayResolver);
+            if (memoryContext) {
+                insertToolCallAndResults(args.chatHistory, "search memory for relevant information", "memory_lookup", memoryContext);
+            }
         }
+        // If we're using voice, get a quick response to say
         let ackResponse = null;
         if (args.voiceResponse) {
             ackResponse = await callPathway('sys_generator_ack', { ...args, stream: false });
@@ -216,7 +219,7 @@ export default {
             title = await fetchTitleResponsePromise;
             pathwayResolver.tool = JSON.stringify({
-                hideFromModel: toolCallbackName ? true : false,
+                hideFromModel: (!args.stream && toolCallbackName) ? true : false,
                 toolCallbackName,
                 title,
                 search: toolCallbackName === 'sys_generator_results' ? true : false,

package/pathways/system/entity/sys_generator_ack.js CHANGED Viewed

@@ -4,7 +4,7 @@ export default {
     prompt:
         [
             new Prompt({ messages: [
-                {"role": "system", "content": `{{renderTemplate AI_CONVERSATION_HISTORY}}\nYou are a part of an AI system named {{aiName}}. Your job is to acknowledge the user's request and provide a very brief voice filler response that is conversational and natural. The purpose of the response is just to let the user know that you have heard them and are processing a response.\nResponse Guidelines:\n- it should just be a normal 1-2 sentence vocalization (at least 10 words) that will take at most about 3-4 seconds to read and is easy for a text to speech engine to read\n- it should be the beginning of an appropriate response to the last user message in the conversation history\n- it should be an appropriate lead-in for the full response that will follow later\n- it should not directly ask for follow up or be a question\n- it must match the tone and verbal style of the rest of your responses in the conversation history\n- it should not be repetitive - don't always open with the same word, etc.\n- if the user has asked a binary question (yes or no, true or false, etc.) or a filler response is not appropriate, you should response with the string \"none\"\n\n{{renderTemplate AI_DATETIME}}`},
+                {"role": "system", "content": `{{renderTemplate AI_CONVERSATION_HISTORY}}\nYou are a part of an AI system named {{aiName}}. Your job is to acknowledge the user's request and provide a very brief voice filler response that is conversational and natural. The purpose of the response is just to let the user know that you have heard them and are processing a response.\nResponse Guidelines:\n- it should just be a normal 1-2 sentence vocalization (at least 10 words) that will take at most about 3-4 seconds to read and is easy for a text to speech engine to read\n- it should be the beginning of an appropriate response to the last user message in the conversation history\n- it should be an appropriate lead-in for the full response that will follow later\n- it should not directly ask for follow up or be a question\n- it must match the tone and verbal style of the rest of your responses in the conversation history\n- it should not be repetitive - don't always open with the same word, etc.\n- if the user has asked a binary question (yes or no, true or false, etc.) or a filler response is not appropriate, you should respond with the string \"none\"\n\n{{renderTemplate AI_DATETIME}}`},
                 {"role": "user", "content": "Please generate a quick response to the user's last message in the conversation history that can be read verbatim to the user or \"none\" if a filler response is not appropriate."}
             ]}),
         ],

package/pathways/system/entity/sys_generator_image.js CHANGED Viewed

@@ -3,7 +3,7 @@
 import { callPathway } from '../../../lib/pathwayTools.js';
 import { Prompt } from '../../../server/prompt.js';
 import logger from '../../../lib/logger.js';
-import { addToolCalls, addToolResults } from './memory/shared/sys_memory_helpers.js';
+import { insertToolCallAndResults } from './memory/shared/sys_memory_helpers.js';
 export default {
     prompt: [],
@@ -73,8 +73,7 @@ Instructions: As part of a conversation with the user, you have been asked to cr
             // add the tool_calls and tool_results to the chatHistory
             imageResults.forEach((imageResult, index) => {
-                const { toolCallId } = addToolCalls(chatHistory, imagePrompts[index], "generate_image");
-                addToolResults(chatHistory, imageResult, toolCallId, "generate_image");
+                insertToolCallAndResults(chatHistory, imagePrompts[index], "generate_image", imageResult);
             });
             const result = await runAllPrompts({ ...args });

package/pathways/system/entity/sys_generator_memory.js CHANGED Viewed

@@ -1,5 +1,5 @@
 import { callPathway } from '../../../lib/pathwayTools.js';
-import { addToolCalls, addToolResults } from './memory/shared/sys_memory_helpers.js';
+import { insertToolCallAndResults } from './memory/shared/sys_memory_helpers.js';
 export default {
     prompt:
@@ -20,8 +20,7 @@ export default {
         const memoryContext = await callPathway('sys_search_memory', { ...args, stream: false, section: 'memoryAll', updateContext: true });
         if (memoryContext) {
-            const {toolCallId} = addToolCalls(args.chatHistory, "search memory for relevant information", "memory_lookup");
-            addToolResults(args.chatHistory, memoryContext, toolCallId);
+            insertToolCallAndResults(args.chatHistory, "search memory for relevant information", "memory_lookup", memoryContext);
         }
         let result;

package/pathways/system/entity/sys_generator_quick.js CHANGED Viewed

@@ -15,7 +15,7 @@ export default {
         let pathwayResolver = resolver;
         const promptMessages = [
-            {"role": "system", "content": `{{renderTemplate AI_MEMORY}}\n\n{{renderTemplate AI_COMMON_INSTRUCTIONS}}\n{{renderTemplate AI_EXPERTISE}} While you have those capabilities but you have already decided it is not necessary to do any of those things to respond in this turn of the conversation. Never pretend like you are searching, looking anything up, or reading or looking in a file or show the user any made up or hallucinated information including non-existent images.\n{{renderTemplate AI_MEMORY_INSTRUCTIONS}}\n{{renderTemplate AI_DATETIME}}`},
+            {"role": "system", "content": `{{renderTemplate AI_MEMORY}}\n\n{{renderTemplate AI_COMMON_INSTRUCTIONS}}\n{{renderTemplate AI_EXPERTISE}}\n{{renderTemplate AI_MEMORY_INSTRUCTIONS}}\n{{renderTemplate AI_DATETIME}}`},
             "{{chatHistory}}",
         ];

package/pathways/system/entity/sys_router_tool.js CHANGED Viewed

@@ -56,8 +56,16 @@ If you decide to use a tool, return a JSON object in this format:
 toolMessage Guidelines:
 - The message should be consistent in style and tone with the rest of your responses in the conversation history.
-- The message should be brief and conversational and flow naturally with the conversation history.
-- The message should not refer to the tool use directly, but rather what you're trying to do.
+- The message should be brief, conversational, and flow naturally with the conversation history.
+- Use contractions (I'm, don't, let's) and informal language to sound more natural.
+- Include slight verbal hesitations or false starts occasionally (e.g., "So... I think what we could do here is" or "Actually, let me try a different approach").
+- Vary sentence length and structure - mix short punchy sentences with longer ones.
+- Include casual transitions and filler phrases that humans use: "you know," "basically," "so," "actually," "by the way," "anyway," etc.
+- Express uncertainty when appropriate with phrases like "I'm pretty sure," "If I remember correctly," or "I think."
+- Reference personal preferences or opinions occasionally ("I personally find that..." or "I tend to prefer...").
+- Occasionally make minor self-corrections as humans do ("Wait, let me rephrase that" or "Actually, I meant to say...").
+- Humans don't refer to tools - stall for time by saying things like "Let me think about this for a sec," "Drawing that up for you now," or "Working on it..." Humans also stall with vocalizations like "hmm," "let's see," or "give me a moment."
+- When appropriate, react emotionally to content with expressions like "Wow!" or "That's interesting!"
 If no tool is required, return:
 {"toolRequired": false, "toolReason": "explanation of why no tool was necessary"}

package/pathways/transcribe_gemini.js CHANGED Viewed

@@ -5,6 +5,42 @@ import { Prompt } from "../server/prompt.js";
 const OFFSET_CHUNK = 500; //seconds of each chunk offset, only used if helper does not provide
+function isYoutubeUrl(url) {
+    try {
+        const urlObj = new URL(url);
+        // Check for standard youtube.com domains
+        if (
+            urlObj.hostname === "youtube.com" ||
+            urlObj.hostname === "www.youtube.com"
+        ) {
+            // For standard watch URLs, verify they have a video ID
+            if (urlObj.pathname === "/watch") {
+                return !!urlObj.searchParams.get("v");
+            }
+            // For embed URLs, verify they have a video ID in the path
+            if (urlObj.pathname.startsWith("/embed/")) {
+                return urlObj.pathname.length > 7; // '/embed/' is 7 chars
+            }
+            // For shorts URLs, verify they have a video ID in the path
+            if (urlObj.pathname.startsWith("/shorts/")) {
+                return urlObj.pathname.length > 8; // '/shorts/' is 8 chars
+            }
+            return false;
+        }
+        // Check for shortened youtu.be domain
+        if (urlObj.hostname === "youtu.be") {
+            // Verify there's a video ID in the path
+            return urlObj.pathname.length > 1; // '/' is 1 char
+        }
+        return false;
+    } catch (err) {
+        return false;
+    }
+}
 export default {
     prompt:
     [
@@ -12,7 +48,7 @@ export default {
             "{{messages}}",
         ]}),
     ],
-    model: 'gemini-flash-20-vision',
+    model: 'gemini-pro-20-vision',
     inputParameters: {
         file: ``,
         language: ``,
@@ -63,7 +99,10 @@ export default {
         sendProgress(true);
         intervalId = setInterval(() => sendProgress(true), 3000);
-        const { file, responseFormat, wordTimestamped, maxLineWidth } = args;
+        const { file, wordTimestamped, maxLineWidth } = args;
+        const responseFormat = args.responseFormat || 'text';
         if(!file) {
             throw new Error("Please provide a file to transcribe.");
         }
@@ -71,7 +110,7 @@ export default {
         //check if fils is a gcs file or youtube
         const isGcs = file.startsWith('gs://');
-        const isYoutube = file.match(/^(http(s)?:\/\/)?((w){3}.)?youtu(be|.be)?(\.com)?\/.+/);
+        const isYoutube = isYoutubeUrl(file);
         let chunks = [{
             url: file,
@@ -87,43 +126,41 @@ export default {
         sendProgress(true);
-        let respectLimitsPrompt = " ";
+        let respectLimitsPrompt = "";
         if (maxLineWidth) {
             const possiblePlacement = maxLineWidth <= 25
             ? "vertical" : maxLineWidth <= 35 ? "horizontal" : "";
-            respectLimitsPrompt += `The output lines must not exceed ${maxLineWidth} characters, so make sure your transcription lines and timestamps are perfectly aligned. `;
-            if(possiblePlacement){
-                respectLimitsPrompt+= `This limit a must as user will be using the output for ${possiblePlacement} display.`
-            }
+            respectLimitsPrompt += `  These subtitles will be shown in a ${possiblePlacement} formatted video player.  Each subtitle line should not exceed ${maxLineWidth} characters to fit the player.`;
         }
-        const transcriptionLevel = wordTimestamped ? "word" : "phrase";
-        function getMessages(file, format) {
+        function getMessages(file) {
+            // Base system content that's always included
+            let systemContent = `Instructions:
+You are a transcription assistant. Your job is to transcribe the audio/video content accurately.
-            const responseFormat = format!== 'text' ? 'VTT' : 'text';
+IMPORTANT: Only provide the transcription in your response - no explanations, comments, or additional text.
-            const messages = [
-                {"role": "system", "content": `Instructions:\nYou are an AI entity with expertise of transcription. Your response only contains the transcription, no comments or additonal stuff.
-Your output must be in the format asked, and must be strictly following the formats and parseble by auto parsers.
+Format your response in ${responseFormat} format.`;
-Word-level transcriptions must be per word timestamped, and phrase-level transcriptions are per phrase.
+            // Only include timestamp instructions if we're not using plain text format
+            if (responseFormat !== 'text') {
+                systemContent += `
-Each transcription timestamp must precisely match the corresponding audio/video segment.
-Each timestamp must correspond to actual spoken content.
-End time cannot exceed total media duration. Especially when transcribing word-level double check your timestamps, never exceed the total duration.
+CRITICAL TIMESTAMP INSTRUCTIONS:
+- Timestamps MUST match the actual timing in the media
+- For each new segment, look at the media time directly
+- Start times should precisely match when spoken words begin
+- Consecutive segments should have matching end/start times (no gaps or overlaps)`;
+            }
-You must follow 1, 2, 3, ... numbering for each transcription segment without any missing numbers.
-Never put newlines or spaces in the middle of a timestamp.
-Never put multiple lines for a single timestamp.
+            systemContent += `
-Example responses:
+Examples:
-- If asked SRT format, e.g.:
+SRT format:
 1
 00:00:00,498 --> 00:00:02,827
 Hello World!
@@ -132,21 +169,24 @@ Hello World!
 00:00:02,827 --> 00:00:06,383
 Being AI is fun!
-- If asked VTT format, e.g.:
+VTT format:
 WEBVTT
 1
 00:00:00.000 --> 00:00:02.944
-Hello World2!
+Hello World!
 2
-00:00:05.344 --> 00:00:08.809
-Being AI is also great!
+00:00:02.944 --> 00:00:08.809
+Being AI is great!
-- If asked text format, e.g.:
-Hello World!!! Being AI is being great yet again!
+Text format:
+Hello World! Being AI is great!`;
-Word-level output e.g.:
+            if (wordTimestamped) {
+                systemContent += `
+For word-level transcription, timestamp each word:
 WEBVTT
@@ -155,17 +195,32 @@ WEBVTT
 Hello
 2
-00:00:01.964 --> 00:00:02.383
+00:00:01.944 --> 00:00:02.383
 World!
+`;
+            }
+            // Only include anti-drift procedure and timestamp reminders for non-text formats
+            if (responseFormat !== 'text') {
+                systemContent += `
+ANTI-DRIFT PROCEDURE:
+1. For EVERY new segment, check the actual media time directly
+2. After every 5 segments, verify your timestamps against the video/audio
+3. Never calculate timestamps based on previous segments
+4. Always match the end time of one segment with the start time of the next
+REMEMBER:
+- Transcription accuracy is your primary goal
+- Timestamp accuracy is equally important
+- Timestamp drift is the most common error - actively prevent it
+- When in doubt, check the media time directly`;
+            }
-You must follow spacing, punctuation, and timestamps as shown in the examples otherwise your response will not be accepted.
-Never output multiple lines for a single timestamp.
-Even a single newline or space can cause the response to be rejected. You must follow the format strictly. You must place newlines and timestamps exactly as shown in the examples.
-    `},
+            const messages = [
+                {"role": "system", "content": systemContent},
                 {"role": "user", "content": [
-                    `{ type: 'text', text: 'Transcribe the media ${transcriptionLevel}-level in ${responseFormat} format.${respectLimitsPrompt}' }`,
+                    `{ type: 'text', text: 'Transcribe this file in ${responseFormat} format.${respectLimitsPrompt} Output only the transcription, no other text or comments or formatting.' }`,
                     JSON.stringify({
                         type: 'image_url',
                         url: file,
@@ -215,7 +270,7 @@ Even a single newline or space can cause the response to be rejected. You must f
         const result = await processChunksParallel(chunks, args);
-        if (['srt','vtt'].includes(responseFormat) || wordTimestamped) { // align subtitles for formats
+        if (['srt','vtt'].includes(responseFormat.toLowerCase()) || wordTimestamped) { // align subtitles for formats
             const offsets = chunks.map((chunk, index) => chunk?.offset || index * OFFSET_CHUNK);
             return alignSubtitles(result, responseFormat, offsets);
         }

package/server/graphql.js CHANGED Viewed

@@ -85,6 +85,7 @@ const getTypedefs = (pathways, pathwayManager) => {
         status: String
         data: String
         info: String
+        error: String
     }
     type Subscription {

package/server/pathwayResolver.js CHANGED Viewed

@@ -94,8 +94,10 @@ class PathwayResolver {
                 requestId: this.rootRequestId || this.requestId,
                 progress: 1,
                 data: '',
-                info: 'ERROR: ' + error.message || error.toString()
+                info: '',
+                error: error.message || error.toString()
             });
+            return;
         }
         // If the response is a stream, handle it as streaming response
@@ -165,7 +167,8 @@ class PathwayResolver {
                     requestId: this.requestId,
                     progress: 1,
                     data: '',
-                    info: 'ERROR: Stream read failed'
+                    info: '',
+                    error: 'Stream read failed'
                 });
             } else {
                 return;
@@ -180,7 +183,7 @@ class PathwayResolver {
                         requestId: this.rootRequestId || this.requestId,
                         progress: Math.min(completedCount, totalCount) / totalCount,
                         // Clients expect these to be strings
-                        data: JSON.stringify(responseData),
+                        data: JSON.stringify(responseData || ''),
                         info: this.tool || ''
                 });
             }

package/server/plugins/azureVideoTranslatePlugin.js CHANGED Viewed

@@ -121,8 +121,8 @@ class AzureVideoTranslatePlugin extends ModelPlugin {
             const operationUrl = response.headers['operation-location'];
             return { translation: response.data, operationUrl };
         } catch (error) {
-            const errorText = error.response?.data || error.message;
-            throw new Error(`Failed to create translation: ${error.message}\nDetails: ${errorText}`);
+            const errorText = error.response?.data?.error?.innererror?.message || error.message;
+            throw new Error(`Failed to create translation: ${errorText}`);
         }
     }
@@ -151,8 +151,8 @@ class AzureVideoTranslatePlugin extends ModelPlugin {
             });
             return response.data;
         } catch (error) {
-            const errorText = error.response?.data || error.message;
-            throw new Error(`Failed to get iteration status: ${error.message}\nDetails: ${errorText}`);
+            const errorText = error.response?.data?.error?.innererror?.message || error.message;
+            throw new Error(`Failed to get iteration status: ${errorText}`);
         }
     }
@@ -165,8 +165,8 @@ class AzureVideoTranslatePlugin extends ModelPlugin {
             });
             return response.data;
         } catch (error) {
-            const errorText = error.response?.data || error.message;
-            throw new Error(`Failed to poll operation: ${error.message}\nDetails: ${errorText}`);
+            const errorText = error.response?.data?.error?.innererror?.message || error.message;
+            throw new Error(`Failed to poll operation: ${errorText}`);
         }
     }
@@ -360,8 +360,8 @@ class AzureVideoTranslatePlugin extends ModelPlugin {
                 const output = await this.getTranslationOutput(translationId, iteration.id);
                 return JSON.stringify(output);
             } catch (error) {
-                const errorText = error.response?.data || error.message;
-                throw new Error(`Failed to create iteration: ${error.message}\nDetails: ${errorText}`);
+                const errorText = error.response?.data?.error?.innererror?.message || error.message;
+                throw new Error(`Failed to create iteration: ${errorText}`);
             }
         } catch (error) {
             logger.error(`Error in video translation: ${error.message}`);