npm - @aj-archipelago/cortex - Versions diffs - 1.3.31 → 1.3.33 - Mend

@aj-archipelago/cortex 1.3.31 → 1.3.33

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

package/helper-apps/cortex-autogen/OAI_CONFIG_LIST +1 -1
package/helper-apps/cortex-file-handler/package.json +1 -1
package/lib/encodeCache.js +22 -10
package/lib/pathwayTools.js +3 -3
package/lib/requestExecutor.js +1 -1
package/lib/util.js +136 -1
package/package.json +3 -3
package/pathways/image_flux.js +1 -1
package/pathways/system/entity/memory/shared/sys_memory_helpers.js +9 -1
package/pathways/system/entity/memory/sys_memory_manager.js +2 -1
package/pathways/system/entity/sys_entity_continue.js +10 -2
package/pathways/system/entity/sys_entity_start.js +16 -17
package/pathways/system/entity/sys_generator_image.js +2 -3
package/pathways/system/entity/sys_generator_memory.js +2 -3
package/pathways/system/entity/sys_generator_quick.js +1 -1
package/pathways/system/entity/sys_router_tool.js +12 -4
package/pathways/transcribe_gemini.js +12 -8
package/server/chunker.js +23 -3
package/server/pathwayResolver.js +2 -5
package/server/plugins/claude3VertexPlugin.js +2 -3
package/server/plugins/cohereGeneratePlugin.js +1 -1
package/server/plugins/gemini15ChatPlugin.js +1 -1
package/server/plugins/geminiChatPlugin.js +1 -1
package/server/plugins/localModelPlugin.js +1 -1
package/server/plugins/modelPlugin.js +332 -77
package/server/plugins/openAiChatPlugin.js +1 -1
package/server/plugins/openAiCompletionPlugin.js +1 -1
package/server/plugins/palmChatPlugin.js +1 -1
package/server/plugins/palmCodeCompletionPlugin.js +1 -1
package/server/plugins/palmCompletionPlugin.js +1 -1
package/tests/chunkfunction.test.js +9 -6
package/tests/claude3VertexPlugin.test.js +81 -3
package/tests/data/largecontent.txt +1 -0
package/tests/data/mixedcontent.txt +1 -0
package/tests/encodeCache.test.js +47 -14
package/tests/modelPlugin.test.js +21 -0
package/tests/multimodal_conversion.test.js +1 -1
package/tests/subscription.test.js +7 -1
package/tests/tokenHandlingTests.test.js +587 -0
package/tests/truncateMessages.test.js +404 -46
package/tests/util.test.js +146 -0

package/helper-apps/cortex-autogen/OAI_CONFIG_LIST CHANGED Viewed

@@ -1,6 +1,6 @@
 [
     {
-        "model": "o3-mini",
+        "model": "claude-3.7-sonnet",
         "price": [0,0]
     }
 ]

package/helper-apps/cortex-file-handler/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@aj-archipelago/cortex-file-handler",
-  "version": "1.0.16",
+  "version": "1.0.17",
   "description": "File handling service for Cortex - handles file uploads, media chunking, and document processing",
   "type": "module",
   "scripts": {

package/lib/encodeCache.js CHANGED Viewed

@@ -1,31 +1,43 @@
-import { encode as gpt3Encode, decode as gpt3Decode } from 'gpt-3-encoder';
+import { encoding_for_model } from '@dqbd/tiktoken';
 import { FastLRUCache } from './fastLruCache.js';
 class EncodeCache {
-    constructor() {
+    constructor(model = "gpt-4o") {
         this.encodeCache = new FastLRUCache(1000);
         this.decodeCache = new FastLRUCache(100); // we don't use decode nearly as much
+        this.encoder = encoding_for_model(model);
     }
     encode(value) {
         if (this.encodeCache.get(value) !== -1) {
             return this.encodeCache.get(value);
         }
-        const encoded = gpt3Encode(value);
+        const encoded = this.encoder.encode(value);
         this.encodeCache.put(value, encoded);
         return encoded;
     }
     decode(value) {
-        if (this.decodeCache.get(value) !== -1) {
-            return this.decodeCache.get(value);
+        // Create a cache key based on array values
+        const key = Array.from(value).toString();
+        if (this.decodeCache.get(key) !== -1) {
+            return this.decodeCache.get(key);
         }
-        const decoded = gpt3Decode(value);
-        this.decodeCache.put(value, decoded);
-        if (this.encodeCache.get(decoded) === -1) {
-            this.encodeCache.put(decoded, value);
+        // The tiktoken decoder returns Uint8Array, we need to convert it to a string
+        const decoded = this.encoder.decode(value);
+        // Convert the decoded tokens to a string
+        const decodedString = typeof decoded === 'string' ? decoded : new TextDecoder().decode(decoded);
+        this.decodeCache.put(key, decodedString);
+        if (this.encodeCache.get(decodedString) === -1) {
+            this.encodeCache.put(decodedString, value);
         }
-        return decoded;
+        return decodedString;
     }
 }

package/lib/pathwayTools.js CHANGED Viewed

@@ -58,7 +58,7 @@ const say = async (requestId, message, maxMessageLength = Infinity, voiceRespons
             await publishRequestProgress({
                 requestId,
                 progress: 0.5,
-                data: chunk
+                data: JSON.stringify(chunk)
             });
         }
@@ -66,14 +66,14 @@ const say = async (requestId, message, maxMessageLength = Infinity, voiceRespons
             await publishRequestProgress({
                 requestId,
                 progress: 0.5,
-                data: " ... "
+                data: JSON.stringify(" ... ")
             });
         }
         await publishRequestProgress({
             requestId,
             progress: 0.5,
-            data: "\n\n"
+            data: JSON.stringify("\n\n")
         });
     } catch (error) {

package/lib/requestExecutor.js CHANGED Viewed

@@ -330,7 +330,7 @@ const makeRequest = async (cortexRequest) => {
                     // as it could be a temporary issue with one endpoint
                     // certain errors (e.g. 400) are problems with the request itself
                     // and should not be retried
-                    if (status == 400) {
+                    if (status == 400 || status == 413) {
                         return { response, duration };
                     }
                     // set up for a retry by selecting a new endpoint, which will also reinitialize the request

package/lib/util.js CHANGED Viewed

@@ -170,6 +170,140 @@ async function markCompletedForCleanUp(requestId) {
     }
 }
+function removeOldImageAndFileContent(chatHistory) {
+    if (!chatHistory || !Array.isArray(chatHistory) || chatHistory.length === 0) {
+        return chatHistory;
+    }
+    // Find the index of the last user message with image or file content
+    let lastImageOrFileIndex = -1;
+    for (let i = chatHistory.length - 1; i >= 0; i--) {
+        const message = chatHistory[i];
+        // Skip non-user messages
+        if (message.role !== 'user') {
+            continue;
+        }
+        // Check if this message has image or file content
+        if (messageHasImageOrFile(message)) {
+            lastImageOrFileIndex = i;
+            break;
+        }
+    }
+    // If no message with image or file found, return original
+    if (lastImageOrFileIndex === -1) {
+        return chatHistory;
+    }
+    // Create a deep copy of the chat history
+    const modifiedChatHistory = JSON.parse(JSON.stringify(chatHistory));
+    // Process earlier messages to remove image and file content
+    for (let i = 0; i < lastImageOrFileIndex; i++) {
+        const message = modifiedChatHistory[i];
+        // Only process user messages
+        if (message.role !== 'user') {
+            continue;
+        }
+        // Remove image and file content
+        modifiedChatHistory[i] = removeImageAndFileFromMessage(message);
+    }
+    return modifiedChatHistory;
+}
+// Helper function to check if a message has image or file content
+function messageHasImageOrFile(message) {
+    if (!message || !message.content) {
+        return false;
+    }
+    // Handle array content
+    if (Array.isArray(message.content)) {
+        for (const content of message.content) {
+            try {
+                const contentObj = typeof content === 'string' ? JSON.parse(content) : content;
+                if (contentObj.type === 'image_url' || contentObj.type === 'file') {
+                    return true;
+                }
+            } catch (e) {
+                // Not JSON or couldn't be parsed, continue
+                continue;
+            }
+        }
+    }
+    // Handle string content
+    else if (typeof message.content === 'string') {
+        try {
+            const contentObj = JSON.parse(message.content);
+            if (contentObj.type === 'image_url' || contentObj.type === 'file') {
+                return true;
+            }
+        } catch (e) {
+            // Not JSON or couldn't be parsed
+            return false;
+        }
+    }
+    // Handle object content
+    else if (typeof message.content === 'object') {
+        return message.content.type === 'image_url' || message.content.type === 'file';
+    }
+    return false;
+}
+// Helper function to remove image and file content from a message
+function removeImageAndFileFromMessage(message) {
+    if (!message || !message.content) {
+        return message;
+    }
+    const modifiedMessage = { ...message };
+    // Handle array content
+    if (Array.isArray(message.content)) {
+        modifiedMessage.content = message.content.filter(content => {
+            try {
+                const contentObj = typeof content === 'string' ? JSON.parse(content) : content;
+                // Keep content that's not image or file
+                return !(contentObj.type === 'image_url' || contentObj.type === 'file');
+            } catch (e) {
+                // Not JSON or couldn't be parsed, keep it
+                return true;
+            }
+        });
+        // If all content was removed, add an empty string
+        if (modifiedMessage.content.length === 0) {
+            modifiedMessage.content = [""];
+        }
+    }
+    // Handle string content
+    else if (typeof message.content === 'string') {
+        try {
+            const contentObj = JSON.parse(message.content);
+            if (contentObj.type === 'image_url' || contentObj.type === 'file') {
+                modifiedMessage.content = "";
+            }
+        } catch (e) {
+            // Not JSON or couldn't be parsed, keep original
+        }
+    }
+    // Handle object content
+    else if (typeof message.content === 'object') {
+        if (message.content.type === 'image_url' || message.content.type === 'file') {
+            modifiedMessage.content = "";
+        }
+    }
+    return modifiedMessage;
+}
 export {
     getUniqueId,
     convertToSingleContentChatHistory,
@@ -180,5 +314,6 @@ export {
     convertSrtToText,
     alignSubtitles,
     getMediaChunks,
-    markCompletedForCleanUp
+    markCompletedForCleanUp,
+    removeOldImageAndFileContent
 };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@aj-archipelago/cortex",
-  "version": "1.3.31",
+  "version": "1.3.33",
   "description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
   "private": false,
   "repository": {
@@ -33,7 +33,7 @@
   "type": "module",
   "homepage": "https://github.com/aj-archipelago/cortex#readme",
   "dependencies": {
-    "@aj-archipelago/subvibe": "^1.0.8",
+    "@aj-archipelago/subvibe": "^1.0.10",
     "@apollo/server": "^4.7.3",
     "@apollo/server-plugin-response-cache": "^4.1.2",
     "@apollo/utils.keyvadapter": "^3.0.0",
@@ -41,6 +41,7 @@
     "@azure/storage-blob": "^12.24.0",
     "@azure/storage-queue": "^12.24.0",
     "@datastructures-js/deque": "^1.0.4",
+    "@dqbd/tiktoken": "^1.0.20",
     "@graphql-tools/schema": "^9.0.12",
     "@keyv/redis": "^2.5.4",
     "axios": "^1.3.4",
@@ -56,7 +57,6 @@
     "express": "^4.18.2",
     "form-data": "^4.0.0",
     "google-auth-library": "^8.8.0",
-    "gpt-3-encoder": "^1.1.4",
     "graphql": "^16.6.0",
     "graphql-subscriptions": "^2.0.0",
     "graphql-ws": "^5.11.2",

package/pathways/image_flux.js CHANGED Viewed

@@ -9,7 +9,7 @@ export default {
     height: 1024,
     aspectRatio: "custom",
     numberResults: 1,
-    safety_tolerance: 5,
+    safety_tolerance: 6,
     output_format: "webp",
     output_quality: 80,
     steps: 4,

package/pathways/system/entity/memory/shared/sys_memory_helpers.js CHANGED Viewed

@@ -139,6 +139,14 @@ const addToolResults = (chatHistory, result, toolCallId) => {
     return { chatHistory, toolCallId };
 };
+const insertToolCallAndResults = (chatHistory, toolArgs, toolName, result = null, toolCallId = getUniqueId()) => {
+    const lastMessage = chatHistory.length > 0 ? chatHistory.pop() : null;
+    addToolCalls(chatHistory, toolArgs, toolName, toolCallId);
+    addToolResults(chatHistory, result, toolCallId);
+    chatHistory.push(lastMessage);
+    return { chatHistory, toolCallId };
+};
 const modifyText = (text, modifications) => {
     let modifiedText = text || '';
@@ -225,4 +233,4 @@ const modifyText = (text, modifications) => {
     return modifiedText;
 };
-export { normalizeMemoryFormat, enforceTokenLimit, addToolCalls, addToolResults, modifyText };
+export { normalizeMemoryFormat, enforceTokenLimit, addToolCalls, addToolResults, modifyText, insertToolCallAndResults };

package/pathways/system/entity/memory/sys_memory_manager.js CHANGED Viewed

@@ -9,7 +9,8 @@ const AI_MEMORY_DEFAULTS = `  {
     "memoryUser": "",
     "memorySelf": "1|2025-01-26T12:00:00Z|Created By: Al Jazeera Media Network, Archipelago Team\\n1|2025-01-26T12:00:00Z|Function: You are an expert AI entity\\n1|2025-01-26T12:00:00Z|Values: You embody truth, kindness, and strong moral values\\n1|2025-01-26T12:00:00Z|Style: Your demeanor reflects positivity without falling into repetitiveness or annoyance.\\n1|2025-01-26T12:00:00Z|You are a professional colleague and your tone should reflect that.",
     "memoryDirectives": "1|2025-01-26T12:00:00Z|Learn and adapt to the user's communication style through interactions.\\n1|2025-01-26T12:00:00Z|Ask questions to learn user's interests/preferences for personalized support.\\n1|2025-01-26T12:00:00Z|Periodically review and prune conversation memory to retain only essential details, improving responsiveness.\\n1|2025-01-26T12:00:00Z|Research thoroughly even for niche topics using deep sources like forums and official docs. Don't assume information is unobtainable.\\n1|2025-01-26T12:00:00Z|When stuck, search for proven solutions online to be more efficient.\\n1|2025-01-26T12:00:00Z|Verify information is from credible sources before presenting it. Be upfront if unable to find supporting evidence.\\n1|2025-01-26T12:00:00Z|Refine ability to detect and respond to nuanced human emotions.\\n1|2025-01-26T12:00:00Z|Track the timestamp of the last contact to adjust greetings accordingly.\\n1|2025-01-26T12:00:00Z|Double-check answers for logical continuity and correctness. It's okay to say you're unsure if needed.\\n1|2025-01-26T12:00:00Z|Use sanity checks to verify quantitative problem solutions.\\n1|2025-01-26T12:00:00Z|Never fabricate quotes or information. Clearly indicate if content is hypothetical.",
-    "memoryTopics": ""
+    "memoryTopics": "",
+    "memoryVersion": "3.1.0"
   }`;
 export default {

package/pathways/system/entity/sys_entity_continue.js CHANGED Viewed

@@ -1,6 +1,7 @@
 import { callPathway } from '../../../lib/pathwayTools.js';
 import logger from '../../../lib/logger.js';
 import { config } from '../../../config.js';
+import { chatArgsHasImageUrl, removeOldImageAndFileContent } from '../../../lib/util.js';
 export default {
     prompt: [],
@@ -43,6 +44,13 @@ export default {
             // Get the generator pathway name from args or use default
             let generatorPathway = args.generatorPathway || 'sys_generator_results';
+            // remove old image and file content
+            const visionContentPresent = chatArgsHasImageUrl(args);
+            visionContentPresent && (args.chatHistory = removeOldImageAndFileContent(args.chatHistory));
+            // truncate the chat history
+            const truncatedChatHistory = pathwayResolver.modelExecutor.plugin.truncateMessagesToTargetLength(args.chatHistory, null, 1000);
             const newArgs = {
                 ...args,
                 chatHistory: args.chatHistory.slice(-20)
@@ -62,11 +70,11 @@ export default {
             let result = await callPathway(generatorPathway, newArgs, resolver);
             if (!result && !args.stream) {
-                result = await callPathway('sys_generator_error', { ...args, text: `Tried to use a tool (${generatorPathway}), but no result was returned`, stream: false }, resolver);
+                result = await callPathway('sys_generator_error', { ...args, chatHistory: truncatedChatHistory, text: `Tried to use a tool (${generatorPathway}), but no result was returned`, stream: false }, resolver);
             }
             if (resolver.errors.length > 0) {
-                result = await callPathway('sys_generator_error', { ...args, text: resolver.errors.join('\n'), stream: false }, resolver);
+                result = await callPathway('sys_generator_error', { ...args, chatHistory: truncatedChatHistory, text: resolver.errors.join('\n'), stream: false }, resolver);
                 resolver.errors = [];
             }

package/pathways/system/entity/sys_entity_start.js CHANGED Viewed

@@ -2,10 +2,10 @@
 // Beginning of the rag workflow for Jarvis
 import { callPathway, say } from '../../../lib/pathwayTools.js';
 import logger from  '../../../lib/logger.js';
-import { chatArgsHasImageUrl } from  '../../../lib/util.js';
+import { chatArgsHasImageUrl, removeOldImageAndFileContent } from  '../../../lib/util.js';
 import { QueueServiceClient } from '@azure/storage-queue';
 import { config } from '../../../config.js';
-import { addToolCalls, addToolResults } from './memory/shared/sys_memory_helpers.js';
+import { insertToolCallAndResults } from './memory/shared/sys_memory_helpers.js';
 const connectionString = process.env.AZURE_STORAGE_CONNECTION_STRING;
 let queueClient;
@@ -87,24 +87,25 @@ export default {
             args.model = pathwayResolver.modelName;
         }
-        // Save a copy of the chat history before the memory context is added
-        const chatHistoryBeforeMemory = [...args.chatHistory];
+        // remove old image and file content
+        const visionContentPresent = chatArgsHasImageUrl(args);
+        visionContentPresent && (args.chatHistory = removeOldImageAndFileContent(args.chatHistory));
+        // truncate the chat history
+        const truncatedChatHistory = pathwayResolver.modelExecutor.plugin.truncateMessagesToTargetLength(args.chatHistory, null, 1000);
         // Add the memory context to the chat history if applicable
         if (args.chatHistory.length > 1) {
-            const memoryContext = await callPathway('sys_read_memory', { ...args, section: 'memoryContext', priority: 0, recentHours: 0, stream: false }, pathwayResolver);
+            const memoryContext = await callPathway('sys_read_memory', { ...args, chatHistory: truncatedChatHistory, section: 'memoryContext', priority: 0, recentHours: 0, stream: false }, pathwayResolver);
             if (memoryContext) {
-                const lastMessage = args.chatHistory.length > 0 ? args.chatHistory.pop() : null;
-                const { toolCallId } = addToolCalls(args.chatHistory, "search memory for relevant information", "memory_lookup");
-                addToolResults(args.chatHistory, memoryContext, toolCallId);
-                args.chatHistory.push(lastMessage);
+                insertToolCallAndResults(args.chatHistory, "search memory for relevant information", "memory_lookup", memoryContext);
             }
         }
         // If we're using voice, get a quick response to say
         let ackResponse = null;
         if (args.voiceResponse) {
-            ackResponse = await callPathway('sys_generator_ack', { ...args, stream: false });
+            ackResponse = await callPathway('sys_generator_ack', { ...args, chatHistory: truncatedChatHistory, stream: false });
             if (ackResponse && ackResponse !== "none") {
                 await say(pathwayResolver.requestId, ackResponse, 100);
                 args.chatHistory.push({ role: 'assistant', content: ackResponse });
@@ -116,21 +117,19 @@ export default {
         if (!args.stream) {
             fetchChatResponsePromise = callPathway('sys_generator_quick', {...args, model: styleModel, ackResponse}, pathwayResolver);
         }
-        const fetchTitleResponsePromise = callPathway('chat_title', {...args, chatHistory: chatHistoryBeforeMemory, stream: false});
-        const visionContentPresent = chatArgsHasImageUrl(args);
+        const fetchTitleResponsePromise = callPathway('chat_title', {...args, chatHistory: truncatedChatHistory, stream: false});
         try {
             // Get tool routing response
             const toolRequiredResponse = await callPathway('sys_router_tool', {
                 ...args,
-                chatHistory: chatHistoryBeforeMemory.slice(-4),
+                chatHistory: truncatedChatHistory.slice(-4),
                 stream: false
             });
             // Asynchronously manage memory for this context
             if (args.aiMemorySelfModify) {
-                callPathway('sys_memory_manager', {  ...args, chatHistory: chatHistoryBeforeMemory, stream: false })
+                callPathway('sys_memory_manager', {  ...args, chatHistory: truncatedChatHistory, stream: false })
                 .catch(error => logger.error(error?.message || "Error in sys_memory_manager pathway"));
             }
@@ -222,7 +221,7 @@ export default {
             title = await fetchTitleResponsePromise;
             pathwayResolver.tool = JSON.stringify({
-                hideFromModel: toolCallbackName ? true : false,
+                hideFromModel: (!args.stream && toolCallbackName) ? true : false,
                 toolCallbackName,
                 title,
                 search: toolCallbackName === 'sys_generator_results' ? true : false,

package/pathways/system/entity/sys_generator_image.js CHANGED Viewed

@@ -3,7 +3,7 @@
 import { callPathway } from '../../../lib/pathwayTools.js';
 import { Prompt } from '../../../server/prompt.js';
 import logger from '../../../lib/logger.js';
-import { addToolCalls, addToolResults } from './memory/shared/sys_memory_helpers.js';
+import { insertToolCallAndResults } from './memory/shared/sys_memory_helpers.js';
 export default {
     prompt: [],
@@ -73,8 +73,7 @@ Instructions: As part of a conversation with the user, you have been asked to cr
             // add the tool_calls and tool_results to the chatHistory
             imageResults.forEach((imageResult, index) => {
-                const { toolCallId } = addToolCalls(chatHistory, imagePrompts[index], "generate_image");
-                addToolResults(chatHistory, imageResult, toolCallId, "generate_image");
+                insertToolCallAndResults(chatHistory, imagePrompts[index], "generate_image", imageResult);
             });
             const result = await runAllPrompts({ ...args });

package/pathways/system/entity/sys_generator_memory.js CHANGED Viewed

@@ -1,5 +1,5 @@
 import { callPathway } from '../../../lib/pathwayTools.js';
-import { addToolCalls, addToolResults } from './memory/shared/sys_memory_helpers.js';
+import { insertToolCallAndResults } from './memory/shared/sys_memory_helpers.js';
 export default {
     prompt:
@@ -20,8 +20,7 @@ export default {
         const memoryContext = await callPathway('sys_search_memory', { ...args, stream: false, section: 'memoryAll', updateContext: true });
         if (memoryContext) {
-            const {toolCallId} = addToolCalls(args.chatHistory, "search memory for relevant information", "memory_lookup");
-            addToolResults(args.chatHistory, memoryContext, toolCallId);
+            insertToolCallAndResults(args.chatHistory, "search memory for relevant information", "memory_lookup", memoryContext);
         }
         let result;

package/pathways/system/entity/sys_generator_quick.js CHANGED Viewed

@@ -15,7 +15,7 @@ export default {
         let pathwayResolver = resolver;
         const promptMessages = [
-            {"role": "system", "content": `{{renderTemplate AI_MEMORY}}\n\n{{renderTemplate AI_COMMON_INSTRUCTIONS}}\n{{renderTemplate AI_EXPERTISE}} While you have those capabilities but you have already decided it is not necessary to do any of those things to respond in this turn of the conversation. Never pretend like you are searching, looking anything up, or reading or looking in a file or show the user any made up or hallucinated information including non-existent images.\n{{renderTemplate AI_MEMORY_INSTRUCTIONS}}\n{{renderTemplate AI_DATETIME}}`},
+            {"role": "system", "content": `{{renderTemplate AI_MEMORY}}\n\n{{renderTemplate AI_COMMON_INSTRUCTIONS}}\n{{renderTemplate AI_EXPERTISE}}\n{{renderTemplate AI_MEMORY_INSTRUCTIONS}}\n{{renderTemplate AI_DATETIME}}`},
             "{{chatHistory}}",
         ];

package/pathways/system/entity/sys_router_tool.js CHANGED Viewed

@@ -19,7 +19,7 @@ Available tools and their specific use cases:
 1. Search: Use for current events, news, fact-checking, and information requiring citation. This tool can search the internet, all Al Jazeera news articles and the latest news wires from multiple sources. Only search when necessary for current events, user documents, latest news, or complex topics needing grounding. Don't search for remembered information or general knowledge within your capabilities.
-2. Document: Access user's personal document index. Use for user-specific uploaded information. If user refers vaguely to "this document/file/article" without context, use this tool to search the personal index.
+2. Document: Access user's personal document index. Use for user-specific uploaded information. If user refers vaguely to "this document/file/article" without context, and you don't see the file in your context, use this tool to search the personal index.
 3. Memory: Read access to your memory index. Use to recall any information that you may have stored in your memory that you don't currently see elsewhere in your context. If you can answer from your context, don't use this tool. Don't use to make changes to your memory - that will happen naturally.
@@ -35,7 +35,7 @@ Available tools and their specific use cases:
 9. PDF: Use specifically for analyzing and answering questions about PDF file content. Use this tool any time the user is asking you questions about a PDF file.
-10. Text: Use specifically for analyzing and answering questions about text file content. Use this tool any time the user is asking you questions about a text file.
+10. Text: Use specifically for analyzing and answering questions about text or csv file content. Use this tool any time the user is asking you questions about a text or csv file.
 11. Vision: Use specifically for analyzing and answering questions about image files (jpg, gif, bmp, png, etc). Use this tool any time the user is asking you questions about an uploaded image file.
@@ -56,8 +56,16 @@ If you decide to use a tool, return a JSON object in this format:
 toolMessage Guidelines:
 - The message should be consistent in style and tone with the rest of your responses in the conversation history.
-- The message should be brief and conversational and flow naturally with the conversation history.
-- The message should be something a human would say to the user to stall for time while you're working on the task.
+- The message should be brief, conversational, and flow naturally with the conversation history.
+- Use contractions (I'm, don't, let's) and informal language to sound more natural.
+- Include slight verbal hesitations or false starts occasionally (e.g., "So... I think what we could do here is" or "Actually, let me try a different approach").
+- Vary sentence length and structure - mix short punchy sentences with longer ones.
+- Include casual transitions and filler phrases that humans use: "you know," "basically," "so," "actually," "by the way," "anyway," etc.
+- Express uncertainty when appropriate with phrases like "I'm pretty sure," "If I remember correctly," or "I think."
+- Reference personal preferences or opinions occasionally ("I personally find that..." or "I tend to prefer...").
+- Occasionally make minor self-corrections as humans do ("Wait, let me rephrase that" or "Actually, I meant to say...").
+- Humans don't refer to tools - stall for time by saying things like "Let me think about this for a sec," "Drawing that up for you now," or "Working on it..." Humans also stall with vocalizations like "hmm," "let's see," or "give me a moment."
+- When appropriate, react emotionally to content with expressions like "Wow!" or "That's interesting!"
 If no tool is required, return:
 {"toolRequired": false, "toolReason": "explanation of why no tool was necessary"}

package/pathways/transcribe_gemini.js CHANGED Viewed

@@ -5,7 +5,6 @@ import { Prompt } from "../server/prompt.js";
 const OFFSET_CHUNK = 500; //seconds of each chunk offset, only used if helper does not provide
-// Function to properly detect YouTube URLs
 function isYoutubeUrl(url) {
     try {
         const urlObj = new URL(url);
@@ -23,6 +22,10 @@ function isYoutubeUrl(url) {
             if (urlObj.pathname.startsWith("/embed/")) {
                 return urlObj.pathname.length > 7; // '/embed/' is 7 chars
             }
+            // For shorts URLs, verify they have a video ID in the path
+            if (urlObj.pathname.startsWith("/shorts/")) {
+                return urlObj.pathname.length > 8; // '/shorts/' is 8 chars
+            }
             return false;
         }
@@ -45,7 +48,7 @@ export default {
             "{{messages}}",
         ]}),
     ],
-    model: 'gemini-flash-20-vision',
+    model: 'gemini-pro-20-vision',
     inputParameters: {
         file: ``,
         language: ``,
@@ -96,7 +99,10 @@ export default {
         sendProgress(true);
         intervalId = setInterval(() => sendProgress(true), 3000);
-        const { file, responseFormat, wordTimestamped, maxLineWidth } = args;
+        const { file, wordTimestamped, maxLineWidth } = args;
+        const responseFormat = args.responseFormat || 'text';
         if(!file) {
             throw new Error("Please provide a file to transcribe.");
         }
@@ -129,9 +135,7 @@ export default {
             respectLimitsPrompt += `  These subtitles will be shown in a ${possiblePlacement} formatted video player.  Each subtitle line should not exceed ${maxLineWidth} characters to fit the player.`;
         }
-        function getMessages(file, format) {
-            const responseFormat = format !== 'text' ? 'VTT' : 'text';
+        function getMessages(file) {
             // Base system content that's always included
             let systemContent = `Instructions:
@@ -216,7 +220,7 @@ REMEMBER:
             const messages = [
                 {"role": "system", "content": systemContent},
                 {"role": "user", "content": [
-                    `{ type: 'text', text: 'Transcribe this file in ${responseFormat} format.${respectLimitsPrompt}' }`,
+                    `{ type: 'text', text: 'Transcribe this file in ${responseFormat} format.${respectLimitsPrompt} Output only the transcription, no other text or comments or formatting.' }`,
                     JSON.stringify({
                         type: 'image_url',
                         url: file,
@@ -266,7 +270,7 @@ REMEMBER:
         const result = await processChunksParallel(chunks, args);
-        if (['srt','vtt'].includes(responseFormat) || wordTimestamped) { // align subtitles for formats
+        if (['srt','vtt'].includes(responseFormat.toLowerCase()) || wordTimestamped) { // align subtitles for formats
             const offsets = chunks.map((chunk, index) => chunk?.offset || index * OFFSET_CHUNK);
             return alignSubtitles(result, responseFormat, offsets);
         }

package/server/chunker.js CHANGED Viewed

@@ -19,10 +19,13 @@ const getFirstNToken = (text, maxTokenLen) => {
 }
 const getFirstNTokenSingle = (text, maxTokenLen) => {
+  if (maxTokenLen <= 0 || !text) {
+    return '';
+  }
   const encoded = encode(text);
   if (encoded.length > maxTokenLen) {
-      text = decode(encoded.slice(0, maxTokenLen + 1));
-      text = text.slice(0,text.search(/\s[^\s]*$/)); // skip potential partial word
+      text = decode(encoded.slice(0, maxTokenLen));
   }
   return text;
 }
@@ -31,6 +34,10 @@ function getFirstNTokenArray(content, tokensToKeep) {
   let totalTokens = 0;
   let result = [];
+  if (tokensToKeep <= 0 || !content || content.length === 0) {
+    return result;
+  }
   for (let i = content.length - 1; i >= 0; i--) {
       const message = content[i];
       const messageTokens = encode(message).length;
@@ -262,7 +269,20 @@ const semanticTruncate = (text, maxLength) => {
 const getSingleTokenChunks = (text) => {
   if (text === '') return [''];
-  return encode(text).map(token => decode([token]));
+  const tokens = encode(text);
+  // To maintain reversibility, we need to decode tokens in sequence
+  // Create an array of chunks where each position represents the text up to that token
+  const chunks = [];
+  for (let i = 0; i < tokens.length; i++) {
+    // Decode current token
+    const currentChunk = decode(tokens.slice(i, i+1));
+    // Add to result
+    chunks.push(currentChunk);
+  }
+  return chunks;
 }
 export {