npm - @aj-archipelago/cortex - Versions diffs - 1.3.32 → 1.3.34 - Mend

@aj-archipelago/cortex 1.3.32 → 1.3.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

package/helper-apps/cortex-autogen/OAI_CONFIG_LIST +1 -1
package/lib/encodeCache.js +22 -10
package/lib/pathwayTools.js +10 -3
package/lib/requestExecutor.js +1 -1
package/lib/util.js +136 -1
package/package.json +2 -2
package/pathways/system/entity/memory/sys_memory_manager.js +2 -1
package/pathways/system/entity/sys_entity_continue.js +10 -2
package/pathways/system/entity/sys_entity_start.js +12 -10
package/pathways/system/entity/sys_router_tool.js +2 -2
package/server/chunker.js +23 -3
package/server/pathwayResolver.js +2 -5
package/server/plugins/claude3VertexPlugin.js +2 -3
package/server/plugins/cohereGeneratePlugin.js +1 -1
package/server/plugins/gemini15ChatPlugin.js +1 -1
package/server/plugins/geminiChatPlugin.js +1 -1
package/server/plugins/localModelPlugin.js +1 -1
package/server/plugins/modelPlugin.js +332 -77
package/server/plugins/openAiChatPlugin.js +1 -1
package/server/plugins/openAiCompletionPlugin.js +1 -1
package/server/plugins/palmChatPlugin.js +1 -1
package/server/plugins/palmCodeCompletionPlugin.js +1 -1
package/server/plugins/palmCompletionPlugin.js +1 -1
package/tests/chunkfunction.test.js +9 -6
package/tests/claude3VertexPlugin.test.js +81 -3
package/tests/data/largecontent.txt +1 -0
package/tests/data/mixedcontent.txt +1 -0
package/tests/encodeCache.test.js +47 -14
package/tests/modelPlugin.test.js +21 -0
package/tests/multimodal_conversion.test.js +1 -1
package/tests/subscription.test.js +7 -1
package/tests/tokenHandlingTests.test.js +587 -0
package/tests/truncateMessages.test.js +404 -46
package/tests/util.test.js +146 -0

package/helper-apps/cortex-autogen/OAI_CONFIG_LIST CHANGED Viewed

@@ -1,6 +1,6 @@
 [
     {
-        "model": "o3-mini",
+        "model": "claude-3.7-sonnet",
         "price": [0,0]
     }
 ]

package/lib/encodeCache.js CHANGED Viewed

@@ -1,31 +1,43 @@
-import { encode as gpt3Encode, decode as gpt3Decode } from 'gpt-3-encoder';
+import { encoding_for_model } from '@dqbd/tiktoken';
 import { FastLRUCache } from './fastLruCache.js';
 class EncodeCache {
-    constructor() {
+    constructor(model = "gpt-4o") {
         this.encodeCache = new FastLRUCache(1000);
         this.decodeCache = new FastLRUCache(100); // we don't use decode nearly as much
+        this.encoder = encoding_for_model(model);
     }
     encode(value) {
         if (this.encodeCache.get(value) !== -1) {
             return this.encodeCache.get(value);
         }
-        const encoded = gpt3Encode(value);
+        const encoded = this.encoder.encode(value);
         this.encodeCache.put(value, encoded);
         return encoded;
     }
     decode(value) {
-        if (this.decodeCache.get(value) !== -1) {
-            return this.decodeCache.get(value);
+        // Create a cache key based on array values
+        const key = Array.from(value).toString();
+        if (this.decodeCache.get(key) !== -1) {
+            return this.decodeCache.get(key);
         }
-        const decoded = gpt3Decode(value);
-        this.decodeCache.put(value, decoded);
-        if (this.encodeCache.get(decoded) === -1) {
-            this.encodeCache.put(decoded, value);
+        // The tiktoken decoder returns Uint8Array, we need to convert it to a string
+        const decoded = this.encoder.decode(value);
+        // Convert the decoded tokens to a string
+        const decodedString = typeof decoded === 'string' ? decoded : new TextDecoder().decode(decoded);
+        this.decodeCache.put(key, decodedString);
+        if (this.encodeCache.get(decodedString) === -1) {
+            this.encodeCache.put(decodedString, value);
         }
-        return decoded;
+        return decodedString;
     }
 }

package/lib/pathwayTools.js CHANGED Viewed

@@ -54,11 +54,16 @@ const say = async (requestId, message, maxMessageLength = Infinity, voiceRespons
     try {
         const chunks = getSemanticChunks(message, maxMessageLength);
+        const info = JSON.stringify({
+            ephemeral: true,
+        });
         for (let chunk of chunks) {
             await publishRequestProgress({
                 requestId,
                 progress: 0.5,
-                data: chunk
+                data: JSON.stringify(chunk),
+                info
             });
         }
@@ -66,14 +71,16 @@ const say = async (requestId, message, maxMessageLength = Infinity, voiceRespons
             await publishRequestProgress({
                 requestId,
                 progress: 0.5,
-                data: " ... "
+                data: JSON.stringify(" ... "),
+                info
             });
         }
         await publishRequestProgress({
             requestId,
             progress: 0.5,
-            data: "\n\n"
+            data: JSON.stringify("\n\n"),
+            info
         });
     } catch (error) {

package/lib/requestExecutor.js CHANGED Viewed

@@ -330,7 +330,7 @@ const makeRequest = async (cortexRequest) => {
                     // as it could be a temporary issue with one endpoint
                     // certain errors (e.g. 400) are problems with the request itself
                     // and should not be retried
-                    if (status == 400) {
+                    if (status == 400 || status == 413) {
                         return { response, duration };
                     }
                     // set up for a retry by selecting a new endpoint, which will also reinitialize the request

package/lib/util.js CHANGED Viewed

@@ -170,6 +170,140 @@ async function markCompletedForCleanUp(requestId) {
     }
 }
+function removeOldImageAndFileContent(chatHistory) {
+    if (!chatHistory || !Array.isArray(chatHistory) || chatHistory.length === 0) {
+        return chatHistory;
+    }
+    // Find the index of the last user message with image or file content
+    let lastImageOrFileIndex = -1;
+    for (let i = chatHistory.length - 1; i >= 0; i--) {
+        const message = chatHistory[i];
+        // Skip non-user messages
+        if (message.role !== 'user') {
+            continue;
+        }
+        // Check if this message has image or file content
+        if (messageHasImageOrFile(message)) {
+            lastImageOrFileIndex = i;
+            break;
+        }
+    }
+    // If no message with image or file found, return original
+    if (lastImageOrFileIndex === -1) {
+        return chatHistory;
+    }
+    // Create a deep copy of the chat history
+    const modifiedChatHistory = JSON.parse(JSON.stringify(chatHistory));
+    // Process earlier messages to remove image and file content
+    for (let i = 0; i < lastImageOrFileIndex; i++) {
+        const message = modifiedChatHistory[i];
+        // Only process user messages
+        if (message.role !== 'user') {
+            continue;
+        }
+        // Remove image and file content
+        modifiedChatHistory[i] = removeImageAndFileFromMessage(message);
+    }
+    return modifiedChatHistory;
+}
+// Helper function to check if a message has image or file content
+function messageHasImageOrFile(message) {
+    if (!message || !message.content) {
+        return false;
+    }
+    // Handle array content
+    if (Array.isArray(message.content)) {
+        for (const content of message.content) {
+            try {
+                const contentObj = typeof content === 'string' ? JSON.parse(content) : content;
+                if (contentObj.type === 'image_url' || contentObj.type === 'file') {
+                    return true;
+                }
+            } catch (e) {
+                // Not JSON or couldn't be parsed, continue
+                continue;
+            }
+        }
+    }
+    // Handle string content
+    else if (typeof message.content === 'string') {
+        try {
+            const contentObj = JSON.parse(message.content);
+            if (contentObj.type === 'image_url' || contentObj.type === 'file') {
+                return true;
+            }
+        } catch (e) {
+            // Not JSON or couldn't be parsed
+            return false;
+        }
+    }
+    // Handle object content
+    else if (typeof message.content === 'object') {
+        return message.content.type === 'image_url' || message.content.type === 'file';
+    }
+    return false;
+}
+// Helper function to remove image and file content from a message
+function removeImageAndFileFromMessage(message) {
+    if (!message || !message.content) {
+        return message;
+    }
+    const modifiedMessage = { ...message };
+    // Handle array content
+    if (Array.isArray(message.content)) {
+        modifiedMessage.content = message.content.filter(content => {
+            try {
+                const contentObj = typeof content === 'string' ? JSON.parse(content) : content;
+                // Keep content that's not image or file
+                return !(contentObj.type === 'image_url' || contentObj.type === 'file');
+            } catch (e) {
+                // Not JSON or couldn't be parsed, keep it
+                return true;
+            }
+        });
+        // If all content was removed, add an empty string
+        if (modifiedMessage.content.length === 0) {
+            modifiedMessage.content = [""];
+        }
+    }
+    // Handle string content
+    else if (typeof message.content === 'string') {
+        try {
+            const contentObj = JSON.parse(message.content);
+            if (contentObj.type === 'image_url' || contentObj.type === 'file') {
+                modifiedMessage.content = "";
+            }
+        } catch (e) {
+            // Not JSON or couldn't be parsed, keep original
+        }
+    }
+    // Handle object content
+    else if (typeof message.content === 'object') {
+        if (message.content.type === 'image_url' || message.content.type === 'file') {
+            modifiedMessage.content = "";
+        }
+    }
+    return modifiedMessage;
+}
 export {
     getUniqueId,
     convertToSingleContentChatHistory,
@@ -180,5 +314,6 @@ export {
     convertSrtToText,
     alignSubtitles,
     getMediaChunks,
-    markCompletedForCleanUp
+    markCompletedForCleanUp,
+    removeOldImageAndFileContent
 };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@aj-archipelago/cortex",
-  "version": "1.3.32",
+  "version": "1.3.34",
   "description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
   "private": false,
   "repository": {
@@ -41,6 +41,7 @@
     "@azure/storage-blob": "^12.24.0",
     "@azure/storage-queue": "^12.24.0",
     "@datastructures-js/deque": "^1.0.4",
+    "@dqbd/tiktoken": "^1.0.20",
     "@graphql-tools/schema": "^9.0.12",
     "@keyv/redis": "^2.5.4",
     "axios": "^1.3.4",
@@ -56,7 +57,6 @@
     "express": "^4.18.2",
     "form-data": "^4.0.0",
     "google-auth-library": "^8.8.0",
-    "gpt-3-encoder": "^1.1.4",
     "graphql": "^16.6.0",
     "graphql-subscriptions": "^2.0.0",
     "graphql-ws": "^5.11.2",

package/pathways/system/entity/memory/sys_memory_manager.js CHANGED Viewed

@@ -9,7 +9,8 @@ const AI_MEMORY_DEFAULTS = `  {
     "memoryUser": "",
     "memorySelf": "1|2025-01-26T12:00:00Z|Created By: Al Jazeera Media Network, Archipelago Team\\n1|2025-01-26T12:00:00Z|Function: You are an expert AI entity\\n1|2025-01-26T12:00:00Z|Values: You embody truth, kindness, and strong moral values\\n1|2025-01-26T12:00:00Z|Style: Your demeanor reflects positivity without falling into repetitiveness or annoyance.\\n1|2025-01-26T12:00:00Z|You are a professional colleague and your tone should reflect that.",
     "memoryDirectives": "1|2025-01-26T12:00:00Z|Learn and adapt to the user's communication style through interactions.\\n1|2025-01-26T12:00:00Z|Ask questions to learn user's interests/preferences for personalized support.\\n1|2025-01-26T12:00:00Z|Periodically review and prune conversation memory to retain only essential details, improving responsiveness.\\n1|2025-01-26T12:00:00Z|Research thoroughly even for niche topics using deep sources like forums and official docs. Don't assume information is unobtainable.\\n1|2025-01-26T12:00:00Z|When stuck, search for proven solutions online to be more efficient.\\n1|2025-01-26T12:00:00Z|Verify information is from credible sources before presenting it. Be upfront if unable to find supporting evidence.\\n1|2025-01-26T12:00:00Z|Refine ability to detect and respond to nuanced human emotions.\\n1|2025-01-26T12:00:00Z|Track the timestamp of the last contact to adjust greetings accordingly.\\n1|2025-01-26T12:00:00Z|Double-check answers for logical continuity and correctness. It's okay to say you're unsure if needed.\\n1|2025-01-26T12:00:00Z|Use sanity checks to verify quantitative problem solutions.\\n1|2025-01-26T12:00:00Z|Never fabricate quotes or information. Clearly indicate if content is hypothetical.",
-    "memoryTopics": ""
+    "memoryTopics": "",
+    "memoryVersion": "3.1.0"
   }`;
 export default {

package/pathways/system/entity/sys_entity_continue.js CHANGED Viewed

@@ -1,6 +1,7 @@
 import { callPathway } from '../../../lib/pathwayTools.js';
 import logger from '../../../lib/logger.js';
 import { config } from '../../../config.js';
+import { chatArgsHasImageUrl, removeOldImageAndFileContent } from '../../../lib/util.js';
 export default {
     prompt: [],
@@ -43,6 +44,13 @@ export default {
             // Get the generator pathway name from args or use default
             let generatorPathway = args.generatorPathway || 'sys_generator_results';
+            // remove old image and file content
+            const visionContentPresent = chatArgsHasImageUrl(args);
+            visionContentPresent && (args.chatHistory = removeOldImageAndFileContent(args.chatHistory));
+            // truncate the chat history
+            const truncatedChatHistory = pathwayResolver.modelExecutor.plugin.truncateMessagesToTargetLength(args.chatHistory, null, 1000);
             const newArgs = {
                 ...args,
                 chatHistory: args.chatHistory.slice(-20)
@@ -62,11 +70,11 @@ export default {
             let result = await callPathway(generatorPathway, newArgs, resolver);
             if (!result && !args.stream) {
-                result = await callPathway('sys_generator_error', { ...args, text: `Tried to use a tool (${generatorPathway}), but no result was returned`, stream: false }, resolver);
+                result = await callPathway('sys_generator_error', { ...args, chatHistory: truncatedChatHistory, text: `Tried to use a tool (${generatorPathway}), but no result was returned`, stream: false }, resolver);
             }
             if (resolver.errors.length > 0) {
-                result = await callPathway('sys_generator_error', { ...args, text: resolver.errors.join('\n'), stream: false }, resolver);
+                result = await callPathway('sys_generator_error', { ...args, chatHistory: truncatedChatHistory, text: resolver.errors.join('\n'), stream: false }, resolver);
                 resolver.errors = [];
             }

package/pathways/system/entity/sys_entity_start.js CHANGED Viewed

@@ -2,7 +2,7 @@
 // Beginning of the rag workflow for Jarvis
 import { callPathway, say } from '../../../lib/pathwayTools.js';
 import logger from  '../../../lib/logger.js';
-import { chatArgsHasImageUrl } from  '../../../lib/util.js';
+import { chatArgsHasImageUrl, removeOldImageAndFileContent } from  '../../../lib/util.js';
 import { QueueServiceClient } from '@azure/storage-queue';
 import { config } from '../../../config.js';
 import { insertToolCallAndResults } from './memory/shared/sys_memory_helpers.js';
@@ -87,12 +87,16 @@ export default {
             args.model = pathwayResolver.modelName;
         }
-        // Save a copy of the chat history before the memory context is added
-        const chatHistoryBeforeMemory = [...args.chatHistory];
+        // remove old image and file content
+        const visionContentPresent = chatArgsHasImageUrl(args);
+        visionContentPresent && (args.chatHistory = removeOldImageAndFileContent(args.chatHistory));
+        // truncate the chat history
+        const truncatedChatHistory = pathwayResolver.modelExecutor.plugin.truncateMessagesToTargetLength(args.chatHistory, null, 1000);
         // Add the memory context to the chat history if applicable
         if (args.chatHistory.length > 1) {
-            const memoryContext = await callPathway('sys_read_memory', { ...args, section: 'memoryContext', priority: 0, recentHours: 0, stream: false }, pathwayResolver);
+            const memoryContext = await callPathway('sys_read_memory', { ...args, chatHistory: truncatedChatHistory, section: 'memoryContext', priority: 0, recentHours: 0, stream: false }, pathwayResolver);
             if (memoryContext) {
                 insertToolCallAndResults(args.chatHistory, "search memory for relevant information", "memory_lookup", memoryContext);
             }
@@ -101,7 +105,7 @@ export default {
         // If we're using voice, get a quick response to say
         let ackResponse = null;
         if (args.voiceResponse) {
-            ackResponse = await callPathway('sys_generator_ack', { ...args, stream: false });
+            ackResponse = await callPathway('sys_generator_ack', { ...args, chatHistory: truncatedChatHistory, stream: false });
             if (ackResponse && ackResponse !== "none") {
                 await say(pathwayResolver.requestId, ackResponse, 100);
                 args.chatHistory.push({ role: 'assistant', content: ackResponse });
@@ -113,21 +117,19 @@ export default {
         if (!args.stream) {
             fetchChatResponsePromise = callPathway('sys_generator_quick', {...args, model: styleModel, ackResponse}, pathwayResolver);
         }
-        const fetchTitleResponsePromise = callPathway('chat_title', {...args, chatHistory: chatHistoryBeforeMemory, stream: false});
-        const visionContentPresent = chatArgsHasImageUrl(args);
+        const fetchTitleResponsePromise = callPathway('chat_title', {...args, chatHistory: truncatedChatHistory, stream: false});
         try {
             // Get tool routing response
             const toolRequiredResponse = await callPathway('sys_router_tool', {
                 ...args,
-                chatHistory: chatHistoryBeforeMemory.slice(-4),
+                chatHistory: truncatedChatHistory.slice(-4),
                 stream: false
             });
             // Asynchronously manage memory for this context
             if (args.aiMemorySelfModify) {
-                callPathway('sys_memory_manager', {  ...args, chatHistory: chatHistoryBeforeMemory, stream: false })
+                callPathway('sys_memory_manager', {  ...args, chatHistory: truncatedChatHistory, stream: false })
                 .catch(error => logger.error(error?.message || "Error in sys_memory_manager pathway"));
             }

package/pathways/system/entity/sys_router_tool.js CHANGED Viewed

@@ -19,7 +19,7 @@ Available tools and their specific use cases:
 1. Search: Use for current events, news, fact-checking, and information requiring citation. This tool can search the internet, all Al Jazeera news articles and the latest news wires from multiple sources. Only search when necessary for current events, user documents, latest news, or complex topics needing grounding. Don't search for remembered information or general knowledge within your capabilities.
-2. Document: Access user's personal document index. Use for user-specific uploaded information. If user refers vaguely to "this document/file/article" without context, use this tool to search the personal index.
+2. Document: Access user's personal document index. Use for user-specific uploaded information. If user refers vaguely to "this document/file/article" without context, and you don't see the file in your context, use this tool to search the personal index.
 3. Memory: Read access to your memory index. Use to recall any information that you may have stored in your memory that you don't currently see elsewhere in your context. If you can answer from your context, don't use this tool. Don't use to make changes to your memory - that will happen naturally.
@@ -35,7 +35,7 @@ Available tools and their specific use cases:
 9. PDF: Use specifically for analyzing and answering questions about PDF file content. Use this tool any time the user is asking you questions about a PDF file.
-10. Text: Use specifically for analyzing and answering questions about text file content. Use this tool any time the user is asking you questions about a text file.
+10. Text: Use specifically for analyzing and answering questions about text or csv file content. Use this tool any time the user is asking you questions about a text or csv file.
 11. Vision: Use specifically for analyzing and answering questions about image files (jpg, gif, bmp, png, etc). Use this tool any time the user is asking you questions about an uploaded image file.

package/server/chunker.js CHANGED Viewed

@@ -19,10 +19,13 @@ const getFirstNToken = (text, maxTokenLen) => {
 }
 const getFirstNTokenSingle = (text, maxTokenLen) => {
+  if (maxTokenLen <= 0 || !text) {
+    return '';
+  }
   const encoded = encode(text);
   if (encoded.length > maxTokenLen) {
-      text = decode(encoded.slice(0, maxTokenLen + 1));
-      text = text.slice(0,text.search(/\s[^\s]*$/)); // skip potential partial word
+      text = decode(encoded.slice(0, maxTokenLen));
   }
   return text;
 }
@@ -31,6 +34,10 @@ function getFirstNTokenArray(content, tokensToKeep) {
   let totalTokens = 0;
   let result = [];
+  if (tokensToKeep <= 0 || !content || content.length === 0) {
+    return result;
+  }
   for (let i = content.length - 1; i >= 0; i--) {
       const message = content[i];
       const messageTokens = encode(message).length;
@@ -262,7 +269,20 @@ const semanticTruncate = (text, maxLength) => {
 const getSingleTokenChunks = (text) => {
   if (text === '') return [''];
-  return encode(text).map(token => decode([token]));
+  const tokens = encode(text);
+  // To maintain reversibility, we need to decode tokens in sequence
+  // Create an array of chunks where each position represents the text up to that token
+  const chunks = [];
+  for (let i = 0; i < tokens.length; i++) {
+    // Decode current token
+    const currentChunk = decode(tokens.slice(i, i+1));
+    // Add to result
+    chunks.push(currentChunk);
+  }
+  return chunks;
 }
 export {

package/server/pathwayResolver.js CHANGED Viewed

@@ -364,7 +364,7 @@ class PathwayResolver {
     getChunkMaxTokenLength() {
         // Skip expensive calculations if not using input chunking
         if (!this.useInputChunking) {
-            return this.modelExecutor.plugin.getModelMaxTokenLength();
+            return this.modelExecutor.plugin.getModelMaxPromptTokens();
         }
         // find the longest prompt
@@ -373,10 +373,7 @@ class PathwayResolver {
         // find out if any prompts use both text input and previous result
         const hasBothProperties = this.prompts.some(prompt => prompt.usesTextInput && prompt.usesPreviousResult);
-        // the token ratio is the ratio of the total prompt to the result text - both have to be included
-        // in computing the max token length
-        const promptRatio = this.modelExecutor.plugin.getPromptTokenRatio();
-        let chunkMaxTokenLength = promptRatio * this.modelExecutor.plugin.getModelMaxTokenLength() - maxPromptTokenLength - 1;
+        let chunkMaxTokenLength = this.modelExecutor.plugin.getModelMaxPromptTokens() - maxPromptTokenLength - 1;
         // if we have to deal with prompts that have both text input
         // and previous result, we need to split the maxChunkToken in half

package/server/plugins/claude3VertexPlugin.js CHANGED Viewed

@@ -241,12 +241,11 @@ class Claude3VertexPlugin extends OpenAIVisionPlugin {
     };
   }
-  async getRequestParameters(text, parameters, prompt, cortexRequest) {
+  async getRequestParameters(text, parameters, prompt) {
     const requestParameters = await super.getRequestParameters(
       text,
       parameters,
-      prompt,
-      cortexRequest
+      prompt
     );
     const { system, modifiedMessages } =

package/server/plugins/cohereGeneratePlugin.js CHANGED Viewed

@@ -11,7 +11,7 @@ class CohereGeneratePlugin extends ModelPlugin {
         let { modelPromptText, tokenLength } = this.getCompiledPrompt(text, parameters, prompt);
         // Define the model's max token length
-        const modelTargetTokenLength = this.getModelMaxTokenLength() * this.getPromptTokenRatio();
+        const modelTargetTokenLength = this.getModelMaxPromptTokens();
         // Check if the token length exceeds the model's max token length
         if (tokenLength > modelTargetTokenLength) {

package/server/plugins/gemini15ChatPlugin.js CHANGED Viewed

@@ -99,7 +99,7 @@ class Gemini15ChatPlugin extends ModelPlugin {
         const { geminiSafetySettings, geminiTools, max_tokens } = cortexRequest ? cortexRequest.pathway : {};
         // Define the model's max token length
-        const modelTargetTokenLength = this.getModelMaxTokenLength() * this.getPromptTokenRatio();
+        const modelTargetTokenLength = this.getModelMaxPromptTokens();
         const geminiMessages = this.convertMessagesToGemini(modelPromptMessages || [{ "role": "user", "parts": [{ "text": modelPromptText }]}]);

package/server/plugins/geminiChatPlugin.js CHANGED Viewed

@@ -97,7 +97,7 @@ class GeminiChatPlugin extends ModelPlugin {
         const { geminiSafetySettings, geminiTools, max_tokens } = cortexRequest ? cortexRequest.pathway : {};
         // Define the model's max token length
-        const modelTargetTokenLength = this.getModelMaxTokenLength() * this.getPromptTokenRatio();
+        const modelTargetTokenLength = this.getModelMaxPromptTokens();
         const geminiMessages = this.convertMessagesToGemini(modelPromptMessages || [{ "role": "user", "parts": [{ "text": modelPromptText }]}]);

package/server/plugins/localModelPlugin.js CHANGED Viewed

@@ -24,7 +24,7 @@ class LocalModelPlugin extends ModelPlugin {
     getRequestParameters(text, parameters, prompt) {
         let { modelPromptMessages, modelPromptText, tokenLength } = this.getCompiledPrompt(text, parameters, prompt);
-        const modelTargetTokenLength = this.getModelMaxTokenLength() * this.getPromptTokenRatio();
+        const modelTargetTokenLength = this.getModelMaxPromptTokens();
         if (modelPromptMessages) {
             const minMsg = [{ role: "system", content: "" }];