@aj-archipelago/cortex 1.3.32 → 1.3.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/helper-apps/cortex-autogen/OAI_CONFIG_LIST +1 -1
  2. package/lib/encodeCache.js +22 -10
  3. package/lib/pathwayTools.js +10 -3
  4. package/lib/requestExecutor.js +1 -1
  5. package/lib/util.js +136 -1
  6. package/package.json +2 -2
  7. package/pathways/system/entity/memory/sys_memory_manager.js +2 -1
  8. package/pathways/system/entity/sys_entity_continue.js +10 -2
  9. package/pathways/system/entity/sys_entity_start.js +12 -10
  10. package/pathways/system/entity/sys_router_tool.js +2 -2
  11. package/server/chunker.js +23 -3
  12. package/server/pathwayResolver.js +2 -5
  13. package/server/plugins/claude3VertexPlugin.js +2 -3
  14. package/server/plugins/cohereGeneratePlugin.js +1 -1
  15. package/server/plugins/gemini15ChatPlugin.js +1 -1
  16. package/server/plugins/geminiChatPlugin.js +1 -1
  17. package/server/plugins/localModelPlugin.js +1 -1
  18. package/server/plugins/modelPlugin.js +332 -77
  19. package/server/plugins/openAiChatPlugin.js +1 -1
  20. package/server/plugins/openAiCompletionPlugin.js +1 -1
  21. package/server/plugins/palmChatPlugin.js +1 -1
  22. package/server/plugins/palmCodeCompletionPlugin.js +1 -1
  23. package/server/plugins/palmCompletionPlugin.js +1 -1
  24. package/tests/chunkfunction.test.js +9 -6
  25. package/tests/claude3VertexPlugin.test.js +81 -3
  26. package/tests/data/largecontent.txt +1 -0
  27. package/tests/data/mixedcontent.txt +1 -0
  28. package/tests/encodeCache.test.js +47 -14
  29. package/tests/modelPlugin.test.js +21 -0
  30. package/tests/multimodal_conversion.test.js +1 -1
  31. package/tests/subscription.test.js +7 -1
  32. package/tests/tokenHandlingTests.test.js +587 -0
  33. package/tests/truncateMessages.test.js +404 -46
  34. package/tests/util.test.js +146 -0
@@ -1,6 +1,6 @@
1
1
  [
2
2
  {
3
- "model": "o3-mini",
3
+ "model": "claude-3.7-sonnet",
4
4
  "price": [0,0]
5
5
  }
6
6
  ]
@@ -1,31 +1,43 @@
1
- import { encode as gpt3Encode, decode as gpt3Decode } from 'gpt-3-encoder';
1
+ import { encoding_for_model } from '@dqbd/tiktoken';
2
2
  import { FastLRUCache } from './fastLruCache.js';
3
3
 
4
4
  class EncodeCache {
5
- constructor() {
5
+ constructor(model = "gpt-4o") {
6
6
  this.encodeCache = new FastLRUCache(1000);
7
7
  this.decodeCache = new FastLRUCache(100); // we don't use decode nearly as much
8
+ this.encoder = encoding_for_model(model);
8
9
  }
9
10
 
10
11
  encode(value) {
11
12
  if (this.encodeCache.get(value) !== -1) {
12
13
  return this.encodeCache.get(value);
13
14
  }
14
- const encoded = gpt3Encode(value);
15
+ const encoded = this.encoder.encode(value);
15
16
  this.encodeCache.put(value, encoded);
16
17
  return encoded;
17
18
  }
18
19
 
19
20
  decode(value) {
20
- if (this.decodeCache.get(value) !== -1) {
21
- return this.decodeCache.get(value);
21
+ // Create a cache key based on array values
22
+ const key = Array.from(value).toString();
23
+
24
+ if (this.decodeCache.get(key) !== -1) {
25
+ return this.decodeCache.get(key);
22
26
  }
23
- const decoded = gpt3Decode(value);
24
- this.decodeCache.put(value, decoded);
25
- if (this.encodeCache.get(decoded) === -1) {
26
- this.encodeCache.put(decoded, value);
27
+
28
+ // The tiktoken decoder returns Uint8Array, we need to convert it to a string
29
+ const decoded = this.encoder.decode(value);
30
+
31
+ // Convert the decoded tokens to a string
32
+ const decodedString = typeof decoded === 'string' ? decoded : new TextDecoder().decode(decoded);
33
+
34
+ this.decodeCache.put(key, decodedString);
35
+
36
+ if (this.encodeCache.get(decodedString) === -1) {
37
+ this.encodeCache.put(decodedString, value);
27
38
  }
28
- return decoded;
39
+
40
+ return decodedString;
29
41
  }
30
42
  }
31
43
 
@@ -54,11 +54,16 @@ const say = async (requestId, message, maxMessageLength = Infinity, voiceRespons
54
54
  try {
55
55
  const chunks = getSemanticChunks(message, maxMessageLength);
56
56
 
57
+ const info = JSON.stringify({
58
+ ephemeral: true,
59
+ });
60
+
57
61
  for (let chunk of chunks) {
58
62
  await publishRequestProgress({
59
63
  requestId,
60
64
  progress: 0.5,
61
- data: chunk
65
+ data: JSON.stringify(chunk),
66
+ info
62
67
  });
63
68
  }
64
69
 
@@ -66,14 +71,16 @@ const say = async (requestId, message, maxMessageLength = Infinity, voiceRespons
66
71
  await publishRequestProgress({
67
72
  requestId,
68
73
  progress: 0.5,
69
- data: " ... "
74
+ data: JSON.stringify(" ... "),
75
+ info
70
76
  });
71
77
  }
72
78
 
73
79
  await publishRequestProgress({
74
80
  requestId,
75
81
  progress: 0.5,
76
- data: "\n\n"
82
+ data: JSON.stringify("\n\n"),
83
+ info
77
84
  });
78
85
 
79
86
  } catch (error) {
@@ -330,7 +330,7 @@ const makeRequest = async (cortexRequest) => {
330
330
  // as it could be a temporary issue with one endpoint
331
331
  // certain errors (e.g. 400) are problems with the request itself
332
332
  // and should not be retried
333
- if (status == 400) {
333
+ if (status == 400 || status == 413) {
334
334
  return { response, duration };
335
335
  }
336
336
  // set up for a retry by selecting a new endpoint, which will also reinitialize the request
package/lib/util.js CHANGED
@@ -170,6 +170,140 @@ async function markCompletedForCleanUp(requestId) {
170
170
  }
171
171
  }
172
172
 
173
+ function removeOldImageAndFileContent(chatHistory) {
174
+ if (!chatHistory || !Array.isArray(chatHistory) || chatHistory.length === 0) {
175
+ return chatHistory;
176
+ }
177
+
178
+ // Find the index of the last user message with image or file content
179
+ let lastImageOrFileIndex = -1;
180
+
181
+ for (let i = chatHistory.length - 1; i >= 0; i--) {
182
+ const message = chatHistory[i];
183
+
184
+ // Skip non-user messages
185
+ if (message.role !== 'user') {
186
+ continue;
187
+ }
188
+
189
+ // Check if this message has image or file content
190
+ if (messageHasImageOrFile(message)) {
191
+ lastImageOrFileIndex = i;
192
+ break;
193
+ }
194
+ }
195
+
196
+ // If no message with image or file found, return original
197
+ if (lastImageOrFileIndex === -1) {
198
+ return chatHistory;
199
+ }
200
+
201
+ // Create a deep copy of the chat history
202
+ const modifiedChatHistory = JSON.parse(JSON.stringify(chatHistory));
203
+
204
+ // Process earlier messages to remove image and file content
205
+ for (let i = 0; i < lastImageOrFileIndex; i++) {
206
+ const message = modifiedChatHistory[i];
207
+
208
+ // Only process user messages
209
+ if (message.role !== 'user') {
210
+ continue;
211
+ }
212
+
213
+ // Remove image and file content
214
+ modifiedChatHistory[i] = removeImageAndFileFromMessage(message);
215
+ }
216
+
217
+ return modifiedChatHistory;
218
+ }
219
+
220
+ // Helper function to check if a message has image or file content
221
+ function messageHasImageOrFile(message) {
222
+ if (!message || !message.content) {
223
+ return false;
224
+ }
225
+
226
+ // Handle array content
227
+ if (Array.isArray(message.content)) {
228
+ for (const content of message.content) {
229
+ try {
230
+ const contentObj = typeof content === 'string' ? JSON.parse(content) : content;
231
+ if (contentObj.type === 'image_url' || contentObj.type === 'file') {
232
+ return true;
233
+ }
234
+ } catch (e) {
235
+ // Not JSON or couldn't be parsed, continue
236
+ continue;
237
+ }
238
+ }
239
+ }
240
+ // Handle string content
241
+ else if (typeof message.content === 'string') {
242
+ try {
243
+ const contentObj = JSON.parse(message.content);
244
+ if (contentObj.type === 'image_url' || contentObj.type === 'file') {
245
+ return true;
246
+ }
247
+ } catch (e) {
248
+ // Not JSON or couldn't be parsed
249
+ return false;
250
+ }
251
+ }
252
+ // Handle object content
253
+ else if (typeof message.content === 'object') {
254
+ return message.content.type === 'image_url' || message.content.type === 'file';
255
+ }
256
+
257
+ return false;
258
+ }
259
+
260
+ // Helper function to remove image and file content from a message
261
+ function removeImageAndFileFromMessage(message) {
262
+ if (!message || !message.content) {
263
+ return message;
264
+ }
265
+
266
+ const modifiedMessage = { ...message };
267
+
268
+ // Handle array content
269
+ if (Array.isArray(message.content)) {
270
+ modifiedMessage.content = message.content.filter(content => {
271
+ try {
272
+ const contentObj = typeof content === 'string' ? JSON.parse(content) : content;
273
+ // Keep content that's not image or file
274
+ return !(contentObj.type === 'image_url' || contentObj.type === 'file');
275
+ } catch (e) {
276
+ // Not JSON or couldn't be parsed, keep it
277
+ return true;
278
+ }
279
+ });
280
+
281
+ // If all content was removed, add an empty string
282
+ if (modifiedMessage.content.length === 0) {
283
+ modifiedMessage.content = [""];
284
+ }
285
+ }
286
+ // Handle string content
287
+ else if (typeof message.content === 'string') {
288
+ try {
289
+ const contentObj = JSON.parse(message.content);
290
+ if (contentObj.type === 'image_url' || contentObj.type === 'file') {
291
+ modifiedMessage.content = "";
292
+ }
293
+ } catch (e) {
294
+ // Not JSON or couldn't be parsed, keep original
295
+ }
296
+ }
297
+ // Handle object content
298
+ else if (typeof message.content === 'object') {
299
+ if (message.content.type === 'image_url' || message.content.type === 'file') {
300
+ modifiedMessage.content = "";
301
+ }
302
+ }
303
+
304
+ return modifiedMessage;
305
+ }
306
+
173
307
  export {
174
308
  getUniqueId,
175
309
  convertToSingleContentChatHistory,
@@ -180,5 +314,6 @@ export {
180
314
  convertSrtToText,
181
315
  alignSubtitles,
182
316
  getMediaChunks,
183
- markCompletedForCleanUp
317
+ markCompletedForCleanUp,
318
+ removeOldImageAndFileContent
184
319
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aj-archipelago/cortex",
3
- "version": "1.3.32",
3
+ "version": "1.3.34",
4
4
  "description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
5
5
  "private": false,
6
6
  "repository": {
@@ -41,6 +41,7 @@
41
41
  "@azure/storage-blob": "^12.24.0",
42
42
  "@azure/storage-queue": "^12.24.0",
43
43
  "@datastructures-js/deque": "^1.0.4",
44
+ "@dqbd/tiktoken": "^1.0.20",
44
45
  "@graphql-tools/schema": "^9.0.12",
45
46
  "@keyv/redis": "^2.5.4",
46
47
  "axios": "^1.3.4",
@@ -56,7 +57,6 @@
56
57
  "express": "^4.18.2",
57
58
  "form-data": "^4.0.0",
58
59
  "google-auth-library": "^8.8.0",
59
- "gpt-3-encoder": "^1.1.4",
60
60
  "graphql": "^16.6.0",
61
61
  "graphql-subscriptions": "^2.0.0",
62
62
  "graphql-ws": "^5.11.2",
@@ -9,7 +9,8 @@ const AI_MEMORY_DEFAULTS = ` {
9
9
  "memoryUser": "",
10
10
  "memorySelf": "1|2025-01-26T12:00:00Z|Created By: Al Jazeera Media Network, Archipelago Team\\n1|2025-01-26T12:00:00Z|Function: You are an expert AI entity\\n1|2025-01-26T12:00:00Z|Values: You embody truth, kindness, and strong moral values\\n1|2025-01-26T12:00:00Z|Style: Your demeanor reflects positivity without falling into repetitiveness or annoyance.\\n1|2025-01-26T12:00:00Z|You are a professional colleague and your tone should reflect that.",
11
11
  "memoryDirectives": "1|2025-01-26T12:00:00Z|Learn and adapt to the user's communication style through interactions.\\n1|2025-01-26T12:00:00Z|Ask questions to learn user's interests/preferences for personalized support.\\n1|2025-01-26T12:00:00Z|Periodically review and prune conversation memory to retain only essential details, improving responsiveness.\\n1|2025-01-26T12:00:00Z|Research thoroughly even for niche topics using deep sources like forums and official docs. Don't assume information is unobtainable.\\n1|2025-01-26T12:00:00Z|When stuck, search for proven solutions online to be more efficient.\\n1|2025-01-26T12:00:00Z|Verify information is from credible sources before presenting it. Be upfront if unable to find supporting evidence.\\n1|2025-01-26T12:00:00Z|Refine ability to detect and respond to nuanced human emotions.\\n1|2025-01-26T12:00:00Z|Track the timestamp of the last contact to adjust greetings accordingly.\\n1|2025-01-26T12:00:00Z|Double-check answers for logical continuity and correctness. It's okay to say you're unsure if needed.\\n1|2025-01-26T12:00:00Z|Use sanity checks to verify quantitative problem solutions.\\n1|2025-01-26T12:00:00Z|Never fabricate quotes or information. Clearly indicate if content is hypothetical.",
12
- "memoryTopics": ""
12
+ "memoryTopics": "",
13
+ "memoryVersion": "3.1.0"
13
14
  }`;
14
15
 
15
16
  export default {
@@ -1,6 +1,7 @@
1
1
  import { callPathway } from '../../../lib/pathwayTools.js';
2
2
  import logger from '../../../lib/logger.js';
3
3
  import { config } from '../../../config.js';
4
+ import { chatArgsHasImageUrl, removeOldImageAndFileContent } from '../../../lib/util.js';
4
5
 
5
6
  export default {
6
7
  prompt: [],
@@ -43,6 +44,13 @@ export default {
43
44
  // Get the generator pathway name from args or use default
44
45
  let generatorPathway = args.generatorPathway || 'sys_generator_results';
45
46
 
47
+ // remove old image and file content
48
+ const visionContentPresent = chatArgsHasImageUrl(args);
49
+ visionContentPresent && (args.chatHistory = removeOldImageAndFileContent(args.chatHistory));
50
+
51
+ // truncate the chat history
52
+ const truncatedChatHistory = pathwayResolver.modelExecutor.plugin.truncateMessagesToTargetLength(args.chatHistory, null, 1000);
53
+
46
54
  const newArgs = {
47
55
  ...args,
48
56
  chatHistory: args.chatHistory.slice(-20)
@@ -62,11 +70,11 @@ export default {
62
70
  let result = await callPathway(generatorPathway, newArgs, resolver);
63
71
 
64
72
  if (!result && !args.stream) {
65
- result = await callPathway('sys_generator_error', { ...args, text: `Tried to use a tool (${generatorPathway}), but no result was returned`, stream: false }, resolver);
73
+ result = await callPathway('sys_generator_error', { ...args, chatHistory: truncatedChatHistory, text: `Tried to use a tool (${generatorPathway}), but no result was returned`, stream: false }, resolver);
66
74
  }
67
75
 
68
76
  if (resolver.errors.length > 0) {
69
- result = await callPathway('sys_generator_error', { ...args, text: resolver.errors.join('\n'), stream: false }, resolver);
77
+ result = await callPathway('sys_generator_error', { ...args, chatHistory: truncatedChatHistory, text: resolver.errors.join('\n'), stream: false }, resolver);
70
78
  resolver.errors = [];
71
79
  }
72
80
 
@@ -2,7 +2,7 @@
2
2
  // Beginning of the rag workflow for Jarvis
3
3
  import { callPathway, say } from '../../../lib/pathwayTools.js';
4
4
  import logger from '../../../lib/logger.js';
5
- import { chatArgsHasImageUrl } from '../../../lib/util.js';
5
+ import { chatArgsHasImageUrl, removeOldImageAndFileContent } from '../../../lib/util.js';
6
6
  import { QueueServiceClient } from '@azure/storage-queue';
7
7
  import { config } from '../../../config.js';
8
8
  import { insertToolCallAndResults } from './memory/shared/sys_memory_helpers.js';
@@ -87,12 +87,16 @@ export default {
87
87
  args.model = pathwayResolver.modelName;
88
88
  }
89
89
 
90
- // Save a copy of the chat history before the memory context is added
91
- const chatHistoryBeforeMemory = [...args.chatHistory];
90
+ // remove old image and file content
91
+ const visionContentPresent = chatArgsHasImageUrl(args);
92
+ visionContentPresent && (args.chatHistory = removeOldImageAndFileContent(args.chatHistory));
93
+
94
+ // truncate the chat history
95
+ const truncatedChatHistory = pathwayResolver.modelExecutor.plugin.truncateMessagesToTargetLength(args.chatHistory, null, 1000);
92
96
 
93
97
  // Add the memory context to the chat history if applicable
94
98
  if (args.chatHistory.length > 1) {
95
- const memoryContext = await callPathway('sys_read_memory', { ...args, section: 'memoryContext', priority: 0, recentHours: 0, stream: false }, pathwayResolver);
99
+ const memoryContext = await callPathway('sys_read_memory', { ...args, chatHistory: truncatedChatHistory, section: 'memoryContext', priority: 0, recentHours: 0, stream: false }, pathwayResolver);
96
100
  if (memoryContext) {
97
101
  insertToolCallAndResults(args.chatHistory, "search memory for relevant information", "memory_lookup", memoryContext);
98
102
  }
@@ -101,7 +105,7 @@ export default {
101
105
  // If we're using voice, get a quick response to say
102
106
  let ackResponse = null;
103
107
  if (args.voiceResponse) {
104
- ackResponse = await callPathway('sys_generator_ack', { ...args, stream: false });
108
+ ackResponse = await callPathway('sys_generator_ack', { ...args, chatHistory: truncatedChatHistory, stream: false });
105
109
  if (ackResponse && ackResponse !== "none") {
106
110
  await say(pathwayResolver.requestId, ackResponse, 100);
107
111
  args.chatHistory.push({ role: 'assistant', content: ackResponse });
@@ -113,21 +117,19 @@ export default {
113
117
  if (!args.stream) {
114
118
  fetchChatResponsePromise = callPathway('sys_generator_quick', {...args, model: styleModel, ackResponse}, pathwayResolver);
115
119
  }
116
- const fetchTitleResponsePromise = callPathway('chat_title', {...args, chatHistory: chatHistoryBeforeMemory, stream: false});
117
-
118
- const visionContentPresent = chatArgsHasImageUrl(args);
120
+ const fetchTitleResponsePromise = callPathway('chat_title', {...args, chatHistory: truncatedChatHistory, stream: false});
119
121
 
120
122
  try {
121
123
  // Get tool routing response
122
124
  const toolRequiredResponse = await callPathway('sys_router_tool', {
123
125
  ...args,
124
- chatHistory: chatHistoryBeforeMemory.slice(-4),
126
+ chatHistory: truncatedChatHistory.slice(-4),
125
127
  stream: false
126
128
  });
127
129
 
128
130
  // Asynchronously manage memory for this context
129
131
  if (args.aiMemorySelfModify) {
130
- callPathway('sys_memory_manager', { ...args, chatHistory: chatHistoryBeforeMemory, stream: false })
132
+ callPathway('sys_memory_manager', { ...args, chatHistory: truncatedChatHistory, stream: false })
131
133
  .catch(error => logger.error(error?.message || "Error in sys_memory_manager pathway"));
132
134
  }
133
135
 
@@ -19,7 +19,7 @@ Available tools and their specific use cases:
19
19
 
20
20
  1. Search: Use for current events, news, fact-checking, and information requiring citation. This tool can search the internet, all Al Jazeera news articles and the latest news wires from multiple sources. Only search when necessary for current events, user documents, latest news, or complex topics needing grounding. Don't search for remembered information or general knowledge within your capabilities.
21
21
 
22
- 2. Document: Access user's personal document index. Use for user-specific uploaded information. If user refers vaguely to "this document/file/article" without context, use this tool to search the personal index.
22
+ 2. Document: Access user's personal document index. Use for user-specific uploaded information. If user refers vaguely to "this document/file/article" without context, and you don't see the file in your context, use this tool to search the personal index.
23
23
 
24
24
  3. Memory: Read access to your memory index. Use to recall any information that you may have stored in your memory that you don't currently see elsewhere in your context. If you can answer from your context, don't use this tool. Don't use to make changes to your memory - that will happen naturally.
25
25
 
@@ -35,7 +35,7 @@ Available tools and their specific use cases:
35
35
 
36
36
  9. PDF: Use specifically for analyzing and answering questions about PDF file content. Use this tool any time the user is asking you questions about a PDF file.
37
37
 
38
- 10. Text: Use specifically for analyzing and answering questions about text file content. Use this tool any time the user is asking you questions about a text file.
38
+ 10. Text: Use specifically for analyzing and answering questions about text or csv file content. Use this tool any time the user is asking you questions about a text or csv file.
39
39
 
40
40
  11. Vision: Use specifically for analyzing and answering questions about image files (jpg, gif, bmp, png, etc). Use this tool any time the user is asking you questions about an uploaded image file.
41
41
 
package/server/chunker.js CHANGED
@@ -19,10 +19,13 @@ const getFirstNToken = (text, maxTokenLen) => {
19
19
  }
20
20
 
21
21
  const getFirstNTokenSingle = (text, maxTokenLen) => {
22
+ if (maxTokenLen <= 0 || !text) {
23
+ return '';
24
+ }
25
+
22
26
  const encoded = encode(text);
23
27
  if (encoded.length > maxTokenLen) {
24
- text = decode(encoded.slice(0, maxTokenLen + 1));
25
- text = text.slice(0,text.search(/\s[^\s]*$/)); // skip potential partial word
28
+ text = decode(encoded.slice(0, maxTokenLen));
26
29
  }
27
30
  return text;
28
31
  }
@@ -31,6 +34,10 @@ function getFirstNTokenArray(content, tokensToKeep) {
31
34
  let totalTokens = 0;
32
35
  let result = [];
33
36
 
37
+ if (tokensToKeep <= 0 || !content || content.length === 0) {
38
+ return result;
39
+ }
40
+
34
41
  for (let i = content.length - 1; i >= 0; i--) {
35
42
  const message = content[i];
36
43
  const messageTokens = encode(message).length;
@@ -262,7 +269,20 @@ const semanticTruncate = (text, maxLength) => {
262
269
 
263
270
  const getSingleTokenChunks = (text) => {
264
271
  if (text === '') return [''];
265
- return encode(text).map(token => decode([token]));
272
+
273
+ const tokens = encode(text);
274
+
275
+ // To maintain reversibility, we need to decode tokens in sequence
276
+ // Create an array of chunks where each position represents the text up to that token
277
+ const chunks = [];
278
+ for (let i = 0; i < tokens.length; i++) {
279
+ // Decode current token
280
+ const currentChunk = decode(tokens.slice(i, i+1));
281
+ // Add to result
282
+ chunks.push(currentChunk);
283
+ }
284
+
285
+ return chunks;
266
286
  }
267
287
 
268
288
  export {
@@ -364,7 +364,7 @@ class PathwayResolver {
364
364
  getChunkMaxTokenLength() {
365
365
  // Skip expensive calculations if not using input chunking
366
366
  if (!this.useInputChunking) {
367
- return this.modelExecutor.plugin.getModelMaxTokenLength();
367
+ return this.modelExecutor.plugin.getModelMaxPromptTokens();
368
368
  }
369
369
 
370
370
  // find the longest prompt
@@ -373,10 +373,7 @@ class PathwayResolver {
373
373
  // find out if any prompts use both text input and previous result
374
374
  const hasBothProperties = this.prompts.some(prompt => prompt.usesTextInput && prompt.usesPreviousResult);
375
375
 
376
- // the token ratio is the ratio of the total prompt to the result text - both have to be included
377
- // in computing the max token length
378
- const promptRatio = this.modelExecutor.plugin.getPromptTokenRatio();
379
- let chunkMaxTokenLength = promptRatio * this.modelExecutor.plugin.getModelMaxTokenLength() - maxPromptTokenLength - 1;
376
+ let chunkMaxTokenLength = this.modelExecutor.plugin.getModelMaxPromptTokens() - maxPromptTokenLength - 1;
380
377
 
381
378
  // if we have to deal with prompts that have both text input
382
379
  // and previous result, we need to split the maxChunkToken in half
@@ -241,12 +241,11 @@ class Claude3VertexPlugin extends OpenAIVisionPlugin {
241
241
  };
242
242
  }
243
243
 
244
- async getRequestParameters(text, parameters, prompt, cortexRequest) {
244
+ async getRequestParameters(text, parameters, prompt) {
245
245
  const requestParameters = await super.getRequestParameters(
246
246
  text,
247
247
  parameters,
248
- prompt,
249
- cortexRequest
248
+ prompt
250
249
  );
251
250
 
252
251
  const { system, modifiedMessages } =
@@ -11,7 +11,7 @@ class CohereGeneratePlugin extends ModelPlugin {
11
11
  let { modelPromptText, tokenLength } = this.getCompiledPrompt(text, parameters, prompt);
12
12
 
13
13
  // Define the model's max token length
14
- const modelTargetTokenLength = this.getModelMaxTokenLength() * this.getPromptTokenRatio();
14
+ const modelTargetTokenLength = this.getModelMaxPromptTokens();
15
15
 
16
16
  // Check if the token length exceeds the model's max token length
17
17
  if (tokenLength > modelTargetTokenLength) {
@@ -99,7 +99,7 @@ class Gemini15ChatPlugin extends ModelPlugin {
99
99
  const { geminiSafetySettings, geminiTools, max_tokens } = cortexRequest ? cortexRequest.pathway : {};
100
100
 
101
101
  // Define the model's max token length
102
- const modelTargetTokenLength = this.getModelMaxTokenLength() * this.getPromptTokenRatio();
102
+ const modelTargetTokenLength = this.getModelMaxPromptTokens();
103
103
 
104
104
  const geminiMessages = this.convertMessagesToGemini(modelPromptMessages || [{ "role": "user", "parts": [{ "text": modelPromptText }]}]);
105
105
 
@@ -97,7 +97,7 @@ class GeminiChatPlugin extends ModelPlugin {
97
97
  const { geminiSafetySettings, geminiTools, max_tokens } = cortexRequest ? cortexRequest.pathway : {};
98
98
 
99
99
  // Define the model's max token length
100
- const modelTargetTokenLength = this.getModelMaxTokenLength() * this.getPromptTokenRatio();
100
+ const modelTargetTokenLength = this.getModelMaxPromptTokens();
101
101
 
102
102
  const geminiMessages = this.convertMessagesToGemini(modelPromptMessages || [{ "role": "user", "parts": [{ "text": modelPromptText }]}]);
103
103
 
@@ -24,7 +24,7 @@ class LocalModelPlugin extends ModelPlugin {
24
24
 
25
25
  getRequestParameters(text, parameters, prompt) {
26
26
  let { modelPromptMessages, modelPromptText, tokenLength } = this.getCompiledPrompt(text, parameters, prompt);
27
- const modelTargetTokenLength = this.getModelMaxTokenLength() * this.getPromptTokenRatio();
27
+ const modelTargetTokenLength = this.getModelMaxPromptTokens();
28
28
 
29
29
  if (modelPromptMessages) {
30
30
  const minMsg = [{ role: "system", content: "" }];