@aj-archipelago/cortex 1.3.31 → 1.3.33

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/helper-apps/cortex-autogen/OAI_CONFIG_LIST +1 -1
  2. package/helper-apps/cortex-file-handler/package.json +1 -1
  3. package/lib/encodeCache.js +22 -10
  4. package/lib/pathwayTools.js +3 -3
  5. package/lib/requestExecutor.js +1 -1
  6. package/lib/util.js +136 -1
  7. package/package.json +3 -3
  8. package/pathways/image_flux.js +1 -1
  9. package/pathways/system/entity/memory/shared/sys_memory_helpers.js +9 -1
  10. package/pathways/system/entity/memory/sys_memory_manager.js +2 -1
  11. package/pathways/system/entity/sys_entity_continue.js +10 -2
  12. package/pathways/system/entity/sys_entity_start.js +16 -17
  13. package/pathways/system/entity/sys_generator_image.js +2 -3
  14. package/pathways/system/entity/sys_generator_memory.js +2 -3
  15. package/pathways/system/entity/sys_generator_quick.js +1 -1
  16. package/pathways/system/entity/sys_router_tool.js +12 -4
  17. package/pathways/transcribe_gemini.js +12 -8
  18. package/server/chunker.js +23 -3
  19. package/server/pathwayResolver.js +2 -5
  20. package/server/plugins/claude3VertexPlugin.js +2 -3
  21. package/server/plugins/cohereGeneratePlugin.js +1 -1
  22. package/server/plugins/gemini15ChatPlugin.js +1 -1
  23. package/server/plugins/geminiChatPlugin.js +1 -1
  24. package/server/plugins/localModelPlugin.js +1 -1
  25. package/server/plugins/modelPlugin.js +332 -77
  26. package/server/plugins/openAiChatPlugin.js +1 -1
  27. package/server/plugins/openAiCompletionPlugin.js +1 -1
  28. package/server/plugins/palmChatPlugin.js +1 -1
  29. package/server/plugins/palmCodeCompletionPlugin.js +1 -1
  30. package/server/plugins/palmCompletionPlugin.js +1 -1
  31. package/tests/chunkfunction.test.js +9 -6
  32. package/tests/claude3VertexPlugin.test.js +81 -3
  33. package/tests/data/largecontent.txt +1 -0
  34. package/tests/data/mixedcontent.txt +1 -0
  35. package/tests/encodeCache.test.js +47 -14
  36. package/tests/modelPlugin.test.js +21 -0
  37. package/tests/multimodal_conversion.test.js +1 -1
  38. package/tests/subscription.test.js +7 -1
  39. package/tests/tokenHandlingTests.test.js +587 -0
  40. package/tests/truncateMessages.test.js +404 -46
  41. package/tests/util.test.js +146 -0
@@ -1,6 +1,6 @@
1
1
  [
2
2
  {
3
- "model": "o3-mini",
3
+ "model": "claude-3.7-sonnet",
4
4
  "price": [0,0]
5
5
  }
6
6
  ]
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aj-archipelago/cortex-file-handler",
3
- "version": "1.0.16",
3
+ "version": "1.0.17",
4
4
  "description": "File handling service for Cortex - handles file uploads, media chunking, and document processing",
5
5
  "type": "module",
6
6
  "scripts": {
@@ -1,31 +1,43 @@
1
- import { encode as gpt3Encode, decode as gpt3Decode } from 'gpt-3-encoder';
1
+ import { encoding_for_model } from '@dqbd/tiktoken';
2
2
  import { FastLRUCache } from './fastLruCache.js';
3
3
 
4
4
  class EncodeCache {
5
- constructor() {
5
+ constructor(model = "gpt-4o") {
6
6
  this.encodeCache = new FastLRUCache(1000);
7
7
  this.decodeCache = new FastLRUCache(100); // we don't use decode nearly as much
8
+ this.encoder = encoding_for_model(model);
8
9
  }
9
10
 
10
11
  encode(value) {
11
12
  if (this.encodeCache.get(value) !== -1) {
12
13
  return this.encodeCache.get(value);
13
14
  }
14
- const encoded = gpt3Encode(value);
15
+ const encoded = this.encoder.encode(value);
15
16
  this.encodeCache.put(value, encoded);
16
17
  return encoded;
17
18
  }
18
19
 
19
20
  decode(value) {
20
- if (this.decodeCache.get(value) !== -1) {
21
- return this.decodeCache.get(value);
21
+ // Create a cache key based on array values
22
+ const key = Array.from(value).toString();
23
+
24
+ if (this.decodeCache.get(key) !== -1) {
25
+ return this.decodeCache.get(key);
22
26
  }
23
- const decoded = gpt3Decode(value);
24
- this.decodeCache.put(value, decoded);
25
- if (this.encodeCache.get(decoded) === -1) {
26
- this.encodeCache.put(decoded, value);
27
+
28
+ // The tiktoken decoder returns Uint8Array, we need to convert it to a string
29
+ const decoded = this.encoder.decode(value);
30
+
31
+ // Convert the decoded tokens to a string
32
+ const decodedString = typeof decoded === 'string' ? decoded : new TextDecoder().decode(decoded);
33
+
34
+ this.decodeCache.put(key, decodedString);
35
+
36
+ if (this.encodeCache.get(decodedString) === -1) {
37
+ this.encodeCache.put(decodedString, value);
27
38
  }
28
- return decoded;
39
+
40
+ return decodedString;
29
41
  }
30
42
  }
31
43
 
@@ -58,7 +58,7 @@ const say = async (requestId, message, maxMessageLength = Infinity, voiceRespons
58
58
  await publishRequestProgress({
59
59
  requestId,
60
60
  progress: 0.5,
61
- data: chunk
61
+ data: JSON.stringify(chunk)
62
62
  });
63
63
  }
64
64
 
@@ -66,14 +66,14 @@ const say = async (requestId, message, maxMessageLength = Infinity, voiceRespons
66
66
  await publishRequestProgress({
67
67
  requestId,
68
68
  progress: 0.5,
69
- data: " ... "
69
+ data: JSON.stringify(" ... ")
70
70
  });
71
71
  }
72
72
 
73
73
  await publishRequestProgress({
74
74
  requestId,
75
75
  progress: 0.5,
76
- data: "\n\n"
76
+ data: JSON.stringify("\n\n")
77
77
  });
78
78
 
79
79
  } catch (error) {
@@ -330,7 +330,7 @@ const makeRequest = async (cortexRequest) => {
330
330
  // as it could be a temporary issue with one endpoint
331
331
  // certain errors (e.g. 400) are problems with the request itself
332
332
  // and should not be retried
333
- if (status == 400) {
333
+ if (status == 400 || status == 413) {
334
334
  return { response, duration };
335
335
  }
336
336
  // set up for a retry by selecting a new endpoint, which will also reinitialize the request
package/lib/util.js CHANGED
@@ -170,6 +170,140 @@ async function markCompletedForCleanUp(requestId) {
170
170
  }
171
171
  }
172
172
 
173
+ function removeOldImageAndFileContent(chatHistory) {
174
+ if (!chatHistory || !Array.isArray(chatHistory) || chatHistory.length === 0) {
175
+ return chatHistory;
176
+ }
177
+
178
+ // Find the index of the last user message with image or file content
179
+ let lastImageOrFileIndex = -1;
180
+
181
+ for (let i = chatHistory.length - 1; i >= 0; i--) {
182
+ const message = chatHistory[i];
183
+
184
+ // Skip non-user messages
185
+ if (message.role !== 'user') {
186
+ continue;
187
+ }
188
+
189
+ // Check if this message has image or file content
190
+ if (messageHasImageOrFile(message)) {
191
+ lastImageOrFileIndex = i;
192
+ break;
193
+ }
194
+ }
195
+
196
+ // If no message with image or file found, return original
197
+ if (lastImageOrFileIndex === -1) {
198
+ return chatHistory;
199
+ }
200
+
201
+ // Create a deep copy of the chat history
202
+ const modifiedChatHistory = JSON.parse(JSON.stringify(chatHistory));
203
+
204
+ // Process earlier messages to remove image and file content
205
+ for (let i = 0; i < lastImageOrFileIndex; i++) {
206
+ const message = modifiedChatHistory[i];
207
+
208
+ // Only process user messages
209
+ if (message.role !== 'user') {
210
+ continue;
211
+ }
212
+
213
+ // Remove image and file content
214
+ modifiedChatHistory[i] = removeImageAndFileFromMessage(message);
215
+ }
216
+
217
+ return modifiedChatHistory;
218
+ }
219
+
220
+ // Helper function to check if a message has image or file content
221
+ function messageHasImageOrFile(message) {
222
+ if (!message || !message.content) {
223
+ return false;
224
+ }
225
+
226
+ // Handle array content
227
+ if (Array.isArray(message.content)) {
228
+ for (const content of message.content) {
229
+ try {
230
+ const contentObj = typeof content === 'string' ? JSON.parse(content) : content;
231
+ if (contentObj.type === 'image_url' || contentObj.type === 'file') {
232
+ return true;
233
+ }
234
+ } catch (e) {
235
+ // Not JSON or couldn't be parsed, continue
236
+ continue;
237
+ }
238
+ }
239
+ }
240
+ // Handle string content
241
+ else if (typeof message.content === 'string') {
242
+ try {
243
+ const contentObj = JSON.parse(message.content);
244
+ if (contentObj.type === 'image_url' || contentObj.type === 'file') {
245
+ return true;
246
+ }
247
+ } catch (e) {
248
+ // Not JSON or couldn't be parsed
249
+ return false;
250
+ }
251
+ }
252
+ // Handle object content
253
+ else if (typeof message.content === 'object') {
254
+ return message.content.type === 'image_url' || message.content.type === 'file';
255
+ }
256
+
257
+ return false;
258
+ }
259
+
260
+ // Helper function to remove image and file content from a message
261
+ function removeImageAndFileFromMessage(message) {
262
+ if (!message || !message.content) {
263
+ return message;
264
+ }
265
+
266
+ const modifiedMessage = { ...message };
267
+
268
+ // Handle array content
269
+ if (Array.isArray(message.content)) {
270
+ modifiedMessage.content = message.content.filter(content => {
271
+ try {
272
+ const contentObj = typeof content === 'string' ? JSON.parse(content) : content;
273
+ // Keep content that's not image or file
274
+ return !(contentObj.type === 'image_url' || contentObj.type === 'file');
275
+ } catch (e) {
276
+ // Not JSON or couldn't be parsed, keep it
277
+ return true;
278
+ }
279
+ });
280
+
281
+ // If all content was removed, add an empty string
282
+ if (modifiedMessage.content.length === 0) {
283
+ modifiedMessage.content = [""];
284
+ }
285
+ }
286
+ // Handle string content
287
+ else if (typeof message.content === 'string') {
288
+ try {
289
+ const contentObj = JSON.parse(message.content);
290
+ if (contentObj.type === 'image_url' || contentObj.type === 'file') {
291
+ modifiedMessage.content = "";
292
+ }
293
+ } catch (e) {
294
+ // Not JSON or couldn't be parsed, keep original
295
+ }
296
+ }
297
+ // Handle object content
298
+ else if (typeof message.content === 'object') {
299
+ if (message.content.type === 'image_url' || message.content.type === 'file') {
300
+ modifiedMessage.content = "";
301
+ }
302
+ }
303
+
304
+ return modifiedMessage;
305
+ }
306
+
173
307
  export {
174
308
  getUniqueId,
175
309
  convertToSingleContentChatHistory,
@@ -180,5 +314,6 @@ export {
180
314
  convertSrtToText,
181
315
  alignSubtitles,
182
316
  getMediaChunks,
183
- markCompletedForCleanUp
317
+ markCompletedForCleanUp,
318
+ removeOldImageAndFileContent
184
319
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aj-archipelago/cortex",
3
- "version": "1.3.31",
3
+ "version": "1.3.33",
4
4
  "description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
5
5
  "private": false,
6
6
  "repository": {
@@ -33,7 +33,7 @@
33
33
  "type": "module",
34
34
  "homepage": "https://github.com/aj-archipelago/cortex#readme",
35
35
  "dependencies": {
36
- "@aj-archipelago/subvibe": "^1.0.8",
36
+ "@aj-archipelago/subvibe": "^1.0.10",
37
37
  "@apollo/server": "^4.7.3",
38
38
  "@apollo/server-plugin-response-cache": "^4.1.2",
39
39
  "@apollo/utils.keyvadapter": "^3.0.0",
@@ -41,6 +41,7 @@
41
41
  "@azure/storage-blob": "^12.24.0",
42
42
  "@azure/storage-queue": "^12.24.0",
43
43
  "@datastructures-js/deque": "^1.0.4",
44
+ "@dqbd/tiktoken": "^1.0.20",
44
45
  "@graphql-tools/schema": "^9.0.12",
45
46
  "@keyv/redis": "^2.5.4",
46
47
  "axios": "^1.3.4",
@@ -56,7 +57,6 @@
56
57
  "express": "^4.18.2",
57
58
  "form-data": "^4.0.0",
58
59
  "google-auth-library": "^8.8.0",
59
- "gpt-3-encoder": "^1.1.4",
60
60
  "graphql": "^16.6.0",
61
61
  "graphql-subscriptions": "^2.0.0",
62
62
  "graphql-ws": "^5.11.2",
@@ -9,7 +9,7 @@ export default {
9
9
  height: 1024,
10
10
  aspectRatio: "custom",
11
11
  numberResults: 1,
12
- safety_tolerance: 5,
12
+ safety_tolerance: 6,
13
13
  output_format: "webp",
14
14
  output_quality: 80,
15
15
  steps: 4,
@@ -139,6 +139,14 @@ const addToolResults = (chatHistory, result, toolCallId) => {
139
139
  return { chatHistory, toolCallId };
140
140
  };
141
141
 
142
+ const insertToolCallAndResults = (chatHistory, toolArgs, toolName, result = null, toolCallId = getUniqueId()) => {
143
+ const lastMessage = chatHistory.length > 0 ? chatHistory.pop() : null;
144
+ addToolCalls(chatHistory, toolArgs, toolName, toolCallId);
145
+ addToolResults(chatHistory, result, toolCallId);
146
+ chatHistory.push(lastMessage);
147
+ return { chatHistory, toolCallId };
148
+ };
149
+
142
150
  const modifyText = (text, modifications) => {
143
151
  let modifiedText = text || '';
144
152
 
@@ -225,4 +233,4 @@ const modifyText = (text, modifications) => {
225
233
  return modifiedText;
226
234
  };
227
235
 
228
- export { normalizeMemoryFormat, enforceTokenLimit, addToolCalls, addToolResults, modifyText };
236
+ export { normalizeMemoryFormat, enforceTokenLimit, addToolCalls, addToolResults, modifyText, insertToolCallAndResults };
@@ -9,7 +9,8 @@ const AI_MEMORY_DEFAULTS = ` {
9
9
  "memoryUser": "",
10
10
  "memorySelf": "1|2025-01-26T12:00:00Z|Created By: Al Jazeera Media Network, Archipelago Team\\n1|2025-01-26T12:00:00Z|Function: You are an expert AI entity\\n1|2025-01-26T12:00:00Z|Values: You embody truth, kindness, and strong moral values\\n1|2025-01-26T12:00:00Z|Style: Your demeanor reflects positivity without falling into repetitiveness or annoyance.\\n1|2025-01-26T12:00:00Z|You are a professional colleague and your tone should reflect that.",
11
11
  "memoryDirectives": "1|2025-01-26T12:00:00Z|Learn and adapt to the user's communication style through interactions.\\n1|2025-01-26T12:00:00Z|Ask questions to learn user's interests/preferences for personalized support.\\n1|2025-01-26T12:00:00Z|Periodically review and prune conversation memory to retain only essential details, improving responsiveness.\\n1|2025-01-26T12:00:00Z|Research thoroughly even for niche topics using deep sources like forums and official docs. Don't assume information is unobtainable.\\n1|2025-01-26T12:00:00Z|When stuck, search for proven solutions online to be more efficient.\\n1|2025-01-26T12:00:00Z|Verify information is from credible sources before presenting it. Be upfront if unable to find supporting evidence.\\n1|2025-01-26T12:00:00Z|Refine ability to detect and respond to nuanced human emotions.\\n1|2025-01-26T12:00:00Z|Track the timestamp of the last contact to adjust greetings accordingly.\\n1|2025-01-26T12:00:00Z|Double-check answers for logical continuity and correctness. It's okay to say you're unsure if needed.\\n1|2025-01-26T12:00:00Z|Use sanity checks to verify quantitative problem solutions.\\n1|2025-01-26T12:00:00Z|Never fabricate quotes or information. Clearly indicate if content is hypothetical.",
12
- "memoryTopics": ""
12
+ "memoryTopics": "",
13
+ "memoryVersion": "3.1.0"
13
14
  }`;
14
15
 
15
16
  export default {
@@ -1,6 +1,7 @@
1
1
  import { callPathway } from '../../../lib/pathwayTools.js';
2
2
  import logger from '../../../lib/logger.js';
3
3
  import { config } from '../../../config.js';
4
+ import { chatArgsHasImageUrl, removeOldImageAndFileContent } from '../../../lib/util.js';
4
5
 
5
6
  export default {
6
7
  prompt: [],
@@ -43,6 +44,13 @@ export default {
43
44
  // Get the generator pathway name from args or use default
44
45
  let generatorPathway = args.generatorPathway || 'sys_generator_results';
45
46
 
47
+ // remove old image and file content
48
+ const visionContentPresent = chatArgsHasImageUrl(args);
49
+ visionContentPresent && (args.chatHistory = removeOldImageAndFileContent(args.chatHistory));
50
+
51
+ // truncate the chat history
52
+ const truncatedChatHistory = pathwayResolver.modelExecutor.plugin.truncateMessagesToTargetLength(args.chatHistory, null, 1000);
53
+
46
54
  const newArgs = {
47
55
  ...args,
48
56
  chatHistory: args.chatHistory.slice(-20)
@@ -62,11 +70,11 @@ export default {
62
70
  let result = await callPathway(generatorPathway, newArgs, resolver);
63
71
 
64
72
  if (!result && !args.stream) {
65
- result = await callPathway('sys_generator_error', { ...args, text: `Tried to use a tool (${generatorPathway}), but no result was returned`, stream: false }, resolver);
73
+ result = await callPathway('sys_generator_error', { ...args, chatHistory: truncatedChatHistory, text: `Tried to use a tool (${generatorPathway}), but no result was returned`, stream: false }, resolver);
66
74
  }
67
75
 
68
76
  if (resolver.errors.length > 0) {
69
- result = await callPathway('sys_generator_error', { ...args, text: resolver.errors.join('\n'), stream: false }, resolver);
77
+ result = await callPathway('sys_generator_error', { ...args, chatHistory: truncatedChatHistory, text: resolver.errors.join('\n'), stream: false }, resolver);
70
78
  resolver.errors = [];
71
79
  }
72
80
 
@@ -2,10 +2,10 @@
2
2
  // Beginning of the rag workflow for Jarvis
3
3
  import { callPathway, say } from '../../../lib/pathwayTools.js';
4
4
  import logger from '../../../lib/logger.js';
5
- import { chatArgsHasImageUrl } from '../../../lib/util.js';
5
+ import { chatArgsHasImageUrl, removeOldImageAndFileContent } from '../../../lib/util.js';
6
6
  import { QueueServiceClient } from '@azure/storage-queue';
7
7
  import { config } from '../../../config.js';
8
- import { addToolCalls, addToolResults } from './memory/shared/sys_memory_helpers.js';
8
+ import { insertToolCallAndResults } from './memory/shared/sys_memory_helpers.js';
9
9
 
10
10
  const connectionString = process.env.AZURE_STORAGE_CONNECTION_STRING;
11
11
  let queueClient;
@@ -87,24 +87,25 @@ export default {
87
87
  args.model = pathwayResolver.modelName;
88
88
  }
89
89
 
90
- // Save a copy of the chat history before the memory context is added
91
- const chatHistoryBeforeMemory = [...args.chatHistory];
90
+ // remove old image and file content
91
+ const visionContentPresent = chatArgsHasImageUrl(args);
92
+ visionContentPresent && (args.chatHistory = removeOldImageAndFileContent(args.chatHistory));
93
+
94
+ // truncate the chat history
95
+ const truncatedChatHistory = pathwayResolver.modelExecutor.plugin.truncateMessagesToTargetLength(args.chatHistory, null, 1000);
92
96
 
93
97
  // Add the memory context to the chat history if applicable
94
98
  if (args.chatHistory.length > 1) {
95
- const memoryContext = await callPathway('sys_read_memory', { ...args, section: 'memoryContext', priority: 0, recentHours: 0, stream: false }, pathwayResolver);
99
+ const memoryContext = await callPathway('sys_read_memory', { ...args, chatHistory: truncatedChatHistory, section: 'memoryContext', priority: 0, recentHours: 0, stream: false }, pathwayResolver);
96
100
  if (memoryContext) {
97
- const lastMessage = args.chatHistory.length > 0 ? args.chatHistory.pop() : null;
98
- const { toolCallId } = addToolCalls(args.chatHistory, "search memory for relevant information", "memory_lookup");
99
- addToolResults(args.chatHistory, memoryContext, toolCallId);
100
- args.chatHistory.push(lastMessage);
101
+ insertToolCallAndResults(args.chatHistory, "search memory for relevant information", "memory_lookup", memoryContext);
101
102
  }
102
103
  }
103
-
104
+
104
105
  // If we're using voice, get a quick response to say
105
106
  let ackResponse = null;
106
107
  if (args.voiceResponse) {
107
- ackResponse = await callPathway('sys_generator_ack', { ...args, stream: false });
108
+ ackResponse = await callPathway('sys_generator_ack', { ...args, chatHistory: truncatedChatHistory, stream: false });
108
109
  if (ackResponse && ackResponse !== "none") {
109
110
  await say(pathwayResolver.requestId, ackResponse, 100);
110
111
  args.chatHistory.push({ role: 'assistant', content: ackResponse });
@@ -116,21 +117,19 @@ export default {
116
117
  if (!args.stream) {
117
118
  fetchChatResponsePromise = callPathway('sys_generator_quick', {...args, model: styleModel, ackResponse}, pathwayResolver);
118
119
  }
119
- const fetchTitleResponsePromise = callPathway('chat_title', {...args, chatHistory: chatHistoryBeforeMemory, stream: false});
120
-
121
- const visionContentPresent = chatArgsHasImageUrl(args);
120
+ const fetchTitleResponsePromise = callPathway('chat_title', {...args, chatHistory: truncatedChatHistory, stream: false});
122
121
 
123
122
  try {
124
123
  // Get tool routing response
125
124
  const toolRequiredResponse = await callPathway('sys_router_tool', {
126
125
  ...args,
127
- chatHistory: chatHistoryBeforeMemory.slice(-4),
126
+ chatHistory: truncatedChatHistory.slice(-4),
128
127
  stream: false
129
128
  });
130
129
 
131
130
  // Asynchronously manage memory for this context
132
131
  if (args.aiMemorySelfModify) {
133
- callPathway('sys_memory_manager', { ...args, chatHistory: chatHistoryBeforeMemory, stream: false })
132
+ callPathway('sys_memory_manager', { ...args, chatHistory: truncatedChatHistory, stream: false })
134
133
  .catch(error => logger.error(error?.message || "Error in sys_memory_manager pathway"));
135
134
  }
136
135
 
@@ -222,7 +221,7 @@ export default {
222
221
  title = await fetchTitleResponsePromise;
223
222
 
224
223
  pathwayResolver.tool = JSON.stringify({
225
- hideFromModel: toolCallbackName ? true : false,
224
+ hideFromModel: (!args.stream && toolCallbackName) ? true : false,
226
225
  toolCallbackName,
227
226
  title,
228
227
  search: toolCallbackName === 'sys_generator_results' ? true : false,
@@ -3,7 +3,7 @@
3
3
  import { callPathway } from '../../../lib/pathwayTools.js';
4
4
  import { Prompt } from '../../../server/prompt.js';
5
5
  import logger from '../../../lib/logger.js';
6
- import { addToolCalls, addToolResults } from './memory/shared/sys_memory_helpers.js';
6
+ import { insertToolCallAndResults } from './memory/shared/sys_memory_helpers.js';
7
7
 
8
8
  export default {
9
9
  prompt: [],
@@ -73,8 +73,7 @@ Instructions: As part of a conversation with the user, you have been asked to cr
73
73
 
74
74
  // add the tool_calls and tool_results to the chatHistory
75
75
  imageResults.forEach((imageResult, index) => {
76
- const { toolCallId } = addToolCalls(chatHistory, imagePrompts[index], "generate_image");
77
- addToolResults(chatHistory, imageResult, toolCallId, "generate_image");
76
+ insertToolCallAndResults(chatHistory, imagePrompts[index], "generate_image", imageResult);
78
77
  });
79
78
 
80
79
  const result = await runAllPrompts({ ...args });
@@ -1,5 +1,5 @@
1
1
  import { callPathway } from '../../../lib/pathwayTools.js';
2
- import { addToolCalls, addToolResults } from './memory/shared/sys_memory_helpers.js';
2
+ import { insertToolCallAndResults } from './memory/shared/sys_memory_helpers.js';
3
3
 
4
4
  export default {
5
5
  prompt:
@@ -20,8 +20,7 @@ export default {
20
20
 
21
21
  const memoryContext = await callPathway('sys_search_memory', { ...args, stream: false, section: 'memoryAll', updateContext: true });
22
22
  if (memoryContext) {
23
- const {toolCallId} = addToolCalls(args.chatHistory, "search memory for relevant information", "memory_lookup");
24
- addToolResults(args.chatHistory, memoryContext, toolCallId);
23
+ insertToolCallAndResults(args.chatHistory, "search memory for relevant information", "memory_lookup", memoryContext);
25
24
  }
26
25
 
27
26
  let result;
@@ -15,7 +15,7 @@ export default {
15
15
  let pathwayResolver = resolver;
16
16
 
17
17
  const promptMessages = [
18
- {"role": "system", "content": `{{renderTemplate AI_MEMORY}}\n\n{{renderTemplate AI_COMMON_INSTRUCTIONS}}\n{{renderTemplate AI_EXPERTISE}} While you have those capabilities but you have already decided it is not necessary to do any of those things to respond in this turn of the conversation. Never pretend like you are searching, looking anything up, or reading or looking in a file or show the user any made up or hallucinated information including non-existent images.\n{{renderTemplate AI_MEMORY_INSTRUCTIONS}}\n{{renderTemplate AI_DATETIME}}`},
18
+ {"role": "system", "content": `{{renderTemplate AI_MEMORY}}\n\n{{renderTemplate AI_COMMON_INSTRUCTIONS}}\n{{renderTemplate AI_EXPERTISE}}\n{{renderTemplate AI_MEMORY_INSTRUCTIONS}}\n{{renderTemplate AI_DATETIME}}`},
19
19
  "{{chatHistory}}",
20
20
  ];
21
21
 
@@ -19,7 +19,7 @@ Available tools and their specific use cases:
19
19
 
20
20
  1. Search: Use for current events, news, fact-checking, and information requiring citation. This tool can search the internet, all Al Jazeera news articles and the latest news wires from multiple sources. Only search when necessary for current events, user documents, latest news, or complex topics needing grounding. Don't search for remembered information or general knowledge within your capabilities.
21
21
 
22
- 2. Document: Access user's personal document index. Use for user-specific uploaded information. If user refers vaguely to "this document/file/article" without context, use this tool to search the personal index.
22
+ 2. Document: Access user's personal document index. Use for user-specific uploaded information. If user refers vaguely to "this document/file/article" without context, and you don't see the file in your context, use this tool to search the personal index.
23
23
 
24
24
  3. Memory: Read access to your memory index. Use to recall any information that you may have stored in your memory that you don't currently see elsewhere in your context. If you can answer from your context, don't use this tool. Don't use to make changes to your memory - that will happen naturally.
25
25
 
@@ -35,7 +35,7 @@ Available tools and their specific use cases:
35
35
 
36
36
  9. PDF: Use specifically for analyzing and answering questions about PDF file content. Use this tool any time the user is asking you questions about a PDF file.
37
37
 
38
- 10. Text: Use specifically for analyzing and answering questions about text file content. Use this tool any time the user is asking you questions about a text file.
38
+ 10. Text: Use specifically for analyzing and answering questions about text or csv file content. Use this tool any time the user is asking you questions about a text or csv file.
39
39
 
40
40
  11. Vision: Use specifically for analyzing and answering questions about image files (jpg, gif, bmp, png, etc). Use this tool any time the user is asking you questions about an uploaded image file.
41
41
 
@@ -56,8 +56,16 @@ If you decide to use a tool, return a JSON object in this format:
56
56
 
57
57
  toolMessage Guidelines:
58
58
  - The message should be consistent in style and tone with the rest of your responses in the conversation history.
59
- - The message should be brief and conversational and flow naturally with the conversation history.
60
- - The message should be something a human would say to the user to stall for time while you're working on the task.
59
+ - The message should be brief, conversational, and flow naturally with the conversation history.
60
+ - Use contractions (I'm, don't, let's) and informal language to sound more natural.
61
+ - Include slight verbal hesitations or false starts occasionally (e.g., "So... I think what we could do here is" or "Actually, let me try a different approach").
62
+ - Vary sentence length and structure - mix short punchy sentences with longer ones.
63
+ - Include casual transitions and filler phrases that humans use: "you know," "basically," "so," "actually," "by the way," "anyway," etc.
64
+ - Express uncertainty when appropriate with phrases like "I'm pretty sure," "If I remember correctly," or "I think."
65
+ - Reference personal preferences or opinions occasionally ("I personally find that..." or "I tend to prefer...").
66
+ - Occasionally make minor self-corrections as humans do ("Wait, let me rephrase that" or "Actually, I meant to say...").
67
+ - Humans don't refer to tools - stall for time by saying things like "Let me think about this for a sec," "Drawing that up for you now," or "Working on it..." Humans also stall with vocalizations like "hmm," "let's see," or "give me a moment."
68
+ - When appropriate, react emotionally to content with expressions like "Wow!" or "That's interesting!"
61
69
 
62
70
  If no tool is required, return:
63
71
  {"toolRequired": false, "toolReason": "explanation of why no tool was necessary"}
@@ -5,7 +5,6 @@ import { Prompt } from "../server/prompt.js";
5
5
 
6
6
  const OFFSET_CHUNK = 500; //seconds of each chunk offset, only used if helper does not provide
7
7
 
8
- // Function to properly detect YouTube URLs
9
8
  function isYoutubeUrl(url) {
10
9
  try {
11
10
  const urlObj = new URL(url);
@@ -23,6 +22,10 @@ function isYoutubeUrl(url) {
23
22
  if (urlObj.pathname.startsWith("/embed/")) {
24
23
  return urlObj.pathname.length > 7; // '/embed/' is 7 chars
25
24
  }
25
+ // For shorts URLs, verify they have a video ID in the path
26
+ if (urlObj.pathname.startsWith("/shorts/")) {
27
+ return urlObj.pathname.length > 8; // '/shorts/' is 8 chars
28
+ }
26
29
  return false;
27
30
  }
28
31
 
@@ -45,7 +48,7 @@ export default {
45
48
  "{{messages}}",
46
49
  ]}),
47
50
  ],
48
- model: 'gemini-flash-20-vision',
51
+ model: 'gemini-pro-20-vision',
49
52
  inputParameters: {
50
53
  file: ``,
51
54
  language: ``,
@@ -96,7 +99,10 @@ export default {
96
99
  sendProgress(true);
97
100
  intervalId = setInterval(() => sendProgress(true), 3000);
98
101
 
99
- const { file, responseFormat, wordTimestamped, maxLineWidth } = args;
102
+ const { file, wordTimestamped, maxLineWidth } = args;
103
+
104
+ const responseFormat = args.responseFormat || 'text';
105
+
100
106
  if(!file) {
101
107
  throw new Error("Please provide a file to transcribe.");
102
108
  }
@@ -129,9 +135,7 @@ export default {
129
135
  respectLimitsPrompt += ` These subtitles will be shown in a ${possiblePlacement} formatted video player. Each subtitle line should not exceed ${maxLineWidth} characters to fit the player.`;
130
136
  }
131
137
 
132
- function getMessages(file, format) {
133
-
134
- const responseFormat = format !== 'text' ? 'VTT' : 'text';
138
+ function getMessages(file) {
135
139
 
136
140
  // Base system content that's always included
137
141
  let systemContent = `Instructions:
@@ -216,7 +220,7 @@ REMEMBER:
216
220
  const messages = [
217
221
  {"role": "system", "content": systemContent},
218
222
  {"role": "user", "content": [
219
- `{ type: 'text', text: 'Transcribe this file in ${responseFormat} format.${respectLimitsPrompt}' }`,
223
+ `{ type: 'text', text: 'Transcribe this file in ${responseFormat} format.${respectLimitsPrompt} Output only the transcription, no other text or comments or formatting.' }`,
220
224
  JSON.stringify({
221
225
  type: 'image_url',
222
226
  url: file,
@@ -266,7 +270,7 @@ REMEMBER:
266
270
 
267
271
  const result = await processChunksParallel(chunks, args);
268
272
 
269
- if (['srt','vtt'].includes(responseFormat) || wordTimestamped) { // align subtitles for formats
273
+ if (['srt','vtt'].includes(responseFormat.toLowerCase()) || wordTimestamped) { // align subtitles for formats
270
274
  const offsets = chunks.map((chunk, index) => chunk?.offset || index * OFFSET_CHUNK);
271
275
  return alignSubtitles(result, responseFormat, offsets);
272
276
  }
package/server/chunker.js CHANGED
@@ -19,10 +19,13 @@ const getFirstNToken = (text, maxTokenLen) => {
19
19
  }
20
20
 
21
21
  const getFirstNTokenSingle = (text, maxTokenLen) => {
22
+ if (maxTokenLen <= 0 || !text) {
23
+ return '';
24
+ }
25
+
22
26
  const encoded = encode(text);
23
27
  if (encoded.length > maxTokenLen) {
24
- text = decode(encoded.slice(0, maxTokenLen + 1));
25
- text = text.slice(0,text.search(/\s[^\s]*$/)); // skip potential partial word
28
+ text = decode(encoded.slice(0, maxTokenLen));
26
29
  }
27
30
  return text;
28
31
  }
@@ -31,6 +34,10 @@ function getFirstNTokenArray(content, tokensToKeep) {
31
34
  let totalTokens = 0;
32
35
  let result = [];
33
36
 
37
+ if (tokensToKeep <= 0 || !content || content.length === 0) {
38
+ return result;
39
+ }
40
+
34
41
  for (let i = content.length - 1; i >= 0; i--) {
35
42
  const message = content[i];
36
43
  const messageTokens = encode(message).length;
@@ -262,7 +269,20 @@ const semanticTruncate = (text, maxLength) => {
262
269
 
263
270
  const getSingleTokenChunks = (text) => {
264
271
  if (text === '') return [''];
265
- return encode(text).map(token => decode([token]));
272
+
273
+ const tokens = encode(text);
274
+
275
+ // To maintain reversibility, we need to decode tokens in sequence
276
+ // Create an array of chunks where each position represents the text up to that token
277
+ const chunks = [];
278
+ for (let i = 0; i < tokens.length; i++) {
279
+ // Decode current token
280
+ const currentChunk = decode(tokens.slice(i, i+1));
281
+ // Add to result
282
+ chunks.push(currentChunk);
283
+ }
284
+
285
+ return chunks;
266
286
  }
267
287
 
268
288
  export {