@aj-archipelago/cortex 1.3.21 → 1.3.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/helper-apps/cortex-realtime-voice-server/src/cortex/memory.ts +2 -2
  2. package/lib/util.js +1 -1
  3. package/package.json +1 -1
  4. package/pathways/system/entity/memory/shared/sys_memory_helpers.js +228 -0
  5. package/pathways/system/entity/memory/sys_memory_format.js +30 -0
  6. package/pathways/system/entity/memory/sys_memory_manager.js +85 -27
  7. package/pathways/system/entity/memory/sys_memory_process.js +154 -0
  8. package/pathways/system/entity/memory/sys_memory_required.js +4 -2
  9. package/pathways/system/entity/memory/sys_memory_topic.js +22 -0
  10. package/pathways/system/entity/memory/sys_memory_update.js +50 -150
  11. package/pathways/system/entity/memory/sys_read_memory.js +67 -69
  12. package/pathways/system/entity/memory/sys_save_memory.js +1 -1
  13. package/pathways/system/entity/memory/sys_search_memory.js +1 -1
  14. package/pathways/system/entity/sys_entity_start.js +9 -6
  15. package/pathways/system/entity/sys_generator_image.js +5 -41
  16. package/pathways/system/entity/sys_generator_memory.js +3 -1
  17. package/pathways/system/entity/sys_generator_reasoning.js +1 -1
  18. package/pathways/system/entity/sys_router_tool.js +3 -4
  19. package/pathways/system/rest_streaming/sys_claude_35_sonnet.js +1 -1
  20. package/pathways/system/rest_streaming/sys_claude_3_haiku.js +1 -1
  21. package/pathways/system/rest_streaming/sys_google_gemini_chat.js +1 -1
  22. package/pathways/system/rest_streaming/sys_openai_chat_o1.js +1 -1
  23. package/pathways/system/rest_streaming/sys_openai_chat_o3_mini.js +1 -1
  24. package/pathways/transcribe_gemini.js +397 -0
  25. package/server/pathwayResolver.js +7 -7
  26. package/server/plugins/claude3VertexPlugin.js +109 -3
  27. package/server/plugins/gemini15VisionPlugin.js +7 -0
  28. package/server/plugins/modelPlugin.js +1 -1
  29. package/server/rest.js +24 -3
  30. package/tests/claude3VertexToolConversion.test.js +411 -0
  31. package/tests/memoryfunction.test.js +560 -46
  32. package/tests/openai_api.test.js +332 -0
@@ -11,7 +11,7 @@ export default {
11
11
  ]}),
12
12
  ],
13
13
  inputParameters: {
14
- messages: [],
14
+ messages: [{role: '', content: []}],
15
15
  },
16
16
  model: 'claude-35-sonnet-vertex',
17
17
  useInputChunking: false,
@@ -11,7 +11,7 @@ export default {
11
11
  ]}),
12
12
  ],
13
13
  inputParameters: {
14
- messages: [],
14
+ messages: [{role: '', content: []}],
15
15
  },
16
16
  model: 'claude-3-haiku-vertex',
17
17
  useInputChunking: false,
@@ -11,7 +11,7 @@ export default {
11
11
  ]}),
12
12
  ],
13
13
  inputParameters: {
14
- messages: [],
14
+ messages: [{role: '', content: []}],
15
15
  },
16
16
  model: 'gemini-pro-chat',
17
17
  useInputChunking: false,
@@ -10,7 +10,7 @@ export default {
10
10
  ]}),
11
11
  ],
12
12
  inputParameters: {
13
- messages: [],
13
+ messages: [{role: '', content: []}],
14
14
  },
15
15
  model: 'oai-o1',
16
16
  useInputChunking: false,
@@ -10,7 +10,7 @@ export default {
10
10
  ]}),
11
11
  ],
12
12
  inputParameters: {
13
- messages: [],
13
+ messages: [{role: '', content: []}],
14
14
  },
15
15
  model: 'oai-o3-mini',
16
16
  useInputChunking: false,
@@ -0,0 +1,397 @@
1
+ import logger from "../lib/logger.js";
2
+ import { publishRequestProgress } from "../lib/redisSubscription.js";
3
+ import { alignSubtitles, getMediaChunks } from "../lib/util.js";
4
+ import { Prompt } from "../server/prompt.js";
5
+
6
+ const OFFSET_CHUNK = 500; //seconds of each chunk offset, only used if helper does not provide
7
+
8
+
9
+
10
+ export function convertSrtToVtt(data) {
11
+ if (!data || !data.trim()) {
12
+ return "WEBVTT\n\n";
13
+ }
14
+ // remove dos newlines
15
+ var srt = data.replace(/\r+/g, "");
16
+ // trim white space start and end
17
+ srt = srt.replace(/^\s+|\s+$/g, "");
18
+
19
+ // Convert all timestamps from comma to dot format
20
+ srt = srt.replace(/(\d{2}:\d{2}:\d{2}),(\d{3})/g, "$1.$2");
21
+
22
+ // Add blank lines before sequence numbers that are followed by timecodes
23
+ srt = srt.replace(/(\n)(\d+)\n(\d{2}:\d{2}:\d{2}[,.])/g, "$1\n$2\n$3");
24
+
25
+ // get cues
26
+ var cuelist = srt.split("\n\n");
27
+ var result = "";
28
+ if (cuelist.length > 0) {
29
+ result += "WEBVTT\n\n";
30
+ for (var i = 0; i < cuelist.length; i = i + 1) {
31
+ const cue = convertSrtCue(cuelist[i]);
32
+ // Only add non-empty cues
33
+ if (cue) {
34
+ result += cue;
35
+ }
36
+ }
37
+ }
38
+ return result;
39
+ }
40
+
41
+ function convertSrtCue(caption) {
42
+ if (!caption || !caption.trim()) {
43
+ return "";
44
+ }
45
+ // remove all html tags for security reasons
46
+ //srt = srt.replace(/<[a-zA-Z\/][^>]*>/g, '');
47
+ var cue = "";
48
+ var s = caption.split(/\n/);
49
+ // concatenate muilt-line string separated in array into one
50
+ while (s.length > 3) {
51
+ for (var i = 3; i < s.length; i++) {
52
+ s[2] += "\n" + s[i];
53
+ }
54
+ s.splice(3, s.length - 3);
55
+ }
56
+ var line = 0;
57
+ // detect identifier
58
+ if (
59
+ s[0] &&
60
+ s[1] &&
61
+ !s[0].match(/\d+:\d+:\d+/) &&
62
+ s[1].match(/\d+:\d+:\d+/)
63
+ ) {
64
+ const match = s[0].match(/^\d+$/); // Only match if the entire line is a number
65
+ if (match) {
66
+ cue += match[0] + "\n";
67
+ line += 1;
68
+ }
69
+ }
70
+ // get time strings
71
+ if (s[line] && s[line].match(/\d+:\d+:\d+/)) {
72
+ // convert time string
73
+ var m = s[1].match(
74
+ /(\d{2}):(\d{2}):(\d{2})[,.](\d{3})\s*--?>\s*(\d{2}):(\d{2}):(\d{2})[,.](\d{3})/,
75
+ );
76
+ if (m) {
77
+ cue +=
78
+ m[1] +
79
+ ":" +
80
+ m[2] +
81
+ ":" +
82
+ m[3] +
83
+ "." +
84
+ m[4] +
85
+ " --> " +
86
+ m[5] +
87
+ ":" +
88
+ m[6] +
89
+ ":" +
90
+ m[7] +
91
+ "." +
92
+ m[8] +
93
+ "\n";
94
+ line += 1;
95
+ } else {
96
+ // Unrecognized timestring
97
+ return "";
98
+ }
99
+ } else {
100
+ // file format error or comment lines
101
+ return "";
102
+ }
103
+ // get cue text
104
+ if (s[line]) {
105
+ cue += s[line] + "\n\n";
106
+ }
107
+ return cue;
108
+ }
109
+
110
+ export function detectSubtitleFormat(text) {
111
+ // Remove DOS newlines and trim whitespace
112
+ const cleanText = text.replace(/\r+/g, "").trim();
113
+ const lines = cleanText.split("\n");
114
+
115
+ // Check if it's VTT format
116
+ if (lines[0]?.trim() === "WEBVTT") {
117
+ return "vtt";
118
+ }
119
+
120
+ // Check if it's SRT format
121
+ // SRT files have a specific pattern:
122
+ // 1. Numeric index
123
+ // 2. Timestamp in format: 00:00:00,000 --> 00:00:00,000
124
+ // 3. Subtitle text
125
+ // 4. Blank line
126
+ const timeRegex =
127
+ /(\d{2}:\d{2}:\d{2})[,.](\d{3})\s*-->\s*(\d{2}:\d{2}:\d{2})[,.](\d{3})/;
128
+
129
+ let hasValidStructure = false;
130
+ let index = 1;
131
+
132
+ // Check first few entries to confirm SRT structure
133
+ for (let i = 0; i < Math.min(lines.length, 12); i++) {
134
+ const line = lines[i]?.trim();
135
+ if (!line) continue;
136
+
137
+ // Check if line is a number matching our expected index
138
+ if (line === index.toString()) {
139
+ // Look ahead for timestamp
140
+ const nextLine = lines[i + 1]?.trim();
141
+ if (nextLine && timeRegex.test(nextLine)) {
142
+ hasValidStructure = true;
143
+ index++;
144
+ i++; // Skip timestamp line since we've verified it
145
+ }
146
+ }
147
+ }
148
+
149
+ if (hasValidStructure) {
150
+ return "srt";
151
+ }
152
+
153
+ return null;
154
+ }
155
+
156
+
157
+ export default {
158
+ prompt:
159
+ [
160
+ new Prompt({ messages: [
161
+ "{{messages}}",
162
+ ]}),
163
+ ],
164
+ model: 'gemini-flash-20-vision',
165
+ inputParameters: {
166
+ file: ``,
167
+ language: ``,
168
+ responseFormat: `text`,
169
+ wordTimestamped: false,
170
+ highlightWords: false,
171
+ maxLineWidth: 0,
172
+ maxLineCount: 0,
173
+ maxWordsPerLine: 0,
174
+ contextId: ``,
175
+ },
176
+ timeout: 3600, // in seconds
177
+ enableDuplicateRequests: false,
178
+
179
+ executePathway: async ({args, runAllPrompts, resolver}) => {
180
+ let intervalId;
181
+ const { requestId } = resolver;
182
+
183
+ try{
184
+ let totalCount = 11; //init max chunk value
185
+ let completedCount = 0;
186
+ let partialCount = 0;
187
+ let partialRatio = 0;
188
+
189
+ const sendProgress = (partial=false, resetCount=false) => {
190
+ partialCount = resetCount ? 0 : partialCount;
191
+
192
+ if(partial){
193
+ partialCount++;
194
+ const increment = 0.02 / Math.log2(partialCount + 1); // logarithmic diminishing increment
195
+ partialRatio = Math.min(partialRatio + increment, 0.99); // limit to 0.99
196
+ }else{
197
+ partialCount = 0;
198
+ partialRatio = 0;
199
+ completedCount++;
200
+ }
201
+ if(completedCount >= totalCount) return;
202
+
203
+ const progress = (completedCount + partialRatio) / totalCount;
204
+ logger.info(`Progress for ${requestId}: ${progress}`);
205
+
206
+ console.log(`Progress for ${requestId}: ${progress}`);
207
+ publishRequestProgress({
208
+ requestId,
209
+ progress,
210
+ data: null,
211
+ });
212
+ }
213
+ sendProgress(true);
214
+ intervalId = setInterval(() => sendProgress(true), 3000);
215
+
216
+ const { file, responseFormat, wordTimestamped, maxLineWidth } = args;
217
+ if(!file) {
218
+ throw new Error("Please provide a file to transcribe.");
219
+ }
220
+
221
+
222
+ //check if fils is a gcs file or youtube
223
+ const isGcs = file.startsWith('gs://');
224
+ const isYoutube = file.match(/^(http(s)?:\/\/)?((w){3}.)?youtu(be|.be)?(\.com)?\/.+/);
225
+
226
+ let chunks = [{
227
+ url: file,
228
+ gcs: file,
229
+ offset: 0,
230
+ }];
231
+ if(!isGcs && !isYoutube) {
232
+ //get chunks from helper api if not gcs or youtube
233
+ chunks = await getMediaChunks(file, requestId);
234
+ }
235
+ totalCount = chunks.length+1;
236
+ logger.info(`Processing chunks: ${JSON.stringify(chunks)}`);
237
+
238
+ sendProgress(true);
239
+
240
+ let respectLimitsPrompt = " ";
241
+ if (maxLineWidth) {
242
+
243
+ const possiblePlacement = maxLineWidth <= 25
244
+ ? "vertical" : maxLineWidth <= 35 ? "horizontal" : "";
245
+
246
+ respectLimitsPrompt += `The output lines must not exceed ${maxLineWidth} characters, so make sure your transcription lines and timestamps are perfectly aligned. `;
247
+
248
+ if(possiblePlacement){
249
+ respectLimitsPrompt+= `This limit a must as user will be using the output for ${possiblePlacement} display.`
250
+ }
251
+ }
252
+
253
+ const transcriptionLevel = wordTimestamped ? "word" : "phrase";
254
+
255
+ function getMessages(file, format) {
256
+
257
+ const responseFormat = format!== 'text' ? 'SRT' : 'text';
258
+
259
+ const messages = [
260
+ {"role": "system", "content": `Instructions:\nYou are an AI entity with expertise of transcription. Your response only contains the transcription, no comments or additonal stuff.
261
+
262
+ Your output must be in the format asked, and must be strictly following the formats and parseble by auto parsers.
263
+
264
+ Word-level transcriptions must be per word timestamped, and phrase-level transcriptions are per phrase.
265
+
266
+ Each transcription timestamp must precisely match the corresponding audio/video segment.
267
+ Each timestamp must correspond to actual spoken content.
268
+ End time cannot exceed total media duration. Especially when transcribing word-level double check your timestamps, never exceed the total duration.
269
+
270
+ You must follow 1, 2, 3, ... numbering for each transcription segment without any missing numbers.
271
+ Never put newlines or spaces in the middle of a timestamp.
272
+ Never put multiple lines for a single timestamp.
273
+
274
+ Example responses:
275
+
276
+ - If asked SRT format, e.g.:
277
+ 1
278
+ 00:00:00,498 --> 00:00:02,827
279
+ Hello World!
280
+
281
+ 2
282
+ 00:00:02,827 --> 00:00:06,383
283
+ Being AI is fun!
284
+
285
+ - If asked VTT format, e.g.:
286
+ WEBVTT
287
+
288
+ 1
289
+ 00:00:00.000 --> 00:00:02.944
290
+ Hello World2!
291
+
292
+ 2
293
+ 00:05.344 --> 00:00:08.809
294
+ Being AI is also great!
295
+
296
+ - If asked text format, e.g.:
297
+ Hello World!!! Being AI is being great yet again!
298
+
299
+ Word-level output e.g.:
300
+
301
+ WEBVTT
302
+
303
+ 1
304
+ 00:00:00.000 --> 00:00:01.944
305
+ Hello
306
+
307
+ 2
308
+ 00:00:01.964 --> 00:00:02.383
309
+ World!
310
+
311
+
312
+ You must follow spacing, punctuation, and timestamps as shown in the examples otherwise your response will not be accepted.
313
+ Never output multiple lines for a single timestamp.
314
+ Even a single newline or space can cause the response to be rejected. You must follow the format strictly. You must place newlines and timestamps exactly as shown in the examples.
315
+
316
+ `},
317
+ {"role": "user", "content": [
318
+ `{ type: 'text', text: 'Transcribe the media ${transcriptionLevel}-level in ${responseFormat} format.${respectLimitsPrompt}' }`,
319
+ JSON.stringify({
320
+ type: 'image_url',
321
+ url: file,
322
+ gcs: file
323
+ })
324
+ ]},
325
+ ]
326
+
327
+ return messages;
328
+ }
329
+
330
+ const processChunksParallel = async (chunks, args) => {
331
+ try {
332
+ const chunkPromises = chunks.map(async (chunk, index) => ({
333
+ index,
334
+ result: await runAllPrompts({
335
+ ...args,
336
+ messages: getMessages(chunk.gcs || chunk.uri, responseFormat),
337
+ requestId: `${requestId}-${index}`
338
+ })
339
+ }));
340
+
341
+ // const results = await Promise.all(chunkPromises);
342
+
343
+ const results = await Promise.all(
344
+ chunkPromises.map(promise =>
345
+ promise.then(result => {
346
+ sendProgress();
347
+ return result;
348
+ })
349
+ ));
350
+
351
+ return results
352
+ .sort((a, b) => a.index - b.index)
353
+ .map(item => item.result);
354
+ } catch (error) {
355
+ logger.error('Error processing chunks:', error);
356
+ throw error;
357
+ }
358
+ };
359
+
360
+ // serial processing of chunks
361
+ // const result = [];
362
+ // for(const chunk of chunks) {
363
+ // const chunkResult = await runAllPrompts({ ...args, messages: getMessages(chunk.gcs || chunk.uri) });
364
+ // result.push(chunkResult);
365
+ // }
366
+
367
+ const result = await processChunksParallel(chunks, args);
368
+
369
+ // publishRequestProgress({
370
+ // requestId: this.rootRequestId || this.requestId,
371
+ // progress: 1,
372
+ // data: "a",
373
+ // });
374
+
375
+ if (['srt','vtt'].includes(responseFormat) || wordTimestamped) { // align subtitles for formats
376
+
377
+ // convert as gemini output is unstable
378
+ for(let i = 0; i < result.length; i++) {
379
+ try{
380
+ result[i] = convertSrtToVtt(result[i]);
381
+ }catch(error){
382
+ logger.error(`Error converting to vtt: ${error}`);
383
+ }
384
+ }
385
+
386
+ const offsets = chunks.map((chunk, index) => chunk?.offset || index * OFFSET_CHUNK);
387
+ return alignSubtitles(result, responseFormat, offsets);
388
+ }
389
+ return result.join(` `);
390
+ }catch(error){
391
+ logger.error(`Error in transcribing: ${error}`);
392
+ throw error;
393
+ }finally{
394
+ intervalId && clearInterval(intervalId);
395
+ }
396
+ }
397
+ };
@@ -102,7 +102,7 @@ class PathwayResolver {
102
102
  if (!modelTypesExcludedFromProgressUpdates.includes(this.model.type)) {
103
103
  await publishRequestProgress({
104
104
  requestId: this.rootRequestId || this.requestId,
105
- progress: completedCount / totalCount,
105
+ progress: Math.min(completedCount,totalCount) / totalCount,
106
106
  data: JSON.stringify(responseData),
107
107
  });
108
108
  }
@@ -227,10 +227,10 @@ class PathwayResolver {
227
227
  // Load saved context and core memory if it exists
228
228
  const [savedContext, memorySelf, memoryDirectives, memoryTopics, memoryUser, memoryContext] = await Promise.all([
229
229
  (getv && await getv(this.savedContextId)) || {},
230
- callPathway('sys_read_memory', { contextId: this.savedContextId, section: 'memorySelf', priority: 1}),
231
- callPathway('sys_read_memory', { contextId: this.savedContextId, section: 'memoryDirectives', priority: 1 }),
230
+ callPathway('sys_read_memory', { contextId: this.savedContextId, section: 'memorySelf', priority: 1, stripMetadata: true }),
231
+ callPathway('sys_read_memory', { contextId: this.savedContextId, section: 'memoryDirectives', priority: 1, stripMetadata: true }),
232
232
  callPathway('sys_read_memory', { contextId: this.savedContextId, section: 'memoryTopics', priority: 0, numResults: 10 }),
233
- callPathway('sys_read_memory', { contextId: this.savedContextId, section: 'memoryUser', priority: 1 }),
233
+ callPathway('sys_read_memory', { contextId: this.savedContextId, section: 'memoryUser', priority: 1, stripMetadata: true }),
234
234
  callPathway('sys_read_memory', { contextId: this.savedContextId, section: 'memoryContext', priority: 0 }),
235
235
  ]).catch(error => {
236
236
  this.logError(`Failed to load memory: ${error.message}`);
@@ -315,12 +315,12 @@ class PathwayResolver {
315
315
  processInputText(text) {
316
316
  let chunkTokenLength = 0;
317
317
  if (this.pathway.inputChunkSize) {
318
- chunkTokenLength = Math.min(this.pathway.inputChunkSize, this.chunkMaxTokenLength);
318
+ chunkTokenLength = this.pathway.inputChunkSize;
319
319
  } else {
320
320
  chunkTokenLength = this.chunkMaxTokenLength;
321
321
  }
322
322
  const encoded = text ? encode(text) : [];
323
- if (!this.useInputChunking || encoded.length <= chunkTokenLength) { // no chunking, return as is
323
+ if (!this.useInputChunking) { // no chunking, return as is
324
324
  if (encoded.length > 0 && encoded.length >= chunkTokenLength) {
325
325
  const warnText = `Truncating long input text. Text length: ${text.length}`;
326
326
  this.logWarning(warnText);
@@ -375,7 +375,7 @@ class PathwayResolver {
375
375
  // Process the request and return the result
376
376
  async processRequest({ text, ...parameters }) {
377
377
  text = await this.summarizeIfEnabled({ text, ...parameters }); // summarize if flag enabled
378
- const chunks = this.processInputText(text);
378
+ const chunks = text && this.processInputText(text) || [text];
379
379
 
380
380
  let anticipatedRequestCount = chunks.length * this.prompts.length
381
381
 
@@ -15,6 +15,21 @@ async function convertContentItem(item, maxImageSize, plugin) {
15
15
  case "text":
16
16
  return item.text ? { type: "text", text: item.text } : null;
17
17
 
18
+ case "tool_use":
19
+ return {
20
+ type: "tool_use",
21
+ id: item.id,
22
+ name: item.name,
23
+ input: typeof item.input === 'string' ? { query: item.input } : item.input
24
+ };
25
+
26
+ case "tool_result":
27
+ return {
28
+ type: "tool_result",
29
+ tool_use_id: item.tool_use_id,
30
+ content: item.content
31
+ };
32
+
18
33
  case "image_url":
19
34
  imageUrl = item.url || item.image_url?.url || item.image_url;
20
35
 
@@ -126,9 +141,42 @@ class Claude3VertexPlugin extends OpenAIVisionPlugin {
126
141
 
127
142
  // Filter out system messages and empty messages
128
143
  let modifiedMessages = messagesCopy
129
- .filter(message => message.role !== "system" && message.content)
130
- .map(message => ({ ...message }));
131
-
144
+ .filter(message => message.role !== "system")
145
+ .map(message => {
146
+ // Handle OpenAI tool calls format conversion to Claude format
147
+ if (message.tool_calls) {
148
+ return {
149
+ role: message.role,
150
+ content: message.tool_calls.map(toolCall => ({
151
+ type: "tool_use",
152
+ id: toolCall.id,
153
+ name: toolCall.function.name,
154
+ input: JSON.parse(toolCall.function.arguments)
155
+ }))
156
+ };
157
+ }
158
+
159
+ // Handle OpenAI tool response format conversion to Claude format
160
+ if (message.role === "tool") {
161
+ return {
162
+ role: "user",
163
+ content: [{
164
+ type: "tool_result",
165
+ tool_use_id: message.tool_call_id,
166
+ content: message.content
167
+ }]
168
+ };
169
+ }
170
+
171
+ return { ...message };
172
+ })
173
+ .filter(message => {
174
+ // Filter out messages with empty content
175
+ if (!message.content) return false;
176
+ if (Array.isArray(message.content) && message.content.length === 0) return false;
177
+ return true;
178
+ });
179
+
132
180
  // Combine consecutive messages from the same author
133
181
  const combinedMessages = modifiedMessages.reduce((acc, message) => {
134
182
  if (acc.length === 0 || message.role !== acc[acc.length - 1].role) {
@@ -191,10 +239,68 @@ class Claude3VertexPlugin extends OpenAIVisionPlugin {
191
239
  prompt,
192
240
  cortexRequest
193
241
  );
242
+
194
243
  const { system, modifiedMessages } =
195
244
  await this.convertMessagesToClaudeVertex(requestParameters.messages);
196
245
  requestParameters.system = system;
197
246
  requestParameters.messages = modifiedMessages;
247
+
248
+ // Convert OpenAI tools format to Claude format if present
249
+ if (parameters.tools) {
250
+ requestParameters.tools = parameters.tools.map(tool => {
251
+ if (tool.type === 'function') {
252
+ return {
253
+ name: tool.function.name,
254
+ description: tool.function.description,
255
+ input_schema: {
256
+ type: "object",
257
+ properties: tool.function.parameters.properties,
258
+ required: tool.function.parameters.required || []
259
+ }
260
+ };
261
+ }
262
+ return tool;
263
+ });
264
+ }
265
+
266
+ // If there are function calls in messages, generate tools block
267
+ if (modifiedMessages?.some(msg =>
268
+ Array.isArray(msg.content) && msg.content.some(item => item.type === 'tool_use')
269
+ )) {
270
+ const toolsMap = new Map();
271
+
272
+ // Collect all unique tool uses from messages
273
+ modifiedMessages.forEach(msg => {
274
+ if (Array.isArray(msg.content)) {
275
+ msg.content.forEach(item => {
276
+ if (item.type === 'tool_use') {
277
+ toolsMap.set(item.name, {
278
+ name: item.name,
279
+ description: `Tool for ${item.name}`,
280
+ input_schema: {
281
+ type: "object",
282
+ properties: item.input ? Object.keys(item.input).reduce((acc, key) => {
283
+ acc[key] = {
284
+ type: typeof item.input[key] === 'string' ? 'string' : 'object',
285
+ description: `Parameter ${key} for ${item.name}`
286
+ };
287
+ return acc;
288
+ }, {}) : {},
289
+ required: item.input ? Object.keys(item.input) : []
290
+ }
291
+ });
292
+ }
293
+ });
294
+ }
295
+ });
296
+
297
+ if (requestParameters.tools) {
298
+ requestParameters.tools.push(...Array.from(toolsMap.values()));
299
+ } else {
300
+ requestParameters.tools = Array.from(toolsMap.values());
301
+ }
302
+ }
303
+
198
304
  requestParameters.max_tokens = this.getModelMaxReturnTokens();
199
305
  requestParameters.anthropic_version = "vertex-2023-10-16";
200
306
  return requestParameters;
@@ -66,6 +66,13 @@ class Gemini15VisionPlugin extends Gemini15ChatPlugin {
66
66
  data: base64Data
67
67
  }
68
68
  };
69
+ } else if (fileUrl.includes('youtube.com/') || fileUrl.includes('youtu.be/')) {
70
+ return {
71
+ fileData: {
72
+ mimeType: 'video/youtube',
73
+ fileUri: fileUrl
74
+ }
75
+ };
69
76
  }
70
77
  return null;
71
78
  }
@@ -210,7 +210,7 @@ class ModelPlugin {
210
210
 
211
211
  // First run handlebars compile on the pathway messages
212
212
  const compiledMessages = modelPrompt.messages.map((message) => {
213
- if (message.content) {
213
+ if (message.content && typeof message.content === 'string') {
214
214
  const compileText = HandleBars.compile(message.content);
215
215
  return {
216
216
  ...message,