@aj-archipelago/cortex 1.3.21 → 1.3.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/helper-apps/cortex-realtime-voice-server/src/cortex/memory.ts +2 -2
- package/lib/util.js +1 -1
- package/package.json +1 -1
- package/pathways/system/entity/memory/shared/sys_memory_helpers.js +228 -0
- package/pathways/system/entity/memory/sys_memory_format.js +30 -0
- package/pathways/system/entity/memory/sys_memory_manager.js +85 -27
- package/pathways/system/entity/memory/sys_memory_process.js +154 -0
- package/pathways/system/entity/memory/sys_memory_required.js +4 -2
- package/pathways/system/entity/memory/sys_memory_topic.js +22 -0
- package/pathways/system/entity/memory/sys_memory_update.js +50 -150
- package/pathways/system/entity/memory/sys_read_memory.js +67 -69
- package/pathways/system/entity/memory/sys_save_memory.js +1 -1
- package/pathways/system/entity/memory/sys_search_memory.js +1 -1
- package/pathways/system/entity/sys_entity_start.js +9 -6
- package/pathways/system/entity/sys_generator_image.js +5 -41
- package/pathways/system/entity/sys_generator_memory.js +3 -1
- package/pathways/system/entity/sys_generator_reasoning.js +1 -1
- package/pathways/system/entity/sys_router_tool.js +3 -4
- package/pathways/system/rest_streaming/sys_claude_35_sonnet.js +1 -1
- package/pathways/system/rest_streaming/sys_claude_3_haiku.js +1 -1
- package/pathways/system/rest_streaming/sys_google_gemini_chat.js +1 -1
- package/pathways/system/rest_streaming/sys_openai_chat_o1.js +1 -1
- package/pathways/system/rest_streaming/sys_openai_chat_o3_mini.js +1 -1
- package/pathways/transcribe_gemini.js +397 -0
- package/server/pathwayResolver.js +7 -7
- package/server/plugins/claude3VertexPlugin.js +109 -3
- package/server/plugins/gemini15VisionPlugin.js +7 -0
- package/server/plugins/modelPlugin.js +1 -1
- package/server/rest.js +24 -3
- package/tests/claude3VertexToolConversion.test.js +411 -0
- package/tests/memoryfunction.test.js +560 -46
- package/tests/openai_api.test.js +332 -0
|
@@ -0,0 +1,397 @@
|
|
|
1
|
+
import logger from "../lib/logger.js";
|
|
2
|
+
import { publishRequestProgress } from "../lib/redisSubscription.js";
|
|
3
|
+
import { alignSubtitles, getMediaChunks } from "../lib/util.js";
|
|
4
|
+
import { Prompt } from "../server/prompt.js";
|
|
5
|
+
|
|
6
|
+
const OFFSET_CHUNK = 500; //seconds of each chunk offset, only used if helper does not provide
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
export function convertSrtToVtt(data) {
|
|
11
|
+
if (!data || !data.trim()) {
|
|
12
|
+
return "WEBVTT\n\n";
|
|
13
|
+
}
|
|
14
|
+
// remove dos newlines
|
|
15
|
+
var srt = data.replace(/\r+/g, "");
|
|
16
|
+
// trim white space start and end
|
|
17
|
+
srt = srt.replace(/^\s+|\s+$/g, "");
|
|
18
|
+
|
|
19
|
+
// Convert all timestamps from comma to dot format
|
|
20
|
+
srt = srt.replace(/(\d{2}:\d{2}:\d{2}),(\d{3})/g, "$1.$2");
|
|
21
|
+
|
|
22
|
+
// Add blank lines before sequence numbers that are followed by timecodes
|
|
23
|
+
srt = srt.replace(/(\n)(\d+)\n(\d{2}:\d{2}:\d{2}[,.])/g, "$1\n$2\n$3");
|
|
24
|
+
|
|
25
|
+
// get cues
|
|
26
|
+
var cuelist = srt.split("\n\n");
|
|
27
|
+
var result = "";
|
|
28
|
+
if (cuelist.length > 0) {
|
|
29
|
+
result += "WEBVTT\n\n";
|
|
30
|
+
for (var i = 0; i < cuelist.length; i = i + 1) {
|
|
31
|
+
const cue = convertSrtCue(cuelist[i]);
|
|
32
|
+
// Only add non-empty cues
|
|
33
|
+
if (cue) {
|
|
34
|
+
result += cue;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
return result;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function convertSrtCue(caption) {
|
|
42
|
+
if (!caption || !caption.trim()) {
|
|
43
|
+
return "";
|
|
44
|
+
}
|
|
45
|
+
// remove all html tags for security reasons
|
|
46
|
+
//srt = srt.replace(/<[a-zA-Z\/][^>]*>/g, '');
|
|
47
|
+
var cue = "";
|
|
48
|
+
var s = caption.split(/\n/);
|
|
49
|
+
// concatenate muilt-line string separated in array into one
|
|
50
|
+
while (s.length > 3) {
|
|
51
|
+
for (var i = 3; i < s.length; i++) {
|
|
52
|
+
s[2] += "\n" + s[i];
|
|
53
|
+
}
|
|
54
|
+
s.splice(3, s.length - 3);
|
|
55
|
+
}
|
|
56
|
+
var line = 0;
|
|
57
|
+
// detect identifier
|
|
58
|
+
if (
|
|
59
|
+
s[0] &&
|
|
60
|
+
s[1] &&
|
|
61
|
+
!s[0].match(/\d+:\d+:\d+/) &&
|
|
62
|
+
s[1].match(/\d+:\d+:\d+/)
|
|
63
|
+
) {
|
|
64
|
+
const match = s[0].match(/^\d+$/); // Only match if the entire line is a number
|
|
65
|
+
if (match) {
|
|
66
|
+
cue += match[0] + "\n";
|
|
67
|
+
line += 1;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
// get time strings
|
|
71
|
+
if (s[line] && s[line].match(/\d+:\d+:\d+/)) {
|
|
72
|
+
// convert time string
|
|
73
|
+
var m = s[1].match(
|
|
74
|
+
/(\d{2}):(\d{2}):(\d{2})[,.](\d{3})\s*--?>\s*(\d{2}):(\d{2}):(\d{2})[,.](\d{3})/,
|
|
75
|
+
);
|
|
76
|
+
if (m) {
|
|
77
|
+
cue +=
|
|
78
|
+
m[1] +
|
|
79
|
+
":" +
|
|
80
|
+
m[2] +
|
|
81
|
+
":" +
|
|
82
|
+
m[3] +
|
|
83
|
+
"." +
|
|
84
|
+
m[4] +
|
|
85
|
+
" --> " +
|
|
86
|
+
m[5] +
|
|
87
|
+
":" +
|
|
88
|
+
m[6] +
|
|
89
|
+
":" +
|
|
90
|
+
m[7] +
|
|
91
|
+
"." +
|
|
92
|
+
m[8] +
|
|
93
|
+
"\n";
|
|
94
|
+
line += 1;
|
|
95
|
+
} else {
|
|
96
|
+
// Unrecognized timestring
|
|
97
|
+
return "";
|
|
98
|
+
}
|
|
99
|
+
} else {
|
|
100
|
+
// file format error or comment lines
|
|
101
|
+
return "";
|
|
102
|
+
}
|
|
103
|
+
// get cue text
|
|
104
|
+
if (s[line]) {
|
|
105
|
+
cue += s[line] + "\n\n";
|
|
106
|
+
}
|
|
107
|
+
return cue;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
export function detectSubtitleFormat(text) {
|
|
111
|
+
// Remove DOS newlines and trim whitespace
|
|
112
|
+
const cleanText = text.replace(/\r+/g, "").trim();
|
|
113
|
+
const lines = cleanText.split("\n");
|
|
114
|
+
|
|
115
|
+
// Check if it's VTT format
|
|
116
|
+
if (lines[0]?.trim() === "WEBVTT") {
|
|
117
|
+
return "vtt";
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// Check if it's SRT format
|
|
121
|
+
// SRT files have a specific pattern:
|
|
122
|
+
// 1. Numeric index
|
|
123
|
+
// 2. Timestamp in format: 00:00:00,000 --> 00:00:00,000
|
|
124
|
+
// 3. Subtitle text
|
|
125
|
+
// 4. Blank line
|
|
126
|
+
const timeRegex =
|
|
127
|
+
/(\d{2}:\d{2}:\d{2})[,.](\d{3})\s*-->\s*(\d{2}:\d{2}:\d{2})[,.](\d{3})/;
|
|
128
|
+
|
|
129
|
+
let hasValidStructure = false;
|
|
130
|
+
let index = 1;
|
|
131
|
+
|
|
132
|
+
// Check first few entries to confirm SRT structure
|
|
133
|
+
for (let i = 0; i < Math.min(lines.length, 12); i++) {
|
|
134
|
+
const line = lines[i]?.trim();
|
|
135
|
+
if (!line) continue;
|
|
136
|
+
|
|
137
|
+
// Check if line is a number matching our expected index
|
|
138
|
+
if (line === index.toString()) {
|
|
139
|
+
// Look ahead for timestamp
|
|
140
|
+
const nextLine = lines[i + 1]?.trim();
|
|
141
|
+
if (nextLine && timeRegex.test(nextLine)) {
|
|
142
|
+
hasValidStructure = true;
|
|
143
|
+
index++;
|
|
144
|
+
i++; // Skip timestamp line since we've verified it
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
if (hasValidStructure) {
|
|
150
|
+
return "srt";
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
return null;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
export default {
|
|
158
|
+
prompt:
|
|
159
|
+
[
|
|
160
|
+
new Prompt({ messages: [
|
|
161
|
+
"{{messages}}",
|
|
162
|
+
]}),
|
|
163
|
+
],
|
|
164
|
+
model: 'gemini-flash-20-vision',
|
|
165
|
+
inputParameters: {
|
|
166
|
+
file: ``,
|
|
167
|
+
language: ``,
|
|
168
|
+
responseFormat: `text`,
|
|
169
|
+
wordTimestamped: false,
|
|
170
|
+
highlightWords: false,
|
|
171
|
+
maxLineWidth: 0,
|
|
172
|
+
maxLineCount: 0,
|
|
173
|
+
maxWordsPerLine: 0,
|
|
174
|
+
contextId: ``,
|
|
175
|
+
},
|
|
176
|
+
timeout: 3600, // in seconds
|
|
177
|
+
enableDuplicateRequests: false,
|
|
178
|
+
|
|
179
|
+
executePathway: async ({args, runAllPrompts, resolver}) => {
|
|
180
|
+
let intervalId;
|
|
181
|
+
const { requestId } = resolver;
|
|
182
|
+
|
|
183
|
+
try{
|
|
184
|
+
let totalCount = 11; //init max chunk value
|
|
185
|
+
let completedCount = 0;
|
|
186
|
+
let partialCount = 0;
|
|
187
|
+
let partialRatio = 0;
|
|
188
|
+
|
|
189
|
+
const sendProgress = (partial=false, resetCount=false) => {
|
|
190
|
+
partialCount = resetCount ? 0 : partialCount;
|
|
191
|
+
|
|
192
|
+
if(partial){
|
|
193
|
+
partialCount++;
|
|
194
|
+
const increment = 0.02 / Math.log2(partialCount + 1); // logarithmic diminishing increment
|
|
195
|
+
partialRatio = Math.min(partialRatio + increment, 0.99); // limit to 0.99
|
|
196
|
+
}else{
|
|
197
|
+
partialCount = 0;
|
|
198
|
+
partialRatio = 0;
|
|
199
|
+
completedCount++;
|
|
200
|
+
}
|
|
201
|
+
if(completedCount >= totalCount) return;
|
|
202
|
+
|
|
203
|
+
const progress = (completedCount + partialRatio) / totalCount;
|
|
204
|
+
logger.info(`Progress for ${requestId}: ${progress}`);
|
|
205
|
+
|
|
206
|
+
console.log(`Progress for ${requestId}: ${progress}`);
|
|
207
|
+
publishRequestProgress({
|
|
208
|
+
requestId,
|
|
209
|
+
progress,
|
|
210
|
+
data: null,
|
|
211
|
+
});
|
|
212
|
+
}
|
|
213
|
+
sendProgress(true);
|
|
214
|
+
intervalId = setInterval(() => sendProgress(true), 3000);
|
|
215
|
+
|
|
216
|
+
const { file, responseFormat, wordTimestamped, maxLineWidth } = args;
|
|
217
|
+
if(!file) {
|
|
218
|
+
throw new Error("Please provide a file to transcribe.");
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
//check if fils is a gcs file or youtube
|
|
223
|
+
const isGcs = file.startsWith('gs://');
|
|
224
|
+
const isYoutube = file.match(/^(http(s)?:\/\/)?((w){3}.)?youtu(be|.be)?(\.com)?\/.+/);
|
|
225
|
+
|
|
226
|
+
let chunks = [{
|
|
227
|
+
url: file,
|
|
228
|
+
gcs: file,
|
|
229
|
+
offset: 0,
|
|
230
|
+
}];
|
|
231
|
+
if(!isGcs && !isYoutube) {
|
|
232
|
+
//get chunks from helper api if not gcs or youtube
|
|
233
|
+
chunks = await getMediaChunks(file, requestId);
|
|
234
|
+
}
|
|
235
|
+
totalCount = chunks.length+1;
|
|
236
|
+
logger.info(`Processing chunks: ${JSON.stringify(chunks)}`);
|
|
237
|
+
|
|
238
|
+
sendProgress(true);
|
|
239
|
+
|
|
240
|
+
let respectLimitsPrompt = " ";
|
|
241
|
+
if (maxLineWidth) {
|
|
242
|
+
|
|
243
|
+
const possiblePlacement = maxLineWidth <= 25
|
|
244
|
+
? "vertical" : maxLineWidth <= 35 ? "horizontal" : "";
|
|
245
|
+
|
|
246
|
+
respectLimitsPrompt += `The output lines must not exceed ${maxLineWidth} characters, so make sure your transcription lines and timestamps are perfectly aligned. `;
|
|
247
|
+
|
|
248
|
+
if(possiblePlacement){
|
|
249
|
+
respectLimitsPrompt+= `This limit a must as user will be using the output for ${possiblePlacement} display.`
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
const transcriptionLevel = wordTimestamped ? "word" : "phrase";
|
|
254
|
+
|
|
255
|
+
function getMessages(file, format) {
|
|
256
|
+
|
|
257
|
+
const responseFormat = format!== 'text' ? 'SRT' : 'text';
|
|
258
|
+
|
|
259
|
+
const messages = [
|
|
260
|
+
{"role": "system", "content": `Instructions:\nYou are an AI entity with expertise of transcription. Your response only contains the transcription, no comments or additonal stuff.
|
|
261
|
+
|
|
262
|
+
Your output must be in the format asked, and must be strictly following the formats and parseble by auto parsers.
|
|
263
|
+
|
|
264
|
+
Word-level transcriptions must be per word timestamped, and phrase-level transcriptions are per phrase.
|
|
265
|
+
|
|
266
|
+
Each transcription timestamp must precisely match the corresponding audio/video segment.
|
|
267
|
+
Each timestamp must correspond to actual spoken content.
|
|
268
|
+
End time cannot exceed total media duration. Especially when transcribing word-level double check your timestamps, never exceed the total duration.
|
|
269
|
+
|
|
270
|
+
You must follow 1, 2, 3, ... numbering for each transcription segment without any missing numbers.
|
|
271
|
+
Never put newlines or spaces in the middle of a timestamp.
|
|
272
|
+
Never put multiple lines for a single timestamp.
|
|
273
|
+
|
|
274
|
+
Example responses:
|
|
275
|
+
|
|
276
|
+
- If asked SRT format, e.g.:
|
|
277
|
+
1
|
|
278
|
+
00:00:00,498 --> 00:00:02,827
|
|
279
|
+
Hello World!
|
|
280
|
+
|
|
281
|
+
2
|
|
282
|
+
00:00:02,827 --> 00:00:06,383
|
|
283
|
+
Being AI is fun!
|
|
284
|
+
|
|
285
|
+
- If asked VTT format, e.g.:
|
|
286
|
+
WEBVTT
|
|
287
|
+
|
|
288
|
+
1
|
|
289
|
+
00:00:00.000 --> 00:00:02.944
|
|
290
|
+
Hello World2!
|
|
291
|
+
|
|
292
|
+
2
|
|
293
|
+
00:05.344 --> 00:00:08.809
|
|
294
|
+
Being AI is also great!
|
|
295
|
+
|
|
296
|
+
- If asked text format, e.g.:
|
|
297
|
+
Hello World!!! Being AI is being great yet again!
|
|
298
|
+
|
|
299
|
+
Word-level output e.g.:
|
|
300
|
+
|
|
301
|
+
WEBVTT
|
|
302
|
+
|
|
303
|
+
1
|
|
304
|
+
00:00:00.000 --> 00:00:01.944
|
|
305
|
+
Hello
|
|
306
|
+
|
|
307
|
+
2
|
|
308
|
+
00:00:01.964 --> 00:00:02.383
|
|
309
|
+
World!
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
You must follow spacing, punctuation, and timestamps as shown in the examples otherwise your response will not be accepted.
|
|
313
|
+
Never output multiple lines for a single timestamp.
|
|
314
|
+
Even a single newline or space can cause the response to be rejected. You must follow the format strictly. You must place newlines and timestamps exactly as shown in the examples.
|
|
315
|
+
|
|
316
|
+
`},
|
|
317
|
+
{"role": "user", "content": [
|
|
318
|
+
`{ type: 'text', text: 'Transcribe the media ${transcriptionLevel}-level in ${responseFormat} format.${respectLimitsPrompt}' }`,
|
|
319
|
+
JSON.stringify({
|
|
320
|
+
type: 'image_url',
|
|
321
|
+
url: file,
|
|
322
|
+
gcs: file
|
|
323
|
+
})
|
|
324
|
+
]},
|
|
325
|
+
]
|
|
326
|
+
|
|
327
|
+
return messages;
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
const processChunksParallel = async (chunks, args) => {
|
|
331
|
+
try {
|
|
332
|
+
const chunkPromises = chunks.map(async (chunk, index) => ({
|
|
333
|
+
index,
|
|
334
|
+
result: await runAllPrompts({
|
|
335
|
+
...args,
|
|
336
|
+
messages: getMessages(chunk.gcs || chunk.uri, responseFormat),
|
|
337
|
+
requestId: `${requestId}-${index}`
|
|
338
|
+
})
|
|
339
|
+
}));
|
|
340
|
+
|
|
341
|
+
// const results = await Promise.all(chunkPromises);
|
|
342
|
+
|
|
343
|
+
const results = await Promise.all(
|
|
344
|
+
chunkPromises.map(promise =>
|
|
345
|
+
promise.then(result => {
|
|
346
|
+
sendProgress();
|
|
347
|
+
return result;
|
|
348
|
+
})
|
|
349
|
+
));
|
|
350
|
+
|
|
351
|
+
return results
|
|
352
|
+
.sort((a, b) => a.index - b.index)
|
|
353
|
+
.map(item => item.result);
|
|
354
|
+
} catch (error) {
|
|
355
|
+
logger.error('Error processing chunks:', error);
|
|
356
|
+
throw error;
|
|
357
|
+
}
|
|
358
|
+
};
|
|
359
|
+
|
|
360
|
+
// serial processing of chunks
|
|
361
|
+
// const result = [];
|
|
362
|
+
// for(const chunk of chunks) {
|
|
363
|
+
// const chunkResult = await runAllPrompts({ ...args, messages: getMessages(chunk.gcs || chunk.uri) });
|
|
364
|
+
// result.push(chunkResult);
|
|
365
|
+
// }
|
|
366
|
+
|
|
367
|
+
const result = await processChunksParallel(chunks, args);
|
|
368
|
+
|
|
369
|
+
// publishRequestProgress({
|
|
370
|
+
// requestId: this.rootRequestId || this.requestId,
|
|
371
|
+
// progress: 1,
|
|
372
|
+
// data: "a",
|
|
373
|
+
// });
|
|
374
|
+
|
|
375
|
+
if (['srt','vtt'].includes(responseFormat) || wordTimestamped) { // align subtitles for formats
|
|
376
|
+
|
|
377
|
+
// convert as gemini output is unstable
|
|
378
|
+
for(let i = 0; i < result.length; i++) {
|
|
379
|
+
try{
|
|
380
|
+
result[i] = convertSrtToVtt(result[i]);
|
|
381
|
+
}catch(error){
|
|
382
|
+
logger.error(`Error converting to vtt: ${error}`);
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
const offsets = chunks.map((chunk, index) => chunk?.offset || index * OFFSET_CHUNK);
|
|
387
|
+
return alignSubtitles(result, responseFormat, offsets);
|
|
388
|
+
}
|
|
389
|
+
return result.join(` `);
|
|
390
|
+
}catch(error){
|
|
391
|
+
logger.error(`Error in transcribing: ${error}`);
|
|
392
|
+
throw error;
|
|
393
|
+
}finally{
|
|
394
|
+
intervalId && clearInterval(intervalId);
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
};
|
|
@@ -102,7 +102,7 @@ class PathwayResolver {
|
|
|
102
102
|
if (!modelTypesExcludedFromProgressUpdates.includes(this.model.type)) {
|
|
103
103
|
await publishRequestProgress({
|
|
104
104
|
requestId: this.rootRequestId || this.requestId,
|
|
105
|
-
progress: completedCount / totalCount,
|
|
105
|
+
progress: Math.min(completedCount,totalCount) / totalCount,
|
|
106
106
|
data: JSON.stringify(responseData),
|
|
107
107
|
});
|
|
108
108
|
}
|
|
@@ -227,10 +227,10 @@ class PathwayResolver {
|
|
|
227
227
|
// Load saved context and core memory if it exists
|
|
228
228
|
const [savedContext, memorySelf, memoryDirectives, memoryTopics, memoryUser, memoryContext] = await Promise.all([
|
|
229
229
|
(getv && await getv(this.savedContextId)) || {},
|
|
230
|
-
callPathway('sys_read_memory', { contextId: this.savedContextId, section: 'memorySelf', priority: 1}),
|
|
231
|
-
callPathway('sys_read_memory', { contextId: this.savedContextId, section: 'memoryDirectives', priority: 1 }),
|
|
230
|
+
callPathway('sys_read_memory', { contextId: this.savedContextId, section: 'memorySelf', priority: 1, stripMetadata: true }),
|
|
231
|
+
callPathway('sys_read_memory', { contextId: this.savedContextId, section: 'memoryDirectives', priority: 1, stripMetadata: true }),
|
|
232
232
|
callPathway('sys_read_memory', { contextId: this.savedContextId, section: 'memoryTopics', priority: 0, numResults: 10 }),
|
|
233
|
-
callPathway('sys_read_memory', { contextId: this.savedContextId, section: 'memoryUser', priority: 1 }),
|
|
233
|
+
callPathway('sys_read_memory', { contextId: this.savedContextId, section: 'memoryUser', priority: 1, stripMetadata: true }),
|
|
234
234
|
callPathway('sys_read_memory', { contextId: this.savedContextId, section: 'memoryContext', priority: 0 }),
|
|
235
235
|
]).catch(error => {
|
|
236
236
|
this.logError(`Failed to load memory: ${error.message}`);
|
|
@@ -315,12 +315,12 @@ class PathwayResolver {
|
|
|
315
315
|
processInputText(text) {
|
|
316
316
|
let chunkTokenLength = 0;
|
|
317
317
|
if (this.pathway.inputChunkSize) {
|
|
318
|
-
chunkTokenLength =
|
|
318
|
+
chunkTokenLength = this.pathway.inputChunkSize;
|
|
319
319
|
} else {
|
|
320
320
|
chunkTokenLength = this.chunkMaxTokenLength;
|
|
321
321
|
}
|
|
322
322
|
const encoded = text ? encode(text) : [];
|
|
323
|
-
if (!this.useInputChunking
|
|
323
|
+
if (!this.useInputChunking) { // no chunking, return as is
|
|
324
324
|
if (encoded.length > 0 && encoded.length >= chunkTokenLength) {
|
|
325
325
|
const warnText = `Truncating long input text. Text length: ${text.length}`;
|
|
326
326
|
this.logWarning(warnText);
|
|
@@ -375,7 +375,7 @@ class PathwayResolver {
|
|
|
375
375
|
// Process the request and return the result
|
|
376
376
|
async processRequest({ text, ...parameters }) {
|
|
377
377
|
text = await this.summarizeIfEnabled({ text, ...parameters }); // summarize if flag enabled
|
|
378
|
-
const chunks = this.processInputText(text);
|
|
378
|
+
const chunks = text && this.processInputText(text) || [text];
|
|
379
379
|
|
|
380
380
|
let anticipatedRequestCount = chunks.length * this.prompts.length
|
|
381
381
|
|
|
@@ -15,6 +15,21 @@ async function convertContentItem(item, maxImageSize, plugin) {
|
|
|
15
15
|
case "text":
|
|
16
16
|
return item.text ? { type: "text", text: item.text } : null;
|
|
17
17
|
|
|
18
|
+
case "tool_use":
|
|
19
|
+
return {
|
|
20
|
+
type: "tool_use",
|
|
21
|
+
id: item.id,
|
|
22
|
+
name: item.name,
|
|
23
|
+
input: typeof item.input === 'string' ? { query: item.input } : item.input
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
case "tool_result":
|
|
27
|
+
return {
|
|
28
|
+
type: "tool_result",
|
|
29
|
+
tool_use_id: item.tool_use_id,
|
|
30
|
+
content: item.content
|
|
31
|
+
};
|
|
32
|
+
|
|
18
33
|
case "image_url":
|
|
19
34
|
imageUrl = item.url || item.image_url?.url || item.image_url;
|
|
20
35
|
|
|
@@ -126,9 +141,42 @@ class Claude3VertexPlugin extends OpenAIVisionPlugin {
|
|
|
126
141
|
|
|
127
142
|
// Filter out system messages and empty messages
|
|
128
143
|
let modifiedMessages = messagesCopy
|
|
129
|
-
.filter(message => message.role !== "system"
|
|
130
|
-
.map(message =>
|
|
131
|
-
|
|
144
|
+
.filter(message => message.role !== "system")
|
|
145
|
+
.map(message => {
|
|
146
|
+
// Handle OpenAI tool calls format conversion to Claude format
|
|
147
|
+
if (message.tool_calls) {
|
|
148
|
+
return {
|
|
149
|
+
role: message.role,
|
|
150
|
+
content: message.tool_calls.map(toolCall => ({
|
|
151
|
+
type: "tool_use",
|
|
152
|
+
id: toolCall.id,
|
|
153
|
+
name: toolCall.function.name,
|
|
154
|
+
input: JSON.parse(toolCall.function.arguments)
|
|
155
|
+
}))
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// Handle OpenAI tool response format conversion to Claude format
|
|
160
|
+
if (message.role === "tool") {
|
|
161
|
+
return {
|
|
162
|
+
role: "user",
|
|
163
|
+
content: [{
|
|
164
|
+
type: "tool_result",
|
|
165
|
+
tool_use_id: message.tool_call_id,
|
|
166
|
+
content: message.content
|
|
167
|
+
}]
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
return { ...message };
|
|
172
|
+
})
|
|
173
|
+
.filter(message => {
|
|
174
|
+
// Filter out messages with empty content
|
|
175
|
+
if (!message.content) return false;
|
|
176
|
+
if (Array.isArray(message.content) && message.content.length === 0) return false;
|
|
177
|
+
return true;
|
|
178
|
+
});
|
|
179
|
+
|
|
132
180
|
// Combine consecutive messages from the same author
|
|
133
181
|
const combinedMessages = modifiedMessages.reduce((acc, message) => {
|
|
134
182
|
if (acc.length === 0 || message.role !== acc[acc.length - 1].role) {
|
|
@@ -191,10 +239,68 @@ class Claude3VertexPlugin extends OpenAIVisionPlugin {
|
|
|
191
239
|
prompt,
|
|
192
240
|
cortexRequest
|
|
193
241
|
);
|
|
242
|
+
|
|
194
243
|
const { system, modifiedMessages } =
|
|
195
244
|
await this.convertMessagesToClaudeVertex(requestParameters.messages);
|
|
196
245
|
requestParameters.system = system;
|
|
197
246
|
requestParameters.messages = modifiedMessages;
|
|
247
|
+
|
|
248
|
+
// Convert OpenAI tools format to Claude format if present
|
|
249
|
+
if (parameters.tools) {
|
|
250
|
+
requestParameters.tools = parameters.tools.map(tool => {
|
|
251
|
+
if (tool.type === 'function') {
|
|
252
|
+
return {
|
|
253
|
+
name: tool.function.name,
|
|
254
|
+
description: tool.function.description,
|
|
255
|
+
input_schema: {
|
|
256
|
+
type: "object",
|
|
257
|
+
properties: tool.function.parameters.properties,
|
|
258
|
+
required: tool.function.parameters.required || []
|
|
259
|
+
}
|
|
260
|
+
};
|
|
261
|
+
}
|
|
262
|
+
return tool;
|
|
263
|
+
});
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
// If there are function calls in messages, generate tools block
|
|
267
|
+
if (modifiedMessages?.some(msg =>
|
|
268
|
+
Array.isArray(msg.content) && msg.content.some(item => item.type === 'tool_use')
|
|
269
|
+
)) {
|
|
270
|
+
const toolsMap = new Map();
|
|
271
|
+
|
|
272
|
+
// Collect all unique tool uses from messages
|
|
273
|
+
modifiedMessages.forEach(msg => {
|
|
274
|
+
if (Array.isArray(msg.content)) {
|
|
275
|
+
msg.content.forEach(item => {
|
|
276
|
+
if (item.type === 'tool_use') {
|
|
277
|
+
toolsMap.set(item.name, {
|
|
278
|
+
name: item.name,
|
|
279
|
+
description: `Tool for ${item.name}`,
|
|
280
|
+
input_schema: {
|
|
281
|
+
type: "object",
|
|
282
|
+
properties: item.input ? Object.keys(item.input).reduce((acc, key) => {
|
|
283
|
+
acc[key] = {
|
|
284
|
+
type: typeof item.input[key] === 'string' ? 'string' : 'object',
|
|
285
|
+
description: `Parameter ${key} for ${item.name}`
|
|
286
|
+
};
|
|
287
|
+
return acc;
|
|
288
|
+
}, {}) : {},
|
|
289
|
+
required: item.input ? Object.keys(item.input) : []
|
|
290
|
+
}
|
|
291
|
+
});
|
|
292
|
+
}
|
|
293
|
+
});
|
|
294
|
+
}
|
|
295
|
+
});
|
|
296
|
+
|
|
297
|
+
if (requestParameters.tools) {
|
|
298
|
+
requestParameters.tools.push(...Array.from(toolsMap.values()));
|
|
299
|
+
} else {
|
|
300
|
+
requestParameters.tools = Array.from(toolsMap.values());
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
|
|
198
304
|
requestParameters.max_tokens = this.getModelMaxReturnTokens();
|
|
199
305
|
requestParameters.anthropic_version = "vertex-2023-10-16";
|
|
200
306
|
return requestParameters;
|
|
@@ -66,6 +66,13 @@ class Gemini15VisionPlugin extends Gemini15ChatPlugin {
|
|
|
66
66
|
data: base64Data
|
|
67
67
|
}
|
|
68
68
|
};
|
|
69
|
+
} else if (fileUrl.includes('youtube.com/') || fileUrl.includes('youtu.be/')) {
|
|
70
|
+
return {
|
|
71
|
+
fileData: {
|
|
72
|
+
mimeType: 'video/youtube',
|
|
73
|
+
fileUri: fileUrl
|
|
74
|
+
}
|
|
75
|
+
};
|
|
69
76
|
}
|
|
70
77
|
return null;
|
|
71
78
|
}
|
|
@@ -210,7 +210,7 @@ class ModelPlugin {
|
|
|
210
210
|
|
|
211
211
|
// First run handlebars compile on the pathway messages
|
|
212
212
|
const compiledMessages = modelPrompt.messages.map((message) => {
|
|
213
|
-
if (message.content) {
|
|
213
|
+
if (message.content && typeof message.content === 'string') {
|
|
214
214
|
const compileText = HandleBars.compile(message.content);
|
|
215
215
|
return {
|
|
216
216
|
...message,
|