@aj-archipelago/cortex 1.3.24 → 1.3.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -21,23 +21,25 @@ const callPathway = async (pathwayName, inArgs, pathwayResolver) => {
21
21
  let rootRequestId = pathwayResolver?.rootRequestId || pathwayResolver?.requestId;
22
22
 
23
23
  let data = await pathway.rootResolver(parent, {...args, rootRequestId}, { config, pathway, requestState } );
24
+ pathwayResolver && pathwayResolver.mergeResults(data);
25
+
26
+ let returnValue = data?.result || null;
24
27
 
25
28
  if (args.async || args.stream) {
26
29
  const { result: requestId } = data;
27
30
 
28
31
  // Fire the resolver for the async requestProgress
29
- logger.info(`Callpathway starting async requestProgress, requestId: ${requestId}`);
32
+ logger.info(`Callpathway starting async requestProgress, pathway: ${pathwayName}, requestId: ${requestId}`);
30
33
  const { resolver, args } = requestState[requestId];
31
34
  requestState[requestId].useRedis = false;
32
35
  requestState[requestId].started = true;
33
36
 
34
- data = resolver && await resolver(args);
37
+ resolver && await resolver(args);
38
+
39
+ returnValue = null;
35
40
  }
36
41
 
37
- // Update pathwayResolver with new data if available
38
- pathwayResolver?.mergeResults(data);
39
-
40
- return data?.result;
42
+ return returnValue;
41
43
  };
42
44
 
43
45
  const gpt3Encode = (text) => {
@@ -48,7 +50,7 @@ const gpt3Decode = (text) => {
48
50
  return decode(text);
49
51
  }
50
52
 
51
- const say = async (requestId, message, maxMessageLength = Infinity) => {
53
+ const say = async (requestId, message, maxMessageLength = Infinity, voiceResponse = true) => {
52
54
  try {
53
55
  const chunks = getSemanticChunks(message, maxMessageLength);
54
56
 
@@ -60,11 +62,13 @@ const say = async (requestId, message, maxMessageLength = Infinity) => {
60
62
  });
61
63
  }
62
64
 
63
- await publishRequestProgress({
64
- requestId,
65
- progress: 0.5,
66
- data: " ... "
67
- });
65
+ if (voiceResponse) {
66
+ await publishRequestProgress({
67
+ requestId,
68
+ progress: 0.5,
69
+ data: " ... "
70
+ });
71
+ }
68
72
 
69
73
  await publishRequestProgress({
70
74
  requestId,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aj-archipelago/cortex",
3
- "version": "1.3.24",
3
+ "version": "1.3.26",
4
4
  "description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
5
5
  "private": false,
6
6
  "repository": {
@@ -33,7 +33,7 @@
33
33
  "type": "module",
34
34
  "homepage": "https://github.com/aj-archipelago/cortex#readme",
35
35
  "dependencies": {
36
- "@aj-archipelago/subvibe": "^1.0.5",
36
+ "@aj-archipelago/subvibe": "^1.0.8",
37
37
  "@apollo/server": "^4.7.3",
38
38
  "@apollo/server-plugin-response-cache": "^4.1.2",
39
39
  "@apollo/utils.keyvadapter": "^3.0.0",
@@ -48,6 +48,10 @@ export default {
48
48
  chatHistory: args.chatHistory.slice(-20)
49
49
  };
50
50
 
51
+ if (generatorPathway === 'coding') {
52
+ return;
53
+ }
54
+
51
55
  if (generatorPathway === 'sys_generator_document') {
52
56
  generatorPathway = 'sys_generator_results';
53
57
  newArgs.dataSources = ["mydata"];
@@ -57,11 +61,7 @@ export default {
57
61
 
58
62
  const result = await callPathway(generatorPathway, newArgs, resolver);
59
63
 
60
- if (args.stream) {
61
- return "";
62
- }
63
-
64
- if (!result) {
64
+ if (!result && !args.stream) {
65
65
  result = await callPathway('sys_generator_error', { ...args, text: `Tried to use a tool (${generatorPathway}), but no result was returned`, stream: false }, resolver);
66
66
  }
67
67
 
@@ -105,22 +105,12 @@ export default {
105
105
  }
106
106
  }
107
107
 
108
- const fetchChatResponse = async (args, pathwayResolver) => {
109
- const [chatResponse, chatTitleResponse] = await Promise.all([
110
- callPathway('sys_generator_quick', {...args, model: styleModel}, pathwayResolver),
111
- callPathway('chat_title', { ...args, chatHistory: chatHistoryBeforeMemory, stream: false}),
112
- ]);
113
-
114
- title = chatTitleResponse;
115
-
116
- return chatResponse;
117
- };
118
-
119
- // start fetching the default response - we may need it later
108
+ // start fetching responses in parallel if not streaming
120
109
  let fetchChatResponsePromise;
121
110
  if (!args.stream) {
122
- fetchChatResponsePromise = fetchChatResponse({ ...args, ackResponse }, pathwayResolver);
111
+ fetchChatResponsePromise = callPathway('sys_generator_quick', {...args, model: styleModel, ackResponse}, pathwayResolver);
123
112
  }
113
+ const fetchTitleResponsePromise = callPathway('chat_title', {...args, chatHistory: chatHistoryBeforeMemory, stream: false});
124
114
 
125
115
  const visionContentPresent = chatArgsHasImageUrl(args);
126
116
 
@@ -223,42 +213,43 @@ export default {
223
213
  }
224
214
  }
225
215
 
216
+ title = await fetchTitleResponsePromise;
217
+
218
+ pathwayResolver.tool = JSON.stringify({
219
+ hideFromModel: toolCallbackName ? true : false,
220
+ toolCallbackName,
221
+ title,
222
+ search: toolCallbackName === 'sys_generator_results' ? true : false,
223
+ coding: toolCallbackName === 'coding' ? true : false,
224
+ codeRequestId,
225
+ toolCallbackId
226
+ });
227
+
226
228
  if (toolCallbackMessage) {
227
229
  if (args.skipCallbackMessage) {
228
- pathwayResolver.tool = JSON.stringify({ hideFromModel: false, search: false, title });
229
230
  return await callPathway('sys_entity_continue', { ...args, stream: false, model: styleModel, generatorPathway: toolCallbackName }, pathwayResolver);
230
231
  }
231
232
 
232
233
  if (args.stream) {
233
234
  if (!ackResponse) {
234
- await say(pathwayResolver.requestId, toolCallbackMessage || "One moment please.", 10);
235
+ await say(pathwayResolver.requestId, toolCallbackMessage || "One moment please.", 10, args.voiceResponse ? true : false);
235
236
  }
236
- pathwayResolver.tool = JSON.stringify({ hideFromModel: false, search: false, title });
237
- await callPathway('sys_entity_continue', { ...args, stream: true, generatorPathway: toolCallbackName }, pathwayResolver);
238
- return "";
237
+ await callPathway('sys_entity_continue', { ...args, stream: true, generatorPathway: toolCallbackName }, pathwayResolver);
238
+ return;
239
239
  }
240
240
 
241
- pathwayResolver.tool = JSON.stringify({
242
- hideFromModel: toolCallbackName ? true : false,
243
- toolCallbackName,
244
- title,
245
- search: toolCallbackName === 'sys_generator_results' ? true : false,
246
- coding: toolCallbackName === 'coding' ? true : false,
247
- codeRequestId,
248
- toolCallbackId
249
- });
250
241
  return toolCallbackMessage || "One moment please.";
251
242
  }
252
243
 
253
- const chatResponse = await (fetchChatResponsePromise || fetchChatResponse({ ...args, ackResponse }, pathwayResolver));
244
+ const chatResponse = await (fetchChatResponsePromise || callPathway('sys_generator_quick', {...args, model: styleModel, ackResponse}, pathwayResolver));
254
245
  pathwayResolver.tool = JSON.stringify({ search: false, title });
255
- return args.stream ? "" : chatResponse;
246
+ return args.stream ? null : chatResponse;
256
247
 
257
248
  } catch (e) {
258
249
  pathwayResolver.logError(e);
259
- const chatResponse = await (fetchChatResponsePromise || fetchChatResponse({ ...args, ackResponse }, pathwayResolver));
250
+ const chatResponse = await (fetchChatResponsePromise || callPathway('sys_generator_quick', {...args, model: styleModel, ackResponse}, pathwayResolver));
260
251
  pathwayResolver.tool = JSON.stringify({ search: false, title });
261
- return args.stream ? "" : chatResponse;
252
+ return args.stream ? null : chatResponse;
262
253
  }
263
254
  }
264
255
  };
@@ -341,7 +341,7 @@ Here are the information sources that were found:
341
341
  clearTimeout(timeoutId);
342
342
  }
343
343
 
344
- if (!args.stream) {
344
+ if (!args.voiceResponse) {
345
345
  const referencedSources = extractReferencedSources(result);
346
346
  searchResults = searchResults.length ? pruneSearchResults(searchResults, referencedSources) : [];
347
347
  }
@@ -5,283 +5,6 @@ import { Prompt } from "../server/prompt.js";
5
5
 
6
6
  const OFFSET_CHUNK = 500; //seconds of each chunk offset, only used if helper does not provide
7
7
 
8
- export function convertSrtToVtt(data) {
9
- if (!data || !data.trim()) {
10
- return "WEBVTT\n\n";
11
- }
12
-
13
- // If it's already VTT format and has header
14
- if (data.trim().startsWith("WEBVTT")) {
15
- const lines = data.split("\n");
16
- const result = ["WEBVTT", ""]; // Start with header and blank line
17
- let currentCue = [];
18
-
19
- for (let i = 0; i < lines.length; i++) {
20
- const line = lines[i].trim();
21
-
22
- // Skip empty lines and the WEBVTT header
23
- if (!line || line === "WEBVTT") {
24
- continue;
25
- }
26
-
27
- // If it's a number by itself, it's a cue identifier
28
- if (/^\d+$/.test(line)) {
29
- // If we have a previous cue, add it with proper spacing
30
- if (currentCue.length > 0) {
31
- result.push(currentCue.join("\n"));
32
- result.push(""); // Add blank line between cues
33
- currentCue = [];
34
- }
35
- currentCue.push(line);
36
- continue;
37
- }
38
-
39
- // Check for and convert timestamps
40
- const fullTimeRegex = /^(\d{2}):(\d{2}):(\d{2})[,.](\d{3})\s*-->\s*(\d{2}):(\d{2}):(\d{2})[,.](\d{3})$/;
41
- const shortTimeRegex = /^(\d{2}):(\d{2})[,.](\d{3})\s*-->\s*(\d{2}):(\d{2})[,.](\d{3})$/;
42
- const ultraShortTimeRegex = /^(\d{1,2})[.](\d{3})\s*-->\s*(\d{1,2})[.](\d{3})$/;
43
-
44
- const fullMatch = line.match(fullTimeRegex);
45
- const shortMatch = line.match(shortTimeRegex);
46
- const ultraShortMatch = line.match(ultraShortTimeRegex);
47
-
48
- if (fullMatch) {
49
- // Already in correct format, just convert comma to dot
50
- const convertedTime = line.replace(/,/g, '.');
51
- currentCue.push(convertedTime);
52
- } else if (shortMatch) {
53
- // Convert MM:SS to HH:MM:SS
54
- const convertedTime = `00:${shortMatch[1]}:${shortMatch[2]}.${shortMatch[3]} --> 00:${shortMatch[4]}:${shortMatch[5]}.${shortMatch[6]}`;
55
- currentCue.push(convertedTime);
56
- } else if (ultraShortMatch) {
57
- // Convert SS to HH:MM:SS
58
- const convertedTime = `00:00:${ultraShortMatch[1].padStart(2, '0')}.${ultraShortMatch[2]} --> 00:00:${ultraShortMatch[3].padStart(2, '0')}.${ultraShortMatch[4]}`;
59
- currentCue.push(convertedTime);
60
- } else if (!line.includes('-->')) {
61
- // Must be subtitle text
62
- currentCue.push(line);
63
- }
64
- }
65
-
66
- // Add the last cue if there is one
67
- if (currentCue.length > 0) {
68
- result.push(currentCue.join("\n"));
69
- result.push(""); // Add final blank line
70
- }
71
-
72
- // Join with newlines and ensure proper ending
73
- return result.join("\n") + "\n";
74
- }
75
-
76
- // remove dos newlines and trim
77
- var srt = data.replace(/\r+/g, "");
78
- srt = srt.replace(/^\s+|\s+$/g, "");
79
-
80
- // Split into cues and filter out empty ones
81
- var cuelist = srt.split("\n\n").filter(cue => cue.trim());
82
-
83
- // Always add WEBVTT header
84
- var result = "WEBVTT\n\n";
85
-
86
- // Convert each cue to VTT format
87
- for (const cue of cuelist) {
88
- const lines = cue.split("\n").map(line => line.trim()).filter(line => line);
89
- if (lines.length < 2) continue;
90
-
91
- let output = [];
92
-
93
- // Handle cue identifier
94
- if (/^\d+$/.test(lines[0])) {
95
- output.push(lines[0]);
96
- lines.shift();
97
- }
98
-
99
- // Handle timestamp line
100
- const timeLine = lines[0];
101
- const fullTimeRegex = /^(\d{2}):(\d{2}):(\d{2})[,.](\d{3})\s*-->\s*(\d{2}):(\d{2}):(\d{2})[,.](\d{3})$/;
102
- const shortTimeRegex = /^(\d{2}):(\d{2})[,.](\d{3})\s*-->\s*(\d{2}):(\d{2})[,.](\d{3})$/;
103
- const ultraShortTimeRegex = /^(\d{1,2})[.](\d{3})\s*-->\s*(\d{1,2})[.](\d{3})$/;
104
-
105
- const fullMatch = timeLine.match(fullTimeRegex);
106
- const shortMatch = timeLine.match(shortTimeRegex);
107
- const ultraShortMatch = timeLine.match(ultraShortTimeRegex);
108
-
109
- if (fullMatch) {
110
- output.push(timeLine.replace(/,/g, '.'));
111
- } else if (shortMatch) {
112
- output.push(`00:${shortMatch[1]}:${shortMatch[2]}.${shortMatch[3]} --> 00:${shortMatch[4]}:${shortMatch[5]}.${shortMatch[6]}`);
113
- } else if (ultraShortMatch) {
114
- output.push(`00:00:${ultraShortMatch[1].padStart(2, '0')}.${ultraShortMatch[2]} --> 00:00:${ultraShortMatch[3].padStart(2, '0')}.${ultraShortMatch[4]}`);
115
- } else {
116
- continue; // Invalid timestamp format
117
- }
118
-
119
- // Add remaining lines as subtitle text
120
- output.push(...lines.slice(1));
121
-
122
- // Add the cue to result
123
- result += output.join("\n") + "\n\n";
124
- }
125
-
126
- return result;
127
- }
128
-
129
- function convertSrtCue(caption) {
130
- if (!caption || !caption.trim()) {
131
- return "";
132
- }
133
-
134
- var cue = "";
135
- var s = caption.split(/\n/);
136
-
137
- // concatenate multi-line string separated in array into one
138
- while (s.length > 3) {
139
- for (var i = 3; i < s.length; i++) {
140
- s[2] += "\n" + s[i];
141
- }
142
- s.splice(3, s.length - 3);
143
- }
144
-
145
- var line = 0;
146
-
147
- // detect identifier
148
- if (
149
- s[0] &&
150
- s[1] &&
151
- !s[0].match(/\d+:\d+:\d+/) &&
152
- s[1].match(/\d+:\d+:\d+/)
153
- ) {
154
- const match = s[0].match(/^\d+$/); // Only match if the entire line is a number
155
- if (match) {
156
- cue += match[0] + "\n";
157
- line += 1;
158
- }
159
- }
160
-
161
- // get time strings
162
- if (s[line] && s[line].match(/\d+:\d+:\d+/)) {
163
- // convert time string
164
- var m = s[line].match(
165
- /(\d{2}):(\d{2}):(\d{2})[,.](\d{3})\s*--?>\s*(\d{2}):(\d{2}):(\d{2})[,.](\d{3})/,
166
- );
167
- if (m) {
168
- cue +=
169
- m[1] +
170
- ":" +
171
- m[2] +
172
- ":" +
173
- m[3] +
174
- "." +
175
- m[4] +
176
- " --> " +
177
- m[5] +
178
- ":" +
179
- m[6] +
180
- ":" +
181
- m[7] +
182
- "." +
183
- m[8] +
184
- "\n";
185
- line += 1;
186
- } else {
187
- // Try alternate timestamp format
188
- m = s[line].match(
189
- /(\d{2}):(\d{2})\.(\d{3})\s*--?>\s*(\d{2}):(\d{2})\.(\d{3})/,
190
- );
191
- if (m) {
192
- // Convert to full timestamp format
193
- cue +=
194
- "00:" +
195
- m[1] +
196
- ":" +
197
- m[2] +
198
- "." +
199
- m[3] +
200
- " --> " +
201
- "00:" +
202
- m[4] +
203
- ":" +
204
- m[5] +
205
- "." +
206
- m[6] +
207
- "\n";
208
- line += 1;
209
- } else {
210
- // Unrecognized timestring
211
- return "";
212
- }
213
- }
214
- } else {
215
- // file format error or comment lines
216
- return "";
217
- }
218
-
219
- // get cue text
220
- if (s[line]) {
221
- cue += s[line] + "\n\n";
222
- }
223
-
224
- return cue;
225
- }
226
-
227
- export function detectSubtitleFormat(text) {
228
- // Remove DOS newlines and trim whitespace
229
- const cleanText = text.replace(/\r+/g, "").trim();
230
- const lines = cleanText.split("\n");
231
-
232
- // Check if it's VTT format - be more lenient with the header
233
- if (lines[0]?.trim() === "WEBVTT") {
234
- return "vtt";
235
- }
236
-
237
- // Define regex patterns for timestamp formats
238
- const srtTimeRegex =
239
- /(\d{2}:\d{2}:\d{2})[,.]\d{3}\s*-->\s*(\d{2}:\d{2}:\d{2})[,.]\d{3}/;
240
- const vttTimeRegex =
241
- /(?:\d{2}:)?(\d{1,2})[.]\d{3}\s*-->\s*(?:\d{2}:)?(\d{1,2})[.]\d{3}/;
242
-
243
- let hasSrtTimestamps = false;
244
- let hasVttTimestamps = false;
245
- let hasSequentialNumbers = false;
246
- let lastNumber = 0;
247
-
248
- // Look through first few lines to detect patterns
249
- for (let i = 0; i < Math.min(lines.length, 12); i++) {
250
- const line = lines[i]?.trim();
251
- if (!line) continue;
252
-
253
- // Check for timestamps
254
- if (srtTimeRegex.test(line)) {
255
- hasSrtTimestamps = true;
256
- }
257
- if (vttTimeRegex.test(line)) {
258
- hasVttTimestamps = true;
259
- }
260
-
261
- // Check for sequential numbers
262
- const numberMatch = line.match(/^(\d+)$/);
263
- if (numberMatch) {
264
- const num = parseInt(numberMatch[1]);
265
- if (lastNumber === 0 || num === lastNumber + 1) {
266
- hasSequentialNumbers = true;
267
- lastNumber = num;
268
- }
269
- }
270
- }
271
-
272
- // If it has SRT-style timestamps (HH:MM:SS), it's SRT
273
- if (hasSrtTimestamps && hasSequentialNumbers) {
274
- return "srt";
275
- }
276
-
277
- // If it has VTT-style timestamps (MM:SS) or WEBVTT header, it's VTT
278
- if (hasVttTimestamps) {
279
- return "vtt";
280
- }
281
-
282
- return null;
283
- }
284
-
285
8
  export default {
286
9
  prompt:
287
10
  [
@@ -381,7 +104,7 @@ export default {
381
104
 
382
105
  function getMessages(file, format) {
383
106
 
384
- const responseFormat = format!== 'text' ? 'SRT' : 'text';
107
+ const responseFormat = format!== 'text' ? 'VTT' : 'text';
385
108
 
386
109
  const messages = [
387
110
  {"role": "system", "content": `Instructions:\nYou are an AI entity with expertise of transcription. Your response only contains the transcription, no comments or additonal stuff.
@@ -491,26 +214,8 @@ Even a single newline or space can cause the response to be rejected. You must f
491
214
  // }
492
215
 
493
216
  const result = await processChunksParallel(chunks, args);
494
-
495
- // publishRequestProgress({
496
- // requestId: this.rootRequestId || this.requestId,
497
- // progress: 1,
498
- // data: "a",
499
- // });
500
217
 
501
218
  if (['srt','vtt'].includes(responseFormat) || wordTimestamped) { // align subtitles for formats
502
-
503
-
504
-
505
- // convert as gemini output is unstable
506
- for(let i = 0; i < result.length; i++) {
507
- try{
508
- result[i] = convertSrtToVtt(result[i]);
509
- }catch(error){
510
- logger.error(`Error converting to vtt: ${error}`);
511
- }
512
- }
513
-
514
219
  const offsets = chunks.map((chunk, index) => chunk?.offset || index * OFFSET_CHUNK);
515
220
  return alignSubtitles(result, responseFormat, offsets);
516
221
  }
@@ -1,95 +1,7 @@
1
+ import { parse, build } from "@aj-archipelago/subvibe";
1
2
  import logger from "../lib/logger.js";
2
3
  import { callPathway } from "../lib/pathwayTools.js";
3
4
 
4
- function preprocessStr(str, format) {
5
- try {
6
- if (!str) return "";
7
- let content = str
8
- // Normalize line endings
9
- .replace(/\r\n?/g, "\n")
10
- // Remove WEBVTT header for processing
11
- .replace(/^WEBVTT\n\n/, '');
12
-
13
- // For SRT, convert commas to dots in timestamps
14
- if (format === 'srt') {
15
- content = content.replace(/(\d{2}:\d{2}:\d{2}),(\d{3})/g, "$1.$2");
16
- }
17
-
18
- return content
19
- // Ensure each subtitle block is properly separated
20
- .split(/\n\s*\n/)
21
- .map(block => block.trim())
22
- .filter(block => {
23
- // Match both numeric indices (SRT) and optional caption identifiers (VTT)
24
- const firstLine = block.split('\n')[0];
25
- return block && (
26
- /^\d+$/.test(firstLine) || // SRT style
27
- /^\d{2}:\d{2}/.test(firstLine) || // VTT style without identifier
28
- /^[^\n]+\n\d{2}:\d{2}/.test(block) // VTT style with identifier
29
- );
30
- })
31
- .join("\n\n")
32
- + "\n\n";
33
- } catch (e) {
34
- logger.error(`An error occurred in content text preprocessing: ${e}`);
35
- return "";
36
- }
37
- }
38
-
39
- function timeToMs(timeStr) {
40
- const [time, ms] = timeStr.split(/[.,]/);
41
- const [hours, minutes, seconds] = time.split(':').map(Number);
42
- return (hours * 3600 + minutes * 60 + seconds) * 1000 + parseInt(ms);
43
- }
44
-
45
- function msToTimestamp(ms, format) {
46
- const date = new Date(ms);
47
- const timestamp = date.toISOString().slice(11, 23);
48
- return format === 'srt' ? timestamp.replace('.', ',') : timestamp;
49
- }
50
-
51
- function parseSubtitles(content, format) {
52
- const blocks = content.split(/\n\s*\n/).filter(block => block.trim());
53
- const captions = [];
54
-
55
- for (const block of blocks) {
56
- const lines = block.split('\n');
57
- if (lines.length < 2) continue;
58
-
59
- let index, timelineIndex;
60
- if (format === 'srt') {
61
- // SRT format: numeric index required
62
- if (!/^\d+$/.test(lines[0])) continue;
63
- index = parseInt(lines[0]);
64
- timelineIndex = 1;
65
- } else {
66
- // VTT format: optional identifier
67
- timelineIndex = /^\d{2}:\d{2}/.test(lines[0]) ? 0 : 1;
68
- index = timelineIndex === 0 ? captions.length + 1 : lines[0];
69
- }
70
-
71
- const timeMatch = lines[timelineIndex].match(/^(\d{2}:\d{2}:\d{2}[.,]\d{3})\s*-->\s*(\d{2}:\d{2}:\d{2}[.,]\d{3})/);
72
- if (!timeMatch) continue;
73
-
74
- const startTime = timeMatch[1].replace(',', '.');
75
- const endTime = timeMatch[2].replace(',', '.');
76
- const content = lines.slice(timelineIndex + 1).join('\n');
77
-
78
- captions.push({
79
- type: "caption",
80
- index: typeof index === 'number' ? index : captions.length + 1,
81
- identifier: typeof index === 'string' ? index : null,
82
- start: timeToMs(startTime),
83
- end: timeToMs(endTime),
84
- duration: timeToMs(endTime) - timeToMs(startTime),
85
- content: content,
86
- text: content
87
- });
88
- }
89
-
90
- return captions;
91
- }
92
-
93
5
  function splitIntoOverlappingChunks(captions, chunkSize = 20, overlap = 3) {
94
6
  const chunks = [];
95
7
  for (let i = 0; i < captions.length; i += (chunkSize - overlap)) {
@@ -124,88 +36,8 @@ function selectBestTranslation(translations, startIndex, endIndex) {
124
36
  });
125
37
  }
126
38
 
127
- function validateFinalOutput(result, originalText, format) {
128
- // Basic structure validation
129
- if (!result || !result.trim()) {
130
- logger.error("Empty or whitespace-only result");
131
- return false;
132
- }
133
-
134
- // Check for VTT header if needed
135
- if (format === 'vtt' && !result.startsWith('WEBVTT\n\n')) {
136
- logger.error("Missing WEBVTT header");
137
- return false;
138
- }
139
-
140
- // Check for timestamp format
141
- const timestampPattern = format === 'srt'
142
- ? /\d{2}:\d{2}:\d{2},\d{3}\s*-->\s*\d{2}:\d{2}:\d{2},\d{3}/
143
- : /\d{2}:\d{2}:\d{2}\.\d{3}\s*-->\s*\d{2}:\d{2}:\d{2}\.\d{3}/;
144
-
145
- const hasTimestamps = timestampPattern.test(result);
146
- if (!hasTimestamps) {
147
- logger.error(`No valid ${format.toUpperCase()} timestamps found in result`);
148
- return false;
149
- }
150
-
151
- // Check overall length ratio
152
- if (result.length < originalText.length * 0.5) {
153
- logger.error(`Result length (${result.length}) is less than 50% of original length (${originalText.length})`);
154
- return false;
155
- }
156
-
157
- // Validate subtitle block structure
158
- const blocks = result.split(/\n\s*\n/).filter(block => block.trim());
159
-
160
- // Skip WEBVTT header for VTT format
161
- const startIndex = format === 'vtt' && blocks[0].trim() === 'WEBVTT' ? 1 : 0;
162
-
163
- for (let i = startIndex; i < blocks.length; i++) {
164
- const block = blocks[i];
165
- const lines = block.trim().split('\n');
166
-
167
- if (lines.length < 2) {
168
- logger.error(`Block ${i + 1} has insufficient lines (${lines.length}):\n${block}`);
169
- return false;
170
- }
171
-
172
- // Find the timestamp line
173
- let timestampLineIndex = -1;
174
- for (let j = 0; j < lines.length; j++) {
175
- if (timestampPattern.test(lines[j])) {
176
- timestampLineIndex = j;
177
- break;
178
- }
179
- }
180
-
181
- if (timestampLineIndex === -1) {
182
- logger.error(`Block ${i + 1} has no valid timestamp line:\n${block}`);
183
- return false;
184
- }
185
-
186
- // Check that we have content after the timestamp
187
- if (timestampLineIndex === lines.length - 1) {
188
- logger.error(`Block ${i + 1} has no content after timestamp:\n${block}`);
189
- return false;
190
- }
191
-
192
- // Log the content for inspection
193
- logger.debug(`Block ${i + 1} content:\n${lines.slice(timestampLineIndex + 1).join('\n')}`);
194
- }
195
-
196
- return true;
197
- }
198
-
199
39
  async function translateChunk(chunk, args, maxRetries = 3) {
200
- const format = args.format || 'srt';
201
- const chunkText = chunk.captions
202
- .map(c => {
203
- const startTime = msToTimestamp(c.start, format);
204
- const endTime = msToTimestamp(c.end, format);
205
- const index = format === 'srt' || !c.identifier ? c.index : c.identifier;
206
- return `${index}\n${startTime} --> ${endTime}\n${c.content}`;
207
- })
208
- .join('\n\n');
40
+ const chunkText = build(chunk.captions, { format: args.format, preserveIndexes: true });
209
41
 
210
42
  for (let attempt = 0; attempt < maxRetries; attempt++) {
211
43
  try {
@@ -223,61 +55,9 @@ async function translateChunk(chunk, args, maxRetries = 3) {
223
55
  }
224
56
 
225
57
  const content = match[1].trim();
226
- const blocks = content.split(/\n\s*\n/);
227
-
228
- // Check if any blocks are empty or invalid
229
- let hasEmptyBlocks = false;
230
- const processedBlocks = chunk.captions.map((caption, index) => {
231
- const block = blocks[index];
232
- if (!block) {
233
- logger.warn(`Attempt ${attempt + 1}: Empty block for caption ${caption.index}`);
234
- hasEmptyBlocks = true;
235
- return null;
236
- }
237
-
238
- const lines = block.split('\n');
239
- if (lines.length < 3) {
240
- logger.warn(`Attempt ${attempt + 1}: Invalid block structure for caption ${caption.index}`);
241
- hasEmptyBlocks = true;
242
- return null;
243
- }
244
-
245
- const content = lines.slice(2).join('\n').trim();
246
- if (!content) {
247
- logger.warn(`Attempt ${attempt + 1}: Empty content for caption ${caption.index}`);
248
- hasEmptyBlocks = true;
249
- return null;
250
- }
251
-
252
- return {
253
- ...caption,
254
- content: content,
255
- text: content,
256
- chunkStart: chunk.startIndex,
257
- chunkEnd: chunk.endIndex
258
- };
259
- });
260
-
261
- // If no empty blocks, return the processed blocks
262
- if (!hasEmptyBlocks) {
263
- return processedBlocks;
264
- }
265
-
266
- // If this was the last attempt and we still have empty blocks,
267
- // return what we have but keep original content for empty blocks
268
- if (attempt === maxRetries - 1) {
269
- logger.warn(`Failed to get valid translations for all blocks after ${maxRetries} attempts`);
270
- return chunk.captions.map((caption, index) => {
271
- return processedBlocks[index] || {
272
- ...caption,
273
- chunkStart: chunk.startIndex,
274
- chunkEnd: chunk.endIndex
275
- };
276
- });
277
- }
278
-
279
- // Otherwise, try again
280
- logger.info(`Retrying chunk due to empty blocks (attempt ${attempt + 1}/${maxRetries})`);
58
+
59
+ const parsed = parse(content, { preserveIndexes: true });
60
+ return parsed.cues;
281
61
 
282
62
  } catch (e) {
283
63
  logger.error(`Error translating chunk ${chunk.startIndex}-${chunk.endIndex} (attempt ${attempt + 1}): ${e}`);
@@ -303,8 +83,8 @@ export default {
303
83
  executePathway: async ({args}) => {
304
84
  try {
305
85
  const { text, format = 'srt' } = args;
306
- const preprocessedText = preprocessStr(text, format);
307
- const captions = parseSubtitles(preprocessedText, format);
86
+ const parsed = parse(text, { format, preserveIndexes: true });
87
+ const captions = parsed.cues;
308
88
 
309
89
  if (!captions || captions.length === 0) {
310
90
  throw new Error("No captions found in input");
@@ -330,40 +110,12 @@ export default {
330
110
  // Select best translation for each caption
331
111
  const finalCaptions = captions.map(caption => {
332
112
  const translations = translationMap.get(caption.index) || [caption];
333
- return selectBestTranslation(translations, caption.index, caption.index);
113
+ const bestTranslation = selectBestTranslation(translations, caption.index, caption.index);
114
+ const text = bestTranslation?.text || caption?.text;
115
+ return { ...caption, text };
334
116
  });
335
-
336
- // Format the output
337
- let result = finalCaptions
338
- .map(caption => {
339
- const startTime = msToTimestamp(caption.start, format);
340
- const endTime = msToTimestamp(caption.end, format);
341
- // Only include index/identifier if it was in the original
342
- const hasIdentifier = caption.identifier !== null || format === 'srt';
343
- const index = format === 'srt' || !caption.identifier ? caption.index : caption.identifier;
344
- return hasIdentifier ?
345
- `${index}\n${startTime} --> ${endTime}\n${caption.content}` :
346
- `${startTime} --> ${endTime}\n${caption.content}`;
347
- })
348
- .join('\n\n')
349
- .trim();
350
-
351
- // Add final newline only if input had one
352
- if (text.endsWith('\n')) {
353
- result += '\n';
354
- }
355
-
356
- // Add WEBVTT header for VTT format
357
- if (format === 'vtt') {
358
- result = 'WEBVTT\n\n' + result;
359
- }
360
-
361
- // Validate final output
362
- if (!validateFinalOutput(result, text, format)) {
363
- throw new Error("Final subtitle reconstruction failed validation");
364
- }
365
117
 
366
- return result;
118
+ return build(finalCaptions, { format, preserveIndexes: true });
367
119
  } catch (e) {
368
120
  logger.error(`Subtitle translation failed: ${e}`);
369
121
  throw e;
@@ -83,43 +83,23 @@ class PathwayResolver {
83
83
  if (requestProgress.progress === 1 && this.rootRequestId) {
84
84
  delete requestProgress.progress;
85
85
  }
86
- publishRequestProgress(requestProgress);
86
+ publishRequestProgress({...requestProgress, info: this.tool || ''});
87
87
  }
88
88
 
89
89
  try {
90
90
  responseData = await this.executePathway(args);
91
91
  }
92
92
  catch (error) {
93
- if (!args.async) {
94
- publishRequestProgress({
95
- requestId: this.rootRequestId || this.requestId,
96
- progress: 1,
97
- data: '[DONE]',
98
- });
99
- } else {
100
- publishRequestProgress({
101
- requestId: this.rootRequestId || this.requestId,
102
- progress: 1,
103
- data: error.message || error.toString(),
104
- });
105
- }
93
+ publishRequestProgress({
94
+ requestId: this.rootRequestId || this.requestId,
95
+ progress: 1,
96
+ data: '',
97
+ info: 'ERROR: ' + error.message || error.toString()
98
+ });
106
99
  }
107
100
 
108
- // If the response is a string, it's a regular long running response
109
- if (args.async || typeof responseData === 'string') {
110
- const { completedCount=1, totalCount=1 } = requestState[this.requestId];
111
- requestState[this.requestId].data = responseData;
112
-
113
- // some models don't support progress updates
114
- if (!modelTypesExcludedFromProgressUpdates.includes(this.model.type)) {
115
- await publishNestedRequestProgress({
116
- requestId: this.rootRequestId || this.requestId,
117
- progress: Math.min(completedCount,totalCount) / totalCount,
118
- data: JSON.stringify(responseData),
119
- });
120
- }
121
- // If the response is an object, it's a streaming response
122
- } else {
101
+ // If the response is a stream, handle it as streaming response
102
+ if (responseData && typeof responseData.on === 'function') {
123
103
  try {
124
104
  const incomingMessage = responseData;
125
105
  let streamEnded = false;
@@ -184,11 +164,25 @@ class PathwayResolver {
184
164
  publishRequestProgress({
185
165
  requestId: this.requestId,
186
166
  progress: 1,
187
- data: '[DONE]',
167
+ data: '',
168
+ info: 'ERROR: Stream read failed'
188
169
  });
189
170
  } else {
190
171
  return;
191
172
  }
173
+ } else {
174
+ const { completedCount = 1, totalCount = 1 } = requestState[this.requestId];
175
+ requestState[this.requestId].data = responseData;
176
+
177
+ // some models don't support progress updates
178
+ if (!modelTypesExcludedFromProgressUpdates.includes(this.model.type)) {
179
+ await publishNestedRequestProgress({
180
+ requestId: this.rootRequestId || this.requestId,
181
+ progress: Math.min(completedCount, totalCount) / totalCount,
182
+ data: responseData,
183
+ info: this.tool || ''
184
+ });
185
+ }
192
186
  }
193
187
  }
194
188
 
@@ -197,7 +191,13 @@ class PathwayResolver {
197
191
  this.previousResult = mergeData.previousResult ? mergeData.previousResult : this.previousResult;
198
192
  this.warnings = [...this.warnings, ...(mergeData.warnings || [])];
199
193
  this.errors = [...this.errors, ...(mergeData.errors || [])];
200
- this.tool = mergeData.tool || this.tool;
194
+ try {
195
+ const mergeDataTool = typeof mergeData.tool === 'string' ? JSON.parse(mergeData.tool) : mergeData.tool || {};
196
+ const thisTool = typeof this.tool === 'string' ? JSON.parse(this.tool) : this.tool || {};
197
+ this.tool = JSON.stringify({ ...thisTool, ...mergeDataTool });
198
+ } catch (error) {
199
+ logger.warn('Error merging pathway resolver tool objects: ' + error);
200
+ }
201
201
  }
202
202
  }
203
203
 
@@ -4,6 +4,7 @@ import { fileURLToPath } from 'url';
4
4
  import { dirname } from 'path';
5
5
  import fs from 'fs';
6
6
  import path from 'path';
7
+ import { SubtitleUtils } from '@aj-archipelago/subvibe';
7
8
 
8
9
  const __filename = fileURLToPath(import.meta.url);
9
10
  const __dirname = dirname(__filename);
@@ -45,12 +46,50 @@ async function testSubtitleTranslation(t, text, language = 'English', format = '
45
46
  // Check timestamps based on format
46
47
  const timestampPattern = format === 'srt'
47
48
  ? /\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}/g
48
- : /\d{2}:\d{2}:\d{2}\.\d{3} --> \d{2}:\d{2}:\d{2}\.\d{3}/g;
49
+ : /(?:\d{2}:)?\d{2}:\d{2}\.\d{3} --> (?:\d{2}:)?\d{2}:\d{2}\.\d{3}/g;
49
50
 
50
51
  const originalTimestamps = text.match(timestampPattern);
51
52
  const translatedTimestamps = result.match(timestampPattern);
53
+
54
+ // Compare timestamps using SubtitleUtils.parseLooseTime
55
+ const areTimestampsEquivalent = originalTimestamps?.every((timestamp, index) => {
56
+ const [origStart, origEnd] = timestamp.split(' --> ');
57
+ const [transStart, transEnd] = translatedTimestamps[index].split(' --> ');
58
+
59
+ const origStartTime = SubtitleUtils.parseLooseTime(origStart);
60
+ const origEndTime = SubtitleUtils.parseLooseTime(origEnd);
61
+ const transStartTime = SubtitleUtils.parseLooseTime(transStart);
62
+ const transEndTime = SubtitleUtils.parseLooseTime(transEnd);
63
+
64
+ return origStartTime === transStartTime && origEndTime === transEndTime;
65
+ });
66
+
67
+ if (!areTimestampsEquivalent) {
68
+ const differences = originalTimestamps?.map((timestamp, index) => {
69
+ const [origStart, origEnd] = timestamp.split(' --> ');
70
+ const [transStart, transEnd] = translatedTimestamps[index].split(' --> ');
71
+
72
+ const origStartTime = SubtitleUtils.parseLooseTime(origStart);
73
+ const origEndTime = SubtitleUtils.parseLooseTime(origEnd);
74
+ const transStartTime = SubtitleUtils.parseLooseTime(transStart);
75
+ const transEndTime = SubtitleUtils.parseLooseTime(transEnd);
76
+
77
+ if (origStartTime !== transStartTime || origEndTime !== transEndTime) {
78
+ return {
79
+ index,
80
+ original: timestamp,
81
+ translated: translatedTimestamps[index],
82
+ parsedOriginal: { start: origStartTime, end: origEndTime },
83
+ parsedTranslated: { start: transStartTime, end: transEndTime }
84
+ };
85
+ }
86
+ return null;
87
+ }).filter(Boolean);
88
+
89
+ console.log('Timestamp differences found:', differences);
90
+ }
52
91
 
53
- t.deepEqual(originalTimestamps, translatedTimestamps, 'All timestamps should be present and unchanged');
92
+ t.true(areTimestampsEquivalent, 'All timestamps should be equivalent when parsed');
54
93
 
55
94
  // Check line count (accounting for WEBVTT header in VTT)
56
95
  const originalLineCount = text.split('\n').length;
@@ -1,217 +0,0 @@
1
- import test from 'ava';
2
- import { convertSrtToVtt } from '../pathways/transcribe_gemini.js';
3
-
4
- test('should return empty WebVTT for null or empty input', t => {
5
- t.is(convertSrtToVtt(null), "WEBVTT\n\n");
6
- t.is(convertSrtToVtt(''), "WEBVTT\n\n");
7
- t.is(convertSrtToVtt(' '), "WEBVTT\n\n");
8
- });
9
-
10
- test('should convert basic SRT to WebVTT format', t => {
11
- const srtInput =
12
- `1
13
- 00:00:01,000 --> 00:00:04,000
14
- Hello world`;
15
-
16
- const expectedOutput =
17
- `WEBVTT
18
-
19
- 1
20
- 00:00:01.000 --> 00:00:04.000
21
- Hello world
22
-
23
- `;
24
- t.is(convertSrtToVtt(srtInput), expectedOutput);
25
- });
26
-
27
- test('should convert multiple subtitle entries', t => {
28
- const srtInput =
29
- `1
30
- 00:00:01,000 --> 00:00:04,000
31
- First subtitle
32
-
33
- 2
34
- 00:00:05,000 --> 00:00:08,000
35
- Second subtitle`;
36
-
37
- const expectedOutput =
38
- `WEBVTT
39
-
40
- 1
41
- 00:00:01.000 --> 00:00:04.000
42
- First subtitle
43
-
44
- 2
45
- 00:00:05.000 --> 00:00:08.000
46
- Second subtitle
47
-
48
- `;
49
- t.is(convertSrtToVtt(srtInput), expectedOutput);
50
- });
51
-
52
- test('should handle DOS line endings', t => {
53
- const srtInput = "1\r\n00:00:01,000 --> 00:00:04,000\r\nHello world\r\n";
54
- const expectedOutput = "WEBVTT\n\n1\n00:00:01.000 --> 00:00:04.000\nHello world\n\n";
55
- t.is(convertSrtToVtt(srtInput), expectedOutput);
56
- });
57
-
58
- test('should handle multi-line subtitles', t => {
59
- const srtInput =
60
- `1
61
- 00:00:01,000 --> 00:00:04,000
62
- First line
63
- Second line
64
- Third line
65
-
66
- 2
67
- 00:00:05,000 --> 00:00:08,000
68
- Another subtitle`;
69
-
70
- const expectedOutput =
71
- `WEBVTT
72
-
73
- 1
74
- 00:00:01.000 --> 00:00:04.000
75
- First line
76
- Second line
77
- Third line
78
-
79
- 2
80
- 00:00:05.000 --> 00:00:08.000
81
- Another subtitle
82
-
83
- `;
84
- t.is(convertSrtToVtt(srtInput), expectedOutput);
85
- });
86
-
87
- test('should handle invalid timestamp formats', t => {
88
- const srtInput =
89
- `1
90
- invalid timestamp
91
- Hello world
92
-
93
- 2
94
- 00:00:05,000 --> 00:00:08,000
95
- Valid subtitle`;
96
-
97
- const expectedOutput =
98
- `WEBVTT
99
-
100
- 2
101
- 00:00:05.000 --> 00:00:08.000
102
- Valid subtitle
103
-
104
- `;
105
- t.is(convertSrtToVtt(srtInput), expectedOutput);
106
- });
107
-
108
- test('should convert comma to dot in timestamps', t => {
109
- const srtInput =
110
- `1
111
- 00:00:01,500 --> 00:00:04,750
112
- Test subtitle`;
113
-
114
- const expectedOutput =
115
- `WEBVTT
116
-
117
- 1
118
- 00:00:01.500 --> 00:00:04.750
119
- Test subtitle
120
-
121
- `;
122
- t.is(convertSrtToVtt(srtInput), expectedOutput);
123
- });
124
-
125
- test('should handle extra whitespace in input', t => {
126
- const srtInput = `
127
-
128
- 1
129
- 00:00:01,000 --> 00:00:04,000
130
- Hello world
131
-
132
- `;
133
- const expectedOutput =
134
- `WEBVTT
135
-
136
- 1
137
- 00:00:01.000 --> 00:00:04.000
138
- Hello world
139
-
140
- `;
141
- t.is(convertSrtToVtt(srtInput), expectedOutput);
142
- });
143
-
144
- test('should handle timestamps with only minutes and seconds', t => {
145
- const srtInput =
146
- `1
147
- 01:30,000 --> 02:45,500
148
- Short timestamp format`;
149
-
150
- const expectedOutput =
151
- `WEBVTT
152
-
153
- 1
154
- 00:01:30.000 --> 00:02:45.500
155
- Short timestamp format
156
-
157
- `;
158
- t.is(convertSrtToVtt(srtInput), expectedOutput);
159
- });
160
-
161
- test('should handle ultra-short timestamps (SS.mmm)', t => {
162
- const srtInput =
163
- `1
164
- 03.298 --> 04.578
165
- First line
166
-
167
- 2
168
- 04.578 --> 06.178
169
- Second line`;
170
-
171
- const expectedOutput =
172
- `WEBVTT
173
-
174
- 1
175
- 00:00:03.298 --> 00:00:04.578
176
- First line
177
-
178
- 2
179
- 00:00:04.578 --> 00:00:06.178
180
- Second line
181
-
182
- `;
183
- t.is(convertSrtToVtt(srtInput), expectedOutput);
184
- });
185
-
186
- test('should handle mixed timestamp formats', t => {
187
- const srtInput =
188
- `1
189
- 03.298 --> 04.578
190
- First line
191
-
192
- 2
193
- 00:04.578 --> 00:06.178
194
- Second line
195
-
196
- 3
197
- 00:00:06.178 --> 00:00:07.518
198
- Third line`;
199
-
200
- const expectedOutput =
201
- `WEBVTT
202
-
203
- 1
204
- 00:00:03.298 --> 00:00:04.578
205
- First line
206
-
207
- 2
208
- 00:00:04.578 --> 00:00:06.178
209
- Second line
210
-
211
- 3
212
- 00:00:06.178 --> 00:00:07.518
213
- Third line
214
-
215
- `;
216
- t.is(convertSrtToVtt(srtInput), expectedOutput);
217
- });