@aj-archipelago/cortex 1.3.24 → 1.3.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/pathwayTools.js +16 -12
- package/package.json +2 -2
- package/pathways/system/entity/sys_entity_continue.js +5 -5
- package/pathways/system/entity/sys_entity_start.js +22 -31
- package/pathways/system/entity/sys_generator_results.js +1 -1
- package/pathways/transcribe_gemini.js +1 -296
- package/pathways/translate_subtitle.js +11 -259
- package/server/pathwayResolver.js +31 -31
- package/tests/translate_srt.test.js +41 -2
- package/tests/transcribe_gemini.test.js +0 -217
package/lib/pathwayTools.js
CHANGED
|
@@ -21,23 +21,25 @@ const callPathway = async (pathwayName, inArgs, pathwayResolver) => {
|
|
|
21
21
|
let rootRequestId = pathwayResolver?.rootRequestId || pathwayResolver?.requestId;
|
|
22
22
|
|
|
23
23
|
let data = await pathway.rootResolver(parent, {...args, rootRequestId}, { config, pathway, requestState } );
|
|
24
|
+
pathwayResolver && pathwayResolver.mergeResults(data);
|
|
25
|
+
|
|
26
|
+
let returnValue = data?.result || null;
|
|
24
27
|
|
|
25
28
|
if (args.async || args.stream) {
|
|
26
29
|
const { result: requestId } = data;
|
|
27
30
|
|
|
28
31
|
// Fire the resolver for the async requestProgress
|
|
29
|
-
logger.info(`Callpathway starting async requestProgress, requestId: ${requestId}`);
|
|
32
|
+
logger.info(`Callpathway starting async requestProgress, pathway: ${pathwayName}, requestId: ${requestId}`);
|
|
30
33
|
const { resolver, args } = requestState[requestId];
|
|
31
34
|
requestState[requestId].useRedis = false;
|
|
32
35
|
requestState[requestId].started = true;
|
|
33
36
|
|
|
34
|
-
|
|
37
|
+
resolver && await resolver(args);
|
|
38
|
+
|
|
39
|
+
returnValue = null;
|
|
35
40
|
}
|
|
36
41
|
|
|
37
|
-
|
|
38
|
-
pathwayResolver?.mergeResults(data);
|
|
39
|
-
|
|
40
|
-
return data?.result;
|
|
42
|
+
return returnValue;
|
|
41
43
|
};
|
|
42
44
|
|
|
43
45
|
const gpt3Encode = (text) => {
|
|
@@ -48,7 +50,7 @@ const gpt3Decode = (text) => {
|
|
|
48
50
|
return decode(text);
|
|
49
51
|
}
|
|
50
52
|
|
|
51
|
-
const say = async (requestId, message, maxMessageLength = Infinity) => {
|
|
53
|
+
const say = async (requestId, message, maxMessageLength = Infinity, voiceResponse = true) => {
|
|
52
54
|
try {
|
|
53
55
|
const chunks = getSemanticChunks(message, maxMessageLength);
|
|
54
56
|
|
|
@@ -60,11 +62,13 @@ const say = async (requestId, message, maxMessageLength = Infinity) => {
|
|
|
60
62
|
});
|
|
61
63
|
}
|
|
62
64
|
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
65
|
+
if (voiceResponse) {
|
|
66
|
+
await publishRequestProgress({
|
|
67
|
+
requestId,
|
|
68
|
+
progress: 0.5,
|
|
69
|
+
data: " ... "
|
|
70
|
+
});
|
|
71
|
+
}
|
|
68
72
|
|
|
69
73
|
await publishRequestProgress({
|
|
70
74
|
requestId,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aj-archipelago/cortex",
|
|
3
|
-
"version": "1.3.
|
|
3
|
+
"version": "1.3.26",
|
|
4
4
|
"description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
|
|
5
5
|
"private": false,
|
|
6
6
|
"repository": {
|
|
@@ -33,7 +33,7 @@
|
|
|
33
33
|
"type": "module",
|
|
34
34
|
"homepage": "https://github.com/aj-archipelago/cortex#readme",
|
|
35
35
|
"dependencies": {
|
|
36
|
-
"@aj-archipelago/subvibe": "^1.0.
|
|
36
|
+
"@aj-archipelago/subvibe": "^1.0.8",
|
|
37
37
|
"@apollo/server": "^4.7.3",
|
|
38
38
|
"@apollo/server-plugin-response-cache": "^4.1.2",
|
|
39
39
|
"@apollo/utils.keyvadapter": "^3.0.0",
|
|
@@ -48,6 +48,10 @@ export default {
|
|
|
48
48
|
chatHistory: args.chatHistory.slice(-20)
|
|
49
49
|
};
|
|
50
50
|
|
|
51
|
+
if (generatorPathway === 'coding') {
|
|
52
|
+
return;
|
|
53
|
+
}
|
|
54
|
+
|
|
51
55
|
if (generatorPathway === 'sys_generator_document') {
|
|
52
56
|
generatorPathway = 'sys_generator_results';
|
|
53
57
|
newArgs.dataSources = ["mydata"];
|
|
@@ -57,11 +61,7 @@ export default {
|
|
|
57
61
|
|
|
58
62
|
const result = await callPathway(generatorPathway, newArgs, resolver);
|
|
59
63
|
|
|
60
|
-
if (args.stream) {
|
|
61
|
-
return "";
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
if (!result) {
|
|
64
|
+
if (!result && !args.stream) {
|
|
65
65
|
result = await callPathway('sys_generator_error', { ...args, text: `Tried to use a tool (${generatorPathway}), but no result was returned`, stream: false }, resolver);
|
|
66
66
|
}
|
|
67
67
|
|
|
@@ -105,22 +105,12 @@ export default {
|
|
|
105
105
|
}
|
|
106
106
|
}
|
|
107
107
|
|
|
108
|
-
|
|
109
|
-
const [chatResponse, chatTitleResponse] = await Promise.all([
|
|
110
|
-
callPathway('sys_generator_quick', {...args, model: styleModel}, pathwayResolver),
|
|
111
|
-
callPathway('chat_title', { ...args, chatHistory: chatHistoryBeforeMemory, stream: false}),
|
|
112
|
-
]);
|
|
113
|
-
|
|
114
|
-
title = chatTitleResponse;
|
|
115
|
-
|
|
116
|
-
return chatResponse;
|
|
117
|
-
};
|
|
118
|
-
|
|
119
|
-
// start fetching the default response - we may need it later
|
|
108
|
+
// start fetching responses in parallel if not streaming
|
|
120
109
|
let fetchChatResponsePromise;
|
|
121
110
|
if (!args.stream) {
|
|
122
|
-
fetchChatResponsePromise =
|
|
111
|
+
fetchChatResponsePromise = callPathway('sys_generator_quick', {...args, model: styleModel, ackResponse}, pathwayResolver);
|
|
123
112
|
}
|
|
113
|
+
const fetchTitleResponsePromise = callPathway('chat_title', {...args, chatHistory: chatHistoryBeforeMemory, stream: false});
|
|
124
114
|
|
|
125
115
|
const visionContentPresent = chatArgsHasImageUrl(args);
|
|
126
116
|
|
|
@@ -223,42 +213,43 @@ export default {
|
|
|
223
213
|
}
|
|
224
214
|
}
|
|
225
215
|
|
|
216
|
+
title = await fetchTitleResponsePromise;
|
|
217
|
+
|
|
218
|
+
pathwayResolver.tool = JSON.stringify({
|
|
219
|
+
hideFromModel: toolCallbackName ? true : false,
|
|
220
|
+
toolCallbackName,
|
|
221
|
+
title,
|
|
222
|
+
search: toolCallbackName === 'sys_generator_results' ? true : false,
|
|
223
|
+
coding: toolCallbackName === 'coding' ? true : false,
|
|
224
|
+
codeRequestId,
|
|
225
|
+
toolCallbackId
|
|
226
|
+
});
|
|
227
|
+
|
|
226
228
|
if (toolCallbackMessage) {
|
|
227
229
|
if (args.skipCallbackMessage) {
|
|
228
|
-
pathwayResolver.tool = JSON.stringify({ hideFromModel: false, search: false, title });
|
|
229
230
|
return await callPathway('sys_entity_continue', { ...args, stream: false, model: styleModel, generatorPathway: toolCallbackName }, pathwayResolver);
|
|
230
231
|
}
|
|
231
232
|
|
|
232
233
|
if (args.stream) {
|
|
233
234
|
if (!ackResponse) {
|
|
234
|
-
await say(pathwayResolver.requestId, toolCallbackMessage || "One moment please.", 10);
|
|
235
|
+
await say(pathwayResolver.requestId, toolCallbackMessage || "One moment please.", 10, args.voiceResponse ? true : false);
|
|
235
236
|
}
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
return "";
|
|
237
|
+
await callPathway('sys_entity_continue', { ...args, stream: true, generatorPathway: toolCallbackName }, pathwayResolver);
|
|
238
|
+
return;
|
|
239
239
|
}
|
|
240
240
|
|
|
241
|
-
pathwayResolver.tool = JSON.stringify({
|
|
242
|
-
hideFromModel: toolCallbackName ? true : false,
|
|
243
|
-
toolCallbackName,
|
|
244
|
-
title,
|
|
245
|
-
search: toolCallbackName === 'sys_generator_results' ? true : false,
|
|
246
|
-
coding: toolCallbackName === 'coding' ? true : false,
|
|
247
|
-
codeRequestId,
|
|
248
|
-
toolCallbackId
|
|
249
|
-
});
|
|
250
241
|
return toolCallbackMessage || "One moment please.";
|
|
251
242
|
}
|
|
252
243
|
|
|
253
|
-
const chatResponse = await (fetchChatResponsePromise ||
|
|
244
|
+
const chatResponse = await (fetchChatResponsePromise || callPathway('sys_generator_quick', {...args, model: styleModel, ackResponse}, pathwayResolver));
|
|
254
245
|
pathwayResolver.tool = JSON.stringify({ search: false, title });
|
|
255
|
-
return args.stream ?
|
|
246
|
+
return args.stream ? null : chatResponse;
|
|
256
247
|
|
|
257
248
|
} catch (e) {
|
|
258
249
|
pathwayResolver.logError(e);
|
|
259
|
-
const chatResponse = await (fetchChatResponsePromise ||
|
|
250
|
+
const chatResponse = await (fetchChatResponsePromise || callPathway('sys_generator_quick', {...args, model: styleModel, ackResponse}, pathwayResolver));
|
|
260
251
|
pathwayResolver.tool = JSON.stringify({ search: false, title });
|
|
261
|
-
return args.stream ?
|
|
252
|
+
return args.stream ? null : chatResponse;
|
|
262
253
|
}
|
|
263
254
|
}
|
|
264
255
|
};
|
|
@@ -341,7 +341,7 @@ Here are the information sources that were found:
|
|
|
341
341
|
clearTimeout(timeoutId);
|
|
342
342
|
}
|
|
343
343
|
|
|
344
|
-
if (!args.
|
|
344
|
+
if (!args.voiceResponse) {
|
|
345
345
|
const referencedSources = extractReferencedSources(result);
|
|
346
346
|
searchResults = searchResults.length ? pruneSearchResults(searchResults, referencedSources) : [];
|
|
347
347
|
}
|
|
@@ -5,283 +5,6 @@ import { Prompt } from "../server/prompt.js";
|
|
|
5
5
|
|
|
6
6
|
const OFFSET_CHUNK = 500; //seconds of each chunk offset, only used if helper does not provide
|
|
7
7
|
|
|
8
|
-
export function convertSrtToVtt(data) {
|
|
9
|
-
if (!data || !data.trim()) {
|
|
10
|
-
return "WEBVTT\n\n";
|
|
11
|
-
}
|
|
12
|
-
|
|
13
|
-
// If it's already VTT format and has header
|
|
14
|
-
if (data.trim().startsWith("WEBVTT")) {
|
|
15
|
-
const lines = data.split("\n");
|
|
16
|
-
const result = ["WEBVTT", ""]; // Start with header and blank line
|
|
17
|
-
let currentCue = [];
|
|
18
|
-
|
|
19
|
-
for (let i = 0; i < lines.length; i++) {
|
|
20
|
-
const line = lines[i].trim();
|
|
21
|
-
|
|
22
|
-
// Skip empty lines and the WEBVTT header
|
|
23
|
-
if (!line || line === "WEBVTT") {
|
|
24
|
-
continue;
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
// If it's a number by itself, it's a cue identifier
|
|
28
|
-
if (/^\d+$/.test(line)) {
|
|
29
|
-
// If we have a previous cue, add it with proper spacing
|
|
30
|
-
if (currentCue.length > 0) {
|
|
31
|
-
result.push(currentCue.join("\n"));
|
|
32
|
-
result.push(""); // Add blank line between cues
|
|
33
|
-
currentCue = [];
|
|
34
|
-
}
|
|
35
|
-
currentCue.push(line);
|
|
36
|
-
continue;
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
// Check for and convert timestamps
|
|
40
|
-
const fullTimeRegex = /^(\d{2}):(\d{2}):(\d{2})[,.](\d{3})\s*-->\s*(\d{2}):(\d{2}):(\d{2})[,.](\d{3})$/;
|
|
41
|
-
const shortTimeRegex = /^(\d{2}):(\d{2})[,.](\d{3})\s*-->\s*(\d{2}):(\d{2})[,.](\d{3})$/;
|
|
42
|
-
const ultraShortTimeRegex = /^(\d{1,2})[.](\d{3})\s*-->\s*(\d{1,2})[.](\d{3})$/;
|
|
43
|
-
|
|
44
|
-
const fullMatch = line.match(fullTimeRegex);
|
|
45
|
-
const shortMatch = line.match(shortTimeRegex);
|
|
46
|
-
const ultraShortMatch = line.match(ultraShortTimeRegex);
|
|
47
|
-
|
|
48
|
-
if (fullMatch) {
|
|
49
|
-
// Already in correct format, just convert comma to dot
|
|
50
|
-
const convertedTime = line.replace(/,/g, '.');
|
|
51
|
-
currentCue.push(convertedTime);
|
|
52
|
-
} else if (shortMatch) {
|
|
53
|
-
// Convert MM:SS to HH:MM:SS
|
|
54
|
-
const convertedTime = `00:${shortMatch[1]}:${shortMatch[2]}.${shortMatch[3]} --> 00:${shortMatch[4]}:${shortMatch[5]}.${shortMatch[6]}`;
|
|
55
|
-
currentCue.push(convertedTime);
|
|
56
|
-
} else if (ultraShortMatch) {
|
|
57
|
-
// Convert SS to HH:MM:SS
|
|
58
|
-
const convertedTime = `00:00:${ultraShortMatch[1].padStart(2, '0')}.${ultraShortMatch[2]} --> 00:00:${ultraShortMatch[3].padStart(2, '0')}.${ultraShortMatch[4]}`;
|
|
59
|
-
currentCue.push(convertedTime);
|
|
60
|
-
} else if (!line.includes('-->')) {
|
|
61
|
-
// Must be subtitle text
|
|
62
|
-
currentCue.push(line);
|
|
63
|
-
}
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
// Add the last cue if there is one
|
|
67
|
-
if (currentCue.length > 0) {
|
|
68
|
-
result.push(currentCue.join("\n"));
|
|
69
|
-
result.push(""); // Add final blank line
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
// Join with newlines and ensure proper ending
|
|
73
|
-
return result.join("\n") + "\n";
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
// remove dos newlines and trim
|
|
77
|
-
var srt = data.replace(/\r+/g, "");
|
|
78
|
-
srt = srt.replace(/^\s+|\s+$/g, "");
|
|
79
|
-
|
|
80
|
-
// Split into cues and filter out empty ones
|
|
81
|
-
var cuelist = srt.split("\n\n").filter(cue => cue.trim());
|
|
82
|
-
|
|
83
|
-
// Always add WEBVTT header
|
|
84
|
-
var result = "WEBVTT\n\n";
|
|
85
|
-
|
|
86
|
-
// Convert each cue to VTT format
|
|
87
|
-
for (const cue of cuelist) {
|
|
88
|
-
const lines = cue.split("\n").map(line => line.trim()).filter(line => line);
|
|
89
|
-
if (lines.length < 2) continue;
|
|
90
|
-
|
|
91
|
-
let output = [];
|
|
92
|
-
|
|
93
|
-
// Handle cue identifier
|
|
94
|
-
if (/^\d+$/.test(lines[0])) {
|
|
95
|
-
output.push(lines[0]);
|
|
96
|
-
lines.shift();
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
// Handle timestamp line
|
|
100
|
-
const timeLine = lines[0];
|
|
101
|
-
const fullTimeRegex = /^(\d{2}):(\d{2}):(\d{2})[,.](\d{3})\s*-->\s*(\d{2}):(\d{2}):(\d{2})[,.](\d{3})$/;
|
|
102
|
-
const shortTimeRegex = /^(\d{2}):(\d{2})[,.](\d{3})\s*-->\s*(\d{2}):(\d{2})[,.](\d{3})$/;
|
|
103
|
-
const ultraShortTimeRegex = /^(\d{1,2})[.](\d{3})\s*-->\s*(\d{1,2})[.](\d{3})$/;
|
|
104
|
-
|
|
105
|
-
const fullMatch = timeLine.match(fullTimeRegex);
|
|
106
|
-
const shortMatch = timeLine.match(shortTimeRegex);
|
|
107
|
-
const ultraShortMatch = timeLine.match(ultraShortTimeRegex);
|
|
108
|
-
|
|
109
|
-
if (fullMatch) {
|
|
110
|
-
output.push(timeLine.replace(/,/g, '.'));
|
|
111
|
-
} else if (shortMatch) {
|
|
112
|
-
output.push(`00:${shortMatch[1]}:${shortMatch[2]}.${shortMatch[3]} --> 00:${shortMatch[4]}:${shortMatch[5]}.${shortMatch[6]}`);
|
|
113
|
-
} else if (ultraShortMatch) {
|
|
114
|
-
output.push(`00:00:${ultraShortMatch[1].padStart(2, '0')}.${ultraShortMatch[2]} --> 00:00:${ultraShortMatch[3].padStart(2, '0')}.${ultraShortMatch[4]}`);
|
|
115
|
-
} else {
|
|
116
|
-
continue; // Invalid timestamp format
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
// Add remaining lines as subtitle text
|
|
120
|
-
output.push(...lines.slice(1));
|
|
121
|
-
|
|
122
|
-
// Add the cue to result
|
|
123
|
-
result += output.join("\n") + "\n\n";
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
return result;
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
function convertSrtCue(caption) {
|
|
130
|
-
if (!caption || !caption.trim()) {
|
|
131
|
-
return "";
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
var cue = "";
|
|
135
|
-
var s = caption.split(/\n/);
|
|
136
|
-
|
|
137
|
-
// concatenate multi-line string separated in array into one
|
|
138
|
-
while (s.length > 3) {
|
|
139
|
-
for (var i = 3; i < s.length; i++) {
|
|
140
|
-
s[2] += "\n" + s[i];
|
|
141
|
-
}
|
|
142
|
-
s.splice(3, s.length - 3);
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
var line = 0;
|
|
146
|
-
|
|
147
|
-
// detect identifier
|
|
148
|
-
if (
|
|
149
|
-
s[0] &&
|
|
150
|
-
s[1] &&
|
|
151
|
-
!s[0].match(/\d+:\d+:\d+/) &&
|
|
152
|
-
s[1].match(/\d+:\d+:\d+/)
|
|
153
|
-
) {
|
|
154
|
-
const match = s[0].match(/^\d+$/); // Only match if the entire line is a number
|
|
155
|
-
if (match) {
|
|
156
|
-
cue += match[0] + "\n";
|
|
157
|
-
line += 1;
|
|
158
|
-
}
|
|
159
|
-
}
|
|
160
|
-
|
|
161
|
-
// get time strings
|
|
162
|
-
if (s[line] && s[line].match(/\d+:\d+:\d+/)) {
|
|
163
|
-
// convert time string
|
|
164
|
-
var m = s[line].match(
|
|
165
|
-
/(\d{2}):(\d{2}):(\d{2})[,.](\d{3})\s*--?>\s*(\d{2}):(\d{2}):(\d{2})[,.](\d{3})/,
|
|
166
|
-
);
|
|
167
|
-
if (m) {
|
|
168
|
-
cue +=
|
|
169
|
-
m[1] +
|
|
170
|
-
":" +
|
|
171
|
-
m[2] +
|
|
172
|
-
":" +
|
|
173
|
-
m[3] +
|
|
174
|
-
"." +
|
|
175
|
-
m[4] +
|
|
176
|
-
" --> " +
|
|
177
|
-
m[5] +
|
|
178
|
-
":" +
|
|
179
|
-
m[6] +
|
|
180
|
-
":" +
|
|
181
|
-
m[7] +
|
|
182
|
-
"." +
|
|
183
|
-
m[8] +
|
|
184
|
-
"\n";
|
|
185
|
-
line += 1;
|
|
186
|
-
} else {
|
|
187
|
-
// Try alternate timestamp format
|
|
188
|
-
m = s[line].match(
|
|
189
|
-
/(\d{2}):(\d{2})\.(\d{3})\s*--?>\s*(\d{2}):(\d{2})\.(\d{3})/,
|
|
190
|
-
);
|
|
191
|
-
if (m) {
|
|
192
|
-
// Convert to full timestamp format
|
|
193
|
-
cue +=
|
|
194
|
-
"00:" +
|
|
195
|
-
m[1] +
|
|
196
|
-
":" +
|
|
197
|
-
m[2] +
|
|
198
|
-
"." +
|
|
199
|
-
m[3] +
|
|
200
|
-
" --> " +
|
|
201
|
-
"00:" +
|
|
202
|
-
m[4] +
|
|
203
|
-
":" +
|
|
204
|
-
m[5] +
|
|
205
|
-
"." +
|
|
206
|
-
m[6] +
|
|
207
|
-
"\n";
|
|
208
|
-
line += 1;
|
|
209
|
-
} else {
|
|
210
|
-
// Unrecognized timestring
|
|
211
|
-
return "";
|
|
212
|
-
}
|
|
213
|
-
}
|
|
214
|
-
} else {
|
|
215
|
-
// file format error or comment lines
|
|
216
|
-
return "";
|
|
217
|
-
}
|
|
218
|
-
|
|
219
|
-
// get cue text
|
|
220
|
-
if (s[line]) {
|
|
221
|
-
cue += s[line] + "\n\n";
|
|
222
|
-
}
|
|
223
|
-
|
|
224
|
-
return cue;
|
|
225
|
-
}
|
|
226
|
-
|
|
227
|
-
export function detectSubtitleFormat(text) {
|
|
228
|
-
// Remove DOS newlines and trim whitespace
|
|
229
|
-
const cleanText = text.replace(/\r+/g, "").trim();
|
|
230
|
-
const lines = cleanText.split("\n");
|
|
231
|
-
|
|
232
|
-
// Check if it's VTT format - be more lenient with the header
|
|
233
|
-
if (lines[0]?.trim() === "WEBVTT") {
|
|
234
|
-
return "vtt";
|
|
235
|
-
}
|
|
236
|
-
|
|
237
|
-
// Define regex patterns for timestamp formats
|
|
238
|
-
const srtTimeRegex =
|
|
239
|
-
/(\d{2}:\d{2}:\d{2})[,.]\d{3}\s*-->\s*(\d{2}:\d{2}:\d{2})[,.]\d{3}/;
|
|
240
|
-
const vttTimeRegex =
|
|
241
|
-
/(?:\d{2}:)?(\d{1,2})[.]\d{3}\s*-->\s*(?:\d{2}:)?(\d{1,2})[.]\d{3}/;
|
|
242
|
-
|
|
243
|
-
let hasSrtTimestamps = false;
|
|
244
|
-
let hasVttTimestamps = false;
|
|
245
|
-
let hasSequentialNumbers = false;
|
|
246
|
-
let lastNumber = 0;
|
|
247
|
-
|
|
248
|
-
// Look through first few lines to detect patterns
|
|
249
|
-
for (let i = 0; i < Math.min(lines.length, 12); i++) {
|
|
250
|
-
const line = lines[i]?.trim();
|
|
251
|
-
if (!line) continue;
|
|
252
|
-
|
|
253
|
-
// Check for timestamps
|
|
254
|
-
if (srtTimeRegex.test(line)) {
|
|
255
|
-
hasSrtTimestamps = true;
|
|
256
|
-
}
|
|
257
|
-
if (vttTimeRegex.test(line)) {
|
|
258
|
-
hasVttTimestamps = true;
|
|
259
|
-
}
|
|
260
|
-
|
|
261
|
-
// Check for sequential numbers
|
|
262
|
-
const numberMatch = line.match(/^(\d+)$/);
|
|
263
|
-
if (numberMatch) {
|
|
264
|
-
const num = parseInt(numberMatch[1]);
|
|
265
|
-
if (lastNumber === 0 || num === lastNumber + 1) {
|
|
266
|
-
hasSequentialNumbers = true;
|
|
267
|
-
lastNumber = num;
|
|
268
|
-
}
|
|
269
|
-
}
|
|
270
|
-
}
|
|
271
|
-
|
|
272
|
-
// If it has SRT-style timestamps (HH:MM:SS), it's SRT
|
|
273
|
-
if (hasSrtTimestamps && hasSequentialNumbers) {
|
|
274
|
-
return "srt";
|
|
275
|
-
}
|
|
276
|
-
|
|
277
|
-
// If it has VTT-style timestamps (MM:SS) or WEBVTT header, it's VTT
|
|
278
|
-
if (hasVttTimestamps) {
|
|
279
|
-
return "vtt";
|
|
280
|
-
}
|
|
281
|
-
|
|
282
|
-
return null;
|
|
283
|
-
}
|
|
284
|
-
|
|
285
8
|
export default {
|
|
286
9
|
prompt:
|
|
287
10
|
[
|
|
@@ -381,7 +104,7 @@ export default {
|
|
|
381
104
|
|
|
382
105
|
function getMessages(file, format) {
|
|
383
106
|
|
|
384
|
-
const responseFormat = format!== 'text' ? '
|
|
107
|
+
const responseFormat = format!== 'text' ? 'VTT' : 'text';
|
|
385
108
|
|
|
386
109
|
const messages = [
|
|
387
110
|
{"role": "system", "content": `Instructions:\nYou are an AI entity with expertise of transcription. Your response only contains the transcription, no comments or additonal stuff.
|
|
@@ -491,26 +214,8 @@ Even a single newline or space can cause the response to be rejected. You must f
|
|
|
491
214
|
// }
|
|
492
215
|
|
|
493
216
|
const result = await processChunksParallel(chunks, args);
|
|
494
|
-
|
|
495
|
-
// publishRequestProgress({
|
|
496
|
-
// requestId: this.rootRequestId || this.requestId,
|
|
497
|
-
// progress: 1,
|
|
498
|
-
// data: "a",
|
|
499
|
-
// });
|
|
500
217
|
|
|
501
218
|
if (['srt','vtt'].includes(responseFormat) || wordTimestamped) { // align subtitles for formats
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
// convert as gemini output is unstable
|
|
506
|
-
for(let i = 0; i < result.length; i++) {
|
|
507
|
-
try{
|
|
508
|
-
result[i] = convertSrtToVtt(result[i]);
|
|
509
|
-
}catch(error){
|
|
510
|
-
logger.error(`Error converting to vtt: ${error}`);
|
|
511
|
-
}
|
|
512
|
-
}
|
|
513
|
-
|
|
514
219
|
const offsets = chunks.map((chunk, index) => chunk?.offset || index * OFFSET_CHUNK);
|
|
515
220
|
return alignSubtitles(result, responseFormat, offsets);
|
|
516
221
|
}
|
|
@@ -1,95 +1,7 @@
|
|
|
1
|
+
import { parse, build } from "@aj-archipelago/subvibe";
|
|
1
2
|
import logger from "../lib/logger.js";
|
|
2
3
|
import { callPathway } from "../lib/pathwayTools.js";
|
|
3
4
|
|
|
4
|
-
function preprocessStr(str, format) {
|
|
5
|
-
try {
|
|
6
|
-
if (!str) return "";
|
|
7
|
-
let content = str
|
|
8
|
-
// Normalize line endings
|
|
9
|
-
.replace(/\r\n?/g, "\n")
|
|
10
|
-
// Remove WEBVTT header for processing
|
|
11
|
-
.replace(/^WEBVTT\n\n/, '');
|
|
12
|
-
|
|
13
|
-
// For SRT, convert commas to dots in timestamps
|
|
14
|
-
if (format === 'srt') {
|
|
15
|
-
content = content.replace(/(\d{2}:\d{2}:\d{2}),(\d{3})/g, "$1.$2");
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
return content
|
|
19
|
-
// Ensure each subtitle block is properly separated
|
|
20
|
-
.split(/\n\s*\n/)
|
|
21
|
-
.map(block => block.trim())
|
|
22
|
-
.filter(block => {
|
|
23
|
-
// Match both numeric indices (SRT) and optional caption identifiers (VTT)
|
|
24
|
-
const firstLine = block.split('\n')[0];
|
|
25
|
-
return block && (
|
|
26
|
-
/^\d+$/.test(firstLine) || // SRT style
|
|
27
|
-
/^\d{2}:\d{2}/.test(firstLine) || // VTT style without identifier
|
|
28
|
-
/^[^\n]+\n\d{2}:\d{2}/.test(block) // VTT style with identifier
|
|
29
|
-
);
|
|
30
|
-
})
|
|
31
|
-
.join("\n\n")
|
|
32
|
-
+ "\n\n";
|
|
33
|
-
} catch (e) {
|
|
34
|
-
logger.error(`An error occurred in content text preprocessing: ${e}`);
|
|
35
|
-
return "";
|
|
36
|
-
}
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
function timeToMs(timeStr) {
|
|
40
|
-
const [time, ms] = timeStr.split(/[.,]/);
|
|
41
|
-
const [hours, minutes, seconds] = time.split(':').map(Number);
|
|
42
|
-
return (hours * 3600 + minutes * 60 + seconds) * 1000 + parseInt(ms);
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
function msToTimestamp(ms, format) {
|
|
46
|
-
const date = new Date(ms);
|
|
47
|
-
const timestamp = date.toISOString().slice(11, 23);
|
|
48
|
-
return format === 'srt' ? timestamp.replace('.', ',') : timestamp;
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
function parseSubtitles(content, format) {
|
|
52
|
-
const blocks = content.split(/\n\s*\n/).filter(block => block.trim());
|
|
53
|
-
const captions = [];
|
|
54
|
-
|
|
55
|
-
for (const block of blocks) {
|
|
56
|
-
const lines = block.split('\n');
|
|
57
|
-
if (lines.length < 2) continue;
|
|
58
|
-
|
|
59
|
-
let index, timelineIndex;
|
|
60
|
-
if (format === 'srt') {
|
|
61
|
-
// SRT format: numeric index required
|
|
62
|
-
if (!/^\d+$/.test(lines[0])) continue;
|
|
63
|
-
index = parseInt(lines[0]);
|
|
64
|
-
timelineIndex = 1;
|
|
65
|
-
} else {
|
|
66
|
-
// VTT format: optional identifier
|
|
67
|
-
timelineIndex = /^\d{2}:\d{2}/.test(lines[0]) ? 0 : 1;
|
|
68
|
-
index = timelineIndex === 0 ? captions.length + 1 : lines[0];
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
const timeMatch = lines[timelineIndex].match(/^(\d{2}:\d{2}:\d{2}[.,]\d{3})\s*-->\s*(\d{2}:\d{2}:\d{2}[.,]\d{3})/);
|
|
72
|
-
if (!timeMatch) continue;
|
|
73
|
-
|
|
74
|
-
const startTime = timeMatch[1].replace(',', '.');
|
|
75
|
-
const endTime = timeMatch[2].replace(',', '.');
|
|
76
|
-
const content = lines.slice(timelineIndex + 1).join('\n');
|
|
77
|
-
|
|
78
|
-
captions.push({
|
|
79
|
-
type: "caption",
|
|
80
|
-
index: typeof index === 'number' ? index : captions.length + 1,
|
|
81
|
-
identifier: typeof index === 'string' ? index : null,
|
|
82
|
-
start: timeToMs(startTime),
|
|
83
|
-
end: timeToMs(endTime),
|
|
84
|
-
duration: timeToMs(endTime) - timeToMs(startTime),
|
|
85
|
-
content: content,
|
|
86
|
-
text: content
|
|
87
|
-
});
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
return captions;
|
|
91
|
-
}
|
|
92
|
-
|
|
93
5
|
function splitIntoOverlappingChunks(captions, chunkSize = 20, overlap = 3) {
|
|
94
6
|
const chunks = [];
|
|
95
7
|
for (let i = 0; i < captions.length; i += (chunkSize - overlap)) {
|
|
@@ -124,88 +36,8 @@ function selectBestTranslation(translations, startIndex, endIndex) {
|
|
|
124
36
|
});
|
|
125
37
|
}
|
|
126
38
|
|
|
127
|
-
function validateFinalOutput(result, originalText, format) {
|
|
128
|
-
// Basic structure validation
|
|
129
|
-
if (!result || !result.trim()) {
|
|
130
|
-
logger.error("Empty or whitespace-only result");
|
|
131
|
-
return false;
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
// Check for VTT header if needed
|
|
135
|
-
if (format === 'vtt' && !result.startsWith('WEBVTT\n\n')) {
|
|
136
|
-
logger.error("Missing WEBVTT header");
|
|
137
|
-
return false;
|
|
138
|
-
}
|
|
139
|
-
|
|
140
|
-
// Check for timestamp format
|
|
141
|
-
const timestampPattern = format === 'srt'
|
|
142
|
-
? /\d{2}:\d{2}:\d{2},\d{3}\s*-->\s*\d{2}:\d{2}:\d{2},\d{3}/
|
|
143
|
-
: /\d{2}:\d{2}:\d{2}\.\d{3}\s*-->\s*\d{2}:\d{2}:\d{2}\.\d{3}/;
|
|
144
|
-
|
|
145
|
-
const hasTimestamps = timestampPattern.test(result);
|
|
146
|
-
if (!hasTimestamps) {
|
|
147
|
-
logger.error(`No valid ${format.toUpperCase()} timestamps found in result`);
|
|
148
|
-
return false;
|
|
149
|
-
}
|
|
150
|
-
|
|
151
|
-
// Check overall length ratio
|
|
152
|
-
if (result.length < originalText.length * 0.5) {
|
|
153
|
-
logger.error(`Result length (${result.length}) is less than 50% of original length (${originalText.length})`);
|
|
154
|
-
return false;
|
|
155
|
-
}
|
|
156
|
-
|
|
157
|
-
// Validate subtitle block structure
|
|
158
|
-
const blocks = result.split(/\n\s*\n/).filter(block => block.trim());
|
|
159
|
-
|
|
160
|
-
// Skip WEBVTT header for VTT format
|
|
161
|
-
const startIndex = format === 'vtt' && blocks[0].trim() === 'WEBVTT' ? 1 : 0;
|
|
162
|
-
|
|
163
|
-
for (let i = startIndex; i < blocks.length; i++) {
|
|
164
|
-
const block = blocks[i];
|
|
165
|
-
const lines = block.trim().split('\n');
|
|
166
|
-
|
|
167
|
-
if (lines.length < 2) {
|
|
168
|
-
logger.error(`Block ${i + 1} has insufficient lines (${lines.length}):\n${block}`);
|
|
169
|
-
return false;
|
|
170
|
-
}
|
|
171
|
-
|
|
172
|
-
// Find the timestamp line
|
|
173
|
-
let timestampLineIndex = -1;
|
|
174
|
-
for (let j = 0; j < lines.length; j++) {
|
|
175
|
-
if (timestampPattern.test(lines[j])) {
|
|
176
|
-
timestampLineIndex = j;
|
|
177
|
-
break;
|
|
178
|
-
}
|
|
179
|
-
}
|
|
180
|
-
|
|
181
|
-
if (timestampLineIndex === -1) {
|
|
182
|
-
logger.error(`Block ${i + 1} has no valid timestamp line:\n${block}`);
|
|
183
|
-
return false;
|
|
184
|
-
}
|
|
185
|
-
|
|
186
|
-
// Check that we have content after the timestamp
|
|
187
|
-
if (timestampLineIndex === lines.length - 1) {
|
|
188
|
-
logger.error(`Block ${i + 1} has no content after timestamp:\n${block}`);
|
|
189
|
-
return false;
|
|
190
|
-
}
|
|
191
|
-
|
|
192
|
-
// Log the content for inspection
|
|
193
|
-
logger.debug(`Block ${i + 1} content:\n${lines.slice(timestampLineIndex + 1).join('\n')}`);
|
|
194
|
-
}
|
|
195
|
-
|
|
196
|
-
return true;
|
|
197
|
-
}
|
|
198
|
-
|
|
199
39
|
async function translateChunk(chunk, args, maxRetries = 3) {
|
|
200
|
-
const
|
|
201
|
-
const chunkText = chunk.captions
|
|
202
|
-
.map(c => {
|
|
203
|
-
const startTime = msToTimestamp(c.start, format);
|
|
204
|
-
const endTime = msToTimestamp(c.end, format);
|
|
205
|
-
const index = format === 'srt' || !c.identifier ? c.index : c.identifier;
|
|
206
|
-
return `${index}\n${startTime} --> ${endTime}\n${c.content}`;
|
|
207
|
-
})
|
|
208
|
-
.join('\n\n');
|
|
40
|
+
const chunkText = build(chunk.captions, { format: args.format, preserveIndexes: true });
|
|
209
41
|
|
|
210
42
|
for (let attempt = 0; attempt < maxRetries; attempt++) {
|
|
211
43
|
try {
|
|
@@ -223,61 +55,9 @@ async function translateChunk(chunk, args, maxRetries = 3) {
|
|
|
223
55
|
}
|
|
224
56
|
|
|
225
57
|
const content = match[1].trim();
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
let hasEmptyBlocks = false;
|
|
230
|
-
const processedBlocks = chunk.captions.map((caption, index) => {
|
|
231
|
-
const block = blocks[index];
|
|
232
|
-
if (!block) {
|
|
233
|
-
logger.warn(`Attempt ${attempt + 1}: Empty block for caption ${caption.index}`);
|
|
234
|
-
hasEmptyBlocks = true;
|
|
235
|
-
return null;
|
|
236
|
-
}
|
|
237
|
-
|
|
238
|
-
const lines = block.split('\n');
|
|
239
|
-
if (lines.length < 3) {
|
|
240
|
-
logger.warn(`Attempt ${attempt + 1}: Invalid block structure for caption ${caption.index}`);
|
|
241
|
-
hasEmptyBlocks = true;
|
|
242
|
-
return null;
|
|
243
|
-
}
|
|
244
|
-
|
|
245
|
-
const content = lines.slice(2).join('\n').trim();
|
|
246
|
-
if (!content) {
|
|
247
|
-
logger.warn(`Attempt ${attempt + 1}: Empty content for caption ${caption.index}`);
|
|
248
|
-
hasEmptyBlocks = true;
|
|
249
|
-
return null;
|
|
250
|
-
}
|
|
251
|
-
|
|
252
|
-
return {
|
|
253
|
-
...caption,
|
|
254
|
-
content: content,
|
|
255
|
-
text: content,
|
|
256
|
-
chunkStart: chunk.startIndex,
|
|
257
|
-
chunkEnd: chunk.endIndex
|
|
258
|
-
};
|
|
259
|
-
});
|
|
260
|
-
|
|
261
|
-
// If no empty blocks, return the processed blocks
|
|
262
|
-
if (!hasEmptyBlocks) {
|
|
263
|
-
return processedBlocks;
|
|
264
|
-
}
|
|
265
|
-
|
|
266
|
-
// If this was the last attempt and we still have empty blocks,
|
|
267
|
-
// return what we have but keep original content for empty blocks
|
|
268
|
-
if (attempt === maxRetries - 1) {
|
|
269
|
-
logger.warn(`Failed to get valid translations for all blocks after ${maxRetries} attempts`);
|
|
270
|
-
return chunk.captions.map((caption, index) => {
|
|
271
|
-
return processedBlocks[index] || {
|
|
272
|
-
...caption,
|
|
273
|
-
chunkStart: chunk.startIndex,
|
|
274
|
-
chunkEnd: chunk.endIndex
|
|
275
|
-
};
|
|
276
|
-
});
|
|
277
|
-
}
|
|
278
|
-
|
|
279
|
-
// Otherwise, try again
|
|
280
|
-
logger.info(`Retrying chunk due to empty blocks (attempt ${attempt + 1}/${maxRetries})`);
|
|
58
|
+
|
|
59
|
+
const parsed = parse(content, { preserveIndexes: true });
|
|
60
|
+
return parsed.cues;
|
|
281
61
|
|
|
282
62
|
} catch (e) {
|
|
283
63
|
logger.error(`Error translating chunk ${chunk.startIndex}-${chunk.endIndex} (attempt ${attempt + 1}): ${e}`);
|
|
@@ -303,8 +83,8 @@ export default {
|
|
|
303
83
|
executePathway: async ({args}) => {
|
|
304
84
|
try {
|
|
305
85
|
const { text, format = 'srt' } = args;
|
|
306
|
-
const
|
|
307
|
-
const captions =
|
|
86
|
+
const parsed = parse(text, { format, preserveIndexes: true });
|
|
87
|
+
const captions = parsed.cues;
|
|
308
88
|
|
|
309
89
|
if (!captions || captions.length === 0) {
|
|
310
90
|
throw new Error("No captions found in input");
|
|
@@ -330,40 +110,12 @@ export default {
|
|
|
330
110
|
// Select best translation for each caption
|
|
331
111
|
const finalCaptions = captions.map(caption => {
|
|
332
112
|
const translations = translationMap.get(caption.index) || [caption];
|
|
333
|
-
|
|
113
|
+
const bestTranslation = selectBestTranslation(translations, caption.index, caption.index);
|
|
114
|
+
const text = bestTranslation?.text || caption?.text;
|
|
115
|
+
return { ...caption, text };
|
|
334
116
|
});
|
|
335
|
-
|
|
336
|
-
// Format the output
|
|
337
|
-
let result = finalCaptions
|
|
338
|
-
.map(caption => {
|
|
339
|
-
const startTime = msToTimestamp(caption.start, format);
|
|
340
|
-
const endTime = msToTimestamp(caption.end, format);
|
|
341
|
-
// Only include index/identifier if it was in the original
|
|
342
|
-
const hasIdentifier = caption.identifier !== null || format === 'srt';
|
|
343
|
-
const index = format === 'srt' || !caption.identifier ? caption.index : caption.identifier;
|
|
344
|
-
return hasIdentifier ?
|
|
345
|
-
`${index}\n${startTime} --> ${endTime}\n${caption.content}` :
|
|
346
|
-
`${startTime} --> ${endTime}\n${caption.content}`;
|
|
347
|
-
})
|
|
348
|
-
.join('\n\n')
|
|
349
|
-
.trim();
|
|
350
|
-
|
|
351
|
-
// Add final newline only if input had one
|
|
352
|
-
if (text.endsWith('\n')) {
|
|
353
|
-
result += '\n';
|
|
354
|
-
}
|
|
355
|
-
|
|
356
|
-
// Add WEBVTT header for VTT format
|
|
357
|
-
if (format === 'vtt') {
|
|
358
|
-
result = 'WEBVTT\n\n' + result;
|
|
359
|
-
}
|
|
360
|
-
|
|
361
|
-
// Validate final output
|
|
362
|
-
if (!validateFinalOutput(result, text, format)) {
|
|
363
|
-
throw new Error("Final subtitle reconstruction failed validation");
|
|
364
|
-
}
|
|
365
117
|
|
|
366
|
-
return
|
|
118
|
+
return build(finalCaptions, { format, preserveIndexes: true });
|
|
367
119
|
} catch (e) {
|
|
368
120
|
logger.error(`Subtitle translation failed: ${e}`);
|
|
369
121
|
throw e;
|
|
@@ -83,43 +83,23 @@ class PathwayResolver {
|
|
|
83
83
|
if (requestProgress.progress === 1 && this.rootRequestId) {
|
|
84
84
|
delete requestProgress.progress;
|
|
85
85
|
}
|
|
86
|
-
publishRequestProgress(requestProgress);
|
|
86
|
+
publishRequestProgress({...requestProgress, info: this.tool || ''});
|
|
87
87
|
}
|
|
88
88
|
|
|
89
89
|
try {
|
|
90
90
|
responseData = await this.executePathway(args);
|
|
91
91
|
}
|
|
92
92
|
catch (error) {
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
} else {
|
|
100
|
-
publishRequestProgress({
|
|
101
|
-
requestId: this.rootRequestId || this.requestId,
|
|
102
|
-
progress: 1,
|
|
103
|
-
data: error.message || error.toString(),
|
|
104
|
-
});
|
|
105
|
-
}
|
|
93
|
+
publishRequestProgress({
|
|
94
|
+
requestId: this.rootRequestId || this.requestId,
|
|
95
|
+
progress: 1,
|
|
96
|
+
data: '',
|
|
97
|
+
info: 'ERROR: ' + error.message || error.toString()
|
|
98
|
+
});
|
|
106
99
|
}
|
|
107
100
|
|
|
108
|
-
// If the response is a
|
|
109
|
-
if (
|
|
110
|
-
const { completedCount=1, totalCount=1 } = requestState[this.requestId];
|
|
111
|
-
requestState[this.requestId].data = responseData;
|
|
112
|
-
|
|
113
|
-
// some models don't support progress updates
|
|
114
|
-
if (!modelTypesExcludedFromProgressUpdates.includes(this.model.type)) {
|
|
115
|
-
await publishNestedRequestProgress({
|
|
116
|
-
requestId: this.rootRequestId || this.requestId,
|
|
117
|
-
progress: Math.min(completedCount,totalCount) / totalCount,
|
|
118
|
-
data: JSON.stringify(responseData),
|
|
119
|
-
});
|
|
120
|
-
}
|
|
121
|
-
// If the response is an object, it's a streaming response
|
|
122
|
-
} else {
|
|
101
|
+
// If the response is a stream, handle it as streaming response
|
|
102
|
+
if (responseData && typeof responseData.on === 'function') {
|
|
123
103
|
try {
|
|
124
104
|
const incomingMessage = responseData;
|
|
125
105
|
let streamEnded = false;
|
|
@@ -184,11 +164,25 @@ class PathwayResolver {
|
|
|
184
164
|
publishRequestProgress({
|
|
185
165
|
requestId: this.requestId,
|
|
186
166
|
progress: 1,
|
|
187
|
-
data: '
|
|
167
|
+
data: '',
|
|
168
|
+
info: 'ERROR: Stream read failed'
|
|
188
169
|
});
|
|
189
170
|
} else {
|
|
190
171
|
return;
|
|
191
172
|
}
|
|
173
|
+
} else {
|
|
174
|
+
const { completedCount = 1, totalCount = 1 } = requestState[this.requestId];
|
|
175
|
+
requestState[this.requestId].data = responseData;
|
|
176
|
+
|
|
177
|
+
// some models don't support progress updates
|
|
178
|
+
if (!modelTypesExcludedFromProgressUpdates.includes(this.model.type)) {
|
|
179
|
+
await publishNestedRequestProgress({
|
|
180
|
+
requestId: this.rootRequestId || this.requestId,
|
|
181
|
+
progress: Math.min(completedCount, totalCount) / totalCount,
|
|
182
|
+
data: responseData,
|
|
183
|
+
info: this.tool || ''
|
|
184
|
+
});
|
|
185
|
+
}
|
|
192
186
|
}
|
|
193
187
|
}
|
|
194
188
|
|
|
@@ -197,7 +191,13 @@ class PathwayResolver {
|
|
|
197
191
|
this.previousResult = mergeData.previousResult ? mergeData.previousResult : this.previousResult;
|
|
198
192
|
this.warnings = [...this.warnings, ...(mergeData.warnings || [])];
|
|
199
193
|
this.errors = [...this.errors, ...(mergeData.errors || [])];
|
|
200
|
-
|
|
194
|
+
try {
|
|
195
|
+
const mergeDataTool = typeof mergeData.tool === 'string' ? JSON.parse(mergeData.tool) : mergeData.tool || {};
|
|
196
|
+
const thisTool = typeof this.tool === 'string' ? JSON.parse(this.tool) : this.tool || {};
|
|
197
|
+
this.tool = JSON.stringify({ ...thisTool, ...mergeDataTool });
|
|
198
|
+
} catch (error) {
|
|
199
|
+
logger.warn('Error merging pathway resolver tool objects: ' + error);
|
|
200
|
+
}
|
|
201
201
|
}
|
|
202
202
|
}
|
|
203
203
|
|
|
@@ -4,6 +4,7 @@ import { fileURLToPath } from 'url';
|
|
|
4
4
|
import { dirname } from 'path';
|
|
5
5
|
import fs from 'fs';
|
|
6
6
|
import path from 'path';
|
|
7
|
+
import { SubtitleUtils } from '@aj-archipelago/subvibe';
|
|
7
8
|
|
|
8
9
|
const __filename = fileURLToPath(import.meta.url);
|
|
9
10
|
const __dirname = dirname(__filename);
|
|
@@ -45,12 +46,50 @@ async function testSubtitleTranslation(t, text, language = 'English', format = '
|
|
|
45
46
|
// Check timestamps based on format
|
|
46
47
|
const timestampPattern = format === 'srt'
|
|
47
48
|
? /\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}/g
|
|
48
|
-
:
|
|
49
|
+
: /(?:\d{2}:)?\d{2}:\d{2}\.\d{3} --> (?:\d{2}:)?\d{2}:\d{2}\.\d{3}/g;
|
|
49
50
|
|
|
50
51
|
const originalTimestamps = text.match(timestampPattern);
|
|
51
52
|
const translatedTimestamps = result.match(timestampPattern);
|
|
53
|
+
|
|
54
|
+
// Compare timestamps using SubtitleUtils.parseLooseTime
|
|
55
|
+
const areTimestampsEquivalent = originalTimestamps?.every((timestamp, index) => {
|
|
56
|
+
const [origStart, origEnd] = timestamp.split(' --> ');
|
|
57
|
+
const [transStart, transEnd] = translatedTimestamps[index].split(' --> ');
|
|
58
|
+
|
|
59
|
+
const origStartTime = SubtitleUtils.parseLooseTime(origStart);
|
|
60
|
+
const origEndTime = SubtitleUtils.parseLooseTime(origEnd);
|
|
61
|
+
const transStartTime = SubtitleUtils.parseLooseTime(transStart);
|
|
62
|
+
const transEndTime = SubtitleUtils.parseLooseTime(transEnd);
|
|
63
|
+
|
|
64
|
+
return origStartTime === transStartTime && origEndTime === transEndTime;
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
if (!areTimestampsEquivalent) {
|
|
68
|
+
const differences = originalTimestamps?.map((timestamp, index) => {
|
|
69
|
+
const [origStart, origEnd] = timestamp.split(' --> ');
|
|
70
|
+
const [transStart, transEnd] = translatedTimestamps[index].split(' --> ');
|
|
71
|
+
|
|
72
|
+
const origStartTime = SubtitleUtils.parseLooseTime(origStart);
|
|
73
|
+
const origEndTime = SubtitleUtils.parseLooseTime(origEnd);
|
|
74
|
+
const transStartTime = SubtitleUtils.parseLooseTime(transStart);
|
|
75
|
+
const transEndTime = SubtitleUtils.parseLooseTime(transEnd);
|
|
76
|
+
|
|
77
|
+
if (origStartTime !== transStartTime || origEndTime !== transEndTime) {
|
|
78
|
+
return {
|
|
79
|
+
index,
|
|
80
|
+
original: timestamp,
|
|
81
|
+
translated: translatedTimestamps[index],
|
|
82
|
+
parsedOriginal: { start: origStartTime, end: origEndTime },
|
|
83
|
+
parsedTranslated: { start: transStartTime, end: transEndTime }
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
return null;
|
|
87
|
+
}).filter(Boolean);
|
|
88
|
+
|
|
89
|
+
console.log('Timestamp differences found:', differences);
|
|
90
|
+
}
|
|
52
91
|
|
|
53
|
-
t.
|
|
92
|
+
t.true(areTimestampsEquivalent, 'All timestamps should be equivalent when parsed');
|
|
54
93
|
|
|
55
94
|
// Check line count (accounting for WEBVTT header in VTT)
|
|
56
95
|
const originalLineCount = text.split('\n').length;
|
|
@@ -1,217 +0,0 @@
|
|
|
1
|
-
import test from 'ava';
|
|
2
|
-
import { convertSrtToVtt } from '../pathways/transcribe_gemini.js';
|
|
3
|
-
|
|
4
|
-
test('should return empty WebVTT for null or empty input', t => {
|
|
5
|
-
t.is(convertSrtToVtt(null), "WEBVTT\n\n");
|
|
6
|
-
t.is(convertSrtToVtt(''), "WEBVTT\n\n");
|
|
7
|
-
t.is(convertSrtToVtt(' '), "WEBVTT\n\n");
|
|
8
|
-
});
|
|
9
|
-
|
|
10
|
-
test('should convert basic SRT to WebVTT format', t => {
|
|
11
|
-
const srtInput =
|
|
12
|
-
`1
|
|
13
|
-
00:00:01,000 --> 00:00:04,000
|
|
14
|
-
Hello world`;
|
|
15
|
-
|
|
16
|
-
const expectedOutput =
|
|
17
|
-
`WEBVTT
|
|
18
|
-
|
|
19
|
-
1
|
|
20
|
-
00:00:01.000 --> 00:00:04.000
|
|
21
|
-
Hello world
|
|
22
|
-
|
|
23
|
-
`;
|
|
24
|
-
t.is(convertSrtToVtt(srtInput), expectedOutput);
|
|
25
|
-
});
|
|
26
|
-
|
|
27
|
-
test('should convert multiple subtitle entries', t => {
|
|
28
|
-
const srtInput =
|
|
29
|
-
`1
|
|
30
|
-
00:00:01,000 --> 00:00:04,000
|
|
31
|
-
First subtitle
|
|
32
|
-
|
|
33
|
-
2
|
|
34
|
-
00:00:05,000 --> 00:00:08,000
|
|
35
|
-
Second subtitle`;
|
|
36
|
-
|
|
37
|
-
const expectedOutput =
|
|
38
|
-
`WEBVTT
|
|
39
|
-
|
|
40
|
-
1
|
|
41
|
-
00:00:01.000 --> 00:00:04.000
|
|
42
|
-
First subtitle
|
|
43
|
-
|
|
44
|
-
2
|
|
45
|
-
00:00:05.000 --> 00:00:08.000
|
|
46
|
-
Second subtitle
|
|
47
|
-
|
|
48
|
-
`;
|
|
49
|
-
t.is(convertSrtToVtt(srtInput), expectedOutput);
|
|
50
|
-
});
|
|
51
|
-
|
|
52
|
-
test('should handle DOS line endings', t => {
|
|
53
|
-
const srtInput = "1\r\n00:00:01,000 --> 00:00:04,000\r\nHello world\r\n";
|
|
54
|
-
const expectedOutput = "WEBVTT\n\n1\n00:00:01.000 --> 00:00:04.000\nHello world\n\n";
|
|
55
|
-
t.is(convertSrtToVtt(srtInput), expectedOutput);
|
|
56
|
-
});
|
|
57
|
-
|
|
58
|
-
test('should handle multi-line subtitles', t => {
|
|
59
|
-
const srtInput =
|
|
60
|
-
`1
|
|
61
|
-
00:00:01,000 --> 00:00:04,000
|
|
62
|
-
First line
|
|
63
|
-
Second line
|
|
64
|
-
Third line
|
|
65
|
-
|
|
66
|
-
2
|
|
67
|
-
00:00:05,000 --> 00:00:08,000
|
|
68
|
-
Another subtitle`;
|
|
69
|
-
|
|
70
|
-
const expectedOutput =
|
|
71
|
-
`WEBVTT
|
|
72
|
-
|
|
73
|
-
1
|
|
74
|
-
00:00:01.000 --> 00:00:04.000
|
|
75
|
-
First line
|
|
76
|
-
Second line
|
|
77
|
-
Third line
|
|
78
|
-
|
|
79
|
-
2
|
|
80
|
-
00:00:05.000 --> 00:00:08.000
|
|
81
|
-
Another subtitle
|
|
82
|
-
|
|
83
|
-
`;
|
|
84
|
-
t.is(convertSrtToVtt(srtInput), expectedOutput);
|
|
85
|
-
});
|
|
86
|
-
|
|
87
|
-
test('should handle invalid timestamp formats', t => {
|
|
88
|
-
const srtInput =
|
|
89
|
-
`1
|
|
90
|
-
invalid timestamp
|
|
91
|
-
Hello world
|
|
92
|
-
|
|
93
|
-
2
|
|
94
|
-
00:00:05,000 --> 00:00:08,000
|
|
95
|
-
Valid subtitle`;
|
|
96
|
-
|
|
97
|
-
const expectedOutput =
|
|
98
|
-
`WEBVTT
|
|
99
|
-
|
|
100
|
-
2
|
|
101
|
-
00:00:05.000 --> 00:00:08.000
|
|
102
|
-
Valid subtitle
|
|
103
|
-
|
|
104
|
-
`;
|
|
105
|
-
t.is(convertSrtToVtt(srtInput), expectedOutput);
|
|
106
|
-
});
|
|
107
|
-
|
|
108
|
-
test('should convert comma to dot in timestamps', t => {
|
|
109
|
-
const srtInput =
|
|
110
|
-
`1
|
|
111
|
-
00:00:01,500 --> 00:00:04,750
|
|
112
|
-
Test subtitle`;
|
|
113
|
-
|
|
114
|
-
const expectedOutput =
|
|
115
|
-
`WEBVTT
|
|
116
|
-
|
|
117
|
-
1
|
|
118
|
-
00:00:01.500 --> 00:00:04.750
|
|
119
|
-
Test subtitle
|
|
120
|
-
|
|
121
|
-
`;
|
|
122
|
-
t.is(convertSrtToVtt(srtInput), expectedOutput);
|
|
123
|
-
});
|
|
124
|
-
|
|
125
|
-
test('should handle extra whitespace in input', t => {
|
|
126
|
-
const srtInput = `
|
|
127
|
-
|
|
128
|
-
1
|
|
129
|
-
00:00:01,000 --> 00:00:04,000
|
|
130
|
-
Hello world
|
|
131
|
-
|
|
132
|
-
`;
|
|
133
|
-
const expectedOutput =
|
|
134
|
-
`WEBVTT
|
|
135
|
-
|
|
136
|
-
1
|
|
137
|
-
00:00:01.000 --> 00:00:04.000
|
|
138
|
-
Hello world
|
|
139
|
-
|
|
140
|
-
`;
|
|
141
|
-
t.is(convertSrtToVtt(srtInput), expectedOutput);
|
|
142
|
-
});
|
|
143
|
-
|
|
144
|
-
test('should handle timestamps with only minutes and seconds', t => {
|
|
145
|
-
const srtInput =
|
|
146
|
-
`1
|
|
147
|
-
01:30,000 --> 02:45,500
|
|
148
|
-
Short timestamp format`;
|
|
149
|
-
|
|
150
|
-
const expectedOutput =
|
|
151
|
-
`WEBVTT
|
|
152
|
-
|
|
153
|
-
1
|
|
154
|
-
00:01:30.000 --> 00:02:45.500
|
|
155
|
-
Short timestamp format
|
|
156
|
-
|
|
157
|
-
`;
|
|
158
|
-
t.is(convertSrtToVtt(srtInput), expectedOutput);
|
|
159
|
-
});
|
|
160
|
-
|
|
161
|
-
test('should handle ultra-short timestamps (SS.mmm)', t => {
|
|
162
|
-
const srtInput =
|
|
163
|
-
`1
|
|
164
|
-
03.298 --> 04.578
|
|
165
|
-
First line
|
|
166
|
-
|
|
167
|
-
2
|
|
168
|
-
04.578 --> 06.178
|
|
169
|
-
Second line`;
|
|
170
|
-
|
|
171
|
-
const expectedOutput =
|
|
172
|
-
`WEBVTT
|
|
173
|
-
|
|
174
|
-
1
|
|
175
|
-
00:00:03.298 --> 00:00:04.578
|
|
176
|
-
First line
|
|
177
|
-
|
|
178
|
-
2
|
|
179
|
-
00:00:04.578 --> 00:00:06.178
|
|
180
|
-
Second line
|
|
181
|
-
|
|
182
|
-
`;
|
|
183
|
-
t.is(convertSrtToVtt(srtInput), expectedOutput);
|
|
184
|
-
});
|
|
185
|
-
|
|
186
|
-
test('should handle mixed timestamp formats', t => {
|
|
187
|
-
const srtInput =
|
|
188
|
-
`1
|
|
189
|
-
03.298 --> 04.578
|
|
190
|
-
First line
|
|
191
|
-
|
|
192
|
-
2
|
|
193
|
-
00:04.578 --> 00:06.178
|
|
194
|
-
Second line
|
|
195
|
-
|
|
196
|
-
3
|
|
197
|
-
00:00:06.178 --> 00:00:07.518
|
|
198
|
-
Third line`;
|
|
199
|
-
|
|
200
|
-
const expectedOutput =
|
|
201
|
-
`WEBVTT
|
|
202
|
-
|
|
203
|
-
1
|
|
204
|
-
00:00:03.298 --> 00:00:04.578
|
|
205
|
-
First line
|
|
206
|
-
|
|
207
|
-
2
|
|
208
|
-
00:00:04.578 --> 00:00:06.178
|
|
209
|
-
Second line
|
|
210
|
-
|
|
211
|
-
3
|
|
212
|
-
00:00:06.178 --> 00:00:07.518
|
|
213
|
-
Third line
|
|
214
|
-
|
|
215
|
-
`;
|
|
216
|
-
t.is(convertSrtToVtt(srtInput), expectedOutput);
|
|
217
|
-
});
|