@aj-archipelago/cortex 1.3.23 → 1.3.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/pathwayTools.js +16 -12
- package/package.json +2 -2
- package/pathways/system/entity/sys_entity_continue.js +1 -5
- package/pathways/system/entity/sys_entity_start.js +12 -20
- package/pathways/system/entity/sys_generator_memory.js +3 -3
- package/pathways/system/entity/sys_generator_results.js +1 -1
- package/pathways/system/rest_streaming/sys_openai_chat.js +2 -2
- package/pathways/transcribe_gemini.js +1 -296
- package/pathways/translate_subtitle.js +11 -259
- package/server/pathwayResolver.js +38 -34
- package/server/plugins/claude3VertexPlugin.js +10 -1
- package/server/plugins/gemini15ChatPlugin.js +4 -0
- package/tests/openai_api.test.js +43 -23
- package/tests/streaming.test.js +197 -0
- package/tests/translate_srt.test.js +41 -2
- package/tests/transcribe_gemini.test.js +0 -217
package/lib/pathwayTools.js
CHANGED
|
@@ -21,23 +21,25 @@ const callPathway = async (pathwayName, inArgs, pathwayResolver) => {
|
|
|
21
21
|
let rootRequestId = pathwayResolver?.rootRequestId || pathwayResolver?.requestId;
|
|
22
22
|
|
|
23
23
|
let data = await pathway.rootResolver(parent, {...args, rootRequestId}, { config, pathway, requestState } );
|
|
24
|
+
pathwayResolver && pathwayResolver.mergeResults(data);
|
|
25
|
+
|
|
26
|
+
let returnValue = data?.result || null;
|
|
24
27
|
|
|
25
28
|
if (args.async || args.stream) {
|
|
26
29
|
const { result: requestId } = data;
|
|
27
30
|
|
|
28
31
|
// Fire the resolver for the async requestProgress
|
|
29
|
-
logger.info(`Callpathway starting async requestProgress, requestId: ${requestId}`);
|
|
32
|
+
logger.info(`Callpathway starting async requestProgress, pathway: ${pathwayName}, requestId: ${requestId}`);
|
|
30
33
|
const { resolver, args } = requestState[requestId];
|
|
31
34
|
requestState[requestId].useRedis = false;
|
|
32
35
|
requestState[requestId].started = true;
|
|
33
36
|
|
|
34
|
-
|
|
37
|
+
resolver && await resolver(args);
|
|
38
|
+
|
|
39
|
+
returnValue = null;
|
|
35
40
|
}
|
|
36
41
|
|
|
37
|
-
|
|
38
|
-
pathwayResolver?.mergeResults(data);
|
|
39
|
-
|
|
40
|
-
return data?.result;
|
|
42
|
+
return returnValue;
|
|
41
43
|
};
|
|
42
44
|
|
|
43
45
|
const gpt3Encode = (text) => {
|
|
@@ -48,7 +50,7 @@ const gpt3Decode = (text) => {
|
|
|
48
50
|
return decode(text);
|
|
49
51
|
}
|
|
50
52
|
|
|
51
|
-
const say = async (requestId, message, maxMessageLength = Infinity) => {
|
|
53
|
+
const say = async (requestId, message, maxMessageLength = Infinity, voiceResponse = true) => {
|
|
52
54
|
try {
|
|
53
55
|
const chunks = getSemanticChunks(message, maxMessageLength);
|
|
54
56
|
|
|
@@ -60,11 +62,13 @@ const say = async (requestId, message, maxMessageLength = Infinity) => {
|
|
|
60
62
|
});
|
|
61
63
|
}
|
|
62
64
|
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
65
|
+
if (voiceResponse) {
|
|
66
|
+
await publishRequestProgress({
|
|
67
|
+
requestId,
|
|
68
|
+
progress: 0.5,
|
|
69
|
+
data: " ... "
|
|
70
|
+
});
|
|
71
|
+
}
|
|
68
72
|
|
|
69
73
|
await publishRequestProgress({
|
|
70
74
|
requestId,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aj-archipelago/cortex",
|
|
3
|
-
"version": "1.3.
|
|
3
|
+
"version": "1.3.25",
|
|
4
4
|
"description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
|
|
5
5
|
"private": false,
|
|
6
6
|
"repository": {
|
|
@@ -33,7 +33,7 @@
|
|
|
33
33
|
"type": "module",
|
|
34
34
|
"homepage": "https://github.com/aj-archipelago/cortex#readme",
|
|
35
35
|
"dependencies": {
|
|
36
|
-
"@aj-archipelago/subvibe": "^1.0.
|
|
36
|
+
"@aj-archipelago/subvibe": "^1.0.8",
|
|
37
37
|
"@apollo/server": "^4.7.3",
|
|
38
38
|
"@apollo/server-plugin-response-cache": "^4.1.2",
|
|
39
39
|
"@apollo/utils.keyvadapter": "^3.0.0",
|
|
@@ -57,11 +57,7 @@ export default {
|
|
|
57
57
|
|
|
58
58
|
const result = await callPathway(generatorPathway, newArgs, resolver);
|
|
59
59
|
|
|
60
|
-
if (args.stream) {
|
|
61
|
-
return "";
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
if (!result) {
|
|
60
|
+
if (!result && !args.stream) {
|
|
65
61
|
result = await callPathway('sys_generator_error', { ...args, text: `Tried to use a tool (${generatorPathway}), but no result was returned`, stream: false }, resolver);
|
|
66
62
|
}
|
|
67
63
|
|
|
@@ -105,22 +105,12 @@ export default {
|
|
|
105
105
|
}
|
|
106
106
|
}
|
|
107
107
|
|
|
108
|
-
|
|
109
|
-
const [chatResponse, chatTitleResponse] = await Promise.all([
|
|
110
|
-
callPathway('sys_generator_quick', {...args, model: styleModel}, pathwayResolver),
|
|
111
|
-
callPathway('chat_title', { ...args, chatHistory: chatHistoryBeforeMemory, stream: false}),
|
|
112
|
-
]);
|
|
113
|
-
|
|
114
|
-
title = chatTitleResponse;
|
|
115
|
-
|
|
116
|
-
return chatResponse;
|
|
117
|
-
};
|
|
118
|
-
|
|
119
|
-
// start fetching the default response - we may need it later
|
|
108
|
+
// start fetching responses in parallel if not streaming
|
|
120
109
|
let fetchChatResponsePromise;
|
|
121
110
|
if (!args.stream) {
|
|
122
|
-
fetchChatResponsePromise =
|
|
111
|
+
fetchChatResponsePromise = callPathway('sys_generator_quick', {...args, model: styleModel, ackResponse}, pathwayResolver);
|
|
123
112
|
}
|
|
113
|
+
const fetchTitleResponsePromise = callPathway('chat_title', {...args, chatHistory: chatHistoryBeforeMemory, stream: false});
|
|
124
114
|
|
|
125
115
|
const visionContentPresent = chatArgsHasImageUrl(args);
|
|
126
116
|
|
|
@@ -223,6 +213,8 @@ export default {
|
|
|
223
213
|
}
|
|
224
214
|
}
|
|
225
215
|
|
|
216
|
+
title = await fetchTitleResponsePromise;
|
|
217
|
+
|
|
226
218
|
if (toolCallbackMessage) {
|
|
227
219
|
if (args.skipCallbackMessage) {
|
|
228
220
|
pathwayResolver.tool = JSON.stringify({ hideFromModel: false, search: false, title });
|
|
@@ -231,11 +223,11 @@ export default {
|
|
|
231
223
|
|
|
232
224
|
if (args.stream) {
|
|
233
225
|
if (!ackResponse) {
|
|
234
|
-
await say(pathwayResolver.requestId, toolCallbackMessage || "One moment please.", 10);
|
|
226
|
+
await say(pathwayResolver.requestId, toolCallbackMessage || "One moment please.", 10, args.voiceResponse ? true : false);
|
|
235
227
|
}
|
|
236
|
-
pathwayResolver.tool = JSON.stringify({ hideFromModel: false, search: false, title });
|
|
237
228
|
await callPathway('sys_entity_continue', { ...args, stream: true, generatorPathway: toolCallbackName }, pathwayResolver);
|
|
238
|
-
|
|
229
|
+
pathwayResolver.tool = JSON.stringify({ hideFromModel: false, search: false, title });
|
|
230
|
+
return;
|
|
239
231
|
}
|
|
240
232
|
|
|
241
233
|
pathwayResolver.tool = JSON.stringify({
|
|
@@ -250,15 +242,15 @@ export default {
|
|
|
250
242
|
return toolCallbackMessage || "One moment please.";
|
|
251
243
|
}
|
|
252
244
|
|
|
253
|
-
const chatResponse = await (fetchChatResponsePromise ||
|
|
245
|
+
const chatResponse = await (fetchChatResponsePromise || callPathway('sys_generator_quick', {...args, model: styleModel, ackResponse}, pathwayResolver));
|
|
254
246
|
pathwayResolver.tool = JSON.stringify({ search: false, title });
|
|
255
|
-
return args.stream ?
|
|
247
|
+
return args.stream ? null : chatResponse;
|
|
256
248
|
|
|
257
249
|
} catch (e) {
|
|
258
250
|
pathwayResolver.logError(e);
|
|
259
|
-
const chatResponse = await (fetchChatResponsePromise ||
|
|
251
|
+
const chatResponse = await (fetchChatResponsePromise || callPathway('sys_generator_quick', {...args, model: styleModel, ackResponse}, pathwayResolver));
|
|
260
252
|
pathwayResolver.tool = JSON.stringify({ search: false, title });
|
|
261
|
-
return args.stream ?
|
|
253
|
+
return args.stream ? null : chatResponse;
|
|
262
254
|
}
|
|
263
255
|
}
|
|
264
256
|
};
|
|
@@ -18,7 +18,7 @@ export default {
|
|
|
18
18
|
const { aiStyle, AI_STYLE_ANTHROPIC, AI_STYLE_OPENAI } = args;
|
|
19
19
|
const styleModel = aiStyle === "Anthropic" ? AI_STYLE_ANTHROPIC : AI_STYLE_OPENAI;
|
|
20
20
|
|
|
21
|
-
const memoryContext = await callPathway('sys_search_memory', { ...args, section: 'memoryAll', updateContext: true });
|
|
21
|
+
const memoryContext = await callPathway('sys_search_memory', { ...args, stream: false, section: 'memoryAll', updateContext: true });
|
|
22
22
|
if (memoryContext) {
|
|
23
23
|
const {toolCallId} = addToolCalls(args.chatHistory, "search memory for relevant information", "memory_lookup");
|
|
24
24
|
addToolResults(args.chatHistory, memoryContext, toolCallId);
|
|
@@ -26,9 +26,9 @@ export default {
|
|
|
26
26
|
|
|
27
27
|
let result;
|
|
28
28
|
if (args.voiceResponse) {
|
|
29
|
-
result = await callPathway('sys_generator_quick', { ...args, model: styleModel, stream: false });
|
|
29
|
+
result = await callPathway('sys_generator_quick', { ...args, model: styleModel, stream: false }, resolver);
|
|
30
30
|
} else {
|
|
31
|
-
result = await callPathway('sys_generator_quick', { ...args, model: styleModel });
|
|
31
|
+
result = await callPathway('sys_generator_quick', { ...args, model: styleModel }, resolver);
|
|
32
32
|
}
|
|
33
33
|
|
|
34
34
|
resolver.tool = JSON.stringify({ toolUsed: "memory" });
|
|
@@ -341,7 +341,7 @@ Here are the information sources that were found:
|
|
|
341
341
|
clearTimeout(timeoutId);
|
|
342
342
|
}
|
|
343
343
|
|
|
344
|
-
if (!args.
|
|
344
|
+
if (!args.voiceResponse) {
|
|
345
345
|
const referencedSources = extractReferencedSources(result);
|
|
346
346
|
searchResults = searchResults.length ? pruneSearchResults(searchResults, referencedSources) : [];
|
|
347
347
|
}
|
|
@@ -5,283 +5,6 @@ import { Prompt } from "../server/prompt.js";
|
|
|
5
5
|
|
|
6
6
|
const OFFSET_CHUNK = 500; //seconds of each chunk offset, only used if helper does not provide
|
|
7
7
|
|
|
8
|
-
export function convertSrtToVtt(data) {
|
|
9
|
-
if (!data || !data.trim()) {
|
|
10
|
-
return "WEBVTT\n\n";
|
|
11
|
-
}
|
|
12
|
-
|
|
13
|
-
// If it's already VTT format and has header
|
|
14
|
-
if (data.trim().startsWith("WEBVTT")) {
|
|
15
|
-
const lines = data.split("\n");
|
|
16
|
-
const result = ["WEBVTT", ""]; // Start with header and blank line
|
|
17
|
-
let currentCue = [];
|
|
18
|
-
|
|
19
|
-
for (let i = 0; i < lines.length; i++) {
|
|
20
|
-
const line = lines[i].trim();
|
|
21
|
-
|
|
22
|
-
// Skip empty lines and the WEBVTT header
|
|
23
|
-
if (!line || line === "WEBVTT") {
|
|
24
|
-
continue;
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
// If it's a number by itself, it's a cue identifier
|
|
28
|
-
if (/^\d+$/.test(line)) {
|
|
29
|
-
// If we have a previous cue, add it with proper spacing
|
|
30
|
-
if (currentCue.length > 0) {
|
|
31
|
-
result.push(currentCue.join("\n"));
|
|
32
|
-
result.push(""); // Add blank line between cues
|
|
33
|
-
currentCue = [];
|
|
34
|
-
}
|
|
35
|
-
currentCue.push(line);
|
|
36
|
-
continue;
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
// Check for and convert timestamps
|
|
40
|
-
const fullTimeRegex = /^(\d{2}):(\d{2}):(\d{2})[,.](\d{3})\s*-->\s*(\d{2}):(\d{2}):(\d{2})[,.](\d{3})$/;
|
|
41
|
-
const shortTimeRegex = /^(\d{2}):(\d{2})[,.](\d{3})\s*-->\s*(\d{2}):(\d{2})[,.](\d{3})$/;
|
|
42
|
-
const ultraShortTimeRegex = /^(\d{1,2})[.](\d{3})\s*-->\s*(\d{1,2})[.](\d{3})$/;
|
|
43
|
-
|
|
44
|
-
const fullMatch = line.match(fullTimeRegex);
|
|
45
|
-
const shortMatch = line.match(shortTimeRegex);
|
|
46
|
-
const ultraShortMatch = line.match(ultraShortTimeRegex);
|
|
47
|
-
|
|
48
|
-
if (fullMatch) {
|
|
49
|
-
// Already in correct format, just convert comma to dot
|
|
50
|
-
const convertedTime = line.replace(/,/g, '.');
|
|
51
|
-
currentCue.push(convertedTime);
|
|
52
|
-
} else if (shortMatch) {
|
|
53
|
-
// Convert MM:SS to HH:MM:SS
|
|
54
|
-
const convertedTime = `00:${shortMatch[1]}:${shortMatch[2]}.${shortMatch[3]} --> 00:${shortMatch[4]}:${shortMatch[5]}.${shortMatch[6]}`;
|
|
55
|
-
currentCue.push(convertedTime);
|
|
56
|
-
} else if (ultraShortMatch) {
|
|
57
|
-
// Convert SS to HH:MM:SS
|
|
58
|
-
const convertedTime = `00:00:${ultraShortMatch[1].padStart(2, '0')}.${ultraShortMatch[2]} --> 00:00:${ultraShortMatch[3].padStart(2, '0')}.${ultraShortMatch[4]}`;
|
|
59
|
-
currentCue.push(convertedTime);
|
|
60
|
-
} else if (!line.includes('-->')) {
|
|
61
|
-
// Must be subtitle text
|
|
62
|
-
currentCue.push(line);
|
|
63
|
-
}
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
// Add the last cue if there is one
|
|
67
|
-
if (currentCue.length > 0) {
|
|
68
|
-
result.push(currentCue.join("\n"));
|
|
69
|
-
result.push(""); // Add final blank line
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
// Join with newlines and ensure proper ending
|
|
73
|
-
return result.join("\n") + "\n";
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
// remove dos newlines and trim
|
|
77
|
-
var srt = data.replace(/\r+/g, "");
|
|
78
|
-
srt = srt.replace(/^\s+|\s+$/g, "");
|
|
79
|
-
|
|
80
|
-
// Split into cues and filter out empty ones
|
|
81
|
-
var cuelist = srt.split("\n\n").filter(cue => cue.trim());
|
|
82
|
-
|
|
83
|
-
// Always add WEBVTT header
|
|
84
|
-
var result = "WEBVTT\n\n";
|
|
85
|
-
|
|
86
|
-
// Convert each cue to VTT format
|
|
87
|
-
for (const cue of cuelist) {
|
|
88
|
-
const lines = cue.split("\n").map(line => line.trim()).filter(line => line);
|
|
89
|
-
if (lines.length < 2) continue;
|
|
90
|
-
|
|
91
|
-
let output = [];
|
|
92
|
-
|
|
93
|
-
// Handle cue identifier
|
|
94
|
-
if (/^\d+$/.test(lines[0])) {
|
|
95
|
-
output.push(lines[0]);
|
|
96
|
-
lines.shift();
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
// Handle timestamp line
|
|
100
|
-
const timeLine = lines[0];
|
|
101
|
-
const fullTimeRegex = /^(\d{2}):(\d{2}):(\d{2})[,.](\d{3})\s*-->\s*(\d{2}):(\d{2}):(\d{2})[,.](\d{3})$/;
|
|
102
|
-
const shortTimeRegex = /^(\d{2}):(\d{2})[,.](\d{3})\s*-->\s*(\d{2}):(\d{2})[,.](\d{3})$/;
|
|
103
|
-
const ultraShortTimeRegex = /^(\d{1,2})[.](\d{3})\s*-->\s*(\d{1,2})[.](\d{3})$/;
|
|
104
|
-
|
|
105
|
-
const fullMatch = timeLine.match(fullTimeRegex);
|
|
106
|
-
const shortMatch = timeLine.match(shortTimeRegex);
|
|
107
|
-
const ultraShortMatch = timeLine.match(ultraShortTimeRegex);
|
|
108
|
-
|
|
109
|
-
if (fullMatch) {
|
|
110
|
-
output.push(timeLine.replace(/,/g, '.'));
|
|
111
|
-
} else if (shortMatch) {
|
|
112
|
-
output.push(`00:${shortMatch[1]}:${shortMatch[2]}.${shortMatch[3]} --> 00:${shortMatch[4]}:${shortMatch[5]}.${shortMatch[6]}`);
|
|
113
|
-
} else if (ultraShortMatch) {
|
|
114
|
-
output.push(`00:00:${ultraShortMatch[1].padStart(2, '0')}.${ultraShortMatch[2]} --> 00:00:${ultraShortMatch[3].padStart(2, '0')}.${ultraShortMatch[4]}`);
|
|
115
|
-
} else {
|
|
116
|
-
continue; // Invalid timestamp format
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
// Add remaining lines as subtitle text
|
|
120
|
-
output.push(...lines.slice(1));
|
|
121
|
-
|
|
122
|
-
// Add the cue to result
|
|
123
|
-
result += output.join("\n") + "\n\n";
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
return result;
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
function convertSrtCue(caption) {
|
|
130
|
-
if (!caption || !caption.trim()) {
|
|
131
|
-
return "";
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
var cue = "";
|
|
135
|
-
var s = caption.split(/\n/);
|
|
136
|
-
|
|
137
|
-
// concatenate multi-line string separated in array into one
|
|
138
|
-
while (s.length > 3) {
|
|
139
|
-
for (var i = 3; i < s.length; i++) {
|
|
140
|
-
s[2] += "\n" + s[i];
|
|
141
|
-
}
|
|
142
|
-
s.splice(3, s.length - 3);
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
var line = 0;
|
|
146
|
-
|
|
147
|
-
// detect identifier
|
|
148
|
-
if (
|
|
149
|
-
s[0] &&
|
|
150
|
-
s[1] &&
|
|
151
|
-
!s[0].match(/\d+:\d+:\d+/) &&
|
|
152
|
-
s[1].match(/\d+:\d+:\d+/)
|
|
153
|
-
) {
|
|
154
|
-
const match = s[0].match(/^\d+$/); // Only match if the entire line is a number
|
|
155
|
-
if (match) {
|
|
156
|
-
cue += match[0] + "\n";
|
|
157
|
-
line += 1;
|
|
158
|
-
}
|
|
159
|
-
}
|
|
160
|
-
|
|
161
|
-
// get time strings
|
|
162
|
-
if (s[line] && s[line].match(/\d+:\d+:\d+/)) {
|
|
163
|
-
// convert time string
|
|
164
|
-
var m = s[line].match(
|
|
165
|
-
/(\d{2}):(\d{2}):(\d{2})[,.](\d{3})\s*--?>\s*(\d{2}):(\d{2}):(\d{2})[,.](\d{3})/,
|
|
166
|
-
);
|
|
167
|
-
if (m) {
|
|
168
|
-
cue +=
|
|
169
|
-
m[1] +
|
|
170
|
-
":" +
|
|
171
|
-
m[2] +
|
|
172
|
-
":" +
|
|
173
|
-
m[3] +
|
|
174
|
-
"." +
|
|
175
|
-
m[4] +
|
|
176
|
-
" --> " +
|
|
177
|
-
m[5] +
|
|
178
|
-
":" +
|
|
179
|
-
m[6] +
|
|
180
|
-
":" +
|
|
181
|
-
m[7] +
|
|
182
|
-
"." +
|
|
183
|
-
m[8] +
|
|
184
|
-
"\n";
|
|
185
|
-
line += 1;
|
|
186
|
-
} else {
|
|
187
|
-
// Try alternate timestamp format
|
|
188
|
-
m = s[line].match(
|
|
189
|
-
/(\d{2}):(\d{2})\.(\d{3})\s*--?>\s*(\d{2}):(\d{2})\.(\d{3})/,
|
|
190
|
-
);
|
|
191
|
-
if (m) {
|
|
192
|
-
// Convert to full timestamp format
|
|
193
|
-
cue +=
|
|
194
|
-
"00:" +
|
|
195
|
-
m[1] +
|
|
196
|
-
":" +
|
|
197
|
-
m[2] +
|
|
198
|
-
"." +
|
|
199
|
-
m[3] +
|
|
200
|
-
" --> " +
|
|
201
|
-
"00:" +
|
|
202
|
-
m[4] +
|
|
203
|
-
":" +
|
|
204
|
-
m[5] +
|
|
205
|
-
"." +
|
|
206
|
-
m[6] +
|
|
207
|
-
"\n";
|
|
208
|
-
line += 1;
|
|
209
|
-
} else {
|
|
210
|
-
// Unrecognized timestring
|
|
211
|
-
return "";
|
|
212
|
-
}
|
|
213
|
-
}
|
|
214
|
-
} else {
|
|
215
|
-
// file format error or comment lines
|
|
216
|
-
return "";
|
|
217
|
-
}
|
|
218
|
-
|
|
219
|
-
// get cue text
|
|
220
|
-
if (s[line]) {
|
|
221
|
-
cue += s[line] + "\n\n";
|
|
222
|
-
}
|
|
223
|
-
|
|
224
|
-
return cue;
|
|
225
|
-
}
|
|
226
|
-
|
|
227
|
-
export function detectSubtitleFormat(text) {
|
|
228
|
-
// Remove DOS newlines and trim whitespace
|
|
229
|
-
const cleanText = text.replace(/\r+/g, "").trim();
|
|
230
|
-
const lines = cleanText.split("\n");
|
|
231
|
-
|
|
232
|
-
// Check if it's VTT format - be more lenient with the header
|
|
233
|
-
if (lines[0]?.trim() === "WEBVTT") {
|
|
234
|
-
return "vtt";
|
|
235
|
-
}
|
|
236
|
-
|
|
237
|
-
// Define regex patterns for timestamp formats
|
|
238
|
-
const srtTimeRegex =
|
|
239
|
-
/(\d{2}:\d{2}:\d{2})[,.]\d{3}\s*-->\s*(\d{2}:\d{2}:\d{2})[,.]\d{3}/;
|
|
240
|
-
const vttTimeRegex =
|
|
241
|
-
/(?:\d{2}:)?(\d{1,2})[.]\d{3}\s*-->\s*(?:\d{2}:)?(\d{1,2})[.]\d{3}/;
|
|
242
|
-
|
|
243
|
-
let hasSrtTimestamps = false;
|
|
244
|
-
let hasVttTimestamps = false;
|
|
245
|
-
let hasSequentialNumbers = false;
|
|
246
|
-
let lastNumber = 0;
|
|
247
|
-
|
|
248
|
-
// Look through first few lines to detect patterns
|
|
249
|
-
for (let i = 0; i < Math.min(lines.length, 12); i++) {
|
|
250
|
-
const line = lines[i]?.trim();
|
|
251
|
-
if (!line) continue;
|
|
252
|
-
|
|
253
|
-
// Check for timestamps
|
|
254
|
-
if (srtTimeRegex.test(line)) {
|
|
255
|
-
hasSrtTimestamps = true;
|
|
256
|
-
}
|
|
257
|
-
if (vttTimeRegex.test(line)) {
|
|
258
|
-
hasVttTimestamps = true;
|
|
259
|
-
}
|
|
260
|
-
|
|
261
|
-
// Check for sequential numbers
|
|
262
|
-
const numberMatch = line.match(/^(\d+)$/);
|
|
263
|
-
if (numberMatch) {
|
|
264
|
-
const num = parseInt(numberMatch[1]);
|
|
265
|
-
if (lastNumber === 0 || num === lastNumber + 1) {
|
|
266
|
-
hasSequentialNumbers = true;
|
|
267
|
-
lastNumber = num;
|
|
268
|
-
}
|
|
269
|
-
}
|
|
270
|
-
}
|
|
271
|
-
|
|
272
|
-
// If it has SRT-style timestamps (HH:MM:SS), it's SRT
|
|
273
|
-
if (hasSrtTimestamps && hasSequentialNumbers) {
|
|
274
|
-
return "srt";
|
|
275
|
-
}
|
|
276
|
-
|
|
277
|
-
// If it has VTT-style timestamps (MM:SS) or WEBVTT header, it's VTT
|
|
278
|
-
if (hasVttTimestamps) {
|
|
279
|
-
return "vtt";
|
|
280
|
-
}
|
|
281
|
-
|
|
282
|
-
return null;
|
|
283
|
-
}
|
|
284
|
-
|
|
285
8
|
export default {
|
|
286
9
|
prompt:
|
|
287
10
|
[
|
|
@@ -381,7 +104,7 @@ export default {
|
|
|
381
104
|
|
|
382
105
|
function getMessages(file, format) {
|
|
383
106
|
|
|
384
|
-
const responseFormat = format!== 'text' ? '
|
|
107
|
+
const responseFormat = format!== 'text' ? 'VTT' : 'text';
|
|
385
108
|
|
|
386
109
|
const messages = [
|
|
387
110
|
{"role": "system", "content": `Instructions:\nYou are an AI entity with expertise of transcription. Your response only contains the transcription, no comments or additonal stuff.
|
|
@@ -491,26 +214,8 @@ Even a single newline or space can cause the response to be rejected. You must f
|
|
|
491
214
|
// }
|
|
492
215
|
|
|
493
216
|
const result = await processChunksParallel(chunks, args);
|
|
494
|
-
|
|
495
|
-
// publishRequestProgress({
|
|
496
|
-
// requestId: this.rootRequestId || this.requestId,
|
|
497
|
-
// progress: 1,
|
|
498
|
-
// data: "a",
|
|
499
|
-
// });
|
|
500
217
|
|
|
501
218
|
if (['srt','vtt'].includes(responseFormat) || wordTimestamped) { // align subtitles for formats
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
// convert as gemini output is unstable
|
|
506
|
-
for(let i = 0; i < result.length; i++) {
|
|
507
|
-
try{
|
|
508
|
-
result[i] = convertSrtToVtt(result[i]);
|
|
509
|
-
}catch(error){
|
|
510
|
-
logger.error(`Error converting to vtt: ${error}`);
|
|
511
|
-
}
|
|
512
|
-
}
|
|
513
|
-
|
|
514
219
|
const offsets = chunks.map((chunk, index) => chunk?.offset || index * OFFSET_CHUNK);
|
|
515
220
|
return alignSubtitles(result, responseFormat, offsets);
|
|
516
221
|
}
|