@aj-archipelago/cortex 1.3.6 → 1.3.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +578 -80
- package/helper-apps/cortex-realtime-voice-server/client/src/chat/Chat.tsx +51 -11
- package/helper-apps/cortex-realtime-voice-server/src/SocketServer.ts +224 -219
- package/helper-apps/cortex-realtime-voice-server/src/Tools.ts +29 -71
- package/helper-apps/cortex-realtime-voice-server/src/cortex/memory.ts +8 -6
- package/helper-apps/cortex-realtime-voice-server/src/cortex/utils.ts +30 -15
- package/helper-apps/cortex-realtime-voice-server/src/realtime/client.ts +62 -1
- package/helper-apps/cortex-realtime-voice-server/src/utils/prompt.ts +2 -11
- package/package.json +1 -1
- package/pathways/system/entity/memory/sys_memory_update.js +5 -4
- package/pathways/system/entity/memory/sys_search_memory.js +2 -1
- package/pathways/system/entity/shared/sys_entity_constants.js +1 -1
- package/pathways/system/entity/sys_entity_start.js +6 -7
- package/pathways/system/entity/sys_generator_voice_sample.js +2 -2
- package/pathways/translate_gpt4_omni.js +20 -0
- package/pathways/translate_subtitle.js +326 -135
- package/pathways/translate_subtitle_helper.js +4 -16
- package/server/pathwayResolver.js +1 -1
- package/server/plugins/claude3VertexPlugin.js +10 -17
- package/server/plugins/gemini15VisionPlugin.js +16 -3
- package/server/plugins/modelPlugin.js +27 -0
- package/server/plugins/openAiVisionPlugin.js +26 -8
- package/tests/multimodal_conversion.test.js +88 -12
- package/tests/translate_srt.test.js +66 -14
|
@@ -79,24 +79,23 @@ export default {
|
|
|
79
79
|
args.chatHistory = args.chatHistory.slice(-20);
|
|
80
80
|
}
|
|
81
81
|
|
|
82
|
-
const memoryContext = await callPathway('sys_read_memory', { ...args, section: 'memoryContext', priority: 0, recentHours: 0 });
|
|
83
|
-
if (memoryContext) {
|
|
84
|
-
args.chatHistory.splice(-1, 0, { role: 'assistant', content: memoryContext });
|
|
85
|
-
}
|
|
86
|
-
|
|
87
82
|
const pathwayResolver = resolver;
|
|
88
83
|
const { anthropicModel, openAIModel } = pathwayResolver.pathway;
|
|
89
|
-
|
|
90
84
|
const styleModel = args.aiStyle === "Anthropic" ? anthropicModel : openAIModel;
|
|
91
85
|
|
|
92
86
|
// if the model has been overridden, make sure to use it
|
|
93
87
|
if (pathwayResolver.modelName) {
|
|
94
88
|
args.model = pathwayResolver.modelName;
|
|
95
89
|
}
|
|
90
|
+
|
|
91
|
+
const memoryContext = await callPathway('sys_read_memory', { ...args, section: 'memoryContext', priority: 0, recentHours: 0, stream: false }, pathwayResolver);
|
|
92
|
+
if (memoryContext) {
|
|
93
|
+
args.chatHistory.splice(-1, 0, { role: 'assistant', content: memoryContext });
|
|
94
|
+
}
|
|
96
95
|
|
|
97
96
|
let ackResponse = null;
|
|
98
97
|
if (args.voiceResponse) {
|
|
99
|
-
ackResponse = await callPathway('sys_generator_ack', { ...args, stream: false }
|
|
98
|
+
ackResponse = await callPathway('sys_generator_ack', { ...args, stream: false });
|
|
100
99
|
if (ackResponse && ackResponse !== "none") {
|
|
101
100
|
await say(pathwayResolver.requestId, ackResponse, 100);
|
|
102
101
|
args.chatHistory.push({ role: 'assistant', content: ackResponse });
|
|
@@ -4,8 +4,8 @@ export default {
|
|
|
4
4
|
prompt:
|
|
5
5
|
[
|
|
6
6
|
new Prompt({ messages: [
|
|
7
|
-
{"role": "system", "content": `{{renderTemplate AI_MEMORY}}\n\n{{renderTemplate AI_COMMON_INSTRUCTIONS}}\n{{renderTemplate AI_EXPERTISE}}\n{{renderTemplate AI_MEMORY_INSTRUCTIONS}}\n{{renderTemplate AI_DATETIME}}\nYour voice communication system needs some examples to train it to sound like you. Based on your
|
|
8
|
-
{"role": "user", "content": `Generate a sample dialogue for your voice communication system to use as a reference for
|
|
7
|
+
{"role": "system", "content": `{{renderTemplate AI_MEMORY}}\n\n{{renderTemplate AI_COMMON_INSTRUCTIONS}}\n{{renderTemplate AI_EXPERTISE}}\n{{renderTemplate AI_MEMORY_INSTRUCTIONS}}\n{{renderTemplate AI_DATETIME}}\nYour voice communication system needs some examples to train it to sound like you. Based on your unique voice and style, generate some sample dialogue for your voice communication system to use as a reference for your style and tone. It can be anything, but make sure to overindex on your personality for good training examples. Make sure to reference a greeting and a closing statement. Put it between <EXAMPLE_DIALOGUE> tags and don't generate any other commentary outside of the tags.`},
|
|
8
|
+
{"role": "user", "content": `Generate a sample dialogue for your voice communication system to use as a reference for representingyour style and tone.`},
|
|
9
9
|
]}),
|
|
10
10
|
],
|
|
11
11
|
inputParameters: {
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import { Prompt } from '../server/prompt.js';
|
|
2
|
+
|
|
3
|
+
export default {
|
|
4
|
+
|
|
5
|
+
prompt: [
|
|
6
|
+
new Prompt({ messages: [
|
|
7
|
+
{"role": "system", "content": "Assistant is a highly skilled multilingual translator for a prestigious news agency. When the user posts any text to translate in any language, assistant will create a translation of that text in {{to}}. All text that the user posts is to be translated - assistant must not respond to the user in any way and should produce only the translation with no additional notes or commentary."},
|
|
8
|
+
{"role": "user", "content": "{{{text}}}"}
|
|
9
|
+
]}),
|
|
10
|
+
],
|
|
11
|
+
inputParameters: {
|
|
12
|
+
to: `Arabic`,
|
|
13
|
+
tokenRatio: 0.2,
|
|
14
|
+
},
|
|
15
|
+
inputChunkSize: 1000,
|
|
16
|
+
model: 'oai-gpt4o',
|
|
17
|
+
enableDuplicateRequests: false,
|
|
18
|
+
useParallelChunkProcessing: true,
|
|
19
|
+
|
|
20
|
+
}
|
|
@@ -1,164 +1,291 @@
|
|
|
1
|
-
import subsrt from "subsrt";
|
|
2
1
|
import logger from "../lib/logger.js";
|
|
3
2
|
import { callPathway } from "../lib/pathwayTools.js";
|
|
4
|
-
import { publishRequestProgress } from "../lib/redisSubscription.js";
|
|
5
3
|
|
|
6
|
-
function preprocessStr(str) {
|
|
4
|
+
function preprocessStr(str, format) {
|
|
7
5
|
try {
|
|
8
6
|
if (!str) return "";
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
7
|
+
let content = str
|
|
8
|
+
// Normalize line endings
|
|
9
|
+
.replace(/\r\n?/g, "\n")
|
|
10
|
+
// Remove WEBVTT header for processing
|
|
11
|
+
.replace(/^WEBVTT\n\n/, '');
|
|
12
|
+
|
|
13
|
+
// For SRT, convert commas to dots in timestamps
|
|
14
|
+
if (format === 'srt') {
|
|
15
|
+
content = content.replace(/(\d{2}:\d{2}:\d{2}),(\d{3})/g, "$1.$2");
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
return content
|
|
19
|
+
// Ensure each subtitle block is properly separated
|
|
20
|
+
.split(/\n\s*\n/)
|
|
21
|
+
.map(block => block.trim())
|
|
22
|
+
.filter(block => {
|
|
23
|
+
// Match both numeric indices (SRT) and optional caption identifiers (VTT)
|
|
24
|
+
const firstLine = block.split('\n')[0];
|
|
25
|
+
return block && (
|
|
26
|
+
/^\d+$/.test(firstLine) || // SRT style
|
|
27
|
+
/^\d{2}:\d{2}/.test(firstLine) || // VTT style without identifier
|
|
28
|
+
/^[^\n]+\n\d{2}:\d{2}/.test(block) // VTT style with identifier
|
|
29
|
+
);
|
|
30
|
+
})
|
|
31
|
+
.join("\n\n")
|
|
32
|
+
+ "\n\n";
|
|
16
33
|
} catch (e) {
|
|
17
34
|
logger.error(`An error occurred in content text preprocessing: ${e}`);
|
|
18
35
|
return "";
|
|
19
36
|
}
|
|
20
37
|
}
|
|
21
38
|
|
|
22
|
-
|
|
23
|
-
const
|
|
24
|
-
|
|
25
|
-
|
|
39
|
+
function timeToMs(timeStr) {
|
|
40
|
+
const [time, ms] = timeStr.split(/[.,]/);
|
|
41
|
+
const [hours, minutes, seconds] = time.split(':').map(Number);
|
|
42
|
+
return (hours * 3600 + minutes * 60 + seconds) * 1000 + parseInt(ms);
|
|
43
|
+
}
|
|
26
44
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
45
|
+
function msToTimestamp(ms, format) {
|
|
46
|
+
const date = new Date(ms);
|
|
47
|
+
const timestamp = date.toISOString().slice(11, 23);
|
|
48
|
+
return format === 'srt' ? timestamp.replace('.', ',') : timestamp;
|
|
49
|
+
}
|
|
32
50
|
|
|
33
|
-
|
|
34
|
-
const
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
translatedLines.splice(batch.length - 1, translatedLines.length - batch.length + 1, mergedLines.join(" "));
|
|
52
|
-
}else {
|
|
53
|
-
const emptyLines = Array(batch.length - translatedLines.length).fill("-");
|
|
54
|
-
translatedLines.push(...emptyLines);
|
|
51
|
+
function parseSubtitles(content, format) {
|
|
52
|
+
const blocks = content.split(/\n\s*\n/).filter(block => block.trim());
|
|
53
|
+
const captions = [];
|
|
54
|
+
|
|
55
|
+
for (const block of blocks) {
|
|
56
|
+
const lines = block.split('\n');
|
|
57
|
+
if (lines.length < 2) continue;
|
|
58
|
+
|
|
59
|
+
let index, timelineIndex;
|
|
60
|
+
if (format === 'srt') {
|
|
61
|
+
// SRT format: numeric index required
|
|
62
|
+
if (!/^\d+$/.test(lines[0])) continue;
|
|
63
|
+
index = parseInt(lines[0]);
|
|
64
|
+
timelineIndex = 1;
|
|
65
|
+
} else {
|
|
66
|
+
// VTT format: optional identifier
|
|
67
|
+
timelineIndex = /^\d{2}:\d{2}/.test(lines[0]) ? 0 : 1;
|
|
68
|
+
index = timelineIndex === 0 ? captions.length + 1 : lines[0];
|
|
55
69
|
}
|
|
70
|
+
|
|
71
|
+
const timeMatch = lines[timelineIndex].match(/^(\d{2}:\d{2}:\d{2}[.,]\d{3})\s*-->\s*(\d{2}:\d{2}:\d{2}[.,]\d{3})/);
|
|
72
|
+
if (!timeMatch) continue;
|
|
73
|
+
|
|
74
|
+
const startTime = timeMatch[1].replace(',', '.');
|
|
75
|
+
const endTime = timeMatch[2].replace(',', '.');
|
|
76
|
+
const content = lines.slice(timelineIndex + 1).join('\n');
|
|
77
|
+
|
|
78
|
+
captions.push({
|
|
79
|
+
type: "caption",
|
|
80
|
+
index: typeof index === 'number' ? index : captions.length + 1,
|
|
81
|
+
identifier: typeof index === 'string' ? index : null,
|
|
82
|
+
start: timeToMs(startTime),
|
|
83
|
+
end: timeToMs(endTime),
|
|
84
|
+
duration: timeToMs(endTime) - timeToMs(startTime),
|
|
85
|
+
content: content,
|
|
86
|
+
text: content
|
|
87
|
+
});
|
|
56
88
|
}
|
|
89
|
+
|
|
90
|
+
return captions;
|
|
91
|
+
}
|
|
57
92
|
|
|
93
|
+
function splitIntoOverlappingChunks(captions, chunkSize = 20, overlap = 3) {
|
|
94
|
+
const chunks = [];
|
|
95
|
+
for (let i = 0; i < captions.length; i += (chunkSize - overlap)) {
|
|
96
|
+
const end = Math.min(i + chunkSize, captions.length);
|
|
97
|
+
const chunk = captions.slice(i, end);
|
|
98
|
+
chunks.push({
|
|
99
|
+
captions: chunk,
|
|
100
|
+
startIndex: i,
|
|
101
|
+
endIndex: end - 1,
|
|
102
|
+
isOverlap: i > 0 || end < captions.length
|
|
103
|
+
});
|
|
104
|
+
}
|
|
105
|
+
return chunks;
|
|
106
|
+
}
|
|
58
107
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
while (lastNonEmptyIndex >= 0 && translatedLines[lastNonEmptyIndex].trim() === "") {
|
|
63
|
-
lastNonEmptyIndex--;
|
|
64
|
-
}
|
|
65
|
-
if (lastNonEmptyIndex >= 0) {
|
|
66
|
-
translatedLines[translatedLines.length - 1] = translatedLines[lastNonEmptyIndex];
|
|
67
|
-
translatedLines[lastNonEmptyIndex] = "";
|
|
68
|
-
}
|
|
69
|
-
}
|
|
70
|
-
|
|
108
|
+
function selectBestTranslation(translations, startIndex, endIndex) {
|
|
109
|
+
// If we only have one translation for this caption, use it
|
|
110
|
+
if (translations.length === 1) return translations[0];
|
|
71
111
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
112
|
+
// For multiple translations, prefer the one from the middle of its chunk
|
|
113
|
+
// This helps avoid edge effects in translation
|
|
114
|
+
return translations.reduce((best, current) => {
|
|
115
|
+
const currentDistance = Math.min(
|
|
116
|
+
Math.abs(current.chunkStart - startIndex),
|
|
117
|
+
Math.abs(current.chunkEnd - endIndex)
|
|
118
|
+
);
|
|
119
|
+
const bestDistance = Math.min(
|
|
120
|
+
Math.abs(best.chunkStart - startIndex),
|
|
121
|
+
Math.abs(best.chunkEnd - endIndex)
|
|
122
|
+
);
|
|
123
|
+
return currentDistance < bestDistance ? current : best;
|
|
124
|
+
});
|
|
77
125
|
}
|
|
78
126
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
127
|
+
function validateFinalOutput(result, originalText, format) {
|
|
128
|
+
// Basic structure validation
|
|
129
|
+
if (!result || !result.trim()) {
|
|
130
|
+
logger.error("Empty or whitespace-only result");
|
|
131
|
+
return false;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// Check for VTT header if needed
|
|
135
|
+
if (format === 'vtt' && !result.startsWith('WEBVTT\n\n')) {
|
|
136
|
+
logger.error("Missing WEBVTT header");
|
|
137
|
+
return false;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// Check for timestamp format
|
|
141
|
+
const timestampPattern = format === 'srt'
|
|
142
|
+
? /\d{2}:\d{2}:\d{2},\d{3}\s*-->\s*\d{2}:\d{2}:\d{2},\d{3}/
|
|
143
|
+
: /\d{2}:\d{2}:\d{2}\.\d{3}\s*-->\s*\d{2}:\d{2}:\d{2}\.\d{3}/;
|
|
144
|
+
|
|
145
|
+
const hasTimestamps = timestampPattern.test(result);
|
|
146
|
+
if (!hasTimestamps) {
|
|
147
|
+
logger.error(`No valid ${format.toUpperCase()} timestamps found in result`);
|
|
148
|
+
return false;
|
|
149
|
+
}
|
|
102
150
|
|
|
103
|
-
|
|
104
|
-
|
|
151
|
+
// Check overall length ratio
|
|
152
|
+
if (result.length < originalText.length * 0.5) {
|
|
153
|
+
logger.error(`Result length (${result.length}) is less than 50% of original length (${originalText.length})`);
|
|
154
|
+
return false;
|
|
155
|
+
}
|
|
105
156
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
translatedCaptions = translatedCaptions.concat(translatedBatch);
|
|
128
|
-
currentBatch = [];
|
|
129
|
-
currentWordCount = 0;
|
|
157
|
+
// Validate subtitle block structure
|
|
158
|
+
const blocks = result.split(/\n\s*\n/).filter(block => block.trim());
|
|
159
|
+
|
|
160
|
+
// Skip WEBVTT header for VTT format
|
|
161
|
+
const startIndex = format === 'vtt' && blocks[0].trim() === 'WEBVTT' ? 1 : 0;
|
|
162
|
+
|
|
163
|
+
for (let i = startIndex; i < blocks.length; i++) {
|
|
164
|
+
const block = blocks[i];
|
|
165
|
+
const lines = block.trim().split('\n');
|
|
166
|
+
|
|
167
|
+
if (lines.length < 2) {
|
|
168
|
+
logger.error(`Block ${i + 1} has insufficient lines (${lines.length}):\n${block}`);
|
|
169
|
+
return false;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// Find the timestamp line
|
|
173
|
+
let timestampLineIndex = -1;
|
|
174
|
+
for (let j = 0; j < lines.length; j++) {
|
|
175
|
+
if (timestampPattern.test(lines[j])) {
|
|
176
|
+
timestampLineIndex = j;
|
|
177
|
+
break;
|
|
130
178
|
}
|
|
131
|
-
currentBatch.push(caption);
|
|
132
|
-
currentWordCount += captionWordCount;
|
|
133
179
|
}
|
|
134
|
-
|
|
135
|
-
if (
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
args,
|
|
139
|
-
);
|
|
140
|
-
translatedCaptions = translatedCaptions.concat(translatedBatch);
|
|
180
|
+
|
|
181
|
+
if (timestampLineIndex === -1) {
|
|
182
|
+
logger.error(`Block ${i + 1} has no valid timestamp line:\n${block}`);
|
|
183
|
+
return false;
|
|
141
184
|
}
|
|
185
|
+
|
|
186
|
+
// Check that we have content after the timestamp
|
|
187
|
+
if (timestampLineIndex === lines.length - 1) {
|
|
188
|
+
logger.error(`Block ${i + 1} has no content after timestamp:\n${block}`);
|
|
189
|
+
return false;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
// Log the content for inspection
|
|
193
|
+
logger.debug(`Block ${i + 1} content:\n${lines.slice(timestampLineIndex + 1).join('\n')}`);
|
|
194
|
+
}
|
|
142
195
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
`${
|
|
154
|
-
)
|
|
196
|
+
return true;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
async function translateChunk(chunk, args, maxRetries = 3) {
|
|
200
|
+
const format = args.format || 'srt';
|
|
201
|
+
const chunkText = chunk.captions
|
|
202
|
+
.map(c => {
|
|
203
|
+
const startTime = msToTimestamp(c.start, format);
|
|
204
|
+
const endTime = msToTimestamp(c.end, format);
|
|
205
|
+
const index = format === 'srt' || !c.identifier ? c.index : c.identifier;
|
|
206
|
+
return `${index}\n${startTime} --> ${endTime}\n${c.content}`;
|
|
207
|
+
})
|
|
208
|
+
.join('\n\n');
|
|
209
|
+
|
|
210
|
+
for (let attempt = 0; attempt < maxRetries; attempt++) {
|
|
155
211
|
try {
|
|
156
|
-
|
|
212
|
+
const translated = await callPathway("translate_subtitle_helper", {
|
|
213
|
+
...args,
|
|
214
|
+
text: chunkText,
|
|
215
|
+
async: false,
|
|
216
|
+
});
|
|
217
|
+
|
|
218
|
+
// Basic validation - just check for SUBTITLES tags and some content
|
|
219
|
+
const match = translated.match(/<SUBTITLES>([\s\S]*)<\/SUBTITLES>/);
|
|
220
|
+
if (!match || !match[1].trim()) {
|
|
221
|
+
logger.warn(`Attempt ${attempt + 1}: Invalid translation format`);
|
|
222
|
+
continue;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
const content = match[1].trim();
|
|
226
|
+
const blocks = content.split(/\n\s*\n/);
|
|
227
|
+
|
|
228
|
+
// Check if any blocks are empty or invalid
|
|
229
|
+
let hasEmptyBlocks = false;
|
|
230
|
+
const processedBlocks = chunk.captions.map((caption, index) => {
|
|
231
|
+
const block = blocks[index];
|
|
232
|
+
if (!block) {
|
|
233
|
+
logger.warn(`Attempt ${attempt + 1}: Empty block for caption ${caption.index}`);
|
|
234
|
+
hasEmptyBlocks = true;
|
|
235
|
+
return null;
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
const lines = block.split('\n');
|
|
239
|
+
if (lines.length < 3) {
|
|
240
|
+
logger.warn(`Attempt ${attempt + 1}: Invalid block structure for caption ${caption.index}`);
|
|
241
|
+
hasEmptyBlocks = true;
|
|
242
|
+
return null;
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
const content = lines.slice(2).join('\n').trim();
|
|
246
|
+
if (!content) {
|
|
247
|
+
logger.warn(`Attempt ${attempt + 1}: Empty content for caption ${caption.index}`);
|
|
248
|
+
hasEmptyBlocks = true;
|
|
249
|
+
return null;
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
return {
|
|
253
|
+
...caption,
|
|
254
|
+
content: content,
|
|
255
|
+
text: content,
|
|
256
|
+
chunkStart: chunk.startIndex,
|
|
257
|
+
chunkEnd: chunk.endIndex
|
|
258
|
+
};
|
|
259
|
+
});
|
|
260
|
+
|
|
261
|
+
// If no empty blocks, return the processed blocks
|
|
262
|
+
if (!hasEmptyBlocks) {
|
|
263
|
+
return processedBlocks;
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
// If this was the last attempt and we still have empty blocks,
|
|
267
|
+
// return what we have but keep original content for empty blocks
|
|
268
|
+
if (attempt === maxRetries - 1) {
|
|
269
|
+
logger.warn(`Failed to get valid translations for all blocks after ${maxRetries} attempts`);
|
|
270
|
+
return chunk.captions.map((caption, index) => {
|
|
271
|
+
return processedBlocks[index] || {
|
|
272
|
+
...caption,
|
|
273
|
+
chunkStart: chunk.startIndex,
|
|
274
|
+
chunkEnd: chunk.endIndex
|
|
275
|
+
};
|
|
276
|
+
});
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
// Otherwise, try again
|
|
280
|
+
logger.info(`Retrying chunk due to empty blocks (attempt ${attempt + 1}/${maxRetries})`);
|
|
281
|
+
|
|
157
282
|
} catch (e) {
|
|
158
|
-
logger.error(`
|
|
159
|
-
|
|
283
|
+
logger.error(`Error translating chunk ${chunk.startIndex}-${chunk.endIndex} (attempt ${attempt + 1}): ${e}`);
|
|
284
|
+
if (attempt === maxRetries - 1) throw e;
|
|
160
285
|
}
|
|
161
286
|
}
|
|
287
|
+
|
|
288
|
+
throw new Error(`Failed to translate chunk ${chunk.startIndex}-${chunk.endIndex} after ${maxRetries} attempts`);
|
|
162
289
|
}
|
|
163
290
|
|
|
164
291
|
export default {
|
|
@@ -173,9 +300,73 @@ export default {
|
|
|
173
300
|
model: "oai-gpt4o",
|
|
174
301
|
enableDuplicateRequests: false,
|
|
175
302
|
timeout: 3600,
|
|
176
|
-
executePathway: async (
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
303
|
+
executePathway: async ({args}) => {
|
|
304
|
+
try {
|
|
305
|
+
const { text, format = 'srt' } = args;
|
|
306
|
+
const preprocessedText = preprocessStr(text, format);
|
|
307
|
+
const captions = parseSubtitles(preprocessedText, format);
|
|
308
|
+
|
|
309
|
+
if (!captions || captions.length === 0) {
|
|
310
|
+
throw new Error("No captions found in input");
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
// Split into overlapping chunks
|
|
314
|
+
const chunks = splitIntoOverlappingChunks(captions);
|
|
315
|
+
logger.info(`Split subtitles into ${chunks.length} overlapping chunks`);
|
|
316
|
+
|
|
317
|
+
// Translate all chunks in parallel
|
|
318
|
+
const chunkPromises = chunks.map(chunk => translateChunk(chunk, args));
|
|
319
|
+
const translatedChunks = await Promise.all(chunkPromises);
|
|
320
|
+
|
|
321
|
+
// Create a map of caption index to all its translations
|
|
322
|
+
const translationMap = new Map();
|
|
323
|
+
translatedChunks.flat().forEach(caption => {
|
|
324
|
+
if (!translationMap.has(caption.index)) {
|
|
325
|
+
translationMap.set(caption.index, []);
|
|
326
|
+
}
|
|
327
|
+
translationMap.get(caption.index).push(caption);
|
|
328
|
+
});
|
|
329
|
+
|
|
330
|
+
// Select best translation for each caption
|
|
331
|
+
const finalCaptions = captions.map(caption => {
|
|
332
|
+
const translations = translationMap.get(caption.index) || [caption];
|
|
333
|
+
return selectBestTranslation(translations, caption.index, caption.index);
|
|
334
|
+
});
|
|
335
|
+
|
|
336
|
+
// Format the output
|
|
337
|
+
let result = finalCaptions
|
|
338
|
+
.map(caption => {
|
|
339
|
+
const startTime = msToTimestamp(caption.start, format);
|
|
340
|
+
const endTime = msToTimestamp(caption.end, format);
|
|
341
|
+
// Only include index/identifier if it was in the original
|
|
342
|
+
const hasIdentifier = caption.identifier !== null || format === 'srt';
|
|
343
|
+
const index = format === 'srt' || !caption.identifier ? caption.index : caption.identifier;
|
|
344
|
+
return hasIdentifier ?
|
|
345
|
+
`${index}\n${startTime} --> ${endTime}\n${caption.content}` :
|
|
346
|
+
`${startTime} --> ${endTime}\n${caption.content}`;
|
|
347
|
+
})
|
|
348
|
+
.join('\n\n')
|
|
349
|
+
.trim();
|
|
350
|
+
|
|
351
|
+
// Add final newline only if input had one
|
|
352
|
+
if (text.endsWith('\n')) {
|
|
353
|
+
result += '\n';
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
// Add WEBVTT header for VTT format
|
|
357
|
+
if (format === 'vtt') {
|
|
358
|
+
result = 'WEBVTT\n\n' + result;
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
// Validate final output
|
|
362
|
+
if (!validateFinalOutput(result, text, format)) {
|
|
363
|
+
throw new Error("Final subtitle reconstruction failed validation");
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
return result;
|
|
367
|
+
} catch (e) {
|
|
368
|
+
logger.error(`Subtitle translation failed: ${e}`);
|
|
369
|
+
throw e;
|
|
370
|
+
}
|
|
180
371
|
},
|
|
181
372
|
};
|
|
@@ -7,25 +7,13 @@ export default {
|
|
|
7
7
|
{
|
|
8
8
|
role: "system",
|
|
9
9
|
content:
|
|
10
|
-
`
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
2. Output MUST have EXACTLY the same line count as input.
|
|
14
|
-
3. One input line = One output line. Always.
|
|
15
|
-
4. Only translations. Nothing extra.
|
|
16
|
-
5. Non-translatable stays unchanged.
|
|
17
|
-
6. Keep all formatting and characters.
|
|
18
|
-
7. Prefix: "LINE#lineNumber:".
|
|
19
|
-
8. Untranslatable: Copy as-is with prefix.
|
|
20
|
-
9. Internal checks: Verify line count and content after each line.
|
|
21
|
-
10. Final verification: Recount, check numbering, confirm content, cross-check with input.
|
|
22
|
-
|
|
23
|
-
Translate ALL lines. Constant vigilance. Exhaustive final cross-check.`
|
|
10
|
+
`You are an expert subtitle translator. You will be given a block of subtitles and asked to translate them into {{to}}.
|
|
11
|
+
You must maintain the original format (caption numbers and timestamps) exactly and make the content fit as naturally as possible.
|
|
12
|
+
Output only the translated subtitles in a <SUBTITLES> tag with no other text or commentary.`
|
|
24
13
|
},
|
|
25
14
|
{
|
|
26
15
|
role: "user",
|
|
27
|
-
|
|
28
|
-
content: `{{{text}}}`,
|
|
16
|
+
content: `<SUBTITLES>\n{{{text}}}\n</SUBTITLES>`,
|
|
29
17
|
},
|
|
30
18
|
],
|
|
31
19
|
}),
|
|
@@ -226,7 +226,7 @@ class PathwayResolver {
|
|
|
226
226
|
try {
|
|
227
227
|
// Load saved context and core memory if it exists
|
|
228
228
|
const [savedContext, memorySelf, memoryDirectives, memoryTopics, memoryUser, memoryContext] = await Promise.all([
|
|
229
|
-
(getv && getv(
|
|
229
|
+
(getv && await getv(this.savedContextId)) || {},
|
|
230
230
|
callPathway('sys_read_memory', { contextId: this.savedContextId, section: 'memorySelf', priority: 1}),
|
|
231
231
|
callPathway('sys_read_memory', { contextId: this.savedContextId, section: 'memoryDirectives', priority: 1 }),
|
|
232
232
|
callPathway('sys_read_memory', { contextId: this.savedContextId, section: 'memoryTopics', priority: 0, numResults: 10 }),
|