@aj-archipelago/cortex 1.1.23 → 1.1.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aj-archipelago/cortex",
|
|
3
|
-
"version": "1.1.
|
|
3
|
+
"version": "1.1.24",
|
|
4
4
|
"description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
|
|
5
5
|
"private": false,
|
|
6
6
|
"repository": {
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import subsrt from "subsrt";
|
|
2
2
|
import logger from "../lib/logger.js";
|
|
3
3
|
import { callPathway } from "../lib/pathwayTools.js";
|
|
4
|
+
import { publishRequestProgress } from "../lib/redisSubscription.js";
|
|
4
5
|
|
|
5
6
|
function preprocessStr(str) {
|
|
6
7
|
try {
|
|
@@ -18,64 +19,25 @@ function preprocessStr(str) {
|
|
|
18
19
|
}
|
|
19
20
|
}
|
|
20
21
|
|
|
21
|
-
function
|
|
22
|
-
let context = "";
|
|
23
|
-
let wordCount = 0;
|
|
24
|
-
let i = startIndex;
|
|
25
|
-
|
|
26
|
-
while (i >= 0 && i < captions.length && wordCount < wordLimit) {
|
|
27
|
-
const words = captions[i].content.split(/\s+/);
|
|
28
|
-
if (wordCount + words.length <= wordLimit) {
|
|
29
|
-
context =
|
|
30
|
-
direction === "prev"
|
|
31
|
-
? captions[i].content + " " + context
|
|
32
|
-
: context + " " + captions[i].content;
|
|
33
|
-
wordCount += words.length;
|
|
34
|
-
} else {
|
|
35
|
-
const remainingWords = wordLimit - wordCount;
|
|
36
|
-
const partialContent =
|
|
37
|
-
direction === "prev"
|
|
38
|
-
? words.slice(-remainingWords).join(" ")
|
|
39
|
-
: words.slice(0, remainingWords).join(" ");
|
|
40
|
-
context =
|
|
41
|
-
direction === "prev"
|
|
42
|
-
? partialContent + " " + context
|
|
43
|
-
: context + " " + partialContent;
|
|
44
|
-
break;
|
|
45
|
-
}
|
|
46
|
-
i += direction === "prev" ? -1 : 1;
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
return context.trim();
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
async function processBatch(batch, args, captions, batchStartIndex) {
|
|
22
|
+
async function processBatch(batch, args) {
|
|
53
23
|
const batchText = batch
|
|
54
24
|
.map((caption, index) => `LINE#${index + 1}: ${caption.content}`)
|
|
55
25
|
.join("\n");
|
|
56
|
-
const prevLines = getContextLines(captions, batchStartIndex - 1, "prev");
|
|
57
|
-
const nextLines = getContextLines(
|
|
58
|
-
captions,
|
|
59
|
-
batchStartIndex + batch.length,
|
|
60
|
-
"next"
|
|
61
|
-
);
|
|
62
26
|
|
|
63
27
|
const translatedText = await callPathway("translate_subtitle_helper", {
|
|
64
28
|
...args,
|
|
65
29
|
text: batchText,
|
|
66
|
-
prevLines,
|
|
67
|
-
nextLines,
|
|
68
30
|
async: false,
|
|
69
31
|
});
|
|
70
32
|
|
|
71
33
|
// Remove LINE# and LINE() labels
|
|
72
|
-
|
|
34
|
+
const translatedLines = translatedText.split("\n");
|
|
73
35
|
translatedLines.forEach((line, i) => {
|
|
74
36
|
translatedLines[i] = line.replace(/^LINE#\d+:\s*/, "").trim();
|
|
75
|
-
|
|
37
|
+
});
|
|
76
38
|
//make sure translatedLines.length===batch.length
|
|
77
39
|
if (translatedLines.length < batch.length) {
|
|
78
|
-
const emptyLines = Array(batch.length - translatedLines.length).fill("");
|
|
40
|
+
const emptyLines = Array(batch.length - translatedLines.length).fill("-");
|
|
79
41
|
translatedLines.push(...emptyLines);
|
|
80
42
|
} else if (translatedLines.length > batch.length) {
|
|
81
43
|
//first remove the empty lines
|
|
@@ -88,7 +50,7 @@ async function processBatch(batch, args, captions, batchStartIndex) {
|
|
|
88
50
|
mergedLines.unshift(lastLine);
|
|
89
51
|
translatedLines.splice(batch.length - 1, translatedLines.length - batch.length + 1, mergedLines.join(" "));
|
|
90
52
|
}else {
|
|
91
|
-
const emptyLines = Array(batch.length - translatedLines.length).fill("");
|
|
53
|
+
const emptyLines = Array(batch.length - translatedLines.length).fill("-");
|
|
92
54
|
translatedLines.push(...emptyLines);
|
|
93
55
|
}
|
|
94
56
|
}
|
|
@@ -114,7 +76,7 @@ async function processBatch(batch, args, captions, batchStartIndex) {
|
|
|
114
76
|
}));
|
|
115
77
|
}
|
|
116
78
|
|
|
117
|
-
async function myResolver(args) {
|
|
79
|
+
async function myResolver(args, requestId) {
|
|
118
80
|
try {
|
|
119
81
|
const { text, format } = args;
|
|
120
82
|
const captions = subsrt.parse(preprocessStr(text), {
|
|
@@ -123,11 +85,30 @@ async function myResolver(args) {
|
|
|
123
85
|
eol: "\n",
|
|
124
86
|
});
|
|
125
87
|
const maxLineCount = 100;
|
|
126
|
-
const maxWordCount =
|
|
88
|
+
const maxWordCount = 300;
|
|
127
89
|
let translatedCaptions = [];
|
|
128
90
|
let currentBatch = [];
|
|
129
91
|
let currentWordCount = 0;
|
|
130
|
-
|
|
92
|
+
|
|
93
|
+
const totalCount = captions.length;
|
|
94
|
+
let completedCount = 0;
|
|
95
|
+
|
|
96
|
+
const sendProgress = () => {
|
|
97
|
+
if (completedCount >= totalCount) return;
|
|
98
|
+
if(!requestId) {
|
|
99
|
+
logger.warn(`No requestId found for progress update`);
|
|
100
|
+
return;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
const progress = completedCount / totalCount;
|
|
104
|
+
logger.info(`Progress for ${requestId}: ${progress}`);
|
|
105
|
+
|
|
106
|
+
publishRequestProgress({
|
|
107
|
+
requestId,
|
|
108
|
+
progress,
|
|
109
|
+
data: null,
|
|
110
|
+
});
|
|
111
|
+
};
|
|
131
112
|
|
|
132
113
|
for (let i = 0; i < captions.length; i++) {
|
|
133
114
|
const caption = captions[i];
|
|
@@ -137,16 +118,15 @@ async function myResolver(args) {
|
|
|
137
118
|
currentBatch.length >= maxLineCount) &&
|
|
138
119
|
currentBatch.length > 0
|
|
139
120
|
) {
|
|
121
|
+
completedCount=i;
|
|
122
|
+
sendProgress();
|
|
140
123
|
const translatedBatch = await processBatch(
|
|
141
124
|
currentBatch,
|
|
142
125
|
args,
|
|
143
|
-
captions,
|
|
144
|
-
batchStartIndex
|
|
145
126
|
);
|
|
146
127
|
translatedCaptions = translatedCaptions.concat(translatedBatch);
|
|
147
128
|
currentBatch = [];
|
|
148
129
|
currentWordCount = 0;
|
|
149
|
-
batchStartIndex = i;
|
|
150
130
|
}
|
|
151
131
|
currentBatch.push(caption);
|
|
152
132
|
currentWordCount += captionWordCount;
|
|
@@ -156,8 +136,6 @@ async function myResolver(args) {
|
|
|
156
136
|
const translatedBatch = await processBatch(
|
|
157
137
|
currentBatch,
|
|
158
138
|
args,
|
|
159
|
-
captions,
|
|
160
|
-
batchStartIndex
|
|
161
139
|
);
|
|
162
140
|
translatedCaptions = translatedCaptions.concat(translatedBatch);
|
|
163
141
|
}
|
|
@@ -171,11 +149,11 @@ async function myResolver(args) {
|
|
|
171
149
|
.trim() + "\n"
|
|
172
150
|
);
|
|
173
151
|
} catch (e) {
|
|
174
|
-
logger.
|
|
175
|
-
|
|
152
|
+
logger.warn(
|
|
153
|
+
`${e} - could be that there are no subtitles, so attempting block translation.`
|
|
176
154
|
);
|
|
177
155
|
try {
|
|
178
|
-
return await callPathway("
|
|
156
|
+
return await callPathway("translate_gpt4_omni", {...args, async: false});
|
|
179
157
|
} catch (e) {
|
|
180
158
|
logger.error(`An error occurred in subtitle translation: ${e}`);
|
|
181
159
|
return "";
|
|
@@ -191,11 +169,13 @@ export default {
|
|
|
191
169
|
prevLines: ``,
|
|
192
170
|
nextLines: ``,
|
|
193
171
|
},
|
|
194
|
-
|
|
172
|
+
useInputChunking: false,
|
|
195
173
|
model: "oai-gpt4o",
|
|
196
174
|
enableDuplicateRequests: false,
|
|
197
175
|
timeout: 3600,
|
|
198
|
-
executePathway: async (
|
|
199
|
-
|
|
176
|
+
executePathway: async (executePathwayArgs) => {
|
|
177
|
+
const { args } = executePathwayArgs;
|
|
178
|
+
const requestId = executePathwayArgs?.resolver?.requestId;
|
|
179
|
+
return await myResolver(args, requestId);
|
|
200
180
|
},
|
|
201
181
|
};
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import { Prompt } from '../server/prompt.js';
|
|
2
2
|
|
|
3
|
-
|
|
4
3
|
export default {
|
|
5
4
|
prompt: [
|
|
6
5
|
new Prompt({
|
|
@@ -8,11 +7,25 @@ export default {
|
|
|
8
7
|
{
|
|
9
8
|
role: "system",
|
|
10
9
|
content:
|
|
11
|
-
|
|
10
|
+
`Expert translator: Convert ALL text to {{to}}. Unbreakable rules:
|
|
11
|
+
|
|
12
|
+
1. Translate EVERY SINGLE LINE. Zero exceptions.
|
|
13
|
+
2. Output MUST have EXACTLY the same line count as input.
|
|
14
|
+
3. One input line = One output line. Always.
|
|
15
|
+
4. Only translations. Nothing extra.
|
|
16
|
+
5. Non-translatable stays unchanged.
|
|
17
|
+
6. Keep all formatting and characters.
|
|
18
|
+
7. Prefix: "LINE#lineNumber:".
|
|
19
|
+
8. Untranslatable: Copy as-is with prefix.
|
|
20
|
+
9. Internal checks: Verify line count and content after each line.
|
|
21
|
+
10. Final verification: Recount, check numbering, confirm content, cross-check with input.
|
|
22
|
+
|
|
23
|
+
Translate ALL lines. Constant vigilance. Exhaustive final cross-check.`
|
|
12
24
|
},
|
|
13
25
|
{
|
|
14
26
|
role: "user",
|
|
15
|
-
content: `"PreviousLines":\n{{{prevLine}}}\n\n"CurrentLines":\n{{{text}}}\n"NextLines":\n{{{nextLine}}}\n\n`,
|
|
27
|
+
// content: `"PreviousLines":\n{{{prevLine}}}\n\n"CurrentLines":\n{{{text}}}\n"NextLines":\n{{{nextLine}}}\n\n`,
|
|
28
|
+
content: `{{{text}}}`,
|
|
16
29
|
},
|
|
17
30
|
],
|
|
18
31
|
}),
|
|
@@ -24,8 +37,8 @@ export default {
|
|
|
24
37
|
prevLine: ``,
|
|
25
38
|
nextLine: ``,
|
|
26
39
|
},
|
|
27
|
-
|
|
40
|
+
useInputChunking: false,
|
|
28
41
|
model: 'oai-gpt4o',
|
|
29
42
|
enableDuplicateRequests: false,
|
|
30
|
-
|
|
43
|
+
timeout: 3600,
|
|
31
44
|
}
|
|
@@ -206,7 +206,7 @@ class PathwayResolver {
|
|
|
206
206
|
|
|
207
207
|
async executePathway(args) {
|
|
208
208
|
if (this.pathway.executePathway && typeof this.pathway.executePathway === 'function') {
|
|
209
|
-
return await this.pathway.executePathway({ args, runAllPrompts: this.promptAndParse.bind(this) });
|
|
209
|
+
return await this.pathway.executePathway({ args, runAllPrompts: this.promptAndParse.bind(this), resolver: this });
|
|
210
210
|
}
|
|
211
211
|
else {
|
|
212
212
|
return await this.promptAndParse(args);
|
|
@@ -37,14 +37,13 @@ class OpenAIWhisperPlugin extends ModelPlugin {
|
|
|
37
37
|
chunks.push(chunk);
|
|
38
38
|
|
|
39
39
|
const { language, responseFormat } = parameters;
|
|
40
|
-
cortexRequest.url = this.requestUrl(text);
|
|
41
40
|
const params = {};
|
|
42
41
|
const { modelPromptText } = this.getCompiledPrompt(text, parameters, prompt);
|
|
43
42
|
const response_format = responseFormat || 'text';
|
|
44
43
|
|
|
45
44
|
const formData = new FormData();
|
|
46
45
|
formData.append('file', fs.createReadStream(chunk));
|
|
47
|
-
formData.append('model',
|
|
46
|
+
formData.append('model', cortexRequest.params.model);
|
|
48
47
|
formData.append('response_format', response_format);
|
|
49
48
|
language && formData.append('language', language);
|
|
50
49
|
modelPromptText && formData.append('prompt', modelPromptText);
|
package/tests/main.test.js
CHANGED
|
@@ -405,6 +405,13 @@ Aseel is mommy
|
|
|
405
405
|
});
|
|
406
406
|
|
|
407
407
|
test('test translate_srt endpoint with long srt file', async t => {
|
|
408
|
+
t.timeout(400000);
|
|
408
409
|
const text = fs.readFileSync(path.join(__dirname, 'sublong.srt'), 'utf8');
|
|
409
410
|
await testTranslateSrt(t, text, 'English');
|
|
411
|
+
});
|
|
412
|
+
|
|
413
|
+
test('test translate_srt endpoint with horizontal srt file', async t => {
|
|
414
|
+
t.timeout(400000);
|
|
415
|
+
const text = fs.readFileSync(path.join(__dirname, 'subhorizontal.srt'), 'utf8');
|
|
416
|
+
await testTranslateSrt(t, text, 'Turkish');
|
|
410
417
|
});
|