@aj-archipelago/cortex 1.1.23 → 1.1.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/cortexRequest.js +15 -2
- package/lib/requestExecutor.js +3 -3
- package/package.json +1 -1
- package/pathways/translate_subtitle.js +38 -58
- package/pathways/translate_subtitle_helper.js +18 -5
- package/server/pathwayResolver.js +1 -1
- package/server/plugins/claude3VertexPlugin.js +1 -1
- package/server/plugins/gemini15ChatPlugin.js +1 -1
- package/server/plugins/geminiChatPlugin.js +1 -1
- package/server/plugins/openAiWhisperPlugin.js +1 -2
- package/server/plugins/palmChatPlugin.js +1 -1
- package/server/plugins/palmCompletionPlugin.js +1 -1
- package/tests/main.test.js +7 -0
- package/tests/subhorizontal.srt +1735 -0
package/lib/cortexRequest.js
CHANGED
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
import { selectEndpoint } from './requestExecutor.js';
|
|
2
2
|
|
|
3
3
|
class CortexRequest {
|
|
4
|
-
constructor( { url, urlSuffix, data, params, headers, cache, model, pathwayResolver, selectedEndpoint, stream } = {}) {
|
|
4
|
+
constructor( { url, urlSuffix, data, params, headers, auth, cache, model, pathwayResolver, selectedEndpoint, stream } = {}) {
|
|
5
5
|
this._url = url || '';
|
|
6
6
|
this._urlSuffix = urlSuffix || '';
|
|
7
7
|
this._data = data || {};
|
|
8
8
|
this._params = params || {};
|
|
9
9
|
this._headers = headers || {};
|
|
10
|
+
this._auth = auth || {};
|
|
10
11
|
this._cache = cache || {};
|
|
11
12
|
this._model = model || '';
|
|
12
13
|
this._pathwayResolver = pathwayResolver || {};
|
|
@@ -30,6 +31,9 @@ class CortexRequest {
|
|
|
30
31
|
this._url = sep.url;
|
|
31
32
|
this._data = { ...this._data, ...sep.params };
|
|
32
33
|
this._headers = { ...this._headers, ...sep.headers };
|
|
34
|
+
if (sep.auth) {
|
|
35
|
+
this._auth = { ...sep.auth };
|
|
36
|
+
}
|
|
33
37
|
this._params = { ...this._params, ...sep.params };
|
|
34
38
|
}
|
|
35
39
|
}
|
|
@@ -81,13 +85,22 @@ class CortexRequest {
|
|
|
81
85
|
|
|
82
86
|
// headers getter and setter
|
|
83
87
|
get headers() {
|
|
84
|
-
return this._headers;
|
|
88
|
+
return { ...this._headers, ...this._auth };
|
|
85
89
|
}
|
|
86
90
|
|
|
87
91
|
set headers(value) {
|
|
88
92
|
this._headers = value;
|
|
89
93
|
}
|
|
90
94
|
|
|
95
|
+
// auth getter and setter
|
|
96
|
+
get auth() {
|
|
97
|
+
return this._auth;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
set auth(value) {
|
|
101
|
+
this._auth = value;
|
|
102
|
+
}
|
|
103
|
+
|
|
91
104
|
// cache getter and setter
|
|
92
105
|
get cache() {
|
|
93
106
|
return this._cache;
|
package/lib/requestExecutor.js
CHANGED
|
@@ -311,9 +311,9 @@ const makeRequest = async (cortexRequest) => {
|
|
|
311
311
|
throw new Error(`Received error response: ${response.status}`);
|
|
312
312
|
}
|
|
313
313
|
} catch (error) {
|
|
314
|
-
const { response, duration } = error;
|
|
315
|
-
if (response) {
|
|
316
|
-
const status = response
|
|
314
|
+
const { response, duration, code } = error;
|
|
315
|
+
if (response || code === 'ECONNRESET') {
|
|
316
|
+
const status = response?.status || 502; // default to 502 if ECONNRESET
|
|
317
317
|
// if there is only one endpoint, only retry select error codes
|
|
318
318
|
if (cortexRequest.model.endpoints.length === 1) {
|
|
319
319
|
if (status !== 429 &&
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aj-archipelago/cortex",
|
|
3
|
-
"version": "1.1.
|
|
3
|
+
"version": "1.1.25",
|
|
4
4
|
"description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
|
|
5
5
|
"private": false,
|
|
6
6
|
"repository": {
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import subsrt from "subsrt";
|
|
2
2
|
import logger from "../lib/logger.js";
|
|
3
3
|
import { callPathway } from "../lib/pathwayTools.js";
|
|
4
|
+
import { publishRequestProgress } from "../lib/redisSubscription.js";
|
|
4
5
|
|
|
5
6
|
function preprocessStr(str) {
|
|
6
7
|
try {
|
|
@@ -18,64 +19,25 @@ function preprocessStr(str) {
|
|
|
18
19
|
}
|
|
19
20
|
}
|
|
20
21
|
|
|
21
|
-
function
|
|
22
|
-
let context = "";
|
|
23
|
-
let wordCount = 0;
|
|
24
|
-
let i = startIndex;
|
|
25
|
-
|
|
26
|
-
while (i >= 0 && i < captions.length && wordCount < wordLimit) {
|
|
27
|
-
const words = captions[i].content.split(/\s+/);
|
|
28
|
-
if (wordCount + words.length <= wordLimit) {
|
|
29
|
-
context =
|
|
30
|
-
direction === "prev"
|
|
31
|
-
? captions[i].content + " " + context
|
|
32
|
-
: context + " " + captions[i].content;
|
|
33
|
-
wordCount += words.length;
|
|
34
|
-
} else {
|
|
35
|
-
const remainingWords = wordLimit - wordCount;
|
|
36
|
-
const partialContent =
|
|
37
|
-
direction === "prev"
|
|
38
|
-
? words.slice(-remainingWords).join(" ")
|
|
39
|
-
: words.slice(0, remainingWords).join(" ");
|
|
40
|
-
context =
|
|
41
|
-
direction === "prev"
|
|
42
|
-
? partialContent + " " + context
|
|
43
|
-
: context + " " + partialContent;
|
|
44
|
-
break;
|
|
45
|
-
}
|
|
46
|
-
i += direction === "prev" ? -1 : 1;
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
return context.trim();
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
async function processBatch(batch, args, captions, batchStartIndex) {
|
|
22
|
+
async function processBatch(batch, args) {
|
|
53
23
|
const batchText = batch
|
|
54
24
|
.map((caption, index) => `LINE#${index + 1}: ${caption.content}`)
|
|
55
25
|
.join("\n");
|
|
56
|
-
const prevLines = getContextLines(captions, batchStartIndex - 1, "prev");
|
|
57
|
-
const nextLines = getContextLines(
|
|
58
|
-
captions,
|
|
59
|
-
batchStartIndex + batch.length,
|
|
60
|
-
"next"
|
|
61
|
-
);
|
|
62
26
|
|
|
63
27
|
const translatedText = await callPathway("translate_subtitle_helper", {
|
|
64
28
|
...args,
|
|
65
29
|
text: batchText,
|
|
66
|
-
prevLines,
|
|
67
|
-
nextLines,
|
|
68
30
|
async: false,
|
|
69
31
|
});
|
|
70
32
|
|
|
71
33
|
// Remove LINE# and LINE() labels
|
|
72
|
-
|
|
34
|
+
const translatedLines = translatedText.split("\n");
|
|
73
35
|
translatedLines.forEach((line, i) => {
|
|
74
36
|
translatedLines[i] = line.replace(/^LINE#\d+:\s*/, "").trim();
|
|
75
|
-
|
|
37
|
+
});
|
|
76
38
|
//make sure translatedLines.length===batch.length
|
|
77
39
|
if (translatedLines.length < batch.length) {
|
|
78
|
-
const emptyLines = Array(batch.length - translatedLines.length).fill("");
|
|
40
|
+
const emptyLines = Array(batch.length - translatedLines.length).fill("-");
|
|
79
41
|
translatedLines.push(...emptyLines);
|
|
80
42
|
} else if (translatedLines.length > batch.length) {
|
|
81
43
|
//first remove the empty lines
|
|
@@ -88,7 +50,7 @@ async function processBatch(batch, args, captions, batchStartIndex) {
|
|
|
88
50
|
mergedLines.unshift(lastLine);
|
|
89
51
|
translatedLines.splice(batch.length - 1, translatedLines.length - batch.length + 1, mergedLines.join(" "));
|
|
90
52
|
}else {
|
|
91
|
-
const emptyLines = Array(batch.length - translatedLines.length).fill("");
|
|
53
|
+
const emptyLines = Array(batch.length - translatedLines.length).fill("-");
|
|
92
54
|
translatedLines.push(...emptyLines);
|
|
93
55
|
}
|
|
94
56
|
}
|
|
@@ -114,7 +76,7 @@ async function processBatch(batch, args, captions, batchStartIndex) {
|
|
|
114
76
|
}));
|
|
115
77
|
}
|
|
116
78
|
|
|
117
|
-
async function myResolver(args) {
|
|
79
|
+
async function myResolver(args, requestId) {
|
|
118
80
|
try {
|
|
119
81
|
const { text, format } = args;
|
|
120
82
|
const captions = subsrt.parse(preprocessStr(text), {
|
|
@@ -123,11 +85,30 @@ async function myResolver(args) {
|
|
|
123
85
|
eol: "\n",
|
|
124
86
|
});
|
|
125
87
|
const maxLineCount = 100;
|
|
126
|
-
const maxWordCount =
|
|
88
|
+
const maxWordCount = 300;
|
|
127
89
|
let translatedCaptions = [];
|
|
128
90
|
let currentBatch = [];
|
|
129
91
|
let currentWordCount = 0;
|
|
130
|
-
|
|
92
|
+
|
|
93
|
+
const totalCount = captions.length;
|
|
94
|
+
let completedCount = 0;
|
|
95
|
+
|
|
96
|
+
const sendProgress = () => {
|
|
97
|
+
if (completedCount >= totalCount) return;
|
|
98
|
+
if(!requestId) {
|
|
99
|
+
logger.warn(`No requestId found for progress update`);
|
|
100
|
+
return;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
const progress = completedCount / totalCount;
|
|
104
|
+
logger.info(`Progress for ${requestId}: ${progress}`);
|
|
105
|
+
|
|
106
|
+
publishRequestProgress({
|
|
107
|
+
requestId,
|
|
108
|
+
progress,
|
|
109
|
+
data: null,
|
|
110
|
+
});
|
|
111
|
+
};
|
|
131
112
|
|
|
132
113
|
for (let i = 0; i < captions.length; i++) {
|
|
133
114
|
const caption = captions[i];
|
|
@@ -137,16 +118,15 @@ async function myResolver(args) {
|
|
|
137
118
|
currentBatch.length >= maxLineCount) &&
|
|
138
119
|
currentBatch.length > 0
|
|
139
120
|
) {
|
|
121
|
+
completedCount=i;
|
|
122
|
+
sendProgress();
|
|
140
123
|
const translatedBatch = await processBatch(
|
|
141
124
|
currentBatch,
|
|
142
125
|
args,
|
|
143
|
-
captions,
|
|
144
|
-
batchStartIndex
|
|
145
126
|
);
|
|
146
127
|
translatedCaptions = translatedCaptions.concat(translatedBatch);
|
|
147
128
|
currentBatch = [];
|
|
148
129
|
currentWordCount = 0;
|
|
149
|
-
batchStartIndex = i;
|
|
150
130
|
}
|
|
151
131
|
currentBatch.push(caption);
|
|
152
132
|
currentWordCount += captionWordCount;
|
|
@@ -156,8 +136,6 @@ async function myResolver(args) {
|
|
|
156
136
|
const translatedBatch = await processBatch(
|
|
157
137
|
currentBatch,
|
|
158
138
|
args,
|
|
159
|
-
captions,
|
|
160
|
-
batchStartIndex
|
|
161
139
|
);
|
|
162
140
|
translatedCaptions = translatedCaptions.concat(translatedBatch);
|
|
163
141
|
}
|
|
@@ -171,11 +149,11 @@ async function myResolver(args) {
|
|
|
171
149
|
.trim() + "\n"
|
|
172
150
|
);
|
|
173
151
|
} catch (e) {
|
|
174
|
-
logger.
|
|
175
|
-
|
|
152
|
+
logger.warn(
|
|
153
|
+
`${e} - could be that there are no subtitles, so attempting block translation.`
|
|
176
154
|
);
|
|
177
155
|
try {
|
|
178
|
-
return await callPathway("
|
|
156
|
+
return await callPathway("translate_gpt4_omni", {...args, async: false});
|
|
179
157
|
} catch (e) {
|
|
180
158
|
logger.error(`An error occurred in subtitle translation: ${e}`);
|
|
181
159
|
return "";
|
|
@@ -191,11 +169,13 @@ export default {
|
|
|
191
169
|
prevLines: ``,
|
|
192
170
|
nextLines: ``,
|
|
193
171
|
},
|
|
194
|
-
|
|
172
|
+
useInputChunking: false,
|
|
195
173
|
model: "oai-gpt4o",
|
|
196
174
|
enableDuplicateRequests: false,
|
|
197
175
|
timeout: 3600,
|
|
198
|
-
executePathway: async (
|
|
199
|
-
|
|
176
|
+
executePathway: async (executePathwayArgs) => {
|
|
177
|
+
const { args } = executePathwayArgs;
|
|
178
|
+
const requestId = executePathwayArgs?.resolver?.requestId;
|
|
179
|
+
return await myResolver(args, requestId);
|
|
200
180
|
},
|
|
201
181
|
};
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import { Prompt } from '../server/prompt.js';
|
|
2
2
|
|
|
3
|
-
|
|
4
3
|
export default {
|
|
5
4
|
prompt: [
|
|
6
5
|
new Prompt({
|
|
@@ -8,11 +7,25 @@ export default {
|
|
|
8
7
|
{
|
|
9
8
|
role: "system",
|
|
10
9
|
content:
|
|
11
|
-
|
|
10
|
+
`Expert translator: Convert ALL text to {{to}}. Unbreakable rules:
|
|
11
|
+
|
|
12
|
+
1. Translate EVERY SINGLE LINE. Zero exceptions.
|
|
13
|
+
2. Output MUST have EXACTLY the same line count as input.
|
|
14
|
+
3. One input line = One output line. Always.
|
|
15
|
+
4. Only translations. Nothing extra.
|
|
16
|
+
5. Non-translatable stays unchanged.
|
|
17
|
+
6. Keep all formatting and characters.
|
|
18
|
+
7. Prefix: "LINE#lineNumber:".
|
|
19
|
+
8. Untranslatable: Copy as-is with prefix.
|
|
20
|
+
9. Internal checks: Verify line count and content after each line.
|
|
21
|
+
10. Final verification: Recount, check numbering, confirm content, cross-check with input.
|
|
22
|
+
|
|
23
|
+
Translate ALL lines. Constant vigilance. Exhaustive final cross-check.`
|
|
12
24
|
},
|
|
13
25
|
{
|
|
14
26
|
role: "user",
|
|
15
|
-
content: `"PreviousLines":\n{{{prevLine}}}\n\n"CurrentLines":\n{{{text}}}\n"NextLines":\n{{{nextLine}}}\n\n`,
|
|
27
|
+
// content: `"PreviousLines":\n{{{prevLine}}}\n\n"CurrentLines":\n{{{text}}}\n"NextLines":\n{{{nextLine}}}\n\n`,
|
|
28
|
+
content: `{{{text}}}`,
|
|
16
29
|
},
|
|
17
30
|
],
|
|
18
31
|
}),
|
|
@@ -24,8 +37,8 @@ export default {
|
|
|
24
37
|
prevLine: ``,
|
|
25
38
|
nextLine: ``,
|
|
26
39
|
},
|
|
27
|
-
|
|
40
|
+
useInputChunking: false,
|
|
28
41
|
model: 'oai-gpt4o',
|
|
29
42
|
enableDuplicateRequests: false,
|
|
30
|
-
|
|
43
|
+
timeout: 3600,
|
|
31
44
|
}
|
|
@@ -206,7 +206,7 @@ class PathwayResolver {
|
|
|
206
206
|
|
|
207
207
|
async executePathway(args) {
|
|
208
208
|
if (this.pathway.executePathway && typeof this.pathway.executePathway === 'function') {
|
|
209
|
-
return await this.pathway.executePathway({ args, runAllPrompts: this.promptAndParse.bind(this) });
|
|
209
|
+
return await this.pathway.executePathway({ args, runAllPrompts: this.promptAndParse.bind(this), resolver: this });
|
|
210
210
|
}
|
|
211
211
|
else {
|
|
212
212
|
return await this.promptAndParse(args);
|
|
@@ -263,7 +263,7 @@ class Claude3VertexPlugin extends OpenAIVisionPlugin {
|
|
|
263
263
|
|
|
264
264
|
const gcpAuthTokenHelper = this.config.get("gcpAuthTokenHelper");
|
|
265
265
|
const authToken = await gcpAuthTokenHelper.getAccessToken();
|
|
266
|
-
cortexRequest.
|
|
266
|
+
cortexRequest.auth.Authorization = `Bearer ${authToken}`;
|
|
267
267
|
|
|
268
268
|
return this.executeRequest(cortexRequest);
|
|
269
269
|
}
|
|
@@ -164,7 +164,7 @@ class Gemini15ChatPlugin extends ModelPlugin {
|
|
|
164
164
|
|
|
165
165
|
const gcpAuthTokenHelper = this.config.get('gcpAuthTokenHelper');
|
|
166
166
|
const authToken = await gcpAuthTokenHelper.getAccessToken();
|
|
167
|
-
cortexRequest.
|
|
167
|
+
cortexRequest.auth.Authorization = `Bearer ${authToken}`;
|
|
168
168
|
|
|
169
169
|
return this.executeRequest(cortexRequest);
|
|
170
170
|
}
|
|
@@ -159,7 +159,7 @@ class GeminiChatPlugin extends ModelPlugin {
|
|
|
159
159
|
|
|
160
160
|
const gcpAuthTokenHelper = this.config.get('gcpAuthTokenHelper');
|
|
161
161
|
const authToken = await gcpAuthTokenHelper.getAccessToken();
|
|
162
|
-
cortexRequest.
|
|
162
|
+
cortexRequest.auth.Authorization = `Bearer ${authToken}`;
|
|
163
163
|
|
|
164
164
|
return this.executeRequest(cortexRequest);
|
|
165
165
|
}
|
|
@@ -37,14 +37,13 @@ class OpenAIWhisperPlugin extends ModelPlugin {
|
|
|
37
37
|
chunks.push(chunk);
|
|
38
38
|
|
|
39
39
|
const { language, responseFormat } = parameters;
|
|
40
|
-
cortexRequest.url = this.requestUrl(text);
|
|
41
40
|
const params = {};
|
|
42
41
|
const { modelPromptText } = this.getCompiledPrompt(text, parameters, prompt);
|
|
43
42
|
const response_format = responseFormat || 'text';
|
|
44
43
|
|
|
45
44
|
const formData = new FormData();
|
|
46
45
|
formData.append('file', fs.createReadStream(chunk));
|
|
47
|
-
formData.append('model',
|
|
46
|
+
formData.append('model', cortexRequest.params.model);
|
|
48
47
|
formData.append('response_format', response_format);
|
|
49
48
|
language && formData.append('language', language);
|
|
50
49
|
modelPromptText && formData.append('prompt', modelPromptText);
|
|
@@ -147,7 +147,7 @@ class PalmChatPlugin extends ModelPlugin {
|
|
|
147
147
|
|
|
148
148
|
const gcpAuthTokenHelper = this.config.get('gcpAuthTokenHelper');
|
|
149
149
|
const authToken = await gcpAuthTokenHelper.getAccessToken();
|
|
150
|
-
cortexRequest.
|
|
150
|
+
cortexRequest.auth.Authorization = `Bearer ${authToken}`;
|
|
151
151
|
|
|
152
152
|
return this.executeRequest(cortexRequest);
|
|
153
153
|
}
|
|
@@ -61,7 +61,7 @@ class PalmCompletionPlugin extends ModelPlugin {
|
|
|
61
61
|
|
|
62
62
|
const gcpAuthTokenHelper = this.config.get('gcpAuthTokenHelper');
|
|
63
63
|
const authToken = await gcpAuthTokenHelper.getAccessToken();
|
|
64
|
-
cortexRequest.
|
|
64
|
+
cortexRequest.auth.Authorization = `Bearer ${authToken}`;
|
|
65
65
|
|
|
66
66
|
return this.executeRequest(cortexRequest);
|
|
67
67
|
}
|
package/tests/main.test.js
CHANGED
|
@@ -405,6 +405,13 @@ Aseel is mommy
|
|
|
405
405
|
});
|
|
406
406
|
|
|
407
407
|
test('test translate_srt endpoint with long srt file', async t => {
|
|
408
|
+
t.timeout(400000);
|
|
408
409
|
const text = fs.readFileSync(path.join(__dirname, 'sublong.srt'), 'utf8');
|
|
409
410
|
await testTranslateSrt(t, text, 'English');
|
|
411
|
+
});
|
|
412
|
+
|
|
413
|
+
test('test translate_srt endpoint with horizontal srt file', async t => {
|
|
414
|
+
t.timeout(400000);
|
|
415
|
+
const text = fs.readFileSync(path.join(__dirname, 'subhorizontal.srt'), 'utf8');
|
|
416
|
+
await testTranslateSrt(t, text, 'Turkish');
|
|
410
417
|
});
|