@aj-archipelago/cortex 1.1.23 → 1.1.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aj-archipelago/cortex",
3
- "version": "1.1.23",
3
+ "version": "1.1.24",
4
4
  "description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
5
5
  "private": false,
6
6
  "repository": {
@@ -1,6 +1,7 @@
1
1
  import subsrt from "subsrt";
2
2
  import logger from "../lib/logger.js";
3
3
  import { callPathway } from "../lib/pathwayTools.js";
4
+ import { publishRequestProgress } from "../lib/redisSubscription.js";
4
5
 
5
6
  function preprocessStr(str) {
6
7
  try {
@@ -18,64 +19,25 @@ function preprocessStr(str) {
18
19
  }
19
20
  }
20
21
 
21
- function getContextLines(captions, startIndex, direction, wordLimit = 100) {
22
- let context = "";
23
- let wordCount = 0;
24
- let i = startIndex;
25
-
26
- while (i >= 0 && i < captions.length && wordCount < wordLimit) {
27
- const words = captions[i].content.split(/\s+/);
28
- if (wordCount + words.length <= wordLimit) {
29
- context =
30
- direction === "prev"
31
- ? captions[i].content + " " + context
32
- : context + " " + captions[i].content;
33
- wordCount += words.length;
34
- } else {
35
- const remainingWords = wordLimit - wordCount;
36
- const partialContent =
37
- direction === "prev"
38
- ? words.slice(-remainingWords).join(" ")
39
- : words.slice(0, remainingWords).join(" ");
40
- context =
41
- direction === "prev"
42
- ? partialContent + " " + context
43
- : context + " " + partialContent;
44
- break;
45
- }
46
- i += direction === "prev" ? -1 : 1;
47
- }
48
-
49
- return context.trim();
50
- }
51
-
52
- async function processBatch(batch, args, captions, batchStartIndex) {
22
+ async function processBatch(batch, args) {
53
23
  const batchText = batch
54
24
  .map((caption, index) => `LINE#${index + 1}: ${caption.content}`)
55
25
  .join("\n");
56
- const prevLines = getContextLines(captions, batchStartIndex - 1, "prev");
57
- const nextLines = getContextLines(
58
- captions,
59
- batchStartIndex + batch.length,
60
- "next"
61
- );
62
26
 
63
27
  const translatedText = await callPathway("translate_subtitle_helper", {
64
28
  ...args,
65
29
  text: batchText,
66
- prevLines,
67
- nextLines,
68
30
  async: false,
69
31
  });
70
32
 
71
33
  // Remove LINE# and LINE() labels
72
- const translatedLines = translatedText.split("\n");
34
+ const translatedLines = translatedText.split("\n");
73
35
  translatedLines.forEach((line, i) => {
74
36
  translatedLines[i] = line.replace(/^LINE#\d+:\s*/, "").trim();
75
- });
37
+ });
76
38
  //make sure translatedLines.length===batch.length
77
39
  if (translatedLines.length < batch.length) {
78
- const emptyLines = Array(batch.length - translatedLines.length).fill("");
40
+ const emptyLines = Array(batch.length - translatedLines.length).fill("-");
79
41
  translatedLines.push(...emptyLines);
80
42
  } else if (translatedLines.length > batch.length) {
81
43
  //first remove the empty lines
@@ -88,7 +50,7 @@ async function processBatch(batch, args, captions, batchStartIndex) {
88
50
  mergedLines.unshift(lastLine);
89
51
  translatedLines.splice(batch.length - 1, translatedLines.length - batch.length + 1, mergedLines.join(" "));
90
52
  }else {
91
- const emptyLines = Array(batch.length - translatedLines.length).fill("");
53
+ const emptyLines = Array(batch.length - translatedLines.length).fill("-");
92
54
  translatedLines.push(...emptyLines);
93
55
  }
94
56
  }
@@ -114,7 +76,7 @@ async function processBatch(batch, args, captions, batchStartIndex) {
114
76
  }));
115
77
  }
116
78
 
117
- async function myResolver(args) {
79
+ async function myResolver(args, requestId) {
118
80
  try {
119
81
  const { text, format } = args;
120
82
  const captions = subsrt.parse(preprocessStr(text), {
@@ -123,11 +85,30 @@ async function myResolver(args) {
123
85
  eol: "\n",
124
86
  });
125
87
  const maxLineCount = 100;
126
- const maxWordCount = 1000;
88
+ const maxWordCount = 300;
127
89
  let translatedCaptions = [];
128
90
  let currentBatch = [];
129
91
  let currentWordCount = 0;
130
- let batchStartIndex = 0;
92
+
93
+ const totalCount = captions.length;
94
+ let completedCount = 0;
95
+
96
+ const sendProgress = () => {
97
+ if (completedCount >= totalCount) return;
98
+ if(!requestId) {
99
+ logger.warn(`No requestId found for progress update`);
100
+ return;
101
+ }
102
+
103
+ const progress = completedCount / totalCount;
104
+ logger.info(`Progress for ${requestId}: ${progress}`);
105
+
106
+ publishRequestProgress({
107
+ requestId,
108
+ progress,
109
+ data: null,
110
+ });
111
+ };
131
112
 
132
113
  for (let i = 0; i < captions.length; i++) {
133
114
  const caption = captions[i];
@@ -137,16 +118,15 @@ async function myResolver(args) {
137
118
  currentBatch.length >= maxLineCount) &&
138
119
  currentBatch.length > 0
139
120
  ) {
121
+ completedCount=i;
122
+ sendProgress();
140
123
  const translatedBatch = await processBatch(
141
124
  currentBatch,
142
125
  args,
143
- captions,
144
- batchStartIndex
145
126
  );
146
127
  translatedCaptions = translatedCaptions.concat(translatedBatch);
147
128
  currentBatch = [];
148
129
  currentWordCount = 0;
149
- batchStartIndex = i;
150
130
  }
151
131
  currentBatch.push(caption);
152
132
  currentWordCount += captionWordCount;
@@ -156,8 +136,6 @@ async function myResolver(args) {
156
136
  const translatedBatch = await processBatch(
157
137
  currentBatch,
158
138
  args,
159
- captions,
160
- batchStartIndex
161
139
  );
162
140
  translatedCaptions = translatedCaptions.concat(translatedBatch);
163
141
  }
@@ -171,11 +149,11 @@ async function myResolver(args) {
171
149
  .trim() + "\n"
172
150
  );
173
151
  } catch (e) {
174
- logger.error(
175
- `An error occurred in subtitle translation, trying direct translation next: ${e}`
152
+ logger.warn(
153
+ `${e} - could be that there are no subtitles, so attempting block translation.`
176
154
  );
177
155
  try {
178
- return await callPathway("translate_gpt4", {...args, async: false});
156
+ return await callPathway("translate_gpt4_omni", {...args, async: false});
179
157
  } catch (e) {
180
158
  logger.error(`An error occurred in subtitle translation: ${e}`);
181
159
  return "";
@@ -191,11 +169,13 @@ export default {
191
169
  prevLines: ``,
192
170
  nextLines: ``,
193
171
  },
194
- inputChunkSize: 500,
172
+ useInputChunking: false,
195
173
  model: "oai-gpt4o",
196
174
  enableDuplicateRequests: false,
197
175
  timeout: 3600,
198
- executePathway: async ({ args }) => {
199
- return await myResolver(args);
176
+ executePathway: async (executePathwayArgs) => {
177
+ const { args } = executePathwayArgs;
178
+ const requestId = executePathwayArgs?.resolver?.requestId;
179
+ return await myResolver(args, requestId);
200
180
  },
201
181
  };
@@ -1,6 +1,5 @@
1
1
  import { Prompt } from '../server/prompt.js';
2
2
 
3
-
4
3
  export default {
5
4
  prompt: [
6
5
  new Prompt({
@@ -8,11 +7,25 @@ export default {
8
7
  {
9
8
  role: "system",
10
9
  content:
11
- `Assistant is a highly skilled multilingual translator for a prestigious news agency. When the user posts any text in any language, assistant will create a translation of that text in {{to}}. User will most probably produce previous and next lines for context with "PreviousLines" and "NextLines" labels, and you are asked to translate current lines one by one in given sequence with "CurrentLines" label. CurrentLines might have numbered labels as LINE#{lineNo} e.g. LINE#1, LINE#2. If currentline is a word only translate that word. You must keep input and output number of lines same, so do not merge translation of lines, single line must always map to single line. Assistant's output translated number of lines must always be equal to the input number of currentlines. For output, Assistant will produce only the translated text, ignore all LINE#{lineNo} and "CurrentLines" labels, and give no additional notes or commentary.`,
10
+ `Expert translator: Convert ALL text to {{to}}. Unbreakable rules:
11
+
12
+ 1. Translate EVERY SINGLE LINE. Zero exceptions.
13
+ 2. Output MUST have EXACTLY the same line count as input.
14
+ 3. One input line = One output line. Always.
15
+ 4. Only translations. Nothing extra.
16
+ 5. Non-translatable stays unchanged.
17
+ 6. Keep all formatting and characters.
18
+ 7. Prefix: "LINE#lineNumber:".
19
+ 8. Untranslatable: Copy as-is with prefix.
20
+ 9. Internal checks: Verify line count and content after each line.
21
+ 10. Final verification: Recount, check numbering, confirm content, cross-check with input.
22
+
23
+ Translate ALL lines. Constant vigilance. Exhaustive final cross-check.`
12
24
  },
13
25
  {
14
26
  role: "user",
15
- content: `"PreviousLines":\n{{{prevLine}}}\n\n"CurrentLines":\n{{{text}}}\n"NextLines":\n{{{nextLine}}}\n\n`,
27
+ // content: `"PreviousLines":\n{{{prevLine}}}\n\n"CurrentLines":\n{{{text}}}\n"NextLines":\n{{{nextLine}}}\n\n`,
28
+ content: `{{{text}}}`,
16
29
  },
17
30
  ],
18
31
  }),
@@ -24,8 +37,8 @@ export default {
24
37
  prevLine: ``,
25
38
  nextLine: ``,
26
39
  },
27
- inputChunkSize: 500,
40
+ useInputChunking: false,
28
41
  model: 'oai-gpt4o',
29
42
  enableDuplicateRequests: false,
30
-
43
+ timeout: 3600,
31
44
  }
@@ -206,7 +206,7 @@ class PathwayResolver {
206
206
 
207
207
  async executePathway(args) {
208
208
  if (this.pathway.executePathway && typeof this.pathway.executePathway === 'function') {
209
- return await this.pathway.executePathway({ args, runAllPrompts: this.promptAndParse.bind(this) });
209
+ return await this.pathway.executePathway({ args, runAllPrompts: this.promptAndParse.bind(this), resolver: this });
210
210
  }
211
211
  else {
212
212
  return await this.promptAndParse(args);
@@ -37,14 +37,13 @@ class OpenAIWhisperPlugin extends ModelPlugin {
37
37
  chunks.push(chunk);
38
38
 
39
39
  const { language, responseFormat } = parameters;
40
- cortexRequest.url = this.requestUrl(text);
41
40
  const params = {};
42
41
  const { modelPromptText } = this.getCompiledPrompt(text, parameters, prompt);
43
42
  const response_format = responseFormat || 'text';
44
43
 
45
44
  const formData = new FormData();
46
45
  formData.append('file', fs.createReadStream(chunk));
47
- formData.append('model', this.model.params.model);
46
+ formData.append('model', cortexRequest.params.model);
48
47
  formData.append('response_format', response_format);
49
48
  language && formData.append('language', language);
50
49
  modelPromptText && formData.append('prompt', modelPromptText);
@@ -405,6 +405,13 @@ Aseel is mommy
405
405
  });
406
406
 
407
407
  test('test translate_srt endpoint with long srt file', async t => {
408
+ t.timeout(400000);
408
409
  const text = fs.readFileSync(path.join(__dirname, 'sublong.srt'), 'utf8');
409
410
  await testTranslateSrt(t, text, 'English');
411
+ });
412
+
413
+ test('test translate_srt endpoint with horizontal srt file', async t => {
414
+ t.timeout(400000);
415
+ const text = fs.readFileSync(path.join(__dirname, 'subhorizontal.srt'), 'utf8');
416
+ await testTranslateSrt(t, text, 'Turkish');
410
417
  });