@aj-archipelago/cortex 1.1.37 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config.js +60 -0
- package/package.json +1 -1
- package/pathways/flux_image.js +2 -1
- package/pathways/index.js +6 -1
- package/pathways/sys_parse_numbered_object_list.js +19 -0
- package/pathways/sys_repair_json.js +17 -0
- package/server/chunker.js +156 -113
- package/server/modelExecutor.js +9 -1
- package/server/parser.js +18 -36
- package/server/pathwayResolver.js +1 -1
- package/server/pathwayResponseParser.js +3 -3
- package/server/plugins/azureCognitivePlugin.js +1 -1
- package/server/plugins/azureVideoTranslatePlugin.js +163 -0
- package/server/plugins/openAiVisionPlugin.js +0 -3
- package/server/plugins/{runwareAIPlugin.js → runwareAiPlugin.js} +1 -1
- package/tests/chunkfunction.test.js +270 -4
- package/tests/main.test.js +0 -55
- package/tests/parser.test.js +255 -0
- package/tests/translate_srt.test.js +82 -0
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
import test from 'ava';
|
|
2
|
+
import * as parser from '../server/parser.js';
|
|
3
|
+
import * as pathwayTools from '../lib/pathwayTools.js';
|
|
4
|
+
import serverFactory from '../index.js';
|
|
5
|
+
|
|
6
|
+
let testServer;
|
|
7
|
+
|
|
8
|
+
test.before(async () => {
|
|
9
|
+
const { server, startServer } = await serverFactory();
|
|
10
|
+
startServer && await startServer();
|
|
11
|
+
testServer = server;
|
|
12
|
+
});
|
|
13
|
+
|
|
14
|
+
test.after.always('cleanup', async () => {
|
|
15
|
+
if (testServer) {
|
|
16
|
+
await testServer.stop();
|
|
17
|
+
}
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
test('regexParser should split text correctly', t => {
|
|
21
|
+
const text = 'Hello world\nHow are you';
|
|
22
|
+
const regex = /\s+/;
|
|
23
|
+
const result = parser.regexParser(text, regex);
|
|
24
|
+
t.deepEqual(result, ['Hello', 'world', 'How', 'are', 'you']);
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
test('parseNumberedList should parse different numbered list formats', t => {
|
|
28
|
+
const text = `1. First item
|
|
29
|
+
2) Second item
|
|
30
|
+
3- Third item
|
|
31
|
+
4: Fourth item`;
|
|
32
|
+
const result = parser.parseNumberedList(text);
|
|
33
|
+
t.deepEqual(result, ['First item', 'Second item', 'Third item', 'Fourth item']);
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
test('parseNumberedObjectList should parse numbered object list correctly', async t => {
|
|
37
|
+
const text = `1. name: John, age: 30
|
|
38
|
+
2. name: Jane, age: 25`;
|
|
39
|
+
const format = 'name age';
|
|
40
|
+
const result = await parser.parseNumberedObjectList(text, format);
|
|
41
|
+
t.deepEqual(result, [
|
|
42
|
+
{ name: 'John', age: 30 },
|
|
43
|
+
{ name: 'Jane', age: 25 }
|
|
44
|
+
]);
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
test('parseCommaSeparatedList should parse comma-separated list correctly', t => {
|
|
48
|
+
const text = 'apple, banana, cherry, date';
|
|
49
|
+
const result = parser.parseCommaSeparatedList(text);
|
|
50
|
+
t.deepEqual(result, ['apple', 'banana', 'cherry', 'date']);
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
test('isCommaSeparatedList should correctly identify comma-separated lists', t => {
|
|
54
|
+
t.true(parser.isCommaSeparatedList('a, b, c'));
|
|
55
|
+
t.false(parser.isCommaSeparatedList('a\nb\nc'));
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
test('isNumberedList should correctly identify numbered lists', t => {
|
|
59
|
+
t.true(parser.isNumberedList('1. First\n2. Second'));
|
|
60
|
+
t.true(parser.isNumberedList('1) First\n2) Second'));
|
|
61
|
+
t.false(parser.isNumberedList('First\nSecond'));
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
test('parseJson should parse valid JSON', async t => {
|
|
65
|
+
const validJson = '{"name": "John", "age": 30}';
|
|
66
|
+
const result = await parser.parseJson(validJson);
|
|
67
|
+
t.deepEqual(JSON.parse(result), JSON.parse(validJson));
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
test('parseJson should extract JSON from text', async t => {
|
|
71
|
+
const textWithJson = 'Here is some JSON: {"name": "John", "age": 30} and some more text';
|
|
72
|
+
const result = await parser.parseJson(textWithJson);
|
|
73
|
+
t.deepEqual(JSON.parse(result), {name: "John", age: 30});
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
test('parseJson should handle JSON arrays', async t => {
|
|
77
|
+
const jsonArray = '[1, 2, 3, 4, 5]';
|
|
78
|
+
const result = await parser.parseJson(jsonArray);
|
|
79
|
+
t.is(result, jsonArray);
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
test('parseJson should handle nested JSON', async t => {
|
|
83
|
+
const nestedJson = '{"person": {"name": "John", "age": 30}, "hobbies": ["reading", "swimming"]}';
|
|
84
|
+
const result = await parser.parseJson(nestedJson);
|
|
85
|
+
t.is(result, nestedJson);
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
test('parseJson should attempt to repair invalid JSON', async t => {
|
|
89
|
+
const invalidJson = '{"name": "John", "age": 30,}';
|
|
90
|
+
|
|
91
|
+
const result = await parser.parseJson(invalidJson);
|
|
92
|
+
|
|
93
|
+
console.log('parseJson result:', result); // For debugging
|
|
94
|
+
|
|
95
|
+
t.not(result, null);
|
|
96
|
+
|
|
97
|
+
if (result !== null) {
|
|
98
|
+
const parsedResult = JSON.parse(result);
|
|
99
|
+
t.deepEqual(parsedResult, {name: "John", age: 30});
|
|
100
|
+
}
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
test('parseJson should return null for unrepairable JSON', async t => {
|
|
104
|
+
const unreparableJson = 'This is not JSON at all';
|
|
105
|
+
const result = await parser.parseJson(unreparableJson);
|
|
106
|
+
t.is(result, '{}');
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
test('parseJson should handle JSON with special characters', async t => {
|
|
110
|
+
const jsonWithSpecialChars = '{"message": "Hello, world!", "symbols": "#$%^&*()"}';
|
|
111
|
+
const result = await parser.parseJson(jsonWithSpecialChars);
|
|
112
|
+
t.is(result, jsonWithSpecialChars);
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
test('parseJson should handle JSON with Unicode characters', async t => {
|
|
116
|
+
const jsonWithUnicode = '{"greeting": "こんにちは", "emoji": "😊"}';
|
|
117
|
+
const result = await parser.parseJson(jsonWithUnicode);
|
|
118
|
+
t.is(result, jsonWithUnicode);
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
test('parseJson should handle large JSON objects', async t => {
|
|
122
|
+
const largeJson = JSON.stringify({
|
|
123
|
+
id: 1,
|
|
124
|
+
name: "Large Object",
|
|
125
|
+
data: Array(1000).fill().map((_, i) => ({ key: `item${i}`, value: `value${i}` }))
|
|
126
|
+
});
|
|
127
|
+
const result = await parser.parseJson(largeJson);
|
|
128
|
+
t.is(result, largeJson);
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
test('parseJson should handle JSON with different number formats', async t => {
|
|
132
|
+
const jsonWithNumbers = '{"integer": 42, "float": 3.14, "scientific": 1.23e-4, "negative": -10}';
|
|
133
|
+
const result = await parser.parseJson(jsonWithNumbers);
|
|
134
|
+
t.is(result, jsonWithNumbers);
|
|
135
|
+
});
|
|
136
|
+
|
|
137
|
+
test('parseJson should handle JSON with boolean and null values', async t => {
|
|
138
|
+
const jsonWithSpecialValues = '{"active": true, "inactive": false, "data": null}';
|
|
139
|
+
const result = await parser.parseJson(jsonWithSpecialValues);
|
|
140
|
+
t.is(result, jsonWithSpecialValues);
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
test('parseNumberedObjectList should handle mixed separators', async t => {
|
|
144
|
+
const text = `1. name: John, age-30
|
|
145
|
+
2. name - Jane, age: 25`;
|
|
146
|
+
const format = 'name age';
|
|
147
|
+
const result = await parser.parseNumberedObjectList(text, format);
|
|
148
|
+
t.deepEqual(result, [
|
|
149
|
+
{ name: 'John', age: 30 },
|
|
150
|
+
{ name: 'Jane', age: 25 }
|
|
151
|
+
]);
|
|
152
|
+
});
|
|
153
|
+
|
|
154
|
+
test('parseNumberedObjectList should handle n fields', async t => {
|
|
155
|
+
const text = `1. name: John, age: 30, city: New York, country: USA
|
|
156
|
+
2. name: Jane, age: 25, country: Canada`;
|
|
157
|
+
const format = 'name age city country';
|
|
158
|
+
const result = await parser.parseNumberedObjectList(text, format);
|
|
159
|
+
t.deepEqual(result, [
|
|
160
|
+
{ name: 'John', age: 30, city: 'New York', country: 'USA' },
|
|
161
|
+
{ name: 'Jane', age: 25, country: 'Canada' }
|
|
162
|
+
]);
|
|
163
|
+
});
|
|
164
|
+
|
|
165
|
+
test('parseNumberedObjectList should ignore extra fields', async t => {
|
|
166
|
+
const text = `1. name: John, age: 30, city: New York
|
|
167
|
+
2. name: Jane, age: 25, country: Canada`;
|
|
168
|
+
const format = 'name age';
|
|
169
|
+
const result = await parser.parseNumberedObjectList(text, format);
|
|
170
|
+
t.deepEqual(result, [
|
|
171
|
+
{ name: 'John', age: 30 },
|
|
172
|
+
{ name: 'Jane', age: 25 }
|
|
173
|
+
]);
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
test('parseNumberedObjectList should handle missing fields', async t => {
|
|
177
|
+
const text = `1. name: John
|
|
178
|
+
2. age: 25`;
|
|
179
|
+
const format = 'name age';
|
|
180
|
+
const result = await parser.parseNumberedObjectList(text, format);
|
|
181
|
+
t.deepEqual(result, [
|
|
182
|
+
{ name: 'John' },
|
|
183
|
+
{ age: 25 }
|
|
184
|
+
]);
|
|
185
|
+
});
|
|
186
|
+
|
|
187
|
+
test('parseNumberedObjectList should be case-insensitive for field names', async t => {
|
|
188
|
+
const text = `1. NAME: John, AGE: 30
|
|
189
|
+
2. Name: Jane, Age: 25`;
|
|
190
|
+
const format = 'name age';
|
|
191
|
+
const result = await parser.parseNumberedObjectList(text, format);
|
|
192
|
+
t.deepEqual(result, [
|
|
193
|
+
{ name: 'John', age: 30 },
|
|
194
|
+
{ name: 'Jane', age: 25 }
|
|
195
|
+
]);
|
|
196
|
+
});
|
|
197
|
+
|
|
198
|
+
test('parseNumberedObjectList should handle whitespace variations', async t => {
|
|
199
|
+
const text = `1. name:John,age: 30
|
|
200
|
+
2. name : Jane , age:25`;
|
|
201
|
+
const format = 'name age';
|
|
202
|
+
const result = await parser.parseNumberedObjectList(text, format);
|
|
203
|
+
t.deepEqual(result, [
|
|
204
|
+
{ name: 'John', age: 30 },
|
|
205
|
+
{ name: 'Jane', age: 25 }
|
|
206
|
+
]);
|
|
207
|
+
});
|
|
208
|
+
|
|
209
|
+
test('parseNumberedObjectList should handle empty input', async t => {
|
|
210
|
+
const text = '';
|
|
211
|
+
const format = 'name age';
|
|
212
|
+
const result = await parser.parseNumberedObjectList(text, format);
|
|
213
|
+
t.deepEqual(result, []);
|
|
214
|
+
});
|
|
215
|
+
|
|
216
|
+
test('parseNumberedObjectList should handle input with no valid fields', async t => {
|
|
217
|
+
const text = `1. foo: bar, baz: qux
|
|
218
|
+
2. quux: corge`;
|
|
219
|
+
const format = 'name age';
|
|
220
|
+
const result = await parser.parseNumberedObjectList(text, format);
|
|
221
|
+
t.deepEqual(result, []);
|
|
222
|
+
});
|
|
223
|
+
|
|
224
|
+
test('parseNumberedObjectList should handle values with splitters in them', async t => {
|
|
225
|
+
const text = `1. name: John Doe, birth: 1990-01-01
|
|
226
|
+
2. name: Jane Smith, birth: 1985-05-05`;
|
|
227
|
+
const format = 'name birth';
|
|
228
|
+
const result = await parser.parseNumberedObjectList(text, format);
|
|
229
|
+
t.deepEqual(result, [
|
|
230
|
+
{ 'name': 'John Doe', 'birth': '1990-01-01' },
|
|
231
|
+
{ 'name': 'Jane Smith', 'birth': '1985-05-05' }
|
|
232
|
+
]);
|
|
233
|
+
});
|
|
234
|
+
|
|
235
|
+
test('parseNumberedObjectList should infer field names when given a list of separated values', async t => {
|
|
236
|
+
const text = `1. John Doe, 1990-01-01
|
|
237
|
+
2. Jane Smith, 1985-05-05`;
|
|
238
|
+
const format = 'name birth';
|
|
239
|
+
const result = await parser.parseNumberedObjectList(text, format);
|
|
240
|
+
t.deepEqual(result, [
|
|
241
|
+
{ 'name': 'John Doe', 'birth': '1990-01-01' },
|
|
242
|
+
{ 'name': 'Jane Smith', 'birth': '1985-05-05' }
|
|
243
|
+
]);
|
|
244
|
+
});
|
|
245
|
+
|
|
246
|
+
test('parseNumberedObjectList should match simple string output to objects', async t => {
|
|
247
|
+
const text = "1. World: The Earth and all its inhabitants, considered as a single entity.\n2. Dear: Loved or cherished by someone; regarded with deep affection.\n3. Hello: Used as a greeting or to begin a conversation.";
|
|
248
|
+
const format = 'name definition';
|
|
249
|
+
const result = await parser.parseNumberedObjectList(text, format);
|
|
250
|
+
t.deepEqual(result, [
|
|
251
|
+
{ 'name': 'World', 'definition': 'The Earth and all its inhabitants, considered as a single entity.' },
|
|
252
|
+
{ 'name': 'Dear', 'definition': 'Loved or cherished by someone; regarded with deep affection.' },
|
|
253
|
+
{ 'name': 'Hello', 'definition': 'Used as a greeting or to begin a conversation.' }
|
|
254
|
+
]);
|
|
255
|
+
});
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
import test from 'ava';
|
|
2
|
+
import serverFactory from '../index.js';
|
|
3
|
+
import { fileURLToPath } from 'url';
|
|
4
|
+
import { dirname } from 'path';
|
|
5
|
+
import fs from 'fs';
|
|
6
|
+
import path from 'path';
|
|
7
|
+
|
|
8
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
9
|
+
const __dirname = dirname(__filename);
|
|
10
|
+
|
|
11
|
+
let testServer;
|
|
12
|
+
|
|
13
|
+
test.before(async () => {
|
|
14
|
+
const { server, startServer } = await serverFactory();
|
|
15
|
+
startServer && await startServer();
|
|
16
|
+
testServer = server;
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
test.after.always('cleanup', async () => {
|
|
20
|
+
if (testServer) {
|
|
21
|
+
await testServer.stop();
|
|
22
|
+
}
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
async function testTranslateSrt(t, text, language='English') {
|
|
26
|
+
const response = await testServer.executeOperation({
|
|
27
|
+
query: 'query translate_subtitle($text: String!, $to:String) { translate_subtitle(text: $text, to:$to) { result } }',
|
|
28
|
+
variables: {
|
|
29
|
+
to: language,
|
|
30
|
+
text
|
|
31
|
+
},
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
t.falsy(response.body?.singleResult?.errors);
|
|
35
|
+
|
|
36
|
+
const result = response.body?.singleResult?.data?.translate_subtitle?.result;
|
|
37
|
+
t.true(result?.length > text.length*0.5);
|
|
38
|
+
|
|
39
|
+
//check all timestamps are still there and not translated
|
|
40
|
+
const originalTimestamps = text.match(/\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}/g);
|
|
41
|
+
const translatedTimestamps = result.match(/\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}/g);
|
|
42
|
+
|
|
43
|
+
t.deepEqual(originalTimestamps, translatedTimestamps, 'All timestamps should be present and unchanged');
|
|
44
|
+
|
|
45
|
+
const originalLineCount = text.split('\n').length;
|
|
46
|
+
const translatedLineCount = result.split('\n').length;
|
|
47
|
+
|
|
48
|
+
t.is(originalLineCount, translatedLineCount, 'Total number of lines should be the same');
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
test('test translate_srt endpoint with simple srt', async t => {
|
|
52
|
+
const text = `1
|
|
53
|
+
00:00:03,069 --> 00:00:04,771
|
|
54
|
+
Who's that?
|
|
55
|
+
|
|
56
|
+
2
|
|
57
|
+
00:00:04,771 --> 00:00:06,039
|
|
58
|
+
Aseel.
|
|
59
|
+
|
|
60
|
+
3
|
|
61
|
+
00:00:06,039 --> 00:00:07,474
|
|
62
|
+
Who is Aseel a mom to?
|
|
63
|
+
|
|
64
|
+
4
|
|
65
|
+
00:00:07,474 --> 00:00:09,376
|
|
66
|
+
Aseel is mommy
|
|
67
|
+
`;
|
|
68
|
+
|
|
69
|
+
await testTranslateSrt(t, text, 'Spanish');
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
test('test translate_srt endpoint with long srt file', async t => {
|
|
73
|
+
t.timeout(400000);
|
|
74
|
+
const text = fs.readFileSync(path.join(__dirname, 'sublong.srt'), 'utf8');
|
|
75
|
+
await testTranslateSrt(t, text, 'English');
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
test('test translate_srt endpoint with horizontal srt file', async t => {
|
|
79
|
+
t.timeout(400000);
|
|
80
|
+
const text = fs.readFileSync(path.join(__dirname, 'subhorizontal.srt'), 'utf8');
|
|
81
|
+
await testTranslateSrt(t, text, 'Turkish');
|
|
82
|
+
});
|