@aj-archipelago/cortex 1.3.49 → 1.3.51
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config.js +1 -1
- package/helper-apps/cortex-browser/Dockerfile +19 -31
- package/helper-apps/cortex-browser/function_app.py +708 -181
- package/helper-apps/cortex-browser/requirements.txt +4 -4
- package/helper-apps/cortex-file-handler/blobHandler.js +850 -429
- package/helper-apps/cortex-file-handler/constants.js +64 -48
- package/helper-apps/cortex-file-handler/docHelper.js +7 -114
- package/helper-apps/cortex-file-handler/fileChunker.js +96 -51
- package/helper-apps/cortex-file-handler/function.json +2 -6
- package/helper-apps/cortex-file-handler/helper.js +34 -25
- package/helper-apps/cortex-file-handler/index.js +324 -136
- package/helper-apps/cortex-file-handler/localFileHandler.js +56 -57
- package/helper-apps/cortex-file-handler/package-lock.json +6065 -5964
- package/helper-apps/cortex-file-handler/package.json +8 -4
- package/helper-apps/cortex-file-handler/redis.js +23 -17
- package/helper-apps/cortex-file-handler/scripts/setup-azure-container.js +12 -9
- package/helper-apps/cortex-file-handler/scripts/setup-test-containers.js +21 -18
- package/helper-apps/cortex-file-handler/scripts/test-azure.sh +1 -1
- package/helper-apps/cortex-file-handler/scripts/test-gcs.sh +1 -1
- package/helper-apps/cortex-file-handler/services/ConversionService.js +288 -0
- package/helper-apps/cortex-file-handler/services/FileConversionService.js +53 -0
- package/helper-apps/cortex-file-handler/start.js +63 -38
- package/helper-apps/cortex-file-handler/tests/FileConversionService.test.js +144 -0
- package/helper-apps/cortex-file-handler/tests/blobHandler.test.js +88 -64
- package/helper-apps/cortex-file-handler/tests/fileChunker.test.js +114 -91
- package/helper-apps/cortex-file-handler/tests/fileUpload.test.js +351 -0
- package/helper-apps/cortex-file-handler/tests/files/DOCX_TestPage.docx +0 -0
- package/helper-apps/cortex-file-handler/tests/files/tests-example.xls +0 -0
- package/helper-apps/cortex-file-handler/tests/start.test.js +943 -642
- package/helper-apps/cortex-file-handler/tests/testUtils.helper.js +31 -0
- package/helper-apps/cortex-markitdown/.funcignore +1 -0
- package/helper-apps/cortex-markitdown/MarkitdownConverterFunction/__init__.py +64 -0
- package/helper-apps/cortex-markitdown/MarkitdownConverterFunction/function.json +21 -0
- package/helper-apps/cortex-markitdown/README.md +94 -0
- package/helper-apps/cortex-markitdown/host.json +15 -0
- package/helper-apps/cortex-markitdown/requirements.txt +2 -0
- package/lib/requestExecutor.js +44 -36
- package/package.json +1 -1
- package/pathways/system/entity/tools/sys_tool_cognitive_search.js +1 -1
- package/pathways/system/entity/tools/sys_tool_readfile.js +24 -2
- package/server/plugins/openAiWhisperPlugin.js +59 -87
- package/helper-apps/cortex-file-handler/tests/docHelper.test.js +0 -148
|
@@ -1,13 +1,15 @@
|
|
|
1
|
-
import
|
|
2
|
-
import { fileURLToPath } from 'url';
|
|
3
|
-
import { dirname, join } from 'path';
|
|
4
|
-
import fs from 'fs/promises';
|
|
1
|
+
import { execSync } from 'child_process';
|
|
5
2
|
import { existsSync } from 'fs';
|
|
6
|
-
import
|
|
7
|
-
import nock from 'nock';
|
|
3
|
+
import fs from 'fs/promises';
|
|
8
4
|
import os from 'os';
|
|
9
|
-
import {
|
|
5
|
+
import { dirname, join } from 'path';
|
|
10
6
|
import { performance } from 'perf_hooks';
|
|
7
|
+
import { fileURLToPath } from 'url';
|
|
8
|
+
|
|
9
|
+
import test from 'ava';
|
|
10
|
+
import nock from 'nock';
|
|
11
|
+
|
|
12
|
+
import { splitMediaFile, downloadFile } from '../fileChunker.js';
|
|
11
13
|
|
|
12
14
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
13
15
|
|
|
@@ -16,16 +18,21 @@ async function createTestMediaFile(filepath, durationSeconds = 10) {
|
|
|
16
18
|
try {
|
|
17
19
|
console.log(`Creating test file: ${filepath} (${durationSeconds}s)`);
|
|
18
20
|
// Generate silence using ffmpeg
|
|
19
|
-
execSync(
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
21
|
+
execSync(
|
|
22
|
+
`ffmpeg -f lavfi -i anullsrc=r=44100:cl=mono -t ${durationSeconds} -q:a 9 -acodec libmp3lame "${filepath}"`,
|
|
23
|
+
{
|
|
24
|
+
stdio: ['ignore', 'pipe', 'pipe'], // Capture stdout and stderr
|
|
25
|
+
},
|
|
26
|
+
);
|
|
27
|
+
|
|
23
28
|
// Verify the file was created and has content
|
|
24
29
|
const stats = await fs.stat(filepath);
|
|
25
30
|
if (stats.size === 0) {
|
|
26
31
|
throw new Error('Generated file is empty');
|
|
27
32
|
}
|
|
28
|
-
console.log(
|
|
33
|
+
console.log(
|
|
34
|
+
`Successfully created ${filepath} (${(stats.size / 1024 / 1024).toFixed(2)}MB)`,
|
|
35
|
+
);
|
|
29
36
|
} catch (error) {
|
|
30
37
|
console.error(`Error creating test file ${filepath}:`, error.message);
|
|
31
38
|
if (error.stderr) console.error('ffmpeg error:', error.stderr.toString());
|
|
@@ -34,43 +41,45 @@ async function createTestMediaFile(filepath, durationSeconds = 10) {
|
|
|
34
41
|
}
|
|
35
42
|
|
|
36
43
|
// Setup: Create test files and mock external services
|
|
37
|
-
test.before(async t => {
|
|
44
|
+
test.before(async (t) => {
|
|
38
45
|
// Check if ffmpeg is available
|
|
39
46
|
try {
|
|
40
47
|
execSync('ffmpeg -version', { stdio: 'ignore' });
|
|
41
48
|
} catch (error) {
|
|
42
|
-
console.error(
|
|
49
|
+
console.error(
|
|
50
|
+
'ffmpeg is not installed. Please install it to run these tests.',
|
|
51
|
+
);
|
|
43
52
|
process.exit(1);
|
|
44
53
|
}
|
|
45
54
|
|
|
46
55
|
const testDir = join(__dirname, 'test-files');
|
|
47
56
|
await fs.mkdir(testDir, { recursive: true });
|
|
48
|
-
|
|
57
|
+
|
|
49
58
|
try {
|
|
50
|
-
|
|
59
|
+
// Create test files of different durations
|
|
51
60
|
const testFile1s = join(testDir, 'test-1s.mp3');
|
|
52
61
|
const testFile10s = join(testDir, 'test-10s.mp3');
|
|
53
62
|
const testFile600s = join(testDir, 'test-600s.mp3');
|
|
54
|
-
|
|
63
|
+
|
|
55
64
|
await createTestMediaFile(testFile1s, 1);
|
|
56
65
|
await createTestMediaFile(testFile10s, 10);
|
|
57
66
|
await createTestMediaFile(testFile600s, 600);
|
|
58
|
-
|
|
67
|
+
|
|
59
68
|
// Create large test files
|
|
60
69
|
const testFile1h = join(testDir, 'test-1h.mp3');
|
|
61
70
|
const testFile4h = join(testDir, 'test-4h.mp3');
|
|
62
|
-
|
|
71
|
+
|
|
63
72
|
console.log('\nCreating large test files (this may take a while)...');
|
|
64
73
|
await createTestMediaFile(testFile1h, 3600);
|
|
65
74
|
await createTestMediaFile(testFile4h, 14400);
|
|
66
|
-
|
|
75
|
+
|
|
67
76
|
t.context = {
|
|
68
77
|
testDir,
|
|
69
78
|
testFile1s,
|
|
70
79
|
testFile10s,
|
|
71
80
|
testFile600s,
|
|
72
81
|
testFile1h,
|
|
73
|
-
testFile4h
|
|
82
|
+
testFile4h,
|
|
74
83
|
};
|
|
75
84
|
|
|
76
85
|
// Setup nock for URL tests with proper headers
|
|
@@ -78,7 +87,7 @@ test.before(async t => {
|
|
|
78
87
|
.get('/media/test.mp3')
|
|
79
88
|
.replyWithFile(200, testFile10s, {
|
|
80
89
|
'Content-Type': 'audio/mpeg',
|
|
81
|
-
'Content-Length': (await fs.stat(testFile10s)).size.toString()
|
|
90
|
+
'Content-Length': (await fs.stat(testFile10s)).size.toString(),
|
|
82
91
|
})
|
|
83
92
|
.persist();
|
|
84
93
|
} catch (error) {
|
|
@@ -94,7 +103,7 @@ test.before(async t => {
|
|
|
94
103
|
});
|
|
95
104
|
|
|
96
105
|
// Cleanup: Remove test files
|
|
97
|
-
test.after.always(async t => {
|
|
106
|
+
test.after.always(async (t) => {
|
|
98
107
|
// Clean up test files
|
|
99
108
|
if (t.context.testDir) {
|
|
100
109
|
try {
|
|
@@ -110,118 +119,126 @@ test.after.always(async t => {
|
|
|
110
119
|
});
|
|
111
120
|
|
|
112
121
|
// Test successful chunking of a short file
|
|
113
|
-
test('successfully chunks short media file', async t => {
|
|
114
|
-
const { chunkPromises, chunkOffsets, uniqueOutputPath } =
|
|
115
|
-
|
|
122
|
+
test('successfully chunks short media file', async (t) => {
|
|
123
|
+
const { chunkPromises, chunkOffsets, uniqueOutputPath } =
|
|
124
|
+
await splitMediaFile(t.context.testFile1s);
|
|
125
|
+
|
|
116
126
|
t.true(Array.isArray(chunkPromises), 'Should return array of promises');
|
|
117
127
|
t.true(Array.isArray(chunkOffsets), 'Should return array of offsets');
|
|
118
128
|
t.true(typeof uniqueOutputPath === 'string', 'Should return output path');
|
|
119
|
-
|
|
129
|
+
|
|
120
130
|
// Should only create one chunk for 1s file
|
|
121
131
|
t.is(chunkPromises.length, 1, 'Should create single chunk for short file');
|
|
122
|
-
|
|
132
|
+
|
|
123
133
|
// Wait for chunks to process
|
|
124
134
|
const chunkPaths = await Promise.all(chunkPromises);
|
|
125
|
-
|
|
135
|
+
|
|
126
136
|
// Verify chunk exists
|
|
127
137
|
t.true(existsSync(chunkPaths[0]), 'Chunk file should exist');
|
|
128
|
-
|
|
138
|
+
|
|
129
139
|
// Cleanup
|
|
130
140
|
await fs.rm(uniqueOutputPath, { recursive: true, force: true });
|
|
131
141
|
});
|
|
132
142
|
|
|
133
143
|
// Test chunking of a longer file
|
|
134
|
-
test('correctly chunks longer media file', async t => {
|
|
135
|
-
const { chunkPromises, chunkOffsets, uniqueOutputPath } =
|
|
136
|
-
|
|
144
|
+
test('correctly chunks longer media file', async (t) => {
|
|
145
|
+
const { chunkPromises, chunkOffsets, uniqueOutputPath } =
|
|
146
|
+
await splitMediaFile(t.context.testFile600s);
|
|
147
|
+
|
|
137
148
|
// For 600s file with 500s chunks, should create 2 chunks
|
|
138
149
|
t.is(chunkPromises.length, 2, 'Should create correct number of chunks');
|
|
139
150
|
t.is(chunkOffsets.length, 2, 'Should create correct number of offsets');
|
|
140
|
-
|
|
151
|
+
|
|
141
152
|
// Verify offsets
|
|
142
153
|
t.is(chunkOffsets[0], 0, 'First chunk should start at 0');
|
|
143
154
|
t.is(chunkOffsets[1], 500, 'Second chunk should start at 500s');
|
|
144
|
-
|
|
155
|
+
|
|
145
156
|
// Wait for chunks to process
|
|
146
157
|
const chunkPaths = await Promise.all(chunkPromises);
|
|
147
|
-
|
|
158
|
+
|
|
148
159
|
// Verify all chunks exist
|
|
149
160
|
for (const chunkPath of chunkPaths) {
|
|
150
161
|
t.true(existsSync(chunkPath), 'Each chunk file should exist');
|
|
151
162
|
}
|
|
152
|
-
|
|
163
|
+
|
|
153
164
|
// Cleanup
|
|
154
165
|
await fs.rm(uniqueOutputPath, { recursive: true, force: true });
|
|
155
166
|
});
|
|
156
167
|
|
|
157
168
|
// Test custom chunk duration
|
|
158
|
-
test('respects custom chunk duration', async t => {
|
|
169
|
+
test('respects custom chunk duration', async (t) => {
|
|
159
170
|
const customDuration = 5; // 5 seconds
|
|
160
|
-
const { chunkPromises, chunkOffsets } = await splitMediaFile(
|
|
161
|
-
|
|
171
|
+
const { chunkPromises, chunkOffsets } = await splitMediaFile(
|
|
172
|
+
t.context.testFile10s,
|
|
173
|
+
customDuration,
|
|
174
|
+
);
|
|
175
|
+
|
|
162
176
|
// For 10s file with 5s chunks, should create 2 chunks
|
|
163
|
-
t.is(
|
|
177
|
+
t.is(
|
|
178
|
+
chunkPromises.length,
|
|
179
|
+
2,
|
|
180
|
+
'Should create correct number of chunks for custom duration',
|
|
181
|
+
);
|
|
164
182
|
t.deepEqual(chunkOffsets, [0, 5], 'Should have correct offset points');
|
|
165
183
|
});
|
|
166
184
|
|
|
167
185
|
// Test URL-based file processing
|
|
168
|
-
test('processes media file from URL', async t => {
|
|
186
|
+
test('processes media file from URL', async (t) => {
|
|
169
187
|
const url = 'https://example.com/media/test.mp3';
|
|
170
188
|
const { chunkPromises, uniqueOutputPath } = await splitMediaFile(url);
|
|
171
|
-
|
|
189
|
+
|
|
172
190
|
// Wait for chunks to process
|
|
173
191
|
const chunkPaths = await Promise.all(chunkPromises);
|
|
174
|
-
|
|
192
|
+
|
|
175
193
|
// Verify chunks were created
|
|
176
194
|
for (const chunkPath of chunkPaths) {
|
|
177
|
-
t.true(
|
|
195
|
+
t.true(
|
|
196
|
+
existsSync(chunkPath),
|
|
197
|
+
'Chunk files should exist for URL-based media',
|
|
198
|
+
);
|
|
178
199
|
}
|
|
179
|
-
|
|
200
|
+
|
|
180
201
|
// Cleanup
|
|
181
202
|
await fs.rm(uniqueOutputPath, { recursive: true, force: true });
|
|
182
203
|
});
|
|
183
204
|
|
|
184
205
|
// Test error handling for invalid files
|
|
185
|
-
test('handles invalid media files gracefully', async t => {
|
|
206
|
+
test('handles invalid media files gracefully', async (t) => {
|
|
186
207
|
const invalidFile = join(t.context.testDir, 'invalid.mp3');
|
|
187
208
|
await fs.writeFile(invalidFile, 'not a valid mp3 file');
|
|
188
|
-
|
|
189
|
-
await t.throwsAsync(
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
);
|
|
209
|
+
|
|
210
|
+
await t.throwsAsync(async () => splitMediaFile(invalidFile), {
|
|
211
|
+
message: /Error processing media file/,
|
|
212
|
+
});
|
|
193
213
|
});
|
|
194
214
|
|
|
195
215
|
// Test error handling for non-existent files
|
|
196
|
-
test('handles non-existent files gracefully', async t => {
|
|
216
|
+
test('handles non-existent files gracefully', async (t) => {
|
|
197
217
|
const nonExistentFile = join(t.context.testDir, 'non-existent.mp3');
|
|
198
|
-
|
|
199
|
-
await t.throwsAsync(
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
);
|
|
218
|
+
|
|
219
|
+
await t.throwsAsync(async () => splitMediaFile(nonExistentFile), {
|
|
220
|
+
message: /Error processing media file/,
|
|
221
|
+
});
|
|
203
222
|
});
|
|
204
223
|
|
|
205
224
|
// Test file download functionality
|
|
206
|
-
test('successfully downloads file from URL', async t => {
|
|
225
|
+
test('successfully downloads file from URL', async (t) => {
|
|
207
226
|
const url = 'https://example.com/media/test.mp3';
|
|
208
227
|
const outputPath = join(os.tmpdir(), 'downloaded-test.mp3');
|
|
209
|
-
|
|
228
|
+
|
|
210
229
|
await downloadFile(url, outputPath);
|
|
211
230
|
t.true(existsSync(outputPath), 'Downloaded file should exist');
|
|
212
|
-
|
|
231
|
+
|
|
213
232
|
// Cleanup
|
|
214
233
|
await fs.unlink(outputPath);
|
|
215
234
|
});
|
|
216
235
|
|
|
217
236
|
// Test error handling for invalid URLs in download
|
|
218
|
-
test('handles invalid URLs in download gracefully', async t => {
|
|
237
|
+
test('handles invalid URLs in download gracefully', async (t) => {
|
|
219
238
|
const invalidUrl = 'https://invalid-url-that-does-not-exist.com/test.mp3';
|
|
220
239
|
const outputPath = join(os.tmpdir(), 'should-not-exist.mp3');
|
|
221
|
-
|
|
222
|
-
await t.throwsAsync(
|
|
223
|
-
async () => downloadFile(invalidUrl, outputPath)
|
|
224
|
-
);
|
|
240
|
+
|
|
241
|
+
await t.throwsAsync(async () => downloadFile(invalidUrl, outputPath));
|
|
225
242
|
});
|
|
226
243
|
|
|
227
244
|
// Helper to format duration nicely
|
|
@@ -236,76 +253,82 @@ function formatDuration(ms) {
|
|
|
236
253
|
}
|
|
237
254
|
|
|
238
255
|
// Test performance with 1-hour file
|
|
239
|
-
test('performance test - 1 hour file', async t => {
|
|
256
|
+
test('performance test - 1 hour file', async (t) => {
|
|
240
257
|
const start = performance.now();
|
|
241
|
-
|
|
242
|
-
const { chunkPromises, uniqueOutputPath } = await splitMediaFile(
|
|
243
|
-
|
|
258
|
+
|
|
259
|
+
const { chunkPromises, uniqueOutputPath } = await splitMediaFile(
|
|
260
|
+
t.context.testFile1h,
|
|
261
|
+
);
|
|
262
|
+
|
|
244
263
|
// Wait for all chunks to complete
|
|
245
264
|
const chunkPaths = await Promise.all(chunkPromises);
|
|
246
265
|
const end = performance.now();
|
|
247
266
|
const duration = end - start;
|
|
248
|
-
|
|
267
|
+
|
|
249
268
|
console.log(`\n1 hour file processing stats:
|
|
250
269
|
- Total time: ${formatDuration(duration)}
|
|
251
270
|
- Chunks created: ${chunkPaths.length}
|
|
252
271
|
- Average time per chunk: ${formatDuration(duration / chunkPaths.length)}
|
|
253
|
-
- Processing speed: ${(
|
|
254
|
-
|
|
272
|
+
- Processing speed: ${(3600 / (duration / 1000)).toFixed(2)}x realtime`);
|
|
273
|
+
|
|
255
274
|
t.true(chunkPaths.length > 0, 'Should create chunks');
|
|
256
275
|
t.true(duration > 0, 'Should measure time');
|
|
257
|
-
|
|
276
|
+
|
|
258
277
|
// Cleanup
|
|
259
278
|
await fs.rm(uniqueOutputPath, { recursive: true, force: true });
|
|
260
279
|
});
|
|
261
280
|
|
|
262
281
|
// Test performance with 4-hour file
|
|
263
|
-
test('performance test - 4 hour file', async t => {
|
|
282
|
+
test('performance test - 4 hour file', async (t) => {
|
|
264
283
|
const start = performance.now();
|
|
265
|
-
|
|
266
|
-
const { chunkPromises, uniqueOutputPath } = await splitMediaFile(
|
|
267
|
-
|
|
284
|
+
|
|
285
|
+
const { chunkPromises, uniqueOutputPath } = await splitMediaFile(
|
|
286
|
+
t.context.testFile4h,
|
|
287
|
+
);
|
|
288
|
+
|
|
268
289
|
// Wait for all chunks to complete
|
|
269
290
|
const chunkPaths = await Promise.all(chunkPromises);
|
|
270
291
|
const end = performance.now();
|
|
271
292
|
const duration = end - start;
|
|
272
|
-
|
|
293
|
+
|
|
273
294
|
console.log(`\n4 hour file processing stats:
|
|
274
295
|
- Total time: ${formatDuration(duration)}
|
|
275
296
|
- Chunks created: ${chunkPaths.length}
|
|
276
297
|
- Average time per chunk: ${formatDuration(duration / chunkPaths.length)}
|
|
277
|
-
- Processing speed: ${(
|
|
278
|
-
|
|
298
|
+
- Processing speed: ${(14400 / (duration / 1000)).toFixed(2)}x realtime`);
|
|
299
|
+
|
|
279
300
|
t.true(chunkPaths.length > 0, 'Should create chunks');
|
|
280
301
|
t.true(duration > 0, 'Should measure time');
|
|
281
|
-
|
|
302
|
+
|
|
282
303
|
// Cleanup
|
|
283
304
|
await fs.rm(uniqueOutputPath, { recursive: true, force: true });
|
|
284
305
|
});
|
|
285
306
|
|
|
286
307
|
// Test memory usage during large file processing
|
|
287
|
-
test('memory usage during large file processing', async t => {
|
|
308
|
+
test('memory usage during large file processing', async (t) => {
|
|
288
309
|
const initialMemory = process.memoryUsage().heapUsed;
|
|
289
310
|
let peakMemory = initialMemory;
|
|
290
|
-
|
|
311
|
+
|
|
291
312
|
const interval = setInterval(() => {
|
|
292
313
|
const used = process.memoryUsage().heapUsed;
|
|
293
314
|
peakMemory = Math.max(peakMemory, used);
|
|
294
315
|
}, 100);
|
|
295
|
-
|
|
296
|
-
const { chunkPromises, uniqueOutputPath } = await splitMediaFile(
|
|
316
|
+
|
|
317
|
+
const { chunkPromises, uniqueOutputPath } = await splitMediaFile(
|
|
318
|
+
t.context.testFile4h,
|
|
319
|
+
);
|
|
297
320
|
await Promise.all(chunkPromises);
|
|
298
|
-
|
|
321
|
+
|
|
299
322
|
clearInterval(interval);
|
|
300
|
-
|
|
323
|
+
|
|
301
324
|
const memoryIncrease = (peakMemory - initialMemory) / 1024 / 1024; // Convert to MB
|
|
302
325
|
console.log(`\nMemory usage stats:
|
|
303
326
|
- Initial memory: ${(initialMemory / 1024 / 1024).toFixed(2)}MB
|
|
304
327
|
- Peak memory: ${(peakMemory / 1024 / 1024).toFixed(2)}MB
|
|
305
328
|
- Memory increase: ${memoryIncrease.toFixed(2)}MB`);
|
|
306
|
-
|
|
329
|
+
|
|
307
330
|
t.true(memoryIncrease >= 0, 'Should track memory usage');
|
|
308
|
-
|
|
331
|
+
|
|
309
332
|
// Cleanup
|
|
310
333
|
await fs.rm(uniqueOutputPath, { recursive: true, force: true });
|
|
311
|
-
});
|
|
334
|
+
});
|