@aj-archipelago/cortex 1.3.49 → 1.3.51

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/config.js +1 -1
  2. package/helper-apps/cortex-browser/Dockerfile +19 -31
  3. package/helper-apps/cortex-browser/function_app.py +708 -181
  4. package/helper-apps/cortex-browser/requirements.txt +4 -4
  5. package/helper-apps/cortex-file-handler/blobHandler.js +850 -429
  6. package/helper-apps/cortex-file-handler/constants.js +64 -48
  7. package/helper-apps/cortex-file-handler/docHelper.js +7 -114
  8. package/helper-apps/cortex-file-handler/fileChunker.js +96 -51
  9. package/helper-apps/cortex-file-handler/function.json +2 -6
  10. package/helper-apps/cortex-file-handler/helper.js +34 -25
  11. package/helper-apps/cortex-file-handler/index.js +324 -136
  12. package/helper-apps/cortex-file-handler/localFileHandler.js +56 -57
  13. package/helper-apps/cortex-file-handler/package-lock.json +6065 -5964
  14. package/helper-apps/cortex-file-handler/package.json +8 -4
  15. package/helper-apps/cortex-file-handler/redis.js +23 -17
  16. package/helper-apps/cortex-file-handler/scripts/setup-azure-container.js +12 -9
  17. package/helper-apps/cortex-file-handler/scripts/setup-test-containers.js +21 -18
  18. package/helper-apps/cortex-file-handler/scripts/test-azure.sh +1 -1
  19. package/helper-apps/cortex-file-handler/scripts/test-gcs.sh +1 -1
  20. package/helper-apps/cortex-file-handler/services/ConversionService.js +288 -0
  21. package/helper-apps/cortex-file-handler/services/FileConversionService.js +53 -0
  22. package/helper-apps/cortex-file-handler/start.js +63 -38
  23. package/helper-apps/cortex-file-handler/tests/FileConversionService.test.js +144 -0
  24. package/helper-apps/cortex-file-handler/tests/blobHandler.test.js +88 -64
  25. package/helper-apps/cortex-file-handler/tests/fileChunker.test.js +114 -91
  26. package/helper-apps/cortex-file-handler/tests/fileUpload.test.js +351 -0
  27. package/helper-apps/cortex-file-handler/tests/files/DOCX_TestPage.docx +0 -0
  28. package/helper-apps/cortex-file-handler/tests/files/tests-example.xls +0 -0
  29. package/helper-apps/cortex-file-handler/tests/start.test.js +943 -642
  30. package/helper-apps/cortex-file-handler/tests/testUtils.helper.js +31 -0
  31. package/helper-apps/cortex-markitdown/.funcignore +1 -0
  32. package/helper-apps/cortex-markitdown/MarkitdownConverterFunction/__init__.py +64 -0
  33. package/helper-apps/cortex-markitdown/MarkitdownConverterFunction/function.json +21 -0
  34. package/helper-apps/cortex-markitdown/README.md +94 -0
  35. package/helper-apps/cortex-markitdown/host.json +15 -0
  36. package/helper-apps/cortex-markitdown/requirements.txt +2 -0
  37. package/lib/requestExecutor.js +44 -36
  38. package/package.json +1 -1
  39. package/pathways/system/entity/tools/sys_tool_cognitive_search.js +1 -1
  40. package/pathways/system/entity/tools/sys_tool_readfile.js +24 -2
  41. package/server/plugins/openAiWhisperPlugin.js +59 -87
  42. package/helper-apps/cortex-file-handler/tests/docHelper.test.js +0 -148
@@ -1,13 +1,15 @@
1
- import test from 'ava';
2
- import { fileURLToPath } from 'url';
3
- import { dirname, join } from 'path';
4
- import fs from 'fs/promises';
1
+ import { execSync } from 'child_process';
5
2
  import { existsSync } from 'fs';
6
- import { splitMediaFile, downloadFile } from '../fileChunker.js';
7
- import nock from 'nock';
3
+ import fs from 'fs/promises';
8
4
  import os from 'os';
9
- import { execSync } from 'child_process';
5
+ import { dirname, join } from 'path';
10
6
  import { performance } from 'perf_hooks';
7
+ import { fileURLToPath } from 'url';
8
+
9
+ import test from 'ava';
10
+ import nock from 'nock';
11
+
12
+ import { splitMediaFile, downloadFile } from '../fileChunker.js';
11
13
 
12
14
  const __dirname = dirname(fileURLToPath(import.meta.url));
13
15
 
@@ -16,16 +18,21 @@ async function createTestMediaFile(filepath, durationSeconds = 10) {
16
18
  try {
17
19
  console.log(`Creating test file: ${filepath} (${durationSeconds}s)`);
18
20
  // Generate silence using ffmpeg
19
- execSync(`ffmpeg -f lavfi -i anullsrc=r=44100:cl=mono -t ${durationSeconds} -q:a 9 -acodec libmp3lame "${filepath}"`, {
20
- stdio: ['ignore', 'pipe', 'pipe'] // Capture stdout and stderr
21
- });
22
-
21
+ execSync(
22
+ `ffmpeg -f lavfi -i anullsrc=r=44100:cl=mono -t ${durationSeconds} -q:a 9 -acodec libmp3lame "${filepath}"`,
23
+ {
24
+ stdio: ['ignore', 'pipe', 'pipe'], // Capture stdout and stderr
25
+ },
26
+ );
27
+
23
28
  // Verify the file was created and has content
24
29
  const stats = await fs.stat(filepath);
25
30
  if (stats.size === 0) {
26
31
  throw new Error('Generated file is empty');
27
32
  }
28
- console.log(`Successfully created ${filepath} (${(stats.size / 1024 / 1024).toFixed(2)}MB)`);
33
+ console.log(
34
+ `Successfully created ${filepath} (${(stats.size / 1024 / 1024).toFixed(2)}MB)`,
35
+ );
29
36
  } catch (error) {
30
37
  console.error(`Error creating test file ${filepath}:`, error.message);
31
38
  if (error.stderr) console.error('ffmpeg error:', error.stderr.toString());
@@ -34,43 +41,45 @@ async function createTestMediaFile(filepath, durationSeconds = 10) {
34
41
  }
35
42
 
36
43
  // Setup: Create test files and mock external services
37
- test.before(async t => {
44
+ test.before(async (t) => {
38
45
  // Check if ffmpeg is available
39
46
  try {
40
47
  execSync('ffmpeg -version', { stdio: 'ignore' });
41
48
  } catch (error) {
42
- console.error('ffmpeg is not installed. Please install it to run these tests.');
49
+ console.error(
50
+ 'ffmpeg is not installed. Please install it to run these tests.',
51
+ );
43
52
  process.exit(1);
44
53
  }
45
54
 
46
55
  const testDir = join(__dirname, 'test-files');
47
56
  await fs.mkdir(testDir, { recursive: true });
48
-
57
+
49
58
  try {
50
- // Create test files of different durations
59
+ // Create test files of different durations
51
60
  const testFile1s = join(testDir, 'test-1s.mp3');
52
61
  const testFile10s = join(testDir, 'test-10s.mp3');
53
62
  const testFile600s = join(testDir, 'test-600s.mp3');
54
-
63
+
55
64
  await createTestMediaFile(testFile1s, 1);
56
65
  await createTestMediaFile(testFile10s, 10);
57
66
  await createTestMediaFile(testFile600s, 600);
58
-
67
+
59
68
  // Create large test files
60
69
  const testFile1h = join(testDir, 'test-1h.mp3');
61
70
  const testFile4h = join(testDir, 'test-4h.mp3');
62
-
71
+
63
72
  console.log('\nCreating large test files (this may take a while)...');
64
73
  await createTestMediaFile(testFile1h, 3600);
65
74
  await createTestMediaFile(testFile4h, 14400);
66
-
75
+
67
76
  t.context = {
68
77
  testDir,
69
78
  testFile1s,
70
79
  testFile10s,
71
80
  testFile600s,
72
81
  testFile1h,
73
- testFile4h
82
+ testFile4h,
74
83
  };
75
84
 
76
85
  // Setup nock for URL tests with proper headers
@@ -78,7 +87,7 @@ test.before(async t => {
78
87
  .get('/media/test.mp3')
79
88
  .replyWithFile(200, testFile10s, {
80
89
  'Content-Type': 'audio/mpeg',
81
- 'Content-Length': (await fs.stat(testFile10s)).size.toString()
90
+ 'Content-Length': (await fs.stat(testFile10s)).size.toString(),
82
91
  })
83
92
  .persist();
84
93
  } catch (error) {
@@ -94,7 +103,7 @@ test.before(async t => {
94
103
  });
95
104
 
96
105
  // Cleanup: Remove test files
97
- test.after.always(async t => {
106
+ test.after.always(async (t) => {
98
107
  // Clean up test files
99
108
  if (t.context.testDir) {
100
109
  try {
@@ -110,118 +119,126 @@ test.after.always(async t => {
110
119
  });
111
120
 
112
121
  // Test successful chunking of a short file
113
- test('successfully chunks short media file', async t => {
114
- const { chunkPromises, chunkOffsets, uniqueOutputPath } = await splitMediaFile(t.context.testFile1s);
115
-
122
+ test('successfully chunks short media file', async (t) => {
123
+ const { chunkPromises, chunkOffsets, uniqueOutputPath } =
124
+ await splitMediaFile(t.context.testFile1s);
125
+
116
126
  t.true(Array.isArray(chunkPromises), 'Should return array of promises');
117
127
  t.true(Array.isArray(chunkOffsets), 'Should return array of offsets');
118
128
  t.true(typeof uniqueOutputPath === 'string', 'Should return output path');
119
-
129
+
120
130
  // Should only create one chunk for 1s file
121
131
  t.is(chunkPromises.length, 1, 'Should create single chunk for short file');
122
-
132
+
123
133
  // Wait for chunks to process
124
134
  const chunkPaths = await Promise.all(chunkPromises);
125
-
135
+
126
136
  // Verify chunk exists
127
137
  t.true(existsSync(chunkPaths[0]), 'Chunk file should exist');
128
-
138
+
129
139
  // Cleanup
130
140
  await fs.rm(uniqueOutputPath, { recursive: true, force: true });
131
141
  });
132
142
 
133
143
  // Test chunking of a longer file
134
- test('correctly chunks longer media file', async t => {
135
- const { chunkPromises, chunkOffsets, uniqueOutputPath } = await splitMediaFile(t.context.testFile600s);
136
-
144
+ test('correctly chunks longer media file', async (t) => {
145
+ const { chunkPromises, chunkOffsets, uniqueOutputPath } =
146
+ await splitMediaFile(t.context.testFile600s);
147
+
137
148
  // For 600s file with 500s chunks, should create 2 chunks
138
149
  t.is(chunkPromises.length, 2, 'Should create correct number of chunks');
139
150
  t.is(chunkOffsets.length, 2, 'Should create correct number of offsets');
140
-
151
+
141
152
  // Verify offsets
142
153
  t.is(chunkOffsets[0], 0, 'First chunk should start at 0');
143
154
  t.is(chunkOffsets[1], 500, 'Second chunk should start at 500s');
144
-
155
+
145
156
  // Wait for chunks to process
146
157
  const chunkPaths = await Promise.all(chunkPromises);
147
-
158
+
148
159
  // Verify all chunks exist
149
160
  for (const chunkPath of chunkPaths) {
150
161
  t.true(existsSync(chunkPath), 'Each chunk file should exist');
151
162
  }
152
-
163
+
153
164
  // Cleanup
154
165
  await fs.rm(uniqueOutputPath, { recursive: true, force: true });
155
166
  });
156
167
 
157
168
  // Test custom chunk duration
158
- test('respects custom chunk duration', async t => {
169
+ test('respects custom chunk duration', async (t) => {
159
170
  const customDuration = 5; // 5 seconds
160
- const { chunkPromises, chunkOffsets } = await splitMediaFile(t.context.testFile10s, customDuration);
161
-
171
+ const { chunkPromises, chunkOffsets } = await splitMediaFile(
172
+ t.context.testFile10s,
173
+ customDuration,
174
+ );
175
+
162
176
  // For 10s file with 5s chunks, should create 2 chunks
163
- t.is(chunkPromises.length, 2, 'Should create correct number of chunks for custom duration');
177
+ t.is(
178
+ chunkPromises.length,
179
+ 2,
180
+ 'Should create correct number of chunks for custom duration',
181
+ );
164
182
  t.deepEqual(chunkOffsets, [0, 5], 'Should have correct offset points');
165
183
  });
166
184
 
167
185
  // Test URL-based file processing
168
- test('processes media file from URL', async t => {
186
+ test('processes media file from URL', async (t) => {
169
187
  const url = 'https://example.com/media/test.mp3';
170
188
  const { chunkPromises, uniqueOutputPath } = await splitMediaFile(url);
171
-
189
+
172
190
  // Wait for chunks to process
173
191
  const chunkPaths = await Promise.all(chunkPromises);
174
-
192
+
175
193
  // Verify chunks were created
176
194
  for (const chunkPath of chunkPaths) {
177
- t.true(existsSync(chunkPath), 'Chunk files should exist for URL-based media');
195
+ t.true(
196
+ existsSync(chunkPath),
197
+ 'Chunk files should exist for URL-based media',
198
+ );
178
199
  }
179
-
200
+
180
201
  // Cleanup
181
202
  await fs.rm(uniqueOutputPath, { recursive: true, force: true });
182
203
  });
183
204
 
184
205
  // Test error handling for invalid files
185
- test('handles invalid media files gracefully', async t => {
206
+ test('handles invalid media files gracefully', async (t) => {
186
207
  const invalidFile = join(t.context.testDir, 'invalid.mp3');
187
208
  await fs.writeFile(invalidFile, 'not a valid mp3 file');
188
-
189
- await t.throwsAsync(
190
- async () => splitMediaFile(invalidFile),
191
- { message: /Error processing media file/ }
192
- );
209
+
210
+ await t.throwsAsync(async () => splitMediaFile(invalidFile), {
211
+ message: /Error processing media file/,
212
+ });
193
213
  });
194
214
 
195
215
  // Test error handling for non-existent files
196
- test('handles non-existent files gracefully', async t => {
216
+ test('handles non-existent files gracefully', async (t) => {
197
217
  const nonExistentFile = join(t.context.testDir, 'non-existent.mp3');
198
-
199
- await t.throwsAsync(
200
- async () => splitMediaFile(nonExistentFile),
201
- { message: /Error processing media file/ }
202
- );
218
+
219
+ await t.throwsAsync(async () => splitMediaFile(nonExistentFile), {
220
+ message: /Error processing media file/,
221
+ });
203
222
  });
204
223
 
205
224
  // Test file download functionality
206
- test('successfully downloads file from URL', async t => {
225
+ test('successfully downloads file from URL', async (t) => {
207
226
  const url = 'https://example.com/media/test.mp3';
208
227
  const outputPath = join(os.tmpdir(), 'downloaded-test.mp3');
209
-
228
+
210
229
  await downloadFile(url, outputPath);
211
230
  t.true(existsSync(outputPath), 'Downloaded file should exist');
212
-
231
+
213
232
  // Cleanup
214
233
  await fs.unlink(outputPath);
215
234
  });
216
235
 
217
236
  // Test error handling for invalid URLs in download
218
- test('handles invalid URLs in download gracefully', async t => {
237
+ test('handles invalid URLs in download gracefully', async (t) => {
219
238
  const invalidUrl = 'https://invalid-url-that-does-not-exist.com/test.mp3';
220
239
  const outputPath = join(os.tmpdir(), 'should-not-exist.mp3');
221
-
222
- await t.throwsAsync(
223
- async () => downloadFile(invalidUrl, outputPath)
224
- );
240
+
241
+ await t.throwsAsync(async () => downloadFile(invalidUrl, outputPath));
225
242
  });
226
243
 
227
244
  // Helper to format duration nicely
@@ -236,76 +253,82 @@ function formatDuration(ms) {
236
253
  }
237
254
 
238
255
  // Test performance with 1-hour file
239
- test('performance test - 1 hour file', async t => {
256
+ test('performance test - 1 hour file', async (t) => {
240
257
  const start = performance.now();
241
-
242
- const { chunkPromises, uniqueOutputPath } = await splitMediaFile(t.context.testFile1h);
243
-
258
+
259
+ const { chunkPromises, uniqueOutputPath } = await splitMediaFile(
260
+ t.context.testFile1h,
261
+ );
262
+
244
263
  // Wait for all chunks to complete
245
264
  const chunkPaths = await Promise.all(chunkPromises);
246
265
  const end = performance.now();
247
266
  const duration = end - start;
248
-
267
+
249
268
  console.log(`\n1 hour file processing stats:
250
269
  - Total time: ${formatDuration(duration)}
251
270
  - Chunks created: ${chunkPaths.length}
252
271
  - Average time per chunk: ${formatDuration(duration / chunkPaths.length)}
253
- - Processing speed: ${((3600 / (duration / 1000))).toFixed(2)}x realtime`);
254
-
272
+ - Processing speed: ${(3600 / (duration / 1000)).toFixed(2)}x realtime`);
273
+
255
274
  t.true(chunkPaths.length > 0, 'Should create chunks');
256
275
  t.true(duration > 0, 'Should measure time');
257
-
276
+
258
277
  // Cleanup
259
278
  await fs.rm(uniqueOutputPath, { recursive: true, force: true });
260
279
  });
261
280
 
262
281
  // Test performance with 4-hour file
263
- test('performance test - 4 hour file', async t => {
282
+ test('performance test - 4 hour file', async (t) => {
264
283
  const start = performance.now();
265
-
266
- const { chunkPromises, uniqueOutputPath } = await splitMediaFile(t.context.testFile4h);
267
-
284
+
285
+ const { chunkPromises, uniqueOutputPath } = await splitMediaFile(
286
+ t.context.testFile4h,
287
+ );
288
+
268
289
  // Wait for all chunks to complete
269
290
  const chunkPaths = await Promise.all(chunkPromises);
270
291
  const end = performance.now();
271
292
  const duration = end - start;
272
-
293
+
273
294
  console.log(`\n4 hour file processing stats:
274
295
  - Total time: ${formatDuration(duration)}
275
296
  - Chunks created: ${chunkPaths.length}
276
297
  - Average time per chunk: ${formatDuration(duration / chunkPaths.length)}
277
- - Processing speed: ${((14400 / (duration / 1000))).toFixed(2)}x realtime`);
278
-
298
+ - Processing speed: ${(14400 / (duration / 1000)).toFixed(2)}x realtime`);
299
+
279
300
  t.true(chunkPaths.length > 0, 'Should create chunks');
280
301
  t.true(duration > 0, 'Should measure time');
281
-
302
+
282
303
  // Cleanup
283
304
  await fs.rm(uniqueOutputPath, { recursive: true, force: true });
284
305
  });
285
306
 
286
307
  // Test memory usage during large file processing
287
- test('memory usage during large file processing', async t => {
308
+ test('memory usage during large file processing', async (t) => {
288
309
  const initialMemory = process.memoryUsage().heapUsed;
289
310
  let peakMemory = initialMemory;
290
-
311
+
291
312
  const interval = setInterval(() => {
292
313
  const used = process.memoryUsage().heapUsed;
293
314
  peakMemory = Math.max(peakMemory, used);
294
315
  }, 100);
295
-
296
- const { chunkPromises, uniqueOutputPath } = await splitMediaFile(t.context.testFile4h);
316
+
317
+ const { chunkPromises, uniqueOutputPath } = await splitMediaFile(
318
+ t.context.testFile4h,
319
+ );
297
320
  await Promise.all(chunkPromises);
298
-
321
+
299
322
  clearInterval(interval);
300
-
323
+
301
324
  const memoryIncrease = (peakMemory - initialMemory) / 1024 / 1024; // Convert to MB
302
325
  console.log(`\nMemory usage stats:
303
326
  - Initial memory: ${(initialMemory / 1024 / 1024).toFixed(2)}MB
304
327
  - Peak memory: ${(peakMemory / 1024 / 1024).toFixed(2)}MB
305
328
  - Memory increase: ${memoryIncrease.toFixed(2)}MB`);
306
-
329
+
307
330
  t.true(memoryIncrease >= 0, 'Should track memory usage');
308
-
331
+
309
332
  // Cleanup
310
333
  await fs.rm(uniqueOutputPath, { recursive: true, force: true });
311
- });
334
+ });