@aj-archipelago/cortex 1.3.51 → 1.3.53

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/helper-apps/cortex-file-handler/{.env.test.azure → .env.test.azure.sample} +2 -1
  2. package/helper-apps/cortex-file-handler/{.env.test.gcs → .env.test.gcs.sample} +2 -1
  3. package/helper-apps/cortex-file-handler/{.env.test → .env.test.sample} +2 -1
  4. package/helper-apps/cortex-file-handler/Dockerfile +1 -1
  5. package/helper-apps/cortex-file-handler/INTERFACE.md +178 -0
  6. package/helper-apps/cortex-file-handler/package.json +4 -3
  7. package/helper-apps/cortex-file-handler/scripts/test-azure.sh +3 -0
  8. package/helper-apps/cortex-file-handler/{blobHandler.js → src/blobHandler.js} +167 -99
  9. package/helper-apps/cortex-file-handler/{fileChunker.js → src/fileChunker.js} +11 -24
  10. package/helper-apps/cortex-file-handler/{index.js → src/index.js} +236 -256
  11. package/helper-apps/cortex-file-handler/{services → src/services}/ConversionService.js +39 -18
  12. package/helper-apps/cortex-file-handler/{services → src/services}/FileConversionService.js +7 -3
  13. package/helper-apps/cortex-file-handler/src/services/storage/AzureStorageProvider.js +177 -0
  14. package/helper-apps/cortex-file-handler/src/services/storage/GCSStorageProvider.js +258 -0
  15. package/helper-apps/cortex-file-handler/src/services/storage/LocalStorageProvider.js +182 -0
  16. package/helper-apps/cortex-file-handler/src/services/storage/StorageFactory.js +86 -0
  17. package/helper-apps/cortex-file-handler/src/services/storage/StorageProvider.js +53 -0
  18. package/helper-apps/cortex-file-handler/src/services/storage/StorageService.js +259 -0
  19. package/helper-apps/cortex-file-handler/{start.js → src/start.js} +1 -1
  20. package/helper-apps/cortex-file-handler/src/utils/filenameUtils.js +28 -0
  21. package/helper-apps/cortex-file-handler/tests/FileConversionService.test.js +1 -1
  22. package/helper-apps/cortex-file-handler/tests/blobHandler.test.js +4 -4
  23. package/helper-apps/cortex-file-handler/tests/conversionResilience.test.js +152 -0
  24. package/helper-apps/cortex-file-handler/tests/fileChunker.test.js +2 -28
  25. package/helper-apps/cortex-file-handler/tests/fileUpload.test.js +134 -23
  26. package/helper-apps/cortex-file-handler/tests/getOperations.test.js +307 -0
  27. package/helper-apps/cortex-file-handler/tests/postOperations.test.js +291 -0
  28. package/helper-apps/cortex-file-handler/tests/start.test.js +50 -14
  29. package/helper-apps/cortex-file-handler/tests/storage/AzureStorageProvider.test.js +120 -0
  30. package/helper-apps/cortex-file-handler/tests/storage/GCSStorageProvider.test.js +193 -0
  31. package/helper-apps/cortex-file-handler/tests/storage/LocalStorageProvider.test.js +148 -0
  32. package/helper-apps/cortex-file-handler/tests/storage/StorageFactory.test.js +100 -0
  33. package/helper-apps/cortex-file-handler/tests/storage/StorageService.test.js +113 -0
  34. package/helper-apps/cortex-file-handler/tests/testUtils.helper.js +73 -19
  35. package/lib/entityConstants.js +17 -2
  36. package/package.json +1 -1
  37. /package/helper-apps/cortex-file-handler/{constants.js → src/constants.js} +0 -0
  38. /package/helper-apps/cortex-file-handler/{docHelper.js → src/docHelper.js} +0 -0
  39. /package/helper-apps/cortex-file-handler/{helper.js → src/helper.js} +0 -0
  40. /package/helper-apps/cortex-file-handler/{localFileHandler.js → src/localFileHandler.js} +0 -0
  41. /package/helper-apps/cortex-file-handler/{redis.js → src/redis.js} +0 -0
@@ -0,0 +1,152 @@
1
+ import test from 'ava';
2
+ import axios from 'axios';
3
+ import fs from 'fs';
4
+ import path from 'path';
5
+ import { v4 as uuidv4 } from 'uuid';
6
+ import XLSX from 'xlsx';
7
+
8
+ import { port } from '../src/start.js';
9
+ import { gcs, GCS_BUCKETNAME } from '../src/blobHandler.js';
10
+ import { getFileStoreMap, setFileStoreMap } from '../src/redis.js';
11
+ import { cleanupHashAndFile } from './testUtils.helper.js';
12
+ import { gcsUrlExists } from '../src/blobHandler.js';
13
+
14
+ const baseUrl = `http://localhost:${port}/api/CortexFileHandler`;
15
+
16
+ // helper: create in-memory xlsx -> file
17
+ async function createXlsx(tmpDir) {
18
+ const wb = XLSX.utils.book_new();
19
+ const ws = XLSX.utils.aoa_to_sheet([
20
+ ['A', 'B'],
21
+ ['1', '2'],
22
+ ]);
23
+ XLSX.utils.book_append_sheet(wb, ws, 'Sheet1');
24
+ const filePath = path.join(tmpDir, `${uuidv4()}.xlsx`);
25
+ XLSX.writeFile(wb, filePath);
26
+ return filePath;
27
+ }
28
+
29
+ // Upload helper (multipart)
30
+ async function multipartUpload(filePath, hash) {
31
+ const FormData = (await import('form-data')).default;
32
+ const form = new FormData();
33
+ form.append('hash', hash);
34
+ form.append('file', fs.createReadStream(filePath));
35
+
36
+ const res = await axios.post(baseUrl, form, {
37
+ headers: form.getHeaders(),
38
+ validateStatus: () => true,
39
+ timeout: 30000,
40
+ maxContentLength: Infinity,
41
+ maxBodyLength: Infinity,
42
+ });
43
+ return res;
44
+ }
45
+
46
+ function isGcsConfigured() {
47
+ return !!gcs;
48
+ }
49
+
50
+ test.before(async t => {
51
+ const dir = path.join(fs.mkdtempSync(path.join(process.cwd(), 'conv-test-')));
52
+ t.context.tmpDir = dir;
53
+ });
54
+
55
+ test.after.always(async t => {
56
+ fs.rmSync(t.context.tmpDir, { recursive: true, force: true });
57
+ });
58
+
59
+ // 1. Remote-URL upload path should still return converted info
60
+
61
+ test.serial('remote URL save returns converted info', async t => {
62
+ const filePath = await createXlsx(t.context.tmpDir);
63
+ const hash = `hash-${uuidv4()}`;
64
+ // step 1: multipart upload
65
+ const up = await multipartUpload(filePath, hash);
66
+ t.is(up.status, 200);
67
+ t.truthy(up.data.converted?.url);
68
+ const publicUrl = up.data.url;
69
+
70
+ // step 2: call handler via ?uri= <publicUrl>&save=true
71
+ const saveRes = await axios.get(baseUrl, {
72
+ params: {
73
+ uri: publicUrl,
74
+ requestId: uuidv4(),
75
+ save: true,
76
+ },
77
+ validateStatus: () => true,
78
+ timeout: 30000,
79
+ });
80
+
81
+ t.is(saveRes.status, 200);
82
+
83
+ t.true(saveRes.data?.url?.includes('.csv'));
84
+
85
+ await cleanupHashAndFile(hash, up.data.url, baseUrl);
86
+ });
87
+
88
+ // 2. If converted.gcs is missing, checkHash should restore it
89
+
90
+ test.serial('checkHash recreates missing GCS converted file', async t => {
91
+ if (!isGcsConfigured()) {
92
+ t.pass();
93
+ return;
94
+ }
95
+
96
+ const filePath = await createXlsx(t.context.tmpDir);
97
+ const hash = `hash-${uuidv4()}`;
98
+ const up = await multipartUpload(filePath, hash);
99
+ t.truthy(up.data.converted?.gcs);
100
+
101
+ // delete the GCS object
102
+ const convertedGcsUrl = up.data.converted.gcs;
103
+ const bucket = gcs.bucket(GCS_BUCKETNAME);
104
+ const filename = convertedGcsUrl.replace(`gs://${GCS_BUCKETNAME}/`, '');
105
+ try {
106
+ await bucket.file(filename).delete({ ignoreNotFound: true });
107
+ } catch (_) {}
108
+
109
+ // call checkHash – should restore
110
+ const resp = await axios.get(baseUrl, {
111
+ params: { hash, checkHash: true },
112
+ validateStatus: () => true,
113
+ timeout: 30000,
114
+ });
115
+ t.is(resp.status, 200);
116
+ t.truthy(resp.data.converted?.gcs);
117
+
118
+ // verify restored GCS object exists using returned URL
119
+ const newGcsUrl = resp.data.converted.gcs;
120
+ const existsAfter = await gcsUrlExists(newGcsUrl, false);
121
+ t.true(existsAfter);
122
+
123
+ await cleanupHashAndFile(hash, up.data.url, baseUrl);
124
+ });
125
+
126
+ // 3. If converted section is removed from Redis, checkHash regenerates
127
+
128
+ test.serial('checkHash regenerates missing converted metadata', async t => {
129
+ const filePath = await createXlsx(t.context.tmpDir);
130
+ const hash = `hash-${uuidv4()}`;
131
+ const up = await multipartUpload(filePath, hash);
132
+ t.truthy(up.data.converted?.url);
133
+
134
+ // strip converted from Redis entry
135
+ const record = await getFileStoreMap(hash);
136
+ if (record) {
137
+ delete record.converted;
138
+ await setFileStoreMap(hash, record);
139
+ }
140
+
141
+ // call checkHash – should add converted back
142
+ const resp = await axios.get(baseUrl, {
143
+ params: { hash, checkHash: true },
144
+ validateStatus: () => true,
145
+ timeout: 30000,
146
+ });
147
+
148
+ t.is(resp.status, 200);
149
+ t.truthy(resp.data.converted?.url);
150
+
151
+ await cleanupHashAndFile(hash, up.data.url, baseUrl);
152
+ });
@@ -9,37 +9,11 @@ import { fileURLToPath } from 'url';
9
9
  import test from 'ava';
10
10
  import nock from 'nock';
11
11
 
12
- import { splitMediaFile, downloadFile } from '../fileChunker.js';
12
+ import { splitMediaFile, downloadFile } from '../src/fileChunker.js';
13
+ import { createTestMediaFile } from './testUtils.helper.js';
13
14
 
14
15
  const __dirname = dirname(fileURLToPath(import.meta.url));
15
16
 
16
- // Helper function to create a test media file of specified duration using ffmpeg
17
- async function createTestMediaFile(filepath, durationSeconds = 10) {
18
- try {
19
- console.log(`Creating test file: ${filepath} (${durationSeconds}s)`);
20
- // Generate silence using ffmpeg
21
- execSync(
22
- `ffmpeg -f lavfi -i anullsrc=r=44100:cl=mono -t ${durationSeconds} -q:a 9 -acodec libmp3lame "${filepath}"`,
23
- {
24
- stdio: ['ignore', 'pipe', 'pipe'], // Capture stdout and stderr
25
- },
26
- );
27
-
28
- // Verify the file was created and has content
29
- const stats = await fs.stat(filepath);
30
- if (stats.size === 0) {
31
- throw new Error('Generated file is empty');
32
- }
33
- console.log(
34
- `Successfully created ${filepath} (${(stats.size / 1024 / 1024).toFixed(2)}MB)`,
35
- );
36
- } catch (error) {
37
- console.error(`Error creating test file ${filepath}:`, error.message);
38
- if (error.stderr) console.error('ffmpeg error:', error.stderr.toString());
39
- throw error;
40
- }
41
- }
42
-
43
17
  // Setup: Create test files and mock external services
44
18
  test.before(async (t) => {
45
19
  // Check if ffmpeg is available
@@ -5,9 +5,10 @@ import { fileURLToPath } from 'url';
5
5
  import { v4 as uuidv4 } from 'uuid';
6
6
  import axios from 'axios';
7
7
  import FormData from 'form-data';
8
- import { port } from '../start.js';
9
- import { gcs } from '../blobHandler.js';
10
- import { cleanupHashAndFile } from './testUtils.helper.js';
8
+ import { port } from '../src/start.js';
9
+ import { gcs } from '../src/blobHandler.js';
10
+ import { cleanupHashAndFile, getFolderNameFromUrl } from './testUtils.helper.js';
11
+ import XLSX from 'xlsx';
11
12
 
12
13
  const __filename = fileURLToPath(import.meta.url);
13
14
  const __dirname = path.dirname(__filename);
@@ -45,7 +46,7 @@ async function uploadFile(filePath, requestId = null, hash = null) {
45
46
  'Content-Type': 'multipart/form-data',
46
47
  },
47
48
  validateStatus: (status) => true,
48
- timeout: 5000,
49
+ timeout: 30000,
49
50
  maxContentLength: Infinity,
50
51
  maxBodyLength: Infinity,
51
52
  });
@@ -83,7 +84,25 @@ test.before(async (t) => {
83
84
 
84
85
  // Cleanup
85
86
  test.after.always(async (t) => {
87
+ // Clean up test directory
86
88
  await fs.promises.rm(t.context.testDir, { recursive: true, force: true });
89
+
90
+ // Clean up any remaining files in the files directory
91
+ const filesDir = path.join(__dirname, '..', 'files');
92
+ if (fs.existsSync(filesDir)) {
93
+ const dirs = await fs.promises.readdir(filesDir);
94
+ for (const dir of dirs) {
95
+ const dirPath = path.join(filesDir, dir);
96
+ try {
97
+ await fs.promises.rm(dirPath, { recursive: true, force: true });
98
+ } catch (e) {
99
+ console.error('Error cleaning up directory:', {
100
+ dir: dirPath,
101
+ error: e.message
102
+ });
103
+ }
104
+ }
105
+ }
87
106
  });
88
107
 
89
108
  // Basic File Upload Tests
@@ -91,9 +110,10 @@ test.serial('should handle basic file upload', async (t) => {
91
110
  const fileContent = 'test content';
92
111
  const filePath = await createTestFile(fileContent, 'txt');
93
112
  const requestId = uuidv4();
113
+ let response;
94
114
 
95
115
  try {
96
- const response = await uploadFile(filePath, requestId);
116
+ response = await uploadFile(filePath, requestId);
97
117
 
98
118
  t.is(response.status, 200);
99
119
  t.truthy(response.data.url);
@@ -104,6 +124,9 @@ test.serial('should handle basic file upload', async (t) => {
104
124
  t.deepEqual(uploadedContent, Buffer.from(fileContent), 'Uploaded file content should match');
105
125
  } finally {
106
126
  fs.unlinkSync(filePath);
127
+ if (response?.data?.url) {
128
+ await cleanupHashAndFile(null, response.data.url, baseUrl);
129
+ }
107
130
  }
108
131
  });
109
132
 
@@ -114,9 +137,11 @@ test.serial('should handle file upload with hash', async (t) => {
114
137
  const hash = 'test-hash-' + uuidv4();
115
138
  let uploadedUrl;
116
139
  let convertedUrl;
140
+ let response;
141
+
117
142
  try {
118
143
  // First upload the file
119
- const response = await uploadFile(filePath, requestId, hash);
144
+ response = await uploadFile(filePath, requestId, hash);
120
145
  t.is(response.status, 200);
121
146
  t.truthy(response.data.url);
122
147
  uploadedUrl = response.data.url;
@@ -153,9 +178,11 @@ test.serial('should handle file upload with hash', async (t) => {
153
178
  t.deepEqual(Buffer.from(fileResponse.data), Buffer.from(fileContent), 'Uploaded file content should match');
154
179
  } finally {
155
180
  fs.unlinkSync(filePath);
156
- await cleanupHashAndFile(hash, uploadedUrl, baseUrl);
181
+ if (uploadedUrl) {
182
+ await cleanupHashAndFile(hash, uploadedUrl, baseUrl);
183
+ }
157
184
  if (convertedUrl) {
158
- await cleanupHashAndFile(`${hash}_converted`, convertedUrl, baseUrl);
185
+ await cleanupHashAndFile(null, convertedUrl, baseUrl);
159
186
  }
160
187
  }
161
188
  });
@@ -166,9 +193,10 @@ test.serial('should handle PDF document upload and conversion', async (t) => {
166
193
  const fileContent = '%PDF-1.4\nTest PDF content';
167
194
  const filePath = await createTestFile(fileContent, 'pdf');
168
195
  const requestId = uuidv4();
196
+ let response;
169
197
 
170
198
  try {
171
- const response = await uploadFile(filePath, requestId);
199
+ response = await uploadFile(filePath, requestId);
172
200
  t.is(response.status, 200);
173
201
  t.truthy(response.data.url);
174
202
 
@@ -186,6 +214,12 @@ test.serial('should handle PDF document upload and conversion', async (t) => {
186
214
  }
187
215
  } finally {
188
216
  fs.unlinkSync(filePath);
217
+ if (response?.data?.url) {
218
+ await cleanupHashAndFile(null, response.data.url, baseUrl);
219
+ }
220
+ if (response?.data?.converted?.url) {
221
+ await cleanupHashAndFile(null, response.data.converted.url, baseUrl);
222
+ }
189
223
  }
190
224
  });
191
225
 
@@ -195,9 +229,10 @@ test.serial('should handle media file chunking', async (t) => {
195
229
  const chunkContent = 'x'.repeat(1024 * 1024);
196
230
  const filePath = await createTestFile(chunkContent, 'mp4');
197
231
  const requestId = uuidv4();
232
+ let response;
198
233
 
199
234
  try {
200
- const response = await uploadFile(filePath, requestId);
235
+ response = await uploadFile(filePath, requestId);
201
236
  t.is(response.status, 200);
202
237
  t.truthy(response.data);
203
238
 
@@ -232,6 +267,17 @@ test.serial('should handle media file chunking', async (t) => {
232
267
  }
233
268
  } finally {
234
269
  fs.unlinkSync(filePath);
270
+ if (response?.data) {
271
+ if (Array.isArray(response.data)) {
272
+ for (const chunk of response.data) {
273
+ if (chunk.uri) {
274
+ await cleanupHashAndFile(null, chunk.uri, baseUrl);
275
+ }
276
+ }
277
+ } else if (response.data.url) {
278
+ await cleanupHashAndFile(null, response.data.url, baseUrl);
279
+ }
280
+ }
235
281
  }
236
282
  });
237
283
 
@@ -249,7 +295,7 @@ test.serial('should handle invalid file upload', async (t) => {
249
295
  'Content-Type': 'multipart/form-data',
250
296
  },
251
297
  validateStatus: (status) => true,
252
- timeout: 5000,
298
+ timeout: 30000,
253
299
  });
254
300
 
255
301
  // Log the response for debugging
@@ -276,8 +322,7 @@ test.serial('should handle file deletion', async (t) => {
276
322
  await new Promise(resolve => setTimeout(resolve, 1000));
277
323
 
278
324
  // Extract the file identifier from the URL
279
- const fileUrl = new URL(uploadResponse.data.url);
280
- const fileIdentifier = fileUrl.pathname.split('/').pop().split('_')[0];
325
+ const fileIdentifier = getFolderNameFromUrl(uploadResponse.data.url);
281
326
  console.log('File identifier for deletion:', fileIdentifier);
282
327
 
283
328
  // Delete file using the correct identifier
@@ -307,13 +352,24 @@ test.serial('should handle file deletion', async (t) => {
307
352
 
308
353
  // Save Option Test
309
354
  test.serial('should handle document upload with save option', async (t) => {
310
- const fileContent = 'Sample DOCX content';
311
- const filePath = await createTestFile(fileContent, 'docx');
355
+ // Create a minimal XLSX workbook in-memory
356
+ const workbook = XLSX.utils.book_new();
357
+ const worksheet = XLSX.utils.aoa_to_sheet([
358
+ ['Name', 'Score'],
359
+ ['Alice', 10],
360
+ ['Bob', 8],
361
+ ]);
362
+ XLSX.utils.book_append_sheet(workbook, worksheet, 'Sheet1');
363
+
364
+ // Write it to a temp file inside the test directory
365
+ const filePath = path.join(t.context.testDir, `${uuidv4()}.xlsx`);
366
+ XLSX.writeFile(workbook, filePath);
312
367
 
313
368
  const initialRequestId = uuidv4();
314
369
  const saveRequestId = uuidv4();
315
370
 
316
371
  let uploadedUrl;
372
+ let savedUrl;
317
373
 
318
374
  try {
319
375
  // First, upload the document so we have a publicly reachable URL
@@ -333,19 +389,74 @@ test.serial('should handle document upload with save option', async (t) => {
333
389
  validateStatus: (status) => true,
334
390
  });
335
391
 
336
- // The current implementation returns an empty array but should still be 200
392
+ // The save operation should return a 200 status with a result object
337
393
  t.is(saveResponse.status, 200, 'Save request should succeed');
338
- t.true(Array.isArray(saveResponse.data), 'Response body should be an array');
394
+ t.truthy(saveResponse.data, 'Response should have data');
395
+ t.truthy(saveResponse.data.url, 'Response should include a URL');
396
+ t.true(saveResponse.data.url.includes('.csv'), 'Response should include a CSV URL');
397
+ savedUrl = saveResponse.data.url;
339
398
  } finally {
340
399
  fs.unlinkSync(filePath);
341
-
342
- // Cleanup the initially uploaded file
400
+ // Clean up both URLs
343
401
  if (uploadedUrl) {
344
- const identifier = new URL(uploadedUrl).pathname.split('/').pop().split('_')[0];
345
- await axios.delete(`${baseUrl}?operation=delete&requestId=${identifier}`).catch(() => {});
402
+ await cleanupHashAndFile(null, uploadedUrl, baseUrl);
403
+ }
404
+ if (savedUrl && savedUrl !== uploadedUrl) {
405
+ await cleanupHashAndFile(null, savedUrl, baseUrl);
346
406
  }
407
+ }
408
+ });
409
+
410
+ // Converted file persistence test – ensures needsConversion works for extension-only checks
411
+ test.serial('should preserve converted version when checking hash for convertible file', async (t) => {
412
+ // Create a minimal XLSX workbook in-memory
413
+ const workbook = XLSX.utils.book_new();
414
+ const worksheet = XLSX.utils.aoa_to_sheet([
415
+ ['Name', 'Score'],
416
+ ['Alice', 10],
417
+ ['Bob', 8],
418
+ ]);
419
+ XLSX.utils.book_append_sheet(workbook, worksheet, 'Sheet1');
347
420
 
348
- // Cleanup files created by save request
349
- await axios.delete(`${baseUrl}?operation=delete&requestId=${saveRequestId}`).catch(() => {});
421
+ // Write it to a temp file inside the test directory
422
+ const filePath = path.join(t.context.testDir, `${uuidv4()}.xlsx`);
423
+ XLSX.writeFile(workbook, filePath);
424
+
425
+ const requestId = uuidv4();
426
+ const hash = `test-hash-${uuidv4()}`;
427
+
428
+ let uploadedUrl;
429
+ let convertedUrl;
430
+
431
+ try {
432
+ // 1. Upload the XLSX file (conversion should run automatically)
433
+ const uploadResponse = await uploadFile(filePath, requestId, hash);
434
+ t.is(uploadResponse.status, 200, 'Upload should succeed');
435
+ t.truthy(uploadResponse.data.converted, 'Upload response must contain converted info');
436
+ t.truthy(uploadResponse.data.converted.url, 'Converted URL should be present');
437
+
438
+ uploadedUrl = uploadResponse.data.url;
439
+ convertedUrl = uploadResponse.data.converted.url;
440
+
441
+ // 2. Give Redis a moment to persist
442
+ await new Promise((resolve) => setTimeout(resolve, 4000));
443
+
444
+ // 3. Ask the handler for the hash – it will invoke ensureConvertedVersion
445
+ const checkResponse = await axios.get(baseUrl, {
446
+ params: { hash, checkHash: true },
447
+ validateStatus: (status) => true,
448
+ timeout: 30000,
449
+ });
450
+
451
+ t.is(checkResponse.status, 200, 'Hash check should succeed');
452
+ t.truthy(checkResponse.data.converted, 'Hash response should include converted info');
453
+ t.truthy(checkResponse.data.converted.url, 'Converted URL should still be present after hash check');
454
+ } finally {
455
+ // Clean up temp file and remote artifacts
456
+ fs.unlinkSync(filePath);
457
+ await cleanupHashAndFile(hash, uploadedUrl, baseUrl);
458
+ if (convertedUrl) {
459
+ await cleanupHashAndFile(null, convertedUrl, baseUrl);
460
+ }
350
461
  }
351
462
  });