@aj-archipelago/cortex 1.3.58 → 1.3.59

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/helper-apps/cortex-file-handler/INTERFACE.md +20 -9
  2. package/helper-apps/cortex-file-handler/package-lock.json +2 -2
  3. package/helper-apps/cortex-file-handler/package.json +1 -1
  4. package/helper-apps/cortex-file-handler/scripts/setup-azure-container.js +17 -17
  5. package/helper-apps/cortex-file-handler/scripts/setup-test-containers.js +35 -35
  6. package/helper-apps/cortex-file-handler/src/blobHandler.js +1010 -909
  7. package/helper-apps/cortex-file-handler/src/constants.js +98 -98
  8. package/helper-apps/cortex-file-handler/src/docHelper.js +27 -27
  9. package/helper-apps/cortex-file-handler/src/fileChunker.js +224 -214
  10. package/helper-apps/cortex-file-handler/src/helper.js +93 -93
  11. package/helper-apps/cortex-file-handler/src/index.js +584 -550
  12. package/helper-apps/cortex-file-handler/src/localFileHandler.js +86 -86
  13. package/helper-apps/cortex-file-handler/src/redis.js +186 -90
  14. package/helper-apps/cortex-file-handler/src/services/ConversionService.js +301 -273
  15. package/helper-apps/cortex-file-handler/src/services/FileConversionService.js +55 -55
  16. package/helper-apps/cortex-file-handler/src/services/storage/AzureStorageProvider.js +174 -154
  17. package/helper-apps/cortex-file-handler/src/services/storage/GCSStorageProvider.js +239 -223
  18. package/helper-apps/cortex-file-handler/src/services/storage/LocalStorageProvider.js +161 -159
  19. package/helper-apps/cortex-file-handler/src/services/storage/StorageFactory.js +73 -71
  20. package/helper-apps/cortex-file-handler/src/services/storage/StorageProvider.js +46 -45
  21. package/helper-apps/cortex-file-handler/src/services/storage/StorageService.js +256 -213
  22. package/helper-apps/cortex-file-handler/src/start.js +4 -1
  23. package/helper-apps/cortex-file-handler/src/utils/filenameUtils.js +59 -25
  24. package/helper-apps/cortex-file-handler/tests/FileConversionService.test.js +119 -116
  25. package/helper-apps/cortex-file-handler/tests/blobHandler.test.js +257 -257
  26. package/helper-apps/cortex-file-handler/tests/cleanup.test.js +676 -0
  27. package/helper-apps/cortex-file-handler/tests/conversionResilience.test.js +124 -124
  28. package/helper-apps/cortex-file-handler/tests/fileChunker.test.js +249 -208
  29. package/helper-apps/cortex-file-handler/tests/fileUpload.test.js +439 -380
  30. package/helper-apps/cortex-file-handler/tests/getOperations.test.js +299 -263
  31. package/helper-apps/cortex-file-handler/tests/postOperations.test.js +265 -239
  32. package/helper-apps/cortex-file-handler/tests/start.test.js +1230 -1201
  33. package/helper-apps/cortex-file-handler/tests/storage/AzureStorageProvider.test.js +110 -105
  34. package/helper-apps/cortex-file-handler/tests/storage/GCSStorageProvider.test.js +201 -175
  35. package/helper-apps/cortex-file-handler/tests/storage/LocalStorageProvider.test.js +128 -125
  36. package/helper-apps/cortex-file-handler/tests/storage/StorageFactory.test.js +78 -73
  37. package/helper-apps/cortex-file-handler/tests/storage/StorageService.test.js +99 -99
  38. package/helper-apps/cortex-file-handler/tests/testUtils.helper.js +74 -70
  39. package/package.json +1 -1
  40. package/pathways/translate_subtitle.js +15 -8
@@ -1,152 +1,152 @@
1
- import test from 'ava';
2
- import axios from 'axios';
3
- import fs from 'fs';
4
- import path from 'path';
5
- import { v4 as uuidv4 } from 'uuid';
6
- import XLSX from 'xlsx';
7
-
8
- import { port } from '../src/start.js';
9
- import { gcs, GCS_BUCKETNAME } from '../src/blobHandler.js';
10
- import { getFileStoreMap, setFileStoreMap } from '../src/redis.js';
11
- import { cleanupHashAndFile } from './testUtils.helper.js';
12
- import { gcsUrlExists } from '../src/blobHandler.js';
1
+ import test from "ava";
2
+ import axios from "axios";
3
+ import fs from "fs";
4
+ import path from "path";
5
+ import { v4 as uuidv4 } from "uuid";
6
+ import XLSX from "xlsx";
7
+
8
+ import { port } from "../src/start.js";
9
+ import { gcs, GCS_BUCKETNAME } from "../src/blobHandler.js";
10
+ import { getFileStoreMap, setFileStoreMap } from "../src/redis.js";
11
+ import { cleanupHashAndFile } from "./testUtils.helper.js";
12
+ import { gcsUrlExists } from "../src/blobHandler.js";
13
13
 
14
14
  const baseUrl = `http://localhost:${port}/api/CortexFileHandler`;
15
15
 
16
16
  // helper: create in-memory xlsx -> file
17
17
  async function createXlsx(tmpDir) {
18
- const wb = XLSX.utils.book_new();
19
- const ws = XLSX.utils.aoa_to_sheet([
20
- ['A', 'B'],
21
- ['1', '2'],
22
- ]);
23
- XLSX.utils.book_append_sheet(wb, ws, 'Sheet1');
24
- const filePath = path.join(tmpDir, `${uuidv4()}.xlsx`);
25
- XLSX.writeFile(wb, filePath);
26
- return filePath;
18
+ const wb = XLSX.utils.book_new();
19
+ const ws = XLSX.utils.aoa_to_sheet([
20
+ ["A", "B"],
21
+ ["1", "2"],
22
+ ]);
23
+ XLSX.utils.book_append_sheet(wb, ws, "Sheet1");
24
+ const filePath = path.join(tmpDir, `${uuidv4()}.xlsx`);
25
+ XLSX.writeFile(wb, filePath);
26
+ return filePath;
27
27
  }
28
28
 
29
29
  // Upload helper (multipart)
30
30
  async function multipartUpload(filePath, hash) {
31
- const FormData = (await import('form-data')).default;
32
- const form = new FormData();
33
- form.append('hash', hash);
34
- form.append('file', fs.createReadStream(filePath));
35
-
36
- const res = await axios.post(baseUrl, form, {
37
- headers: form.getHeaders(),
38
- validateStatus: () => true,
39
- timeout: 30000,
40
- maxContentLength: Infinity,
41
- maxBodyLength: Infinity,
42
- });
43
- return res;
31
+ const FormData = (await import("form-data")).default;
32
+ const form = new FormData();
33
+ form.append("hash", hash);
34
+ form.append("file", fs.createReadStream(filePath));
35
+
36
+ const res = await axios.post(baseUrl, form, {
37
+ headers: form.getHeaders(),
38
+ validateStatus: () => true,
39
+ timeout: 30000,
40
+ maxContentLength: Infinity,
41
+ maxBodyLength: Infinity,
42
+ });
43
+ return res;
44
44
  }
45
45
 
46
46
  function isGcsConfigured() {
47
- return !!gcs;
47
+ return !!gcs;
48
48
  }
49
49
 
50
- test.before(async t => {
51
- const dir = path.join(fs.mkdtempSync(path.join(process.cwd(), 'conv-test-')));
52
- t.context.tmpDir = dir;
50
+ test.before(async (t) => {
51
+ const dir = path.join(fs.mkdtempSync(path.join(process.cwd(), "conv-test-")));
52
+ t.context.tmpDir = dir;
53
53
  });
54
54
 
55
- test.after.always(async t => {
56
- fs.rmSync(t.context.tmpDir, { recursive: true, force: true });
55
+ test.after.always(async (t) => {
56
+ fs.rmSync(t.context.tmpDir, { recursive: true, force: true });
57
57
  });
58
58
 
59
59
  // 1. Remote-URL upload path should still return converted info
60
60
 
61
- test.serial('remote URL save returns converted info', async t => {
62
- const filePath = await createXlsx(t.context.tmpDir);
63
- const hash = `hash-${uuidv4()}`;
64
- // step 1: multipart upload
65
- const up = await multipartUpload(filePath, hash);
66
- t.is(up.status, 200);
67
- t.truthy(up.data.converted?.url);
68
- const publicUrl = up.data.url;
69
-
70
- // step 2: call handler via ?uri= <publicUrl>&save=true
71
- const saveRes = await axios.get(baseUrl, {
72
- params: {
73
- uri: publicUrl,
74
- requestId: uuidv4(),
75
- save: true,
76
- },
77
- validateStatus: () => true,
78
- timeout: 30000,
79
- });
80
-
81
- t.is(saveRes.status, 200);
82
-
83
- t.true(saveRes.data?.url?.includes('.csv'));
84
-
85
- await cleanupHashAndFile(hash, up.data.url, baseUrl);
61
+ test.serial("remote URL save returns converted info", async (t) => {
62
+ const filePath = await createXlsx(t.context.tmpDir);
63
+ const hash = `hash-${uuidv4()}`;
64
+ // step 1: multipart upload
65
+ const up = await multipartUpload(filePath, hash);
66
+ t.is(up.status, 200);
67
+ t.truthy(up.data.converted?.url);
68
+ const publicUrl = up.data.url;
69
+
70
+ // step 2: call handler via ?uri= <publicUrl>&save=true
71
+ const saveRes = await axios.get(baseUrl, {
72
+ params: {
73
+ uri: publicUrl,
74
+ requestId: uuidv4(),
75
+ save: true,
76
+ },
77
+ validateStatus: () => true,
78
+ timeout: 30000,
79
+ });
80
+
81
+ t.is(saveRes.status, 200);
82
+
83
+ t.true(saveRes.data?.url?.includes(".csv"));
84
+
85
+ await cleanupHashAndFile(hash, up.data.url, baseUrl);
86
86
  });
87
87
 
88
88
  // 2. If converted.gcs is missing, checkHash should restore it
89
89
 
90
- test.serial('checkHash recreates missing GCS converted file', async t => {
91
- if (!isGcsConfigured()) {
92
- t.pass();
93
- return;
94
- }
95
-
96
- const filePath = await createXlsx(t.context.tmpDir);
97
- const hash = `hash-${uuidv4()}`;
98
- const up = await multipartUpload(filePath, hash);
99
- t.truthy(up.data.converted?.gcs);
100
-
101
- // delete the GCS object
102
- const convertedGcsUrl = up.data.converted.gcs;
103
- const bucket = gcs.bucket(GCS_BUCKETNAME);
104
- const filename = convertedGcsUrl.replace(`gs://${GCS_BUCKETNAME}/`, '');
105
- try {
106
- await bucket.file(filename).delete({ ignoreNotFound: true });
107
- } catch (_) {}
108
-
109
- // call checkHash – should restore
110
- const resp = await axios.get(baseUrl, {
111
- params: { hash, checkHash: true },
112
- validateStatus: () => true,
113
- timeout: 30000,
114
- });
115
- t.is(resp.status, 200);
116
- t.truthy(resp.data.converted?.gcs);
117
-
118
- // verify restored GCS object exists using returned URL
119
- const newGcsUrl = resp.data.converted.gcs;
120
- const existsAfter = await gcsUrlExists(newGcsUrl, false);
121
- t.true(existsAfter);
122
-
123
- await cleanupHashAndFile(hash, up.data.url, baseUrl);
90
+ test.serial("checkHash recreates missing GCS converted file", async (t) => {
91
+ if (!isGcsConfigured()) {
92
+ t.pass();
93
+ return;
94
+ }
95
+
96
+ const filePath = await createXlsx(t.context.tmpDir);
97
+ const hash = `hash-${uuidv4()}`;
98
+ const up = await multipartUpload(filePath, hash);
99
+ t.truthy(up.data.converted?.gcs);
100
+
101
+ // delete the GCS object
102
+ const convertedGcsUrl = up.data.converted.gcs;
103
+ const bucket = gcs.bucket(GCS_BUCKETNAME);
104
+ const filename = convertedGcsUrl.replace(`gs://${GCS_BUCKETNAME}/`, "");
105
+ try {
106
+ await bucket.file(filename).delete({ ignoreNotFound: true });
107
+ } catch (_) {}
108
+
109
+ // call checkHash – should restore
110
+ const resp = await axios.get(baseUrl, {
111
+ params: { hash, checkHash: true },
112
+ validateStatus: () => true,
113
+ timeout: 30000,
114
+ });
115
+ t.is(resp.status, 200);
116
+ t.truthy(resp.data.converted?.gcs);
117
+
118
+ // verify restored GCS object exists using returned URL
119
+ const newGcsUrl = resp.data.converted.gcs;
120
+ const existsAfter = await gcsUrlExists(newGcsUrl, false);
121
+ t.true(existsAfter);
122
+
123
+ await cleanupHashAndFile(hash, up.data.url, baseUrl);
124
124
  });
125
125
 
126
126
  // 3. If converted section is removed from Redis, checkHash regenerates
127
127
 
128
- test.serial('checkHash regenerates missing converted metadata', async t => {
129
- const filePath = await createXlsx(t.context.tmpDir);
130
- const hash = `hash-${uuidv4()}`;
131
- const up = await multipartUpload(filePath, hash);
132
- t.truthy(up.data.converted?.url);
133
-
134
- // strip converted from Redis entry
135
- const record = await getFileStoreMap(hash);
136
- if (record) {
137
- delete record.converted;
138
- await setFileStoreMap(hash, record);
139
- }
140
-
141
- // call checkHash – should add converted back
142
- const resp = await axios.get(baseUrl, {
143
- params: { hash, checkHash: true },
144
- validateStatus: () => true,
145
- timeout: 30000,
146
- });
147
-
148
- t.is(resp.status, 200);
149
- t.truthy(resp.data.converted?.url);
150
-
151
- await cleanupHashAndFile(hash, up.data.url, baseUrl);
152
- });
128
+ test.serial("checkHash regenerates missing converted metadata", async (t) => {
129
+ const filePath = await createXlsx(t.context.tmpDir);
130
+ const hash = `hash-${uuidv4()}`;
131
+ const up = await multipartUpload(filePath, hash);
132
+ t.truthy(up.data.converted?.url);
133
+
134
+ // strip converted from Redis entry
135
+ const record = await getFileStoreMap(hash);
136
+ if (record) {
137
+ delete record.converted;
138
+ await setFileStoreMap(hash, record);
139
+ }
140
+
141
+ // call checkHash – should add converted back
142
+ const resp = await axios.get(baseUrl, {
143
+ params: { hash, checkHash: true },
144
+ validateStatus: () => true,
145
+ timeout: 30000,
146
+ });
147
+
148
+ t.is(resp.status, 200);
149
+ t.truthy(resp.data.converted?.url);
150
+
151
+ await cleanupHashAndFile(hash, up.data.url, baseUrl);
152
+ });