@aj-archipelago/cortex 1.3.58 → 1.3.59
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/helper-apps/cortex-file-handler/INTERFACE.md +20 -9
- package/helper-apps/cortex-file-handler/package-lock.json +2 -2
- package/helper-apps/cortex-file-handler/package.json +1 -1
- package/helper-apps/cortex-file-handler/scripts/setup-azure-container.js +17 -17
- package/helper-apps/cortex-file-handler/scripts/setup-test-containers.js +35 -35
- package/helper-apps/cortex-file-handler/src/blobHandler.js +1010 -909
- package/helper-apps/cortex-file-handler/src/constants.js +98 -98
- package/helper-apps/cortex-file-handler/src/docHelper.js +27 -27
- package/helper-apps/cortex-file-handler/src/fileChunker.js +224 -214
- package/helper-apps/cortex-file-handler/src/helper.js +93 -93
- package/helper-apps/cortex-file-handler/src/index.js +584 -550
- package/helper-apps/cortex-file-handler/src/localFileHandler.js +86 -86
- package/helper-apps/cortex-file-handler/src/redis.js +186 -90
- package/helper-apps/cortex-file-handler/src/services/ConversionService.js +301 -273
- package/helper-apps/cortex-file-handler/src/services/FileConversionService.js +55 -55
- package/helper-apps/cortex-file-handler/src/services/storage/AzureStorageProvider.js +174 -154
- package/helper-apps/cortex-file-handler/src/services/storage/GCSStorageProvider.js +239 -223
- package/helper-apps/cortex-file-handler/src/services/storage/LocalStorageProvider.js +161 -159
- package/helper-apps/cortex-file-handler/src/services/storage/StorageFactory.js +73 -71
- package/helper-apps/cortex-file-handler/src/services/storage/StorageProvider.js +46 -45
- package/helper-apps/cortex-file-handler/src/services/storage/StorageService.js +256 -213
- package/helper-apps/cortex-file-handler/src/start.js +4 -1
- package/helper-apps/cortex-file-handler/src/utils/filenameUtils.js +59 -25
- package/helper-apps/cortex-file-handler/tests/FileConversionService.test.js +119 -116
- package/helper-apps/cortex-file-handler/tests/blobHandler.test.js +257 -257
- package/helper-apps/cortex-file-handler/tests/cleanup.test.js +676 -0
- package/helper-apps/cortex-file-handler/tests/conversionResilience.test.js +124 -124
- package/helper-apps/cortex-file-handler/tests/fileChunker.test.js +249 -208
- package/helper-apps/cortex-file-handler/tests/fileUpload.test.js +439 -380
- package/helper-apps/cortex-file-handler/tests/getOperations.test.js +299 -263
- package/helper-apps/cortex-file-handler/tests/postOperations.test.js +265 -239
- package/helper-apps/cortex-file-handler/tests/start.test.js +1230 -1201
- package/helper-apps/cortex-file-handler/tests/storage/AzureStorageProvider.test.js +110 -105
- package/helper-apps/cortex-file-handler/tests/storage/GCSStorageProvider.test.js +201 -175
- package/helper-apps/cortex-file-handler/tests/storage/LocalStorageProvider.test.js +128 -125
- package/helper-apps/cortex-file-handler/tests/storage/StorageFactory.test.js +78 -73
- package/helper-apps/cortex-file-handler/tests/storage/StorageService.test.js +99 -99
- package/helper-apps/cortex-file-handler/tests/testUtils.helper.js +74 -70
- package/package.json +1 -1
- package/pathways/translate_subtitle.js +15 -8
|
@@ -1,152 +1,152 @@
|
|
|
1
|
-
import test from
|
|
2
|
-
import axios from
|
|
3
|
-
import fs from
|
|
4
|
-
import path from
|
|
5
|
-
import { v4 as uuidv4 } from
|
|
6
|
-
import XLSX from
|
|
7
|
-
|
|
8
|
-
import { port } from
|
|
9
|
-
import { gcs, GCS_BUCKETNAME } from
|
|
10
|
-
import { getFileStoreMap, setFileStoreMap } from
|
|
11
|
-
import { cleanupHashAndFile } from
|
|
12
|
-
import { gcsUrlExists } from
|
|
1
|
+
import test from "ava";
|
|
2
|
+
import axios from "axios";
|
|
3
|
+
import fs from "fs";
|
|
4
|
+
import path from "path";
|
|
5
|
+
import { v4 as uuidv4 } from "uuid";
|
|
6
|
+
import XLSX from "xlsx";
|
|
7
|
+
|
|
8
|
+
import { port } from "../src/start.js";
|
|
9
|
+
import { gcs, GCS_BUCKETNAME } from "../src/blobHandler.js";
|
|
10
|
+
import { getFileStoreMap, setFileStoreMap } from "../src/redis.js";
|
|
11
|
+
import { cleanupHashAndFile } from "./testUtils.helper.js";
|
|
12
|
+
import { gcsUrlExists } from "../src/blobHandler.js";
|
|
13
13
|
|
|
14
14
|
const baseUrl = `http://localhost:${port}/api/CortexFileHandler`;
|
|
15
15
|
|
|
16
16
|
// helper: create in-memory xlsx -> file
|
|
17
17
|
async function createXlsx(tmpDir) {
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
18
|
+
const wb = XLSX.utils.book_new();
|
|
19
|
+
const ws = XLSX.utils.aoa_to_sheet([
|
|
20
|
+
["A", "B"],
|
|
21
|
+
["1", "2"],
|
|
22
|
+
]);
|
|
23
|
+
XLSX.utils.book_append_sheet(wb, ws, "Sheet1");
|
|
24
|
+
const filePath = path.join(tmpDir, `${uuidv4()}.xlsx`);
|
|
25
|
+
XLSX.writeFile(wb, filePath);
|
|
26
|
+
return filePath;
|
|
27
27
|
}
|
|
28
28
|
|
|
29
29
|
// Upload helper (multipart)
|
|
30
30
|
async function multipartUpload(filePath, hash) {
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
31
|
+
const FormData = (await import("form-data")).default;
|
|
32
|
+
const form = new FormData();
|
|
33
|
+
form.append("hash", hash);
|
|
34
|
+
form.append("file", fs.createReadStream(filePath));
|
|
35
|
+
|
|
36
|
+
const res = await axios.post(baseUrl, form, {
|
|
37
|
+
headers: form.getHeaders(),
|
|
38
|
+
validateStatus: () => true,
|
|
39
|
+
timeout: 30000,
|
|
40
|
+
maxContentLength: Infinity,
|
|
41
|
+
maxBodyLength: Infinity,
|
|
42
|
+
});
|
|
43
|
+
return res;
|
|
44
44
|
}
|
|
45
45
|
|
|
46
46
|
function isGcsConfigured() {
|
|
47
|
-
|
|
47
|
+
return !!gcs;
|
|
48
48
|
}
|
|
49
49
|
|
|
50
|
-
test.before(async t => {
|
|
51
|
-
|
|
52
|
-
|
|
50
|
+
test.before(async (t) => {
|
|
51
|
+
const dir = path.join(fs.mkdtempSync(path.join(process.cwd(), "conv-test-")));
|
|
52
|
+
t.context.tmpDir = dir;
|
|
53
53
|
});
|
|
54
54
|
|
|
55
|
-
test.after.always(async t => {
|
|
56
|
-
|
|
55
|
+
test.after.always(async (t) => {
|
|
56
|
+
fs.rmSync(t.context.tmpDir, { recursive: true, force: true });
|
|
57
57
|
});
|
|
58
58
|
|
|
59
59
|
// 1. Remote-URL upload path should still return converted info
|
|
60
60
|
|
|
61
|
-
test.serial(
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
61
|
+
test.serial("remote URL save returns converted info", async (t) => {
|
|
62
|
+
const filePath = await createXlsx(t.context.tmpDir);
|
|
63
|
+
const hash = `hash-${uuidv4()}`;
|
|
64
|
+
// step 1: multipart upload
|
|
65
|
+
const up = await multipartUpload(filePath, hash);
|
|
66
|
+
t.is(up.status, 200);
|
|
67
|
+
t.truthy(up.data.converted?.url);
|
|
68
|
+
const publicUrl = up.data.url;
|
|
69
|
+
|
|
70
|
+
// step 2: call handler via ?uri= <publicUrl>&save=true
|
|
71
|
+
const saveRes = await axios.get(baseUrl, {
|
|
72
|
+
params: {
|
|
73
|
+
uri: publicUrl,
|
|
74
|
+
requestId: uuidv4(),
|
|
75
|
+
save: true,
|
|
76
|
+
},
|
|
77
|
+
validateStatus: () => true,
|
|
78
|
+
timeout: 30000,
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
t.is(saveRes.status, 200);
|
|
82
|
+
|
|
83
|
+
t.true(saveRes.data?.url?.includes(".csv"));
|
|
84
|
+
|
|
85
|
+
await cleanupHashAndFile(hash, up.data.url, baseUrl);
|
|
86
86
|
});
|
|
87
87
|
|
|
88
88
|
// 2. If converted.gcs is missing, checkHash should restore it
|
|
89
89
|
|
|
90
|
-
test.serial(
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
90
|
+
test.serial("checkHash recreates missing GCS converted file", async (t) => {
|
|
91
|
+
if (!isGcsConfigured()) {
|
|
92
|
+
t.pass();
|
|
93
|
+
return;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
const filePath = await createXlsx(t.context.tmpDir);
|
|
97
|
+
const hash = `hash-${uuidv4()}`;
|
|
98
|
+
const up = await multipartUpload(filePath, hash);
|
|
99
|
+
t.truthy(up.data.converted?.gcs);
|
|
100
|
+
|
|
101
|
+
// delete the GCS object
|
|
102
|
+
const convertedGcsUrl = up.data.converted.gcs;
|
|
103
|
+
const bucket = gcs.bucket(GCS_BUCKETNAME);
|
|
104
|
+
const filename = convertedGcsUrl.replace(`gs://${GCS_BUCKETNAME}/`, "");
|
|
105
|
+
try {
|
|
106
|
+
await bucket.file(filename).delete({ ignoreNotFound: true });
|
|
107
|
+
} catch (_) {}
|
|
108
|
+
|
|
109
|
+
// call checkHash – should restore
|
|
110
|
+
const resp = await axios.get(baseUrl, {
|
|
111
|
+
params: { hash, checkHash: true },
|
|
112
|
+
validateStatus: () => true,
|
|
113
|
+
timeout: 30000,
|
|
114
|
+
});
|
|
115
|
+
t.is(resp.status, 200);
|
|
116
|
+
t.truthy(resp.data.converted?.gcs);
|
|
117
|
+
|
|
118
|
+
// verify restored GCS object exists using returned URL
|
|
119
|
+
const newGcsUrl = resp.data.converted.gcs;
|
|
120
|
+
const existsAfter = await gcsUrlExists(newGcsUrl, false);
|
|
121
|
+
t.true(existsAfter);
|
|
122
|
+
|
|
123
|
+
await cleanupHashAndFile(hash, up.data.url, baseUrl);
|
|
124
124
|
});
|
|
125
125
|
|
|
126
126
|
// 3. If converted section is removed from Redis, checkHash regenerates
|
|
127
127
|
|
|
128
|
-
test.serial(
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
});
|
|
128
|
+
test.serial("checkHash regenerates missing converted metadata", async (t) => {
|
|
129
|
+
const filePath = await createXlsx(t.context.tmpDir);
|
|
130
|
+
const hash = `hash-${uuidv4()}`;
|
|
131
|
+
const up = await multipartUpload(filePath, hash);
|
|
132
|
+
t.truthy(up.data.converted?.url);
|
|
133
|
+
|
|
134
|
+
// strip converted from Redis entry
|
|
135
|
+
const record = await getFileStoreMap(hash);
|
|
136
|
+
if (record) {
|
|
137
|
+
delete record.converted;
|
|
138
|
+
await setFileStoreMap(hash, record);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// call checkHash – should add converted back
|
|
142
|
+
const resp = await axios.get(baseUrl, {
|
|
143
|
+
params: { hash, checkHash: true },
|
|
144
|
+
validateStatus: () => true,
|
|
145
|
+
timeout: 30000,
|
|
146
|
+
});
|
|
147
|
+
|
|
148
|
+
t.is(resp.status, 200);
|
|
149
|
+
t.truthy(resp.data.converted?.url);
|
|
150
|
+
|
|
151
|
+
await cleanupHashAndFile(hash, up.data.url, baseUrl);
|
|
152
|
+
});
|