@aj-archipelago/cortex 1.3.50 → 1.3.52
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config.js +1 -1
- package/helper-apps/cortex-browser/Dockerfile +19 -31
- package/helper-apps/cortex-browser/function_app.py +708 -181
- package/helper-apps/cortex-browser/requirements.txt +4 -4
- package/helper-apps/cortex-file-handler/{.env.test.azure → .env.test.azure.sample} +2 -1
- package/helper-apps/cortex-file-handler/{.env.test.gcs → .env.test.gcs.sample} +2 -1
- package/helper-apps/cortex-file-handler/{.env.test → .env.test.sample} +2 -1
- package/helper-apps/cortex-file-handler/Dockerfile +1 -1
- package/helper-apps/cortex-file-handler/INTERFACE.md +178 -0
- package/helper-apps/cortex-file-handler/function.json +2 -6
- package/helper-apps/cortex-file-handler/package-lock.json +6065 -5964
- package/helper-apps/cortex-file-handler/package.json +11 -6
- package/helper-apps/cortex-file-handler/scripts/setup-azure-container.js +12 -9
- package/helper-apps/cortex-file-handler/scripts/setup-test-containers.js +21 -18
- package/helper-apps/cortex-file-handler/scripts/test-azure.sh +4 -1
- package/helper-apps/cortex-file-handler/scripts/test-gcs.sh +1 -1
- package/helper-apps/cortex-file-handler/src/blobHandler.js +1056 -0
- package/helper-apps/cortex-file-handler/{constants.js → src/constants.js} +64 -48
- package/helper-apps/cortex-file-handler/src/docHelper.js +37 -0
- package/helper-apps/cortex-file-handler/{fileChunker.js → src/fileChunker.js} +97 -65
- package/helper-apps/cortex-file-handler/{helper.js → src/helper.js} +34 -25
- package/helper-apps/cortex-file-handler/src/index.js +608 -0
- package/helper-apps/cortex-file-handler/src/localFileHandler.js +107 -0
- package/helper-apps/cortex-file-handler/{redis.js → src/redis.js} +23 -17
- package/helper-apps/cortex-file-handler/src/services/ConversionService.js +309 -0
- package/helper-apps/cortex-file-handler/src/services/FileConversionService.js +57 -0
- package/helper-apps/cortex-file-handler/src/services/storage/AzureStorageProvider.js +177 -0
- package/helper-apps/cortex-file-handler/src/services/storage/GCSStorageProvider.js +258 -0
- package/helper-apps/cortex-file-handler/src/services/storage/LocalStorageProvider.js +182 -0
- package/helper-apps/cortex-file-handler/src/services/storage/StorageFactory.js +86 -0
- package/helper-apps/cortex-file-handler/src/services/storage/StorageProvider.js +53 -0
- package/helper-apps/cortex-file-handler/src/services/storage/StorageService.js +259 -0
- package/helper-apps/cortex-file-handler/src/start.js +88 -0
- package/helper-apps/cortex-file-handler/src/utils/filenameUtils.js +28 -0
- package/helper-apps/cortex-file-handler/tests/FileConversionService.test.js +144 -0
- package/helper-apps/cortex-file-handler/tests/blobHandler.test.js +90 -66
- package/helper-apps/cortex-file-handler/tests/conversionResilience.test.js +152 -0
- package/helper-apps/cortex-file-handler/tests/fileChunker.test.js +105 -108
- package/helper-apps/cortex-file-handler/tests/fileUpload.test.js +462 -0
- package/helper-apps/cortex-file-handler/tests/files/DOCX_TestPage.docx +0 -0
- package/helper-apps/cortex-file-handler/tests/files/tests-example.xls +0 -0
- package/helper-apps/cortex-file-handler/tests/getOperations.test.js +307 -0
- package/helper-apps/cortex-file-handler/tests/postOperations.test.js +291 -0
- package/helper-apps/cortex-file-handler/tests/start.test.js +984 -647
- package/helper-apps/cortex-file-handler/tests/storage/AzureStorageProvider.test.js +120 -0
- package/helper-apps/cortex-file-handler/tests/storage/GCSStorageProvider.test.js +193 -0
- package/helper-apps/cortex-file-handler/tests/storage/LocalStorageProvider.test.js +148 -0
- package/helper-apps/cortex-file-handler/tests/storage/StorageFactory.test.js +100 -0
- package/helper-apps/cortex-file-handler/tests/storage/StorageService.test.js +113 -0
- package/helper-apps/cortex-file-handler/tests/testUtils.helper.js +85 -0
- package/helper-apps/cortex-markitdown/.funcignore +1 -0
- package/helper-apps/cortex-markitdown/MarkitdownConverterFunction/__init__.py +64 -0
- package/helper-apps/cortex-markitdown/MarkitdownConverterFunction/function.json +21 -0
- package/helper-apps/cortex-markitdown/README.md +94 -0
- package/helper-apps/cortex-markitdown/host.json +15 -0
- package/helper-apps/cortex-markitdown/requirements.txt +2 -0
- package/lib/entityConstants.js +1 -1
- package/lib/requestExecutor.js +44 -36
- package/package.json +1 -1
- package/pathways/system/entity/tools/sys_tool_readfile.js +24 -2
- package/server/plugins/openAiWhisperPlugin.js +59 -87
- package/helper-apps/cortex-file-handler/blobHandler.js +0 -567
- package/helper-apps/cortex-file-handler/docHelper.js +0 -144
- package/helper-apps/cortex-file-handler/index.js +0 -440
- package/helper-apps/cortex-file-handler/localFileHandler.js +0 -108
- package/helper-apps/cortex-file-handler/start.js +0 -63
- package/helper-apps/cortex-file-handler/tests/docHelper.test.js +0 -148
|
@@ -1,440 +0,0 @@
|
|
|
1
|
-
import { downloadFile, splitMediaFile } from './fileChunker.js';
|
|
2
|
-
import { saveFileToBlob, deleteBlob, deleteGCS, uploadBlob, cleanup, cleanupGCS, gcsUrlExists, ensureGCSUpload, gcs, AZURE_STORAGE_CONTAINER_NAME, uploadChunkToGCS } from './blobHandler.js';
|
|
3
|
-
import { cleanupRedisFileStoreMap, getFileStoreMap, publishRequestProgress, removeFromFileStoreMap, setFileStoreMap } from './redis.js';
|
|
4
|
-
import { ensureEncoded, ensureFileExtension, urlExists } from './helper.js';
|
|
5
|
-
import { moveFileToPublicFolder, deleteFolder, cleanupLocal } from './localFileHandler.js';
|
|
6
|
-
import { documentToText, easyChunker } from './docHelper.js';
|
|
7
|
-
import { DOC_EXTENSIONS } from './constants.js';
|
|
8
|
-
import path from 'path';
|
|
9
|
-
import os from 'os';
|
|
10
|
-
import { v4 as uuidv4 } from 'uuid';
|
|
11
|
-
import fs from 'fs';
|
|
12
|
-
|
|
13
|
-
const useAzure = process.env.AZURE_STORAGE_CONNECTION_STRING ? true : false;
|
|
14
|
-
const useGCS = process.env.GCP_SERVICE_ACCOUNT_KEY_BASE64 || process.env.GCP_SERVICE_ACCOUNT_KEY ? true : false;
|
|
15
|
-
|
|
16
|
-
console.log(`Storage configuration - ${useAzure ? 'Azure' : 'Local'} Storage${useGCS ? ' and Google Cloud Storage' : ''}`);
|
|
17
|
-
|
|
18
|
-
let isCleanupRunning = false;
|
|
19
|
-
async function cleanupInactive(context) {
|
|
20
|
-
try {
|
|
21
|
-
if (isCleanupRunning) { return; } //no need to cleanup every call
|
|
22
|
-
isCleanupRunning = true;
|
|
23
|
-
const cleaned = await cleanupRedisFileStoreMap();
|
|
24
|
-
|
|
25
|
-
const cleanedAzure = [];
|
|
26
|
-
const cleanedLocal = [];
|
|
27
|
-
const cleanedGCS = [];
|
|
28
|
-
|
|
29
|
-
for(const key in cleaned){
|
|
30
|
-
const item = cleaned[key];
|
|
31
|
-
const {url,gcs} = item;
|
|
32
|
-
if(url){
|
|
33
|
-
if(url.includes('.blob.core.windows.net/')){
|
|
34
|
-
cleanedAzure.push(url);
|
|
35
|
-
}else if(url.startsWith('gs://')){
|
|
36
|
-
cleanedGCS.push(url);
|
|
37
|
-
}else{
|
|
38
|
-
cleanedLocal.push(url);
|
|
39
|
-
}
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
if(item && item.gcs){
|
|
43
|
-
cleanedGCS.push(gcs);
|
|
44
|
-
}
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
try {
|
|
48
|
-
if (cleanedAzure && cleanedAzure.length > 0) {
|
|
49
|
-
await cleanup(context, cleanedAzure);
|
|
50
|
-
}
|
|
51
|
-
} catch (error) {
|
|
52
|
-
console.log('Error occurred during azure cleanup:', error);
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
try {
|
|
56
|
-
if (cleanedLocal && cleanedLocal.length > 0) {
|
|
57
|
-
await cleanupLocal(cleanedLocal);
|
|
58
|
-
}
|
|
59
|
-
}catch(err){
|
|
60
|
-
console.log('Error occurred during local cleanup:', err);
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
try{
|
|
64
|
-
if(cleanedGCS && cleanedGCS.length > 0){
|
|
65
|
-
await cleanupGCS(cleanedGCS);
|
|
66
|
-
}
|
|
67
|
-
}catch(err){
|
|
68
|
-
console.log('Error occurred during GCS cleanup:', err);
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
} catch (error) {
|
|
72
|
-
console.log('Error occurred during cleanup:', error);
|
|
73
|
-
} finally{
|
|
74
|
-
isCleanupRunning = false;
|
|
75
|
-
}
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
async function CortexFileHandler(context, req) {
|
|
79
|
-
const { uri, requestId, save, hash, checkHash, clearHash, fetch, load, restore } = req.body?.params || req.query;
|
|
80
|
-
const operation = save ? 'save' :
|
|
81
|
-
checkHash ? 'checkHash' :
|
|
82
|
-
clearHash ? 'clearHash' :
|
|
83
|
-
fetch || load || restore ? 'remoteFile' :
|
|
84
|
-
req.method.toLowerCase() === 'delete' || req.query.operation === 'delete' ? 'delete' :
|
|
85
|
-
uri ? (DOC_EXTENSIONS.some(ext => uri.toLowerCase().endsWith(ext)) ? 'document_processing' : 'media_chunking') :
|
|
86
|
-
'upload';
|
|
87
|
-
|
|
88
|
-
context.log(`Processing ${req.method} request - ${requestId ? `requestId: ${requestId}, ` : ''}${uri ? `uri: ${uri}, ` : ''}${hash ? `hash: ${hash}, ` : ''}operation: ${operation}`);
|
|
89
|
-
|
|
90
|
-
cleanupInactive(context); //trigger & no need to wait for it
|
|
91
|
-
|
|
92
|
-
// Clean up blob when request delete which means processing marked completed
|
|
93
|
-
if (operation === 'delete') {
|
|
94
|
-
const deleteRequestId = req.query.requestId || requestId;
|
|
95
|
-
if (!deleteRequestId) {
|
|
96
|
-
context.res = {
|
|
97
|
-
status: 400,
|
|
98
|
-
body: "Please pass a requestId on the query string"
|
|
99
|
-
};
|
|
100
|
-
return;
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
// Delete from Azure/Local storage
|
|
104
|
-
const azureResult = useAzure ? await deleteBlob(deleteRequestId) : await deleteFolder(deleteRequestId);
|
|
105
|
-
const gcsResult = [];
|
|
106
|
-
if (gcs) {
|
|
107
|
-
gcsResult.push(...await deleteGCS(deleteRequestId));
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
context.res = {
|
|
111
|
-
status: 200,
|
|
112
|
-
body: { body: [...azureResult, ...gcsResult] }
|
|
113
|
-
};
|
|
114
|
-
return;
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
const remoteUrl = fetch || restore || load;
|
|
118
|
-
if (req.method.toLowerCase() === `get` && remoteUrl) {
|
|
119
|
-
context.log(`Remote file: ${remoteUrl}`);
|
|
120
|
-
let filename; // Declare filename outside try block
|
|
121
|
-
try {
|
|
122
|
-
// Validate URL format and accessibility
|
|
123
|
-
const urlCheck = await urlExists(remoteUrl);
|
|
124
|
-
if (!urlCheck.valid) {
|
|
125
|
-
context.res = {
|
|
126
|
-
status: 400,
|
|
127
|
-
body: 'Invalid or inaccessible URL'
|
|
128
|
-
};
|
|
129
|
-
return;
|
|
130
|
-
}
|
|
131
|
-
|
|
132
|
-
// Check if file already exists (using hash as the key)
|
|
133
|
-
let exists = await getFileStoreMap(remoteUrl);
|
|
134
|
-
if(exists){
|
|
135
|
-
context.res = {
|
|
136
|
-
status: 200,
|
|
137
|
-
body: exists
|
|
138
|
-
};
|
|
139
|
-
//update redis timestamp with current time
|
|
140
|
-
await setFileStoreMap(remoteUrl, exists);
|
|
141
|
-
return;
|
|
142
|
-
}
|
|
143
|
-
|
|
144
|
-
// Download the file first
|
|
145
|
-
const urlObj = new URL(remoteUrl);
|
|
146
|
-
let originalFileName = path.basename(urlObj.pathname);
|
|
147
|
-
if (!originalFileName || originalFileName === '') {
|
|
148
|
-
originalFileName = urlObj.hostname;
|
|
149
|
-
}
|
|
150
|
-
|
|
151
|
-
// Ensure the filename has the correct extension based on content type
|
|
152
|
-
originalFileName = ensureFileExtension(originalFileName, urlCheck.contentType);
|
|
153
|
-
|
|
154
|
-
const maxLength = 200; // Set the maximum length for the filename
|
|
155
|
-
let truncatedFileName = originalFileName;
|
|
156
|
-
if (originalFileName.length > maxLength) {
|
|
157
|
-
const extension = path.extname(originalFileName);
|
|
158
|
-
const basename = path.basename(originalFileName, extension);
|
|
159
|
-
truncatedFileName = basename.substring(0, maxLength - extension.length) + extension;
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
// Use the original-truncated file name when saving the downloaded file
|
|
163
|
-
filename = path.join(os.tmpdir(), truncatedFileName);
|
|
164
|
-
await downloadFile(remoteUrl, filename);
|
|
165
|
-
|
|
166
|
-
// Now upload the downloaded file
|
|
167
|
-
const res = await uploadBlob(context, null, !useAzure, filename, remoteUrl);
|
|
168
|
-
|
|
169
|
-
//Update Redis (using hash as the key)
|
|
170
|
-
await setFileStoreMap(remoteUrl, res);
|
|
171
|
-
|
|
172
|
-
// Return the file URL
|
|
173
|
-
context.res = {
|
|
174
|
-
status: 200,
|
|
175
|
-
body: res,
|
|
176
|
-
};
|
|
177
|
-
} catch (error) {
|
|
178
|
-
context.log("Error processing remote file request:", error);
|
|
179
|
-
context.res = {
|
|
180
|
-
status: 500,
|
|
181
|
-
body: `Error processing file: ${error.message}`
|
|
182
|
-
};
|
|
183
|
-
} finally {
|
|
184
|
-
// Cleanup temp file if it exists
|
|
185
|
-
try {
|
|
186
|
-
if (filename && fs.existsSync(filename)) {
|
|
187
|
-
fs.unlinkSync(filename);
|
|
188
|
-
}
|
|
189
|
-
} catch (err) {
|
|
190
|
-
context.log("Error cleaning up temp file:", err);
|
|
191
|
-
}
|
|
192
|
-
}
|
|
193
|
-
return;
|
|
194
|
-
}
|
|
195
|
-
|
|
196
|
-
if(hash && clearHash){
|
|
197
|
-
try {
|
|
198
|
-
const hashValue = await getFileStoreMap(hash);
|
|
199
|
-
if (hashValue) {
|
|
200
|
-
await removeFromFileStoreMap(hash);
|
|
201
|
-
context.res = {
|
|
202
|
-
status: 200,
|
|
203
|
-
body: `Hash ${hash} removed`
|
|
204
|
-
};
|
|
205
|
-
} else {
|
|
206
|
-
context.res = {
|
|
207
|
-
status: 404,
|
|
208
|
-
body: `Hash ${hash} not found`
|
|
209
|
-
};
|
|
210
|
-
}
|
|
211
|
-
} catch (error) {
|
|
212
|
-
context.res = {
|
|
213
|
-
status: 500,
|
|
214
|
-
body: `Error occurred during hash cleanup: ${error}`
|
|
215
|
-
};
|
|
216
|
-
console.log('Error occurred during hash cleanup:', error);
|
|
217
|
-
}
|
|
218
|
-
return;
|
|
219
|
-
}
|
|
220
|
-
|
|
221
|
-
if(hash && checkHash){ //check if hash exists
|
|
222
|
-
let hashResult = await getFileStoreMap(hash);
|
|
223
|
-
|
|
224
|
-
if(hashResult){
|
|
225
|
-
context.log(`File exists in map: ${hash}`);
|
|
226
|
-
|
|
227
|
-
// Check primary storage (Azure/Local) first
|
|
228
|
-
const primaryExists = await urlExists(hashResult?.url);
|
|
229
|
-
const gcsExists = gcs ? await gcsUrlExists(hashResult?.gcs) : false;
|
|
230
|
-
|
|
231
|
-
// If neither storage has the file, remove from map and return not found
|
|
232
|
-
if (!primaryExists.valid && !gcsExists) {
|
|
233
|
-
context.log(`File not found in any storage. Removing from map: ${hash}`);
|
|
234
|
-
await removeFromFileStoreMap(hash);
|
|
235
|
-
context.res = {
|
|
236
|
-
status: 404,
|
|
237
|
-
body: `Hash ${hash} not found in storage`
|
|
238
|
-
};
|
|
239
|
-
return;
|
|
240
|
-
}
|
|
241
|
-
|
|
242
|
-
// If primary is missing but GCS exists, restore from GCS
|
|
243
|
-
if (!primaryExists.valid && gcsExists) {
|
|
244
|
-
context.log(`Primary storage file missing, restoring from GCS: ${hash}`);
|
|
245
|
-
try {
|
|
246
|
-
const res = await CortexFileHandler(context, {
|
|
247
|
-
method: 'GET',
|
|
248
|
-
body: { params: { fetch: hashResult.gcs } }
|
|
249
|
-
});
|
|
250
|
-
if (res?.body?.url) {
|
|
251
|
-
hashResult.url = res.body.url;
|
|
252
|
-
}
|
|
253
|
-
} catch (error) {
|
|
254
|
-
console.error('Error restoring from GCS:', error);
|
|
255
|
-
}
|
|
256
|
-
}
|
|
257
|
-
// If GCS is missing but primary exists, restore to GCS
|
|
258
|
-
else if (primaryExists.valid && gcs && !gcsExists) {
|
|
259
|
-
context.log(`GCS file missing, restoring from primary: ${hash}`);
|
|
260
|
-
const { gcs: _, ...fileInfo } = hashResult; // eslint-disable-line no-unused-vars
|
|
261
|
-
hashResult = await ensureGCSUpload(context, fileInfo);
|
|
262
|
-
}
|
|
263
|
-
|
|
264
|
-
// Final check to ensure we have at least one valid storage location
|
|
265
|
-
const finalPrimaryCheck = await urlExists(hashResult?.url);
|
|
266
|
-
if (!finalPrimaryCheck.valid && !await gcsUrlExists(hashResult?.gcs)) {
|
|
267
|
-
context.log(`Failed to restore file. Removing from map: ${hash}`);
|
|
268
|
-
await removeFromFileStoreMap(hash);
|
|
269
|
-
context.res = {
|
|
270
|
-
status: 404,
|
|
271
|
-
body: `Hash ${hash} not found and restoration failed`
|
|
272
|
-
};
|
|
273
|
-
return;
|
|
274
|
-
}
|
|
275
|
-
|
|
276
|
-
//update redis timestamp with current time
|
|
277
|
-
await setFileStoreMap(hash, hashResult);
|
|
278
|
-
|
|
279
|
-
context.res = {
|
|
280
|
-
status: 200,
|
|
281
|
-
body: hashResult
|
|
282
|
-
};
|
|
283
|
-
return;
|
|
284
|
-
}
|
|
285
|
-
|
|
286
|
-
context.res = {
|
|
287
|
-
status: 404,
|
|
288
|
-
body: `Hash ${hash} not found`
|
|
289
|
-
};
|
|
290
|
-
return;
|
|
291
|
-
}
|
|
292
|
-
|
|
293
|
-
if (req.method.toLowerCase() === `post`) {
|
|
294
|
-
await uploadBlob(context, req, !useAzure, null, hash);
|
|
295
|
-
if(hash && context?.res?.body){
|
|
296
|
-
await setFileStoreMap(hash, context.res.body);
|
|
297
|
-
}
|
|
298
|
-
return
|
|
299
|
-
}
|
|
300
|
-
|
|
301
|
-
if (!uri || !requestId) {
|
|
302
|
-
context.res = {
|
|
303
|
-
status: 400,
|
|
304
|
-
body: "Please pass a uri and requestId on the query string or in the request body"
|
|
305
|
-
};
|
|
306
|
-
return;
|
|
307
|
-
}
|
|
308
|
-
|
|
309
|
-
let totalCount = 0;
|
|
310
|
-
let completedCount = 0;
|
|
311
|
-
let numberOfChunks;
|
|
312
|
-
|
|
313
|
-
let file = ensureEncoded(uri); // encode url to handle special characters
|
|
314
|
-
|
|
315
|
-
const result = [];
|
|
316
|
-
|
|
317
|
-
const sendProgress = async (data = null) => {
|
|
318
|
-
completedCount++;
|
|
319
|
-
const progress = completedCount / totalCount;
|
|
320
|
-
await publishRequestProgress({ requestId, progress, completedCount, totalCount, numberOfChunks, data });
|
|
321
|
-
}
|
|
322
|
-
|
|
323
|
-
try {
|
|
324
|
-
// Parse URL and get pathname without query parameters for extension check
|
|
325
|
-
const urlObj = new URL(uri);
|
|
326
|
-
const pathWithoutQuery = urlObj.pathname;
|
|
327
|
-
|
|
328
|
-
if (DOC_EXTENSIONS.some(ext => pathWithoutQuery.toLowerCase().endsWith(ext))) {
|
|
329
|
-
const extension = path.extname(pathWithoutQuery).toLowerCase();
|
|
330
|
-
const tempDir = path.join(os.tmpdir(), `${uuidv4()}`);
|
|
331
|
-
fs.mkdirSync(tempDir);
|
|
332
|
-
const downloadedFile = path.join(tempDir, `${uuidv4()}${extension}`);
|
|
333
|
-
await downloadFile(uri, downloadedFile);
|
|
334
|
-
const text = await documentToText(downloadedFile);
|
|
335
|
-
let tmpPath;
|
|
336
|
-
|
|
337
|
-
try {
|
|
338
|
-
if (save) {
|
|
339
|
-
const fileName = `${uuidv4()}.txt`; // generate unique file name
|
|
340
|
-
const filePath = path.join(tempDir, fileName);
|
|
341
|
-
tmpPath = filePath;
|
|
342
|
-
fs.writeFileSync(filePath, text); // write text to file
|
|
343
|
-
|
|
344
|
-
// save file to the cloud or local file system
|
|
345
|
-
const saveResult = useAzure ? await saveFileToBlob(filePath, requestId) : await moveFileToPublicFolder(filePath, requestId);
|
|
346
|
-
result.push(saveResult);
|
|
347
|
-
|
|
348
|
-
} else {
|
|
349
|
-
result.push(...easyChunker(text));
|
|
350
|
-
}
|
|
351
|
-
} catch(err) {
|
|
352
|
-
console.log(`Error saving file ${uri} with request id ${requestId}:`, err);
|
|
353
|
-
} finally {
|
|
354
|
-
try {
|
|
355
|
-
// delete temporary files
|
|
356
|
-
tmpPath && fs.unlinkSync(tmpPath);
|
|
357
|
-
downloadedFile && fs.unlinkSync(downloadedFile);
|
|
358
|
-
console.log(`Cleaned temp files ${tmpPath}, ${downloadedFile}`);
|
|
359
|
-
} catch(err) {
|
|
360
|
-
console.log(`Error cleaning temp files ${tmpPath}, ${downloadedFile}:`, err);
|
|
361
|
-
}
|
|
362
|
-
|
|
363
|
-
try {
|
|
364
|
-
//delete uploaded prev nontext file
|
|
365
|
-
//check cleanup for uploaded files url
|
|
366
|
-
const regex = new RegExp(`${AZURE_STORAGE_CONTAINER_NAME}/([a-z0-9-]+)`);
|
|
367
|
-
const match = uri.match(regex);
|
|
368
|
-
if (match && match[1]) {
|
|
369
|
-
const extractedValue = match[1];
|
|
370
|
-
useAzure ? await deleteBlob(extractedValue) : await deleteFolder(extractedValue);
|
|
371
|
-
console.log(`Cleaned temp file ${uri} with request id ${extractedValue}`);
|
|
372
|
-
}
|
|
373
|
-
} catch(err) {
|
|
374
|
-
console.log(`Error cleaning temp file ${uri}:`, err);
|
|
375
|
-
}
|
|
376
|
-
}
|
|
377
|
-
} else {
|
|
378
|
-
const { chunkPromises, chunkOffsets, uniqueOutputPath } = await splitMediaFile(file);
|
|
379
|
-
|
|
380
|
-
numberOfChunks = chunkPromises.length; // for progress reporting
|
|
381
|
-
totalCount += chunkPromises.length * 4; // 4 steps for each chunk (download and upload)
|
|
382
|
-
|
|
383
|
-
// sequential download of chunks
|
|
384
|
-
const chunks = [];
|
|
385
|
-
for (const chunkPromise of chunkPromises) {
|
|
386
|
-
const chunkPath = await chunkPromise;
|
|
387
|
-
chunks.push(chunkPath);
|
|
388
|
-
await sendProgress();
|
|
389
|
-
}
|
|
390
|
-
|
|
391
|
-
// sequential processing of chunks
|
|
392
|
-
for (let index = 0; index < chunks.length; index++) {
|
|
393
|
-
const chunkPath = chunks[index];
|
|
394
|
-
let blobName;
|
|
395
|
-
let gcsUrl;
|
|
396
|
-
|
|
397
|
-
if (useAzure) {
|
|
398
|
-
blobName = await saveFileToBlob(chunkPath, requestId);
|
|
399
|
-
} else {
|
|
400
|
-
blobName = await moveFileToPublicFolder(chunkPath, requestId);
|
|
401
|
-
}
|
|
402
|
-
|
|
403
|
-
// If GCS is configured, save to GCS
|
|
404
|
-
gcsUrl = await uploadChunkToGCS(chunkPath, requestId);
|
|
405
|
-
|
|
406
|
-
const chunkOffset = chunkOffsets[index];
|
|
407
|
-
result.push({ uri: blobName, offset: chunkOffset, gcs: gcsUrl });
|
|
408
|
-
console.log(`Saved chunk as: ${blobName}${gcsUrl ? ` and ${gcsUrl}` : ''}`);
|
|
409
|
-
await sendProgress();
|
|
410
|
-
}
|
|
411
|
-
|
|
412
|
-
// Cleanup the temp directory
|
|
413
|
-
try {
|
|
414
|
-
if (uniqueOutputPath && fs.existsSync(uniqueOutputPath)) {
|
|
415
|
-
fs.rmSync(uniqueOutputPath, { recursive: true });
|
|
416
|
-
console.log(`Cleaned temp directory: ${uniqueOutputPath}`);
|
|
417
|
-
}
|
|
418
|
-
} catch (err) {
|
|
419
|
-
console.log(`Error cleaning temp directory ${uniqueOutputPath}:`, err);
|
|
420
|
-
}
|
|
421
|
-
}
|
|
422
|
-
} catch (error) {
|
|
423
|
-
console.error("An error occurred:", error);
|
|
424
|
-
context.res = {
|
|
425
|
-
status: 500,
|
|
426
|
-
body: error.message || error
|
|
427
|
-
};
|
|
428
|
-
return;
|
|
429
|
-
}
|
|
430
|
-
|
|
431
|
-
console.log('result:', result.map(item =>
|
|
432
|
-
typeof item === 'object' ? JSON.stringify(item, null, 2) : item
|
|
433
|
-
).join('\n'));
|
|
434
|
-
|
|
435
|
-
context.res = {
|
|
436
|
-
body: result
|
|
437
|
-
};
|
|
438
|
-
}
|
|
439
|
-
|
|
440
|
-
export default CortexFileHandler;
|
|
@@ -1,108 +0,0 @@
|
|
|
1
|
-
import { promises as fs } from 'fs';
|
|
2
|
-
import { join, basename } from 'path';
|
|
3
|
-
import { v4 as uuidv4 } from 'uuid';
|
|
4
|
-
|
|
5
|
-
import { publicFolder, port, ipAddress } from "./start.js";
|
|
6
|
-
|
|
7
|
-
async function moveFileToPublicFolder(chunkPath, requestId) {
|
|
8
|
-
// Use the filename with a UUID as the blob name
|
|
9
|
-
const filename = `${requestId}/${uuidv4()}_${basename(chunkPath)}`;
|
|
10
|
-
|
|
11
|
-
// Create the target folder if it doesn't exist
|
|
12
|
-
const targetFolder = join(publicFolder, requestId);
|
|
13
|
-
await fs.mkdir(targetFolder, { recursive: true });
|
|
14
|
-
|
|
15
|
-
// Move the file to the target folder
|
|
16
|
-
const targetPath = join(targetFolder, basename(filename));
|
|
17
|
-
await fs.rename(chunkPath, targetPath);
|
|
18
|
-
|
|
19
|
-
// Return the complete URL of the file
|
|
20
|
-
const fileUrl = `http://${ipAddress}:${port}/files/${filename}`;
|
|
21
|
-
// const fileUrl = `http://localhost:${port}/files/${filename}`;
|
|
22
|
-
return fileUrl;
|
|
23
|
-
}
|
|
24
|
-
|
|
25
|
-
async function deleteFolder(requestId) {
|
|
26
|
-
if (!requestId) throw new Error('Missing requestId parameter');
|
|
27
|
-
const targetFolder = join(publicFolder, requestId);
|
|
28
|
-
try {
|
|
29
|
-
// Check if folder exists first
|
|
30
|
-
const stats = await fs.stat(targetFolder);
|
|
31
|
-
if (stats.isDirectory()) {
|
|
32
|
-
// Get list of files before deleting
|
|
33
|
-
const files = await fs.readdir(targetFolder);
|
|
34
|
-
const deletedFiles = files.map(file => join(requestId, file));
|
|
35
|
-
// Delete the folder
|
|
36
|
-
await fs.rm(targetFolder, { recursive: true });
|
|
37
|
-
console.log(`Cleaned folder: ${targetFolder}`);
|
|
38
|
-
return deletedFiles;
|
|
39
|
-
}
|
|
40
|
-
return [];
|
|
41
|
-
} catch (error) {
|
|
42
|
-
if (error.code === 'ENOENT') {
|
|
43
|
-
// Folder doesn't exist, return empty array
|
|
44
|
-
return [];
|
|
45
|
-
}
|
|
46
|
-
throw error;
|
|
47
|
-
}
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
async function cleanupLocal(urls=null) {
|
|
51
|
-
const cleanedUrls = [];
|
|
52
|
-
if(!urls){
|
|
53
|
-
try {
|
|
54
|
-
// Read the directory
|
|
55
|
-
const items = await fs.readdir(publicFolder);
|
|
56
|
-
|
|
57
|
-
// Calculate the date that is x months ago
|
|
58
|
-
const monthsAgo = new Date();
|
|
59
|
-
monthsAgo.setMonth(monthsAgo.getMonth() - 1);
|
|
60
|
-
|
|
61
|
-
// Iterate through the items
|
|
62
|
-
for (const item of items) {
|
|
63
|
-
const itemPath = join(publicFolder, item);
|
|
64
|
-
|
|
65
|
-
// Get the stats of the item
|
|
66
|
-
const stats = await fs.stat(itemPath);
|
|
67
|
-
|
|
68
|
-
// Check if the item is a file or a directory
|
|
69
|
-
const isDirectory = stats.isDirectory();
|
|
70
|
-
|
|
71
|
-
// Compare the last modified date with three months ago
|
|
72
|
-
if (stats.mtime < monthsAgo) {
|
|
73
|
-
if (isDirectory) {
|
|
74
|
-
// If it's a directory, delete it recursively
|
|
75
|
-
await fs.rm(itemPath, { recursive: true });
|
|
76
|
-
console.log(`Cleaned directory: ${item}`);
|
|
77
|
-
} else {
|
|
78
|
-
// If it's a file, delete it
|
|
79
|
-
await fs.unlink(itemPath);
|
|
80
|
-
console.log(`Cleaned file: ${item}`);
|
|
81
|
-
|
|
82
|
-
// Add the URL of the cleaned file to cleanedUrls array
|
|
83
|
-
cleanedUrls.push(`http://${ipAddress}:${port}/files/${item}`);
|
|
84
|
-
}
|
|
85
|
-
}
|
|
86
|
-
}
|
|
87
|
-
} catch (error) {
|
|
88
|
-
console.error(`Error cleaning up files: ${error}`);
|
|
89
|
-
}
|
|
90
|
-
}else{
|
|
91
|
-
try{
|
|
92
|
-
for (const url of urls) {
|
|
93
|
-
const filename = url.split('/').pop();
|
|
94
|
-
const itemPath = join(publicFolder, filename);
|
|
95
|
-
await fs.unlink(itemPath);
|
|
96
|
-
}
|
|
97
|
-
}catch(error){
|
|
98
|
-
console.error(`Error cleaning up files: ${error}`);
|
|
99
|
-
}
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
// Return the array of cleaned file URLs
|
|
103
|
-
return cleanedUrls;
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
export {
|
|
107
|
-
moveFileToPublicFolder, deleteFolder, cleanupLocal
|
|
108
|
-
};
|
|
@@ -1,63 +0,0 @@
|
|
|
1
|
-
import CortexFileHandler from "./index.js";
|
|
2
|
-
import express from "express";
|
|
3
|
-
import { fileURLToPath } from 'url';
|
|
4
|
-
import { dirname, join } from 'path';
|
|
5
|
-
import cors from 'cors';
|
|
6
|
-
import { readFileSync } from 'fs';
|
|
7
|
-
|
|
8
|
-
import { publicIpv4 } from 'public-ip';
|
|
9
|
-
const ipAddress = await publicIpv4();
|
|
10
|
-
|
|
11
|
-
const app = express();
|
|
12
|
-
const port = process.env.PORT || 7071;
|
|
13
|
-
const publicFolder = join(dirname(fileURLToPath(import.meta.url)), 'files');
|
|
14
|
-
|
|
15
|
-
// Get version from package.json
|
|
16
|
-
const packageJson = JSON.parse(readFileSync(join(dirname(fileURLToPath(import.meta.url)), 'package.json'), 'utf8'));
|
|
17
|
-
const version = packageJson.version;
|
|
18
|
-
|
|
19
|
-
app.use(cors());
|
|
20
|
-
// Serve static files from the public folder
|
|
21
|
-
app.use('/files', express.static(publicFolder));
|
|
22
|
-
|
|
23
|
-
// Health check endpoint
|
|
24
|
-
app.get('/health', (req, res) => {
|
|
25
|
-
res.status(200).json({
|
|
26
|
-
status: 'healthy',
|
|
27
|
-
version: version
|
|
28
|
-
});
|
|
29
|
-
});
|
|
30
|
-
|
|
31
|
-
// New primary endpoint
|
|
32
|
-
app.all('/api/CortexFileHandler', async (req, res) => {
|
|
33
|
-
const context = { req, res, log: console.log }
|
|
34
|
-
try {
|
|
35
|
-
await CortexFileHandler(context, req);
|
|
36
|
-
context.log(context.res);
|
|
37
|
-
res.status(context.res.status || 200).send(context.res.body);
|
|
38
|
-
} catch (error) {
|
|
39
|
-
const status = error.status || 500;
|
|
40
|
-
const message = error.message || 'Internal server error';
|
|
41
|
-
res.status(status).send(message);
|
|
42
|
-
}
|
|
43
|
-
});
|
|
44
|
-
|
|
45
|
-
// Legacy endpoint for compatibility
|
|
46
|
-
app.all('/api/MediaFileChunker', async (req, res) => {
|
|
47
|
-
const context = { req, res, log: console.log }
|
|
48
|
-
try {
|
|
49
|
-
await CortexFileHandler(context, req);
|
|
50
|
-
context.log(context.res);
|
|
51
|
-
res.status(context.res.status || 200).send(context.res.body);
|
|
52
|
-
} catch (error) {
|
|
53
|
-
const status = error.status || 500;
|
|
54
|
-
const message = error.message || 'Internal server error';
|
|
55
|
-
res.status(status).send(message);
|
|
56
|
-
}
|
|
57
|
-
});
|
|
58
|
-
|
|
59
|
-
app.listen(port, () => {
|
|
60
|
-
console.log(`Cortex File Handler v${version} running on port ${port} (includes legacy MediaFileChunker endpoint)`);
|
|
61
|
-
});
|
|
62
|
-
|
|
63
|
-
export { port, publicFolder, ipAddress };
|