@aj-archipelago/cortex 1.3.11 → 1.3.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/helper-apps/cortex-file-handler/.env.test +7 -0
- package/helper-apps/cortex-file-handler/.env.test.azure +6 -0
- package/helper-apps/cortex-file-handler/.env.test.gcs +9 -0
- package/helper-apps/cortex-file-handler/blobHandler.js +313 -204
- package/helper-apps/cortex-file-handler/constants.js +107 -0
- package/helper-apps/cortex-file-handler/docHelper.js +4 -1
- package/helper-apps/cortex-file-handler/fileChunker.js +170 -109
- package/helper-apps/cortex-file-handler/helper.js +82 -16
- package/helper-apps/cortex-file-handler/index.js +226 -146
- package/helper-apps/cortex-file-handler/localFileHandler.js +21 -3
- package/helper-apps/cortex-file-handler/package-lock.json +2622 -51
- package/helper-apps/cortex-file-handler/package.json +25 -4
- package/helper-apps/cortex-file-handler/redis.js +9 -18
- package/helper-apps/cortex-file-handler/scripts/setup-azure-container.js +22 -0
- package/helper-apps/cortex-file-handler/scripts/setup-test-containers.js +49 -0
- package/helper-apps/cortex-file-handler/scripts/test-azure.sh +34 -0
- package/helper-apps/cortex-file-handler/scripts/test-gcs.sh +49 -0
- package/helper-apps/cortex-file-handler/start.js +39 -4
- package/helper-apps/cortex-file-handler/tests/blobHandler.test.js +292 -0
- package/helper-apps/cortex-file-handler/tests/docHelper.test.js +148 -0
- package/helper-apps/cortex-file-handler/tests/fileChunker.test.js +311 -0
- package/helper-apps/cortex-file-handler/tests/start.test.js +930 -0
- package/package.json +1 -1
- package/pathways/system/entity/sys_entity_continue.js +1 -1
- package/pathways/system/entity/sys_entity_start.js +1 -0
- package/pathways/system/entity/sys_generator_video_vision.js +2 -1
- package/pathways/system/entity/sys_router_tool.js +6 -4
- package/server/plugins/openAiWhisperPlugin.js +9 -13
- package/server/plugins/replicateApiPlugin.js +54 -2
|
@@ -1,28 +1,22 @@
|
|
|
1
|
-
import { downloadFile,
|
|
2
|
-
import { saveFileToBlob, deleteBlob, uploadBlob, cleanup, cleanupGCS, gcsUrlExists } from './blobHandler.js';
|
|
1
|
+
import { downloadFile, splitMediaFile } from './fileChunker.js';
|
|
2
|
+
import { saveFileToBlob, deleteBlob, deleteGCS, uploadBlob, cleanup, cleanupGCS, gcsUrlExists, ensureGCSUpload, gcs, AZURE_STORAGE_CONTAINER_NAME } from './blobHandler.js';
|
|
3
3
|
import { cleanupRedisFileStoreMap, getFileStoreMap, publishRequestProgress, removeFromFileStoreMap, setFileStoreMap } from './redis.js';
|
|
4
|
-
import {
|
|
4
|
+
import { ensureEncoded, ensureFileExtension, urlExists } from './helper.js';
|
|
5
5
|
import { moveFileToPublicFolder, deleteFolder, cleanupLocal } from './localFileHandler.js';
|
|
6
6
|
import { documentToText, easyChunker } from './docHelper.js';
|
|
7
|
+
import { DOC_EXTENSIONS } from './constants.js';
|
|
7
8
|
import path from 'path';
|
|
8
9
|
import os from 'os';
|
|
9
10
|
import { v4 as uuidv4 } from 'uuid';
|
|
10
11
|
import fs from 'fs';
|
|
11
|
-
import http from 'http';
|
|
12
|
-
import https from 'https';
|
|
13
|
-
import axios from "axios";
|
|
14
|
-
import { pipeline } from "stream";
|
|
15
|
-
import { promisify } from "util";
|
|
16
|
-
const pipelineUtility = promisify(pipeline); // To pipe streams using async/await
|
|
17
|
-
|
|
18
|
-
const DOC_EXTENSIONS = [".txt", ".json", ".csv", ".md", ".xml", ".js", ".html", ".css", '.pdf', '.docx', '.xlsx', '.csv'];
|
|
19
12
|
|
|
20
13
|
const useAzure = process.env.AZURE_STORAGE_CONNECTION_STRING ? true : false;
|
|
21
|
-
|
|
14
|
+
const useGCS = process.env.GCP_SERVICE_ACCOUNT_KEY_BASE64 || process.env.GCP_SERVICE_ACCOUNT_KEY ? true : false;
|
|
22
15
|
|
|
16
|
+
console.log(`Storage configuration - ${useAzure ? 'Azure' : 'Local'} Storage${useGCS ? ' and Google Cloud Storage' : ''}`);
|
|
23
17
|
|
|
24
18
|
let isCleanupRunning = false;
|
|
25
|
-
async function cleanupInactive() {
|
|
19
|
+
async function cleanupInactive(context) {
|
|
26
20
|
try {
|
|
27
21
|
if (isCleanupRunning) { return; } //no need to cleanup every call
|
|
28
22
|
isCleanupRunning = true;
|
|
@@ -52,7 +46,7 @@ async function cleanupInactive() {
|
|
|
52
46
|
|
|
53
47
|
try {
|
|
54
48
|
if (cleanedAzure && cleanedAzure.length > 0) {
|
|
55
|
-
await cleanup(cleanedAzure);
|
|
49
|
+
await cleanup(context, cleanedAzure);
|
|
56
50
|
}
|
|
57
51
|
} catch (error) {
|
|
58
52
|
console.log('Error occurred during azure cleanup:', error);
|
|
@@ -81,96 +75,141 @@ async function cleanupInactive() {
|
|
|
81
75
|
}
|
|
82
76
|
}
|
|
83
77
|
|
|
84
|
-
async function
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
resolve(false);
|
|
96
|
-
});
|
|
97
|
-
});
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
async function main(context, req) {
|
|
102
|
-
context.log('Starting req processing..');
|
|
78
|
+
async function CortexFileHandler(context, req) {
|
|
79
|
+
const { uri, requestId, save, hash, checkHash, clearHash, fetch, load, restore } = req.body?.params || req.query;
|
|
80
|
+
const operation = save ? 'save' :
|
|
81
|
+
checkHash ? 'checkHash' :
|
|
82
|
+
clearHash ? 'clearHash' :
|
|
83
|
+
fetch || load || restore ? 'remoteFile' :
|
|
84
|
+
req.method.toLowerCase() === 'delete' || req.query.operation === 'delete' ? 'delete' :
|
|
85
|
+
uri ? (DOC_EXTENSIONS.some(ext => uri.toLowerCase().endsWith(ext)) ? 'document_processing' : 'media_chunking') :
|
|
86
|
+
'upload';
|
|
87
|
+
|
|
88
|
+
context.log(`Processing ${req.method} request - ${requestId ? `requestId: ${requestId}, ` : ''}${uri ? `uri: ${uri}, ` : ''}${hash ? `hash: ${hash}, ` : ''}operation: ${operation}`);
|
|
103
89
|
|
|
104
|
-
cleanupInactive(); //trigger & no need to wait for it
|
|
90
|
+
cleanupInactive(context); //trigger & no need to wait for it
|
|
105
91
|
|
|
106
92
|
// Clean up blob when request delete which means processing marked completed
|
|
107
|
-
if (
|
|
108
|
-
const
|
|
109
|
-
if (!
|
|
93
|
+
if (operation === 'delete') {
|
|
94
|
+
const deleteRequestId = req.query.requestId || requestId;
|
|
95
|
+
if (!deleteRequestId) {
|
|
110
96
|
context.res = {
|
|
111
97
|
status: 400,
|
|
112
98
|
body: "Please pass a requestId on the query string"
|
|
113
99
|
};
|
|
114
100
|
return;
|
|
115
101
|
}
|
|
116
|
-
|
|
102
|
+
|
|
103
|
+
// Delete from Azure/Local storage
|
|
104
|
+
const azureResult = useAzure ? await deleteBlob(deleteRequestId) : await deleteFolder(deleteRequestId);
|
|
105
|
+
const gcsResult = [];
|
|
106
|
+
if (gcs) {
|
|
107
|
+
for (const blobName of azureResult) {
|
|
108
|
+
gcsResult.push(...await deleteGCS(blobName));
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
117
112
|
context.res = {
|
|
118
|
-
|
|
113
|
+
status: 200,
|
|
114
|
+
body: { body: [...azureResult, ...gcsResult] }
|
|
119
115
|
};
|
|
120
116
|
return;
|
|
121
117
|
}
|
|
122
118
|
|
|
123
|
-
const
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
119
|
+
const remoteUrl = fetch || restore || load;
|
|
120
|
+
if (req.method.toLowerCase() === `get` && remoteUrl) {
|
|
121
|
+
context.log(`Remote file: ${remoteUrl}`);
|
|
122
|
+
let filename; // Declare filename outside try block
|
|
123
|
+
try {
|
|
124
|
+
// Validate URL format and accessibility
|
|
125
|
+
const urlCheck = await urlExists(remoteUrl);
|
|
126
|
+
if (!urlCheck.valid) {
|
|
127
|
+
context.res = {
|
|
128
|
+
status: 400,
|
|
129
|
+
body: 'Invalid or inaccessible URL'
|
|
130
|
+
};
|
|
131
|
+
return;
|
|
132
|
+
}
|
|
137
133
|
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
134
|
+
// Check if file already exists (using hash as the key)
|
|
135
|
+
let exists = await getFileStoreMap(remoteUrl);
|
|
136
|
+
if(exists){
|
|
137
|
+
context.res = {
|
|
138
|
+
status: 200,
|
|
139
|
+
body: exists
|
|
140
|
+
};
|
|
141
|
+
//update redis timestamp with current time
|
|
142
|
+
await setFileStoreMap(remoteUrl, exists);
|
|
143
|
+
return;
|
|
144
|
+
}
|
|
149
145
|
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
146
|
+
// Download the file first
|
|
147
|
+
const urlObj = new URL(remoteUrl);
|
|
148
|
+
let originalFileName = path.basename(urlObj.pathname);
|
|
149
|
+
if (!originalFileName || originalFileName === '') {
|
|
150
|
+
originalFileName = urlObj.hostname;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// Ensure the filename has the correct extension based on content type
|
|
154
|
+
originalFileName = ensureFileExtension(originalFileName, urlCheck.contentType);
|
|
155
|
+
|
|
156
|
+
const maxLength = 200; // Set the maximum length for the filename
|
|
157
|
+
let truncatedFileName = originalFileName;
|
|
158
|
+
if (originalFileName.length > maxLength) {
|
|
159
|
+
const extension = path.extname(originalFileName);
|
|
160
|
+
const basename = path.basename(originalFileName, extension);
|
|
161
|
+
truncatedFileName = basename.substring(0, maxLength - extension.length) + extension;
|
|
162
|
+
}
|
|
153
163
|
|
|
154
|
-
|
|
155
|
-
|
|
164
|
+
// Use the original-truncated file name when saving the downloaded file
|
|
165
|
+
filename = path.join(os.tmpdir(), truncatedFileName);
|
|
166
|
+
await downloadFile(remoteUrl, filename);
|
|
167
|
+
|
|
168
|
+
// Now upload the downloaded file
|
|
169
|
+
const res = await uploadBlob(context, null, !useAzure, filename, remoteUrl);
|
|
156
170
|
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
status: 200,
|
|
160
|
-
body: res,
|
|
161
|
-
};
|
|
171
|
+
//Update Redis (using hash as the key)
|
|
172
|
+
await setFileStoreMap(remoteUrl, res);
|
|
162
173
|
|
|
174
|
+
// Return the file URL
|
|
175
|
+
context.res = {
|
|
176
|
+
status: 200,
|
|
177
|
+
body: res,
|
|
178
|
+
};
|
|
179
|
+
} catch (error) {
|
|
180
|
+
context.log("Error processing remote file request:", error);
|
|
181
|
+
context.res = {
|
|
182
|
+
status: 500,
|
|
183
|
+
body: `Error processing file: ${error.message}`
|
|
184
|
+
};
|
|
185
|
+
} finally {
|
|
186
|
+
// Cleanup temp file if it exists
|
|
187
|
+
try {
|
|
188
|
+
if (filename && fs.existsSync(filename)) {
|
|
189
|
+
fs.unlinkSync(filename);
|
|
190
|
+
}
|
|
191
|
+
} catch (err) {
|
|
192
|
+
context.log("Error cleaning up temp file:", err);
|
|
193
|
+
}
|
|
194
|
+
}
|
|
163
195
|
return;
|
|
164
196
|
}
|
|
165
197
|
|
|
166
198
|
if(hash && clearHash){
|
|
167
199
|
try {
|
|
168
200
|
const hashValue = await getFileStoreMap(hash);
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
201
|
+
if (hashValue) {
|
|
202
|
+
await removeFromFileStoreMap(hash);
|
|
203
|
+
context.res = {
|
|
204
|
+
status: 200,
|
|
205
|
+
body: `Hash ${hash} removed`
|
|
206
|
+
};
|
|
207
|
+
} else {
|
|
208
|
+
context.res = {
|
|
209
|
+
status: 404,
|
|
210
|
+
body: `Hash ${hash} not found`
|
|
211
|
+
};
|
|
212
|
+
}
|
|
174
213
|
} catch (error) {
|
|
175
214
|
context.res = {
|
|
176
215
|
status: 500,
|
|
@@ -178,37 +217,84 @@ async function main(context, req) {
|
|
|
178
217
|
};
|
|
179
218
|
console.log('Error occurred during hash cleanup:', error);
|
|
180
219
|
}
|
|
181
|
-
return
|
|
220
|
+
return;
|
|
182
221
|
}
|
|
183
222
|
|
|
184
223
|
if(hash && checkHash){ //check if hash exists
|
|
185
|
-
|
|
186
|
-
|
|
224
|
+
let hashResult = await getFileStoreMap(hash);
|
|
225
|
+
|
|
226
|
+
if(hashResult){
|
|
227
|
+
context.log(`File exists in map: ${hash}`);
|
|
228
|
+
|
|
229
|
+
// Check primary storage (Azure/Local) first
|
|
230
|
+
const primaryExists = await urlExists(hashResult?.url);
|
|
231
|
+
const gcsExists = gcs ? await gcsUrlExists(hashResult?.gcs) : false;
|
|
232
|
+
|
|
233
|
+
// If neither storage has the file, remove from map and return not found
|
|
234
|
+
if (!primaryExists.valid && !gcsExists) {
|
|
235
|
+
context.log(`File not found in any storage. Removing from map: ${hash}`);
|
|
236
|
+
await removeFromFileStoreMap(hash);
|
|
237
|
+
context.res = {
|
|
238
|
+
status: 404,
|
|
239
|
+
body: `Hash ${hash} not found in storage`
|
|
240
|
+
};
|
|
241
|
+
return;
|
|
242
|
+
}
|
|
187
243
|
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
244
|
+
// If primary is missing but GCS exists, restore from GCS
|
|
245
|
+
if (!primaryExists.valid && gcsExists) {
|
|
246
|
+
context.log(`Primary storage file missing, restoring from GCS: ${hash}`);
|
|
247
|
+
try {
|
|
248
|
+
const res = await CortexFileHandler(context, {
|
|
249
|
+
method: 'GET',
|
|
250
|
+
body: { params: { fetch: hashResult.gcs } }
|
|
251
|
+
});
|
|
252
|
+
if (res?.body?.url) {
|
|
253
|
+
hashResult.url = res.body.url;
|
|
254
|
+
}
|
|
255
|
+
} catch (error) {
|
|
256
|
+
console.error('Error restoring from GCS:', error);
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
// If GCS is missing but primary exists, restore to GCS
|
|
260
|
+
else if (primaryExists.valid && gcs && !gcsExists) {
|
|
261
|
+
context.log(`GCS file missing, restoring from primary: ${hash}`);
|
|
262
|
+
const { gcs: _, ...fileInfo } = hashResult; // eslint-disable-line no-unused-vars
|
|
263
|
+
hashResult = await ensureGCSUpload(context, fileInfo);
|
|
264
|
+
}
|
|
191
265
|
|
|
192
|
-
|
|
266
|
+
// Final check to ensure we have at least one valid storage location
|
|
267
|
+
const finalPrimaryCheck = await urlExists(hashResult?.url);
|
|
268
|
+
if (!finalPrimaryCheck.valid && !await gcsUrlExists(hashResult?.gcs)) {
|
|
269
|
+
context.log(`Failed to restore file. Removing from map: ${hash}`);
|
|
193
270
|
await removeFromFileStoreMap(hash);
|
|
271
|
+
context.res = {
|
|
272
|
+
status: 404,
|
|
273
|
+
body: `Hash ${hash} not found and restoration failed`
|
|
274
|
+
};
|
|
194
275
|
return;
|
|
195
276
|
}
|
|
196
277
|
|
|
197
|
-
context.log(`Hash exists: ${hash}`);
|
|
198
278
|
//update redis timestamp with current time
|
|
199
|
-
await setFileStoreMap(hash,
|
|
279
|
+
await setFileStoreMap(hash, hashResult);
|
|
280
|
+
|
|
281
|
+
context.res = {
|
|
282
|
+
status: 200,
|
|
283
|
+
body: hashResult
|
|
284
|
+
};
|
|
285
|
+
return;
|
|
200
286
|
}
|
|
287
|
+
|
|
201
288
|
context.res = {
|
|
202
|
-
|
|
289
|
+
status: 404,
|
|
290
|
+
body: `Hash ${hash} not found`
|
|
203
291
|
};
|
|
204
292
|
return;
|
|
205
293
|
}
|
|
206
294
|
|
|
207
295
|
if (req.method.toLowerCase() === `post`) {
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
context.log(`File url: ${url}`);
|
|
211
|
-
if(hash && context?.res?.body){ //save hash after upload
|
|
296
|
+
await uploadBlob(context, req, !useAzure, null, hash);
|
|
297
|
+
if(hash && context?.res?.body){
|
|
212
298
|
await setFileStoreMap(hash, context.res.body);
|
|
213
299
|
}
|
|
214
300
|
return
|
|
@@ -227,8 +313,6 @@ async function main(context, req) {
|
|
|
227
313
|
let numberOfChunks;
|
|
228
314
|
|
|
229
315
|
let file = ensureEncoded(uri); // encode url to handle special characters
|
|
230
|
-
let folder;
|
|
231
|
-
const isYoutubeUrl = isValidYoutubeUrl(uri);
|
|
232
316
|
|
|
233
317
|
const result = [];
|
|
234
318
|
|
|
@@ -238,20 +322,24 @@ async function main(context, req) {
|
|
|
238
322
|
await publishRequestProgress({ requestId, progress, completedCount, totalCount, numberOfChunks, data });
|
|
239
323
|
}
|
|
240
324
|
|
|
241
|
-
const isDocument = DOC_EXTENSIONS.some(ext => uri.toLowerCase().endsWith(ext));
|
|
242
|
-
|
|
243
325
|
try {
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
326
|
+
// Parse URL and get pathname without query parameters for extension check
|
|
327
|
+
const urlObj = new URL(uri);
|
|
328
|
+
const pathWithoutQuery = urlObj.pathname;
|
|
329
|
+
|
|
330
|
+
if (DOC_EXTENSIONS.some(ext => pathWithoutQuery.toLowerCase().endsWith(ext))) {
|
|
331
|
+
const extension = path.extname(pathWithoutQuery).toLowerCase();
|
|
332
|
+
const tempDir = path.join(os.tmpdir(), `${uuidv4()}`);
|
|
333
|
+
fs.mkdirSync(tempDir);
|
|
334
|
+
const downloadedFile = path.join(tempDir, `${uuidv4()}${extension}`);
|
|
335
|
+
await downloadFile(uri, downloadedFile);
|
|
336
|
+
const text = await documentToText(downloadedFile);
|
|
249
337
|
let tmpPath;
|
|
250
338
|
|
|
251
|
-
try{
|
|
339
|
+
try {
|
|
252
340
|
if (save) {
|
|
253
341
|
const fileName = `${uuidv4()}.txt`; // generate unique file name
|
|
254
|
-
const filePath = path.join(
|
|
342
|
+
const filePath = path.join(tempDir, fileName);
|
|
255
343
|
tmpPath = filePath;
|
|
256
344
|
fs.writeFileSync(filePath, text); // write text to file
|
|
257
345
|
|
|
@@ -262,79 +350,73 @@ async function main(context, req) {
|
|
|
262
350
|
} else {
|
|
263
351
|
result.push(...easyChunker(text));
|
|
264
352
|
}
|
|
265
|
-
}catch(err){
|
|
353
|
+
} catch(err) {
|
|
266
354
|
console.log(`Error saving file ${uri} with request id ${requestId}:`, err);
|
|
267
|
-
}finally{
|
|
268
|
-
try{
|
|
355
|
+
} finally {
|
|
356
|
+
try {
|
|
269
357
|
// delete temporary files
|
|
270
358
|
tmpPath && fs.unlinkSync(tmpPath);
|
|
271
|
-
|
|
272
|
-
console.log(`Cleaned temp files ${tmpPath}, ${
|
|
273
|
-
}catch(err){
|
|
274
|
-
console.log(`Error cleaning temp files ${tmpPath}, ${
|
|
359
|
+
downloadedFile && fs.unlinkSync(downloadedFile);
|
|
360
|
+
console.log(`Cleaned temp files ${tmpPath}, ${downloadedFile}`);
|
|
361
|
+
} catch(err) {
|
|
362
|
+
console.log(`Error cleaning temp files ${tmpPath}, ${downloadedFile}:`, err);
|
|
275
363
|
}
|
|
276
364
|
|
|
277
|
-
try{
|
|
365
|
+
try {
|
|
278
366
|
//delete uploaded prev nontext file
|
|
279
|
-
//check cleanup for
|
|
280
|
-
const regex = /
|
|
367
|
+
//check cleanup for uploaded files url
|
|
368
|
+
const regex = new RegExp(`${AZURE_STORAGE_CONTAINER_NAME}/([a-z0-9-]+)`);
|
|
281
369
|
const match = uri.match(regex);
|
|
282
370
|
if (match && match[1]) {
|
|
283
371
|
const extractedValue = match[1];
|
|
284
372
|
useAzure ? await deleteBlob(extractedValue) : await deleteFolder(extractedValue);
|
|
285
373
|
console.log(`Cleaned temp file ${uri} with request id ${extractedValue}`);
|
|
286
374
|
}
|
|
287
|
-
}catch(err){
|
|
375
|
+
} catch(err) {
|
|
288
376
|
console.log(`Error cleaning temp file ${uri}:`, err);
|
|
289
377
|
}
|
|
290
378
|
}
|
|
291
|
-
}else{
|
|
292
|
-
|
|
293
|
-
if (isYoutubeUrl) {
|
|
294
|
-
// totalCount += 1; // extra 1 step for youtube download
|
|
295
|
-
const processAsVideo = req.body?.params?.processAsVideo || req.query?.processAsVideo;
|
|
296
|
-
file = await processYoutubeUrl(file, processAsVideo);
|
|
297
|
-
}
|
|
298
|
-
|
|
379
|
+
} else {
|
|
299
380
|
const { chunkPromises, chunkOffsets, uniqueOutputPath } = await splitMediaFile(file);
|
|
300
|
-
folder = uniqueOutputPath;
|
|
301
381
|
|
|
302
382
|
numberOfChunks = chunkPromises.length; // for progress reporting
|
|
303
383
|
totalCount += chunkPromises.length * 4; // 4 steps for each chunk (download and upload)
|
|
304
|
-
// isYoutubeUrl && sendProgress(); // send progress for youtube download after total count is calculated
|
|
305
384
|
|
|
306
385
|
// sequential download of chunks
|
|
307
386
|
const chunks = [];
|
|
308
387
|
for (const chunkPromise of chunkPromises) {
|
|
309
|
-
|
|
310
|
-
|
|
388
|
+
const chunkPath = await chunkPromise;
|
|
389
|
+
chunks.push(chunkPath);
|
|
390
|
+
await sendProgress();
|
|
311
391
|
}
|
|
312
392
|
|
|
313
393
|
// sequential processing of chunks
|
|
314
394
|
for (let index = 0; index < chunks.length; index++) {
|
|
315
|
-
const
|
|
316
|
-
const blobName = useAzure ? await saveFileToBlob(
|
|
395
|
+
const chunkPath = chunks[index];
|
|
396
|
+
const blobName = useAzure ? await saveFileToBlob(chunkPath, requestId) : await moveFileToPublicFolder(chunkPath, requestId);
|
|
317
397
|
const chunkOffset = chunkOffsets[index];
|
|
318
|
-
result.push({ uri:blobName, offset:chunkOffset });
|
|
319
|
-
|
|
320
|
-
sendProgress();
|
|
398
|
+
result.push({ uri: blobName, offset: chunkOffset });
|
|
399
|
+
console.log(`Saved chunk as: ${blobName}`);
|
|
400
|
+
await sendProgress();
|
|
321
401
|
}
|
|
322
402
|
|
|
323
|
-
//
|
|
324
|
-
|
|
403
|
+
// Cleanup the temp directory
|
|
404
|
+
try {
|
|
405
|
+
if (uniqueOutputPath && fs.existsSync(uniqueOutputPath)) {
|
|
406
|
+
fs.rmSync(uniqueOutputPath, { recursive: true });
|
|
407
|
+
console.log(`Cleaned temp directory: ${uniqueOutputPath}`);
|
|
408
|
+
}
|
|
409
|
+
} catch (err) {
|
|
410
|
+
console.log(`Error cleaning temp directory ${uniqueOutputPath}:`, err);
|
|
411
|
+
}
|
|
325
412
|
}
|
|
326
413
|
} catch (error) {
|
|
327
414
|
console.error("An error occurred:", error);
|
|
328
|
-
context.res
|
|
329
|
-
|
|
415
|
+
context.res = {
|
|
416
|
+
status: 500,
|
|
417
|
+
body: error.message || error
|
|
418
|
+
};
|
|
330
419
|
return;
|
|
331
|
-
} finally {
|
|
332
|
-
try {
|
|
333
|
-
(isYoutubeUrl) && (await deleteTempPath(file));
|
|
334
|
-
folder && (await deleteTempPath(folder));
|
|
335
|
-
} catch (error) {
|
|
336
|
-
console.error("An error occurred while deleting:", error);
|
|
337
|
-
}
|
|
338
420
|
}
|
|
339
421
|
|
|
340
422
|
console.log('result:', result.map(item =>
|
|
@@ -344,8 +426,6 @@ async function main(context, req) {
|
|
|
344
426
|
context.res = {
|
|
345
427
|
body: result
|
|
346
428
|
};
|
|
347
|
-
|
|
348
429
|
}
|
|
349
430
|
|
|
350
|
-
|
|
351
|
-
export default main;
|
|
431
|
+
export default CortexFileHandler;
|
|
@@ -25,13 +25,31 @@ async function moveFileToPublicFolder(chunkPath, requestId) {
|
|
|
25
25
|
async function deleteFolder(requestId) {
|
|
26
26
|
if (!requestId) throw new Error('Missing requestId parameter');
|
|
27
27
|
const targetFolder = join(publicFolder, requestId);
|
|
28
|
-
|
|
29
|
-
|
|
28
|
+
try {
|
|
29
|
+
// Check if folder exists first
|
|
30
|
+
const stats = await fs.stat(targetFolder);
|
|
31
|
+
if (stats.isDirectory()) {
|
|
32
|
+
// Get list of files before deleting
|
|
33
|
+
const files = await fs.readdir(targetFolder);
|
|
34
|
+
const deletedFiles = files.map(file => join(requestId, file));
|
|
35
|
+
// Delete the folder
|
|
36
|
+
await fs.rm(targetFolder, { recursive: true });
|
|
37
|
+
console.log(`Cleaned folder: ${targetFolder}`);
|
|
38
|
+
return deletedFiles;
|
|
39
|
+
}
|
|
40
|
+
return [];
|
|
41
|
+
} catch (error) {
|
|
42
|
+
if (error.code === 'ENOENT') {
|
|
43
|
+
// Folder doesn't exist, return empty array
|
|
44
|
+
return [];
|
|
45
|
+
}
|
|
46
|
+
throw error;
|
|
47
|
+
}
|
|
30
48
|
}
|
|
31
49
|
|
|
32
50
|
async function cleanupLocal(urls=null) {
|
|
51
|
+
const cleanedUrls = [];
|
|
33
52
|
if(!urls){
|
|
34
|
-
const cleanedUrls = []; // initialize array for holding cleaned file URLs
|
|
35
53
|
try {
|
|
36
54
|
// Read the directory
|
|
37
55
|
const items = await fs.readdir(publicFolder);
|