@aj-archipelago/cortex 1.3.51 → 1.3.52
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/helper-apps/cortex-file-handler/{.env.test.azure → .env.test.azure.sample} +2 -1
- package/helper-apps/cortex-file-handler/{.env.test.gcs → .env.test.gcs.sample} +2 -1
- package/helper-apps/cortex-file-handler/{.env.test → .env.test.sample} +2 -1
- package/helper-apps/cortex-file-handler/Dockerfile +1 -1
- package/helper-apps/cortex-file-handler/INTERFACE.md +178 -0
- package/helper-apps/cortex-file-handler/package.json +4 -3
- package/helper-apps/cortex-file-handler/scripts/test-azure.sh +3 -0
- package/helper-apps/cortex-file-handler/{blobHandler.js → src/blobHandler.js} +167 -99
- package/helper-apps/cortex-file-handler/{fileChunker.js → src/fileChunker.js} +11 -24
- package/helper-apps/cortex-file-handler/{index.js → src/index.js} +236 -256
- package/helper-apps/cortex-file-handler/{services → src/services}/ConversionService.js +39 -18
- package/helper-apps/cortex-file-handler/{services → src/services}/FileConversionService.js +7 -3
- package/helper-apps/cortex-file-handler/src/services/storage/AzureStorageProvider.js +177 -0
- package/helper-apps/cortex-file-handler/src/services/storage/GCSStorageProvider.js +258 -0
- package/helper-apps/cortex-file-handler/src/services/storage/LocalStorageProvider.js +182 -0
- package/helper-apps/cortex-file-handler/src/services/storage/StorageFactory.js +86 -0
- package/helper-apps/cortex-file-handler/src/services/storage/StorageProvider.js +53 -0
- package/helper-apps/cortex-file-handler/src/services/storage/StorageService.js +259 -0
- package/helper-apps/cortex-file-handler/{start.js → src/start.js} +1 -1
- package/helper-apps/cortex-file-handler/src/utils/filenameUtils.js +28 -0
- package/helper-apps/cortex-file-handler/tests/FileConversionService.test.js +1 -1
- package/helper-apps/cortex-file-handler/tests/blobHandler.test.js +4 -4
- package/helper-apps/cortex-file-handler/tests/conversionResilience.test.js +152 -0
- package/helper-apps/cortex-file-handler/tests/fileChunker.test.js +2 -28
- package/helper-apps/cortex-file-handler/tests/fileUpload.test.js +134 -23
- package/helper-apps/cortex-file-handler/tests/getOperations.test.js +307 -0
- package/helper-apps/cortex-file-handler/tests/postOperations.test.js +291 -0
- package/helper-apps/cortex-file-handler/tests/start.test.js +50 -14
- package/helper-apps/cortex-file-handler/tests/storage/AzureStorageProvider.test.js +120 -0
- package/helper-apps/cortex-file-handler/tests/storage/GCSStorageProvider.test.js +193 -0
- package/helper-apps/cortex-file-handler/tests/storage/LocalStorageProvider.test.js +148 -0
- package/helper-apps/cortex-file-handler/tests/storage/StorageFactory.test.js +100 -0
- package/helper-apps/cortex-file-handler/tests/storage/StorageService.test.js +113 -0
- package/helper-apps/cortex-file-handler/tests/testUtils.helper.js +73 -19
- package/lib/entityConstants.js +1 -1
- package/package.json +1 -1
- /package/helper-apps/cortex-file-handler/{constants.js → src/constants.js} +0 -0
- /package/helper-apps/cortex-file-handler/{docHelper.js → src/docHelper.js} +0 -0
- /package/helper-apps/cortex-file-handler/{helper.js → src/helper.js} +0 -0
- /package/helper-apps/cortex-file-handler/{localFileHandler.js → src/localFileHandler.js} +0 -0
- /package/helper-apps/cortex-file-handler/{redis.js → src/redis.js} +0 -0
|
@@ -1,32 +1,12 @@
|
|
|
1
1
|
import fs from 'fs';
|
|
2
2
|
import os from 'os';
|
|
3
3
|
import path from 'path';
|
|
4
|
-
|
|
5
4
|
import { v4 as uuidv4 } from 'uuid';
|
|
6
5
|
|
|
7
|
-
import {
|
|
8
|
-
saveFileToBlob,
|
|
9
|
-
deleteBlob,
|
|
10
|
-
deleteGCS,
|
|
11
|
-
uploadBlob,
|
|
12
|
-
cleanup,
|
|
13
|
-
cleanupGCS,
|
|
14
|
-
gcsUrlExists,
|
|
15
|
-
ensureGCSUpload,
|
|
16
|
-
gcs,
|
|
17
|
-
AZURE_STORAGE_CONTAINER_NAME,
|
|
18
|
-
uploadChunkToGCS,
|
|
19
|
-
downloadFromGCS,
|
|
20
|
-
} from './blobHandler.js';
|
|
21
|
-
import { DOC_EXTENSIONS, CONVERTED_EXTENSIONS } from './constants.js';
|
|
6
|
+
import { DOC_EXTENSIONS } from './constants.js';
|
|
22
7
|
import { easyChunker } from './docHelper.js';
|
|
23
8
|
import { downloadFile, splitMediaFile } from './fileChunker.js';
|
|
24
9
|
import { ensureEncoded, ensureFileExtension, urlExists } from './helper.js';
|
|
25
|
-
import {
|
|
26
|
-
moveFileToPublicFolder,
|
|
27
|
-
deleteFolder,
|
|
28
|
-
cleanupLocal,
|
|
29
|
-
} from './localFileHandler.js';
|
|
30
10
|
import {
|
|
31
11
|
cleanupRedisFileStoreMap,
|
|
32
12
|
getFileStoreMap,
|
|
@@ -35,71 +15,32 @@ import {
|
|
|
35
15
|
setFileStoreMap,
|
|
36
16
|
} from './redis.js';
|
|
37
17
|
import { FileConversionService } from './services/FileConversionService.js';
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
const useGCS =
|
|
41
|
-
process.env.GCP_SERVICE_ACCOUNT_KEY_BASE64 ||
|
|
42
|
-
process.env.GCP_SERVICE_ACCOUNT_KEY
|
|
43
|
-
? true
|
|
44
|
-
: false;
|
|
45
|
-
|
|
46
|
-
console.log(
|
|
47
|
-
`Storage configuration - ${useAzure ? 'Azure' : 'Local'} Storage${useGCS ? ' and Google Cloud Storage' : ''}`,
|
|
48
|
-
);
|
|
18
|
+
import { StorageService } from './services/storage/StorageService.js';
|
|
19
|
+
import { uploadBlob } from './blobHandler.js';
|
|
49
20
|
|
|
50
21
|
let isCleanupRunning = false;
|
|
51
22
|
async function cleanupInactive(context) {
|
|
52
23
|
try {
|
|
53
24
|
if (isCleanupRunning) {
|
|
54
25
|
return;
|
|
55
|
-
}
|
|
26
|
+
}
|
|
56
27
|
isCleanupRunning = true;
|
|
57
28
|
const cleaned = await cleanupRedisFileStoreMap();
|
|
58
29
|
|
|
59
|
-
const
|
|
60
|
-
const cleanedLocal = [];
|
|
61
|
-
const cleanedGCS = [];
|
|
62
|
-
|
|
30
|
+
const urls = [];
|
|
63
31
|
for (const key in cleaned) {
|
|
64
32
|
const item = cleaned[key];
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
if (url.includes('.blob.core.windows.net/')) {
|
|
68
|
-
cleanedAzure.push(url);
|
|
69
|
-
} else if (url.startsWith('gs://')) {
|
|
70
|
-
cleanedGCS.push(url);
|
|
71
|
-
} else {
|
|
72
|
-
cleanedLocal.push(url);
|
|
73
|
-
}
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
if (item && item.gcs) {
|
|
77
|
-
cleanedGCS.push(gcs);
|
|
78
|
-
}
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
try {
|
|
82
|
-
if (cleanedAzure && cleanedAzure.length > 0) {
|
|
83
|
-
await cleanup(context, cleanedAzure);
|
|
33
|
+
if (item.url) {
|
|
34
|
+
urls.push(item.url);
|
|
84
35
|
}
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
try {
|
|
90
|
-
if (cleanedLocal && cleanedLocal.length > 0) {
|
|
91
|
-
await cleanupLocal(cleanedLocal);
|
|
36
|
+
if (item.gcs) {
|
|
37
|
+
urls.push(item.gcs);
|
|
92
38
|
}
|
|
93
|
-
} catch (err) {
|
|
94
|
-
console.log('Error occurred during local cleanup:', err);
|
|
95
39
|
}
|
|
96
40
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
}
|
|
101
|
-
} catch (err) {
|
|
102
|
-
console.log('Error occurred during GCS cleanup:', err);
|
|
41
|
+
if (urls.length > 0) {
|
|
42
|
+
const storageService = new StorageService();
|
|
43
|
+
await storageService.cleanup(urls);
|
|
103
44
|
}
|
|
104
45
|
} catch (error) {
|
|
105
46
|
console.log('Error occurred during cleanup:', error);
|
|
@@ -120,13 +61,20 @@ async function CortexFileHandler(context, req) {
|
|
|
120
61
|
load,
|
|
121
62
|
restore,
|
|
122
63
|
} = req.body?.params || req.query;
|
|
123
|
-
|
|
64
|
+
|
|
65
|
+
// Normalize boolean parameters
|
|
66
|
+
const shouldSave = save === true || save === 'true';
|
|
67
|
+
const shouldCheckHash = checkHash === true || checkHash === 'true';
|
|
68
|
+
const shouldClearHash = clearHash === true || clearHash === 'true';
|
|
69
|
+
const shouldFetchRemote = fetch || load || restore;
|
|
70
|
+
|
|
71
|
+
const operation = shouldSave
|
|
124
72
|
? 'save'
|
|
125
|
-
:
|
|
73
|
+
: shouldCheckHash
|
|
126
74
|
? 'checkHash'
|
|
127
|
-
:
|
|
75
|
+
: shouldClearHash
|
|
128
76
|
? 'clearHash'
|
|
129
|
-
:
|
|
77
|
+
: shouldFetchRemote
|
|
130
78
|
? 'remoteFile'
|
|
131
79
|
: req.method.toLowerCase() === 'delete' ||
|
|
132
80
|
req.query.operation === 'delete'
|
|
@@ -143,12 +91,42 @@ async function CortexFileHandler(context, req) {
|
|
|
143
91
|
|
|
144
92
|
cleanupInactive(context); //trigger & no need to wait for it
|
|
145
93
|
|
|
146
|
-
// Initialize
|
|
147
|
-
const
|
|
94
|
+
// Initialize services
|
|
95
|
+
const storageService = new StorageService();
|
|
96
|
+
const conversionService = new FileConversionService(context, storageService.primaryProvider.constructor.name === 'AzureStorageProvider');
|
|
148
97
|
|
|
149
|
-
//
|
|
98
|
+
// Validate URL for document processing and media chunking operations
|
|
99
|
+
if (operation === 'document_processing' || operation === 'media_chunking') {
|
|
100
|
+
try {
|
|
101
|
+
const urlObj = new URL(uri);
|
|
102
|
+
if (!['http:', 'https:', 'gs:'].includes(urlObj.protocol)) {
|
|
103
|
+
context.res = {
|
|
104
|
+
status: 400,
|
|
105
|
+
body: 'Invalid URL protocol - only HTTP, HTTPS, and GCS URLs are supported',
|
|
106
|
+
};
|
|
107
|
+
return;
|
|
108
|
+
}
|
|
109
|
+
// Check if the pathname is too long (e.g., > 1024 characters)
|
|
110
|
+
if (urlObj.pathname.length > 1024) {
|
|
111
|
+
context.res = {
|
|
112
|
+
status: 400,
|
|
113
|
+
body: 'URL pathname is too long',
|
|
114
|
+
};
|
|
115
|
+
return;
|
|
116
|
+
}
|
|
117
|
+
} catch (error) {
|
|
118
|
+
context.res = {
|
|
119
|
+
status: 400,
|
|
120
|
+
body: 'Invalid URL format',
|
|
121
|
+
};
|
|
122
|
+
return;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Clean up files when request delete which means processing marked completed
|
|
150
127
|
if (operation === 'delete') {
|
|
151
128
|
const deleteRequestId = req.query.requestId || requestId;
|
|
129
|
+
const deleteHash = req.query.hash || hash;
|
|
152
130
|
if (!deleteRequestId) {
|
|
153
131
|
context.res = {
|
|
154
132
|
status: 400,
|
|
@@ -157,26 +135,27 @@ async function CortexFileHandler(context, req) {
|
|
|
157
135
|
return;
|
|
158
136
|
}
|
|
159
137
|
|
|
160
|
-
//
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
138
|
+
// First, get the hash from the map if it exists
|
|
139
|
+
if (deleteHash) {
|
|
140
|
+
const hashResult = await getFileStoreMap(deleteHash);
|
|
141
|
+
if (hashResult) {
|
|
142
|
+
context.log(`Found hash in map for deletion: ${deleteHash}`);
|
|
143
|
+
await removeFromFileStoreMap(deleteHash);
|
|
144
|
+
}
|
|
167
145
|
}
|
|
168
146
|
|
|
147
|
+
const deleted = await storageService.deleteFiles(deleteRequestId);
|
|
169
148
|
context.res = {
|
|
170
149
|
status: 200,
|
|
171
|
-
body: { body:
|
|
150
|
+
body: { body: deleted },
|
|
172
151
|
};
|
|
173
152
|
return;
|
|
174
153
|
}
|
|
175
154
|
|
|
176
|
-
const remoteUrl =
|
|
155
|
+
const remoteUrl = shouldFetchRemote;
|
|
177
156
|
if (req.method.toLowerCase() === 'get' && remoteUrl) {
|
|
178
157
|
context.log(`Remote file: ${remoteUrl}`);
|
|
179
|
-
let filename;
|
|
158
|
+
let filename;
|
|
180
159
|
try {
|
|
181
160
|
// Validate URL format and accessibility
|
|
182
161
|
const urlCheck = await urlExists(remoteUrl);
|
|
@@ -227,10 +206,8 @@ async function CortexFileHandler(context, req) {
|
|
|
227
206
|
await downloadFile(remoteUrl, filename);
|
|
228
207
|
|
|
229
208
|
// Now upload the downloaded file
|
|
230
|
-
const res = await
|
|
209
|
+
const res = await storageService.uploadFile(
|
|
231
210
|
context,
|
|
232
|
-
null,
|
|
233
|
-
!useAzure,
|
|
234
211
|
filename,
|
|
235
212
|
remoteUrl,
|
|
236
213
|
);
|
|
@@ -296,147 +273,140 @@ async function CortexFileHandler(context, req) {
|
|
|
296
273
|
// Log the URL retrieved from Redis before checking existence
|
|
297
274
|
context.log(`Checking existence of URL from Redis: ${hashResult?.url}`);
|
|
298
275
|
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
const
|
|
302
|
-
const
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
);
|
|
276
|
+
try {
|
|
277
|
+
// Check primary storage first
|
|
278
|
+
const primaryExists = hashResult?.url ? await storageService.fileExists(hashResult.url) : false;
|
|
279
|
+
const gcsExists = hashResult?.gcs ? await storageService.fileExists(hashResult.gcs) : false;
|
|
280
|
+
|
|
281
|
+
// If neither storage has the file, remove from map and return not found
|
|
282
|
+
if (!primaryExists && !gcsExists) {
|
|
283
|
+
context.log(`File not found in any storage. Removing from map: ${hash}`);
|
|
307
284
|
await removeFromFileStoreMap(hash);
|
|
308
285
|
context.res = {
|
|
309
286
|
status: 404,
|
|
310
|
-
body: `Hash ${hash}
|
|
287
|
+
body: `Hash ${hash} not found in storage`,
|
|
311
288
|
};
|
|
312
289
|
return;
|
|
313
290
|
}
|
|
314
|
-
}
|
|
315
|
-
|
|
316
|
-
// Check primary storage (Azure/Local) first
|
|
317
|
-
const primaryExists = await urlExists(hashResult?.url);
|
|
318
|
-
const gcsExists = gcs ? await gcsUrlExists(hashResult?.gcs) : false;
|
|
319
|
-
|
|
320
|
-
// If neither storage has the file, remove from map and return not found
|
|
321
|
-
if (!primaryExists.valid && !gcsExists) {
|
|
322
|
-
context.log(
|
|
323
|
-
`File not found in any storage. Removing from map: ${hash}`,
|
|
324
|
-
);
|
|
325
|
-
await removeFromFileStoreMap(hash);
|
|
326
|
-
context.res = {
|
|
327
|
-
status: 404,
|
|
328
|
-
body: `Hash ${hash} not found in storage`,
|
|
329
|
-
};
|
|
330
|
-
return;
|
|
331
|
-
}
|
|
332
291
|
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
// Download from GCS using the new function
|
|
350
|
-
await downloadFromGCS(hashResult.gcs, downloadedFile);
|
|
351
|
-
|
|
352
|
-
// Upload to primary storage
|
|
353
|
-
const res = await uploadBlob(
|
|
354
|
-
context,
|
|
355
|
-
null,
|
|
356
|
-
!useAzure,
|
|
357
|
-
downloadedFile,
|
|
358
|
-
hash
|
|
359
|
-
);
|
|
360
|
-
|
|
361
|
-
// Update the hash result with the new primary storage URL
|
|
362
|
-
hashResult.url = res.url;
|
|
292
|
+
// If GCS is missing but primary exists, restore to GCS
|
|
293
|
+
if (primaryExists && !gcsExists && hashResult?.url) {
|
|
294
|
+
context.log(`GCS file missing, restoring from primary: ${hash}`);
|
|
295
|
+
try {
|
|
296
|
+
hashResult = await storageService.ensureGCSUpload(context, hashResult);
|
|
297
|
+
} catch (error) {
|
|
298
|
+
context.log(`Error restoring to GCS: ${error}`);
|
|
299
|
+
// If restoration fails, remove the hash from the map
|
|
300
|
+
await removeFromFileStoreMap(hash);
|
|
301
|
+
context.res = {
|
|
302
|
+
status: 404,
|
|
303
|
+
body: `Hash ${hash} not found`,
|
|
304
|
+
};
|
|
305
|
+
return;
|
|
306
|
+
}
|
|
307
|
+
}
|
|
363
308
|
|
|
364
|
-
|
|
309
|
+
// If primary is missing but GCS exists, restore from GCS
|
|
310
|
+
if (!primaryExists && gcsExists && hashResult?.gcs && storageService.backupProvider?.isConfigured()) {
|
|
311
|
+
context.log(`Primary storage file missing, restoring from GCS: ${hash}`);
|
|
365
312
|
try {
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
313
|
+
// Create a temporary file to store the downloaded content
|
|
314
|
+
const tempDir = path.join(os.tmpdir(), `${uuidv4()}`);
|
|
315
|
+
fs.mkdirSync(tempDir);
|
|
316
|
+
const downloadedFile = path.join(tempDir, path.basename(hashResult.gcs));
|
|
317
|
+
|
|
318
|
+
// Download from GCS
|
|
319
|
+
await storageService.downloadFile(hashResult.gcs, downloadedFile);
|
|
320
|
+
|
|
321
|
+
// Upload to primary storage
|
|
322
|
+
const res = await storageService.uploadFile(
|
|
323
|
+
context,
|
|
324
|
+
downloadedFile,
|
|
325
|
+
hash
|
|
326
|
+
);
|
|
327
|
+
|
|
328
|
+
// Update the hash result with the new primary storage URL
|
|
329
|
+
hashResult.url = res.url;
|
|
330
|
+
|
|
331
|
+
// Clean up temp file
|
|
332
|
+
try {
|
|
333
|
+
if (downloadedFile && fs.existsSync(downloadedFile)) {
|
|
334
|
+
fs.unlinkSync(downloadedFile);
|
|
335
|
+
}
|
|
336
|
+
if (tempDir && fs.existsSync(tempDir)) {
|
|
337
|
+
fs.rmSync(tempDir, { recursive: true });
|
|
338
|
+
}
|
|
339
|
+
} catch (err) {
|
|
340
|
+
console.log('Error cleaning up temp files:', err);
|
|
371
341
|
}
|
|
372
|
-
} catch (
|
|
373
|
-
console.
|
|
342
|
+
} catch (error) {
|
|
343
|
+
console.error('Error restoring from GCS:', error);
|
|
344
|
+
// If restoration fails, remove the hash from the map
|
|
345
|
+
await removeFromFileStoreMap(hash);
|
|
346
|
+
context.res = {
|
|
347
|
+
status: 404,
|
|
348
|
+
body: `Hash ${hash} not found`,
|
|
349
|
+
};
|
|
350
|
+
return;
|
|
374
351
|
}
|
|
375
|
-
} catch (error) {
|
|
376
|
-
console.error('Error restoring from GCS:', error);
|
|
377
352
|
}
|
|
378
|
-
}
|
|
379
353
|
|
|
380
|
-
|
|
381
|
-
|
|
354
|
+
// Final check to ensure we have at least one valid storage location
|
|
355
|
+
const finalPrimaryCheck = hashResult?.url ? await storageService.fileExists(hashResult.url) : false;
|
|
356
|
+
const finalGCSCheck = hashResult?.gcs ? await storageService.fileExists(hashResult.gcs) : false;
|
|
357
|
+
if (!finalPrimaryCheck && !finalGCSCheck) {
|
|
358
|
+
context.log(`Failed to restore file. Removing from map: ${hash}`);
|
|
359
|
+
await removeFromFileStoreMap(hash);
|
|
360
|
+
context.res = {
|
|
361
|
+
status: 404,
|
|
362
|
+
body: `Hash ${hash} not found`,
|
|
363
|
+
};
|
|
364
|
+
return;
|
|
365
|
+
}
|
|
382
366
|
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
367
|
+
// Create the response object
|
|
368
|
+
const response = {
|
|
369
|
+
message: `File '${hashResult.filename}' uploaded successfully.`,
|
|
370
|
+
filename: hashResult.filename,
|
|
371
|
+
url: hashResult.url,
|
|
372
|
+
gcs: hashResult.gcs,
|
|
373
|
+
hash: hashResult.hash,
|
|
374
|
+
timestamp: new Date().toISOString()
|
|
391
375
|
};
|
|
392
|
-
return;
|
|
393
|
-
}
|
|
394
376
|
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
hash: hashResult.hash,
|
|
402
|
-
timestamp: new Date().toISOString()
|
|
403
|
-
};
|
|
377
|
+
// Ensure converted version exists and is synced across storage providers
|
|
378
|
+
try {
|
|
379
|
+
hashResult = await conversionService.ensureConvertedVersion(hashResult, requestId);
|
|
380
|
+
} catch (error) {
|
|
381
|
+
context.log(`Error ensuring converted version: ${error}`);
|
|
382
|
+
}
|
|
404
383
|
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
context.log(`Adding converted info to final response`);
|
|
408
|
-
response.converted = {
|
|
409
|
-
url: hashResult.converted.url,
|
|
410
|
-
gcs: hashResult.converted.gcs
|
|
411
|
-
};
|
|
412
|
-
} else if (hashResult.converted?.gcs) {
|
|
413
|
-
// If we only have GCS URL, trigger conversion
|
|
414
|
-
context.log(`Only GCS URL exists for converted file, triggering conversion`);
|
|
415
|
-
const convertedResult = await conversionService.convertFile(
|
|
416
|
-
await downloadFile(hashResult.url, path.join(os.tmpdir(), path.basename(hashResult.url))),
|
|
417
|
-
hashResult.url
|
|
418
|
-
);
|
|
419
|
-
if (convertedResult.converted) {
|
|
420
|
-
const convertedSaveResult = await conversionService._saveConvertedFile(convertedResult.convertedPath, requestId);
|
|
384
|
+
// Attach converted info to response if present
|
|
385
|
+
if (hashResult.converted) {
|
|
421
386
|
response.converted = {
|
|
422
|
-
url:
|
|
387
|
+
url: hashResult.converted.url,
|
|
423
388
|
gcs: hashResult.converted.gcs
|
|
424
389
|
};
|
|
425
|
-
// Update the hash map with the new converted info
|
|
426
|
-
await setFileStoreMap(`${hashResult.hash}_converted`, response.converted);
|
|
427
390
|
}
|
|
428
|
-
} else {
|
|
429
|
-
context.log(`No converted info to add to final response`);
|
|
430
|
-
}
|
|
431
391
|
|
|
432
|
-
|
|
433
|
-
|
|
392
|
+
//update redis timestamp with current time
|
|
393
|
+
await setFileStoreMap(hash, hashResult);
|
|
434
394
|
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
395
|
+
context.res = {
|
|
396
|
+
status: 200,
|
|
397
|
+
body: response
|
|
398
|
+
};
|
|
399
|
+
return;
|
|
400
|
+
} catch (error) {
|
|
401
|
+
context.log(`Error checking file existence: ${error}`);
|
|
402
|
+
// If there's an error checking file existence, remove the hash from the map
|
|
403
|
+
await removeFromFileStoreMap(hash);
|
|
404
|
+
context.res = {
|
|
405
|
+
status: 404,
|
|
406
|
+
body: `Hash ${hash} not found`,
|
|
407
|
+
};
|
|
408
|
+
return;
|
|
409
|
+
}
|
|
440
410
|
}
|
|
441
411
|
|
|
442
412
|
context.res = {
|
|
@@ -447,7 +417,10 @@ async function CortexFileHandler(context, req) {
|
|
|
447
417
|
}
|
|
448
418
|
|
|
449
419
|
if (req.method.toLowerCase() === 'post') {
|
|
450
|
-
|
|
420
|
+
// Determine if we should save to local storage based on primary provider
|
|
421
|
+
const saveToLocal = storageService.primaryProvider.constructor.name === 'LocalStorageProvider';
|
|
422
|
+
// Use uploadBlob to handle multipart/form-data
|
|
423
|
+
const result = await uploadBlob(context, req, saveToLocal, null, hash);
|
|
451
424
|
if (result?.hash && context?.res?.body) {
|
|
452
425
|
await setFileStoreMap(result.hash, context.res.body);
|
|
453
426
|
}
|
|
@@ -484,7 +457,7 @@ async function CortexFileHandler(context, req) {
|
|
|
484
457
|
};
|
|
485
458
|
|
|
486
459
|
try {
|
|
487
|
-
|
|
460
|
+
// Parse URL and get pathname without query parameters for extension check
|
|
488
461
|
const urlObj = new URL(uri);
|
|
489
462
|
const pathWithoutQuery = urlObj.pathname;
|
|
490
463
|
|
|
@@ -498,22 +471,48 @@ async function CortexFileHandler(context, req) {
|
|
|
498
471
|
await downloadFile(uri, downloadedFile);
|
|
499
472
|
|
|
500
473
|
try {
|
|
501
|
-
if (
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
474
|
+
if (shouldSave) {
|
|
475
|
+
// Check if file needs conversion first
|
|
476
|
+
if (conversionService.needsConversion(downloadedFile)) {
|
|
477
|
+
// Convert the file
|
|
478
|
+
const conversion = await conversionService.convertFile(downloadedFile, uri);
|
|
479
|
+
if (!conversion.converted) {
|
|
480
|
+
throw new Error('File conversion failed');
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
// Save the converted file
|
|
484
|
+
const convertedSaveResult = await conversionService._saveConvertedFile(conversion.convertedPath, requestId);
|
|
485
|
+
|
|
486
|
+
// Return the converted file URL
|
|
487
|
+
context.res = {
|
|
488
|
+
status: 200,
|
|
489
|
+
body: {
|
|
490
|
+
url: convertedSaveResult.url,
|
|
491
|
+
blobName: path.basename(convertedSaveResult.url)
|
|
492
|
+
}
|
|
493
|
+
};
|
|
510
494
|
} else {
|
|
511
|
-
|
|
495
|
+
// File doesn't need conversion, save the original file
|
|
496
|
+
const saveResult = await conversionService._saveConvertedFile(downloadedFile, requestId);
|
|
497
|
+
|
|
498
|
+
// Return the original file URL
|
|
499
|
+
context.res = {
|
|
500
|
+
status: 200,
|
|
501
|
+
body: {
|
|
502
|
+
url: saveResult.url,
|
|
503
|
+
blobName: path.basename(saveResult.url)
|
|
504
|
+
}
|
|
505
|
+
};
|
|
512
506
|
}
|
|
513
|
-
|
|
514
|
-
result.push(fileUrl);
|
|
507
|
+
return;
|
|
515
508
|
} else {
|
|
516
|
-
|
|
509
|
+
let text;
|
|
510
|
+
if (conversionService.needsConversion(downloadedFile)) {
|
|
511
|
+
text = await conversionService.convertFile(downloadedFile, uri, true);
|
|
512
|
+
} else {
|
|
513
|
+
// For files that don't need conversion, read the file contents directly
|
|
514
|
+
text = await fs.promises.readFile(downloadedFile, 'utf-8');
|
|
515
|
+
}
|
|
517
516
|
result.push(...easyChunker(text));
|
|
518
517
|
}
|
|
519
518
|
} catch (err) {
|
|
@@ -533,24 +532,16 @@ async function CortexFileHandler(context, req) {
|
|
|
533
532
|
console.log(`Error cleaning temp file ${downloadedFile}:`, err);
|
|
534
533
|
}
|
|
535
534
|
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
535
|
+
// Delete uploaded files only if we're NOT saving the converted version.
|
|
536
|
+
// When save=true we need to keep the converted file (which is stored under the same requestId prefix),
|
|
537
|
+
// so skip the cleanup in that case.
|
|
538
|
+
if (!shouldSave) {
|
|
539
|
+
await storageService.deleteFiles(requestId);
|
|
540
|
+
console.log(
|
|
541
|
+
`Cleaned temp files for request id ${requestId}`,
|
|
541
542
|
);
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
const extractedValue = match[1];
|
|
545
|
-
useAzure
|
|
546
|
-
? await deleteBlob(extractedValue)
|
|
547
|
-
: await deleteFolder(extractedValue);
|
|
548
|
-
console.log(
|
|
549
|
-
`Cleaned temp file ${uri} with request id ${extractedValue}`,
|
|
550
|
-
);
|
|
551
|
-
}
|
|
552
|
-
} catch (err) {
|
|
553
|
-
console.log(`Error cleaning temp file ${uri}:`, err);
|
|
543
|
+
} else {
|
|
544
|
+
console.log(`Skip cleanup for request id ${requestId} because save flag is set`);
|
|
554
545
|
}
|
|
555
546
|
}
|
|
556
547
|
} else {
|
|
@@ -571,23 +562,12 @@ async function CortexFileHandler(context, req) {
|
|
|
571
562
|
// sequential processing of chunks
|
|
572
563
|
for (let index = 0; index < chunks.length; index++) {
|
|
573
564
|
const chunkPath = chunks[index];
|
|
574
|
-
|
|
575
|
-
let chunkGcsUrl;
|
|
576
|
-
|
|
577
|
-
if (useAzure) {
|
|
578
|
-
const savedBlob = await saveFileToBlob(chunkPath, requestId);
|
|
579
|
-
chunkUrl = savedBlob.url;
|
|
580
|
-
} else {
|
|
581
|
-
chunkUrl = await moveFileToPublicFolder(chunkPath, requestId);
|
|
582
|
-
}
|
|
583
|
-
|
|
584
|
-
// If GCS is configured, save to GCS
|
|
585
|
-
chunkGcsUrl = await uploadChunkToGCS(chunkPath, requestId);
|
|
565
|
+
const chunkResult = await storageService.uploadFile(context, chunkPath, requestId);
|
|
586
566
|
|
|
587
567
|
const chunkOffset = chunkOffsets[index];
|
|
588
|
-
result.push({ uri:
|
|
568
|
+
result.push({ uri: chunkResult.url, offset: chunkOffset, gcs: chunkResult.gcs });
|
|
589
569
|
console.log(
|
|
590
|
-
`Saved chunk as: ${
|
|
570
|
+
`Saved chunk as: ${chunkResult.url}${chunkResult.gcs ? ` and ${chunkResult.gcs}` : ''}`,
|
|
591
571
|
);
|
|
592
572
|
await sendProgress();
|
|
593
573
|
}
|
|
@@ -625,4 +605,4 @@ async function CortexFileHandler(context, req) {
|
|
|
625
605
|
};
|
|
626
606
|
}
|
|
627
607
|
|
|
628
|
-
export default CortexFileHandler;
|
|
608
|
+
export default CortexFileHandler;
|