@aj-archipelago/cortex 1.3.51 → 1.3.52

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/helper-apps/cortex-file-handler/{.env.test.azure → .env.test.azure.sample} +2 -1
  2. package/helper-apps/cortex-file-handler/{.env.test.gcs → .env.test.gcs.sample} +2 -1
  3. package/helper-apps/cortex-file-handler/{.env.test → .env.test.sample} +2 -1
  4. package/helper-apps/cortex-file-handler/Dockerfile +1 -1
  5. package/helper-apps/cortex-file-handler/INTERFACE.md +178 -0
  6. package/helper-apps/cortex-file-handler/package.json +4 -3
  7. package/helper-apps/cortex-file-handler/scripts/test-azure.sh +3 -0
  8. package/helper-apps/cortex-file-handler/{blobHandler.js → src/blobHandler.js} +167 -99
  9. package/helper-apps/cortex-file-handler/{fileChunker.js → src/fileChunker.js} +11 -24
  10. package/helper-apps/cortex-file-handler/{index.js → src/index.js} +236 -256
  11. package/helper-apps/cortex-file-handler/{services → src/services}/ConversionService.js +39 -18
  12. package/helper-apps/cortex-file-handler/{services → src/services}/FileConversionService.js +7 -3
  13. package/helper-apps/cortex-file-handler/src/services/storage/AzureStorageProvider.js +177 -0
  14. package/helper-apps/cortex-file-handler/src/services/storage/GCSStorageProvider.js +258 -0
  15. package/helper-apps/cortex-file-handler/src/services/storage/LocalStorageProvider.js +182 -0
  16. package/helper-apps/cortex-file-handler/src/services/storage/StorageFactory.js +86 -0
  17. package/helper-apps/cortex-file-handler/src/services/storage/StorageProvider.js +53 -0
  18. package/helper-apps/cortex-file-handler/src/services/storage/StorageService.js +259 -0
  19. package/helper-apps/cortex-file-handler/{start.js → src/start.js} +1 -1
  20. package/helper-apps/cortex-file-handler/src/utils/filenameUtils.js +28 -0
  21. package/helper-apps/cortex-file-handler/tests/FileConversionService.test.js +1 -1
  22. package/helper-apps/cortex-file-handler/tests/blobHandler.test.js +4 -4
  23. package/helper-apps/cortex-file-handler/tests/conversionResilience.test.js +152 -0
  24. package/helper-apps/cortex-file-handler/tests/fileChunker.test.js +2 -28
  25. package/helper-apps/cortex-file-handler/tests/fileUpload.test.js +134 -23
  26. package/helper-apps/cortex-file-handler/tests/getOperations.test.js +307 -0
  27. package/helper-apps/cortex-file-handler/tests/postOperations.test.js +291 -0
  28. package/helper-apps/cortex-file-handler/tests/start.test.js +50 -14
  29. package/helper-apps/cortex-file-handler/tests/storage/AzureStorageProvider.test.js +120 -0
  30. package/helper-apps/cortex-file-handler/tests/storage/GCSStorageProvider.test.js +193 -0
  31. package/helper-apps/cortex-file-handler/tests/storage/LocalStorageProvider.test.js +148 -0
  32. package/helper-apps/cortex-file-handler/tests/storage/StorageFactory.test.js +100 -0
  33. package/helper-apps/cortex-file-handler/tests/storage/StorageService.test.js +113 -0
  34. package/helper-apps/cortex-file-handler/tests/testUtils.helper.js +73 -19
  35. package/lib/entityConstants.js +1 -1
  36. package/package.json +1 -1
  37. /package/helper-apps/cortex-file-handler/{constants.js → src/constants.js} +0 -0
  38. /package/helper-apps/cortex-file-handler/{docHelper.js → src/docHelper.js} +0 -0
  39. /package/helper-apps/cortex-file-handler/{helper.js → src/helper.js} +0 -0
  40. /package/helper-apps/cortex-file-handler/{localFileHandler.js → src/localFileHandler.js} +0 -0
  41. /package/helper-apps/cortex-file-handler/{redis.js → src/redis.js} +0 -0
@@ -1,32 +1,12 @@
1
1
  import fs from 'fs';
2
2
  import os from 'os';
3
3
  import path from 'path';
4
-
5
4
  import { v4 as uuidv4 } from 'uuid';
6
5
 
7
- import {
8
- saveFileToBlob,
9
- deleteBlob,
10
- deleteGCS,
11
- uploadBlob,
12
- cleanup,
13
- cleanupGCS,
14
- gcsUrlExists,
15
- ensureGCSUpload,
16
- gcs,
17
- AZURE_STORAGE_CONTAINER_NAME,
18
- uploadChunkToGCS,
19
- downloadFromGCS,
20
- } from './blobHandler.js';
21
- import { DOC_EXTENSIONS, CONVERTED_EXTENSIONS } from './constants.js';
6
+ import { DOC_EXTENSIONS } from './constants.js';
22
7
  import { easyChunker } from './docHelper.js';
23
8
  import { downloadFile, splitMediaFile } from './fileChunker.js';
24
9
  import { ensureEncoded, ensureFileExtension, urlExists } from './helper.js';
25
- import {
26
- moveFileToPublicFolder,
27
- deleteFolder,
28
- cleanupLocal,
29
- } from './localFileHandler.js';
30
10
  import {
31
11
  cleanupRedisFileStoreMap,
32
12
  getFileStoreMap,
@@ -35,71 +15,32 @@ import {
35
15
  setFileStoreMap,
36
16
  } from './redis.js';
37
17
  import { FileConversionService } from './services/FileConversionService.js';
38
-
39
- const useAzure = process.env.AZURE_STORAGE_CONNECTION_STRING ? true : false;
40
- const useGCS =
41
- process.env.GCP_SERVICE_ACCOUNT_KEY_BASE64 ||
42
- process.env.GCP_SERVICE_ACCOUNT_KEY
43
- ? true
44
- : false;
45
-
46
- console.log(
47
- `Storage configuration - ${useAzure ? 'Azure' : 'Local'} Storage${useGCS ? ' and Google Cloud Storage' : ''}`,
48
- );
18
+ import { StorageService } from './services/storage/StorageService.js';
19
+ import { uploadBlob } from './blobHandler.js';
49
20
 
50
21
  let isCleanupRunning = false;
51
22
  async function cleanupInactive(context) {
52
23
  try {
53
24
  if (isCleanupRunning) {
54
25
  return;
55
- } //no need to cleanup every call
26
+ }
56
27
  isCleanupRunning = true;
57
28
  const cleaned = await cleanupRedisFileStoreMap();
58
29
 
59
- const cleanedAzure = [];
60
- const cleanedLocal = [];
61
- const cleanedGCS = [];
62
-
30
+ const urls = [];
63
31
  for (const key in cleaned) {
64
32
  const item = cleaned[key];
65
- const { url, gcs } = item;
66
- if (url) {
67
- if (url.includes('.blob.core.windows.net/')) {
68
- cleanedAzure.push(url);
69
- } else if (url.startsWith('gs://')) {
70
- cleanedGCS.push(url);
71
- } else {
72
- cleanedLocal.push(url);
73
- }
74
- }
75
-
76
- if (item && item.gcs) {
77
- cleanedGCS.push(gcs);
78
- }
79
- }
80
-
81
- try {
82
- if (cleanedAzure && cleanedAzure.length > 0) {
83
- await cleanup(context, cleanedAzure);
33
+ if (item.url) {
34
+ urls.push(item.url);
84
35
  }
85
- } catch (error) {
86
- console.log('Error occurred during azure cleanup:', error);
87
- }
88
-
89
- try {
90
- if (cleanedLocal && cleanedLocal.length > 0) {
91
- await cleanupLocal(cleanedLocal);
36
+ if (item.gcs) {
37
+ urls.push(item.gcs);
92
38
  }
93
- } catch (err) {
94
- console.log('Error occurred during local cleanup:', err);
95
39
  }
96
40
 
97
- try {
98
- if (cleanedGCS && cleanedGCS.length > 0) {
99
- await cleanupGCS(cleanedGCS);
100
- }
101
- } catch (err) {
102
- console.log('Error occurred during GCS cleanup:', err);
41
+ if (urls.length > 0) {
42
+ const storageService = new StorageService();
43
+ await storageService.cleanup(urls);
103
44
  }
104
45
  } catch (error) {
105
46
  console.log('Error occurred during cleanup:', error);
@@ -120,13 +61,20 @@ async function CortexFileHandler(context, req) {
120
61
  load,
121
62
  restore,
122
63
  } = req.body?.params || req.query;
123
- const operation = save
64
+
65
+ // Normalize boolean parameters
66
+ const shouldSave = save === true || save === 'true';
67
+ const shouldCheckHash = checkHash === true || checkHash === 'true';
68
+ const shouldClearHash = clearHash === true || clearHash === 'true';
69
+ const shouldFetchRemote = fetch || load || restore;
70
+
71
+ const operation = shouldSave
124
72
  ? 'save'
125
- : checkHash
73
+ : shouldCheckHash
126
74
  ? 'checkHash'
127
- : clearHash
75
+ : shouldClearHash
128
76
  ? 'clearHash'
129
- : fetch || load || restore
77
+ : shouldFetchRemote
130
78
  ? 'remoteFile'
131
79
  : req.method.toLowerCase() === 'delete' ||
132
80
  req.query.operation === 'delete'
@@ -143,12 +91,42 @@ async function CortexFileHandler(context, req) {
143
91
 
144
92
  cleanupInactive(context); //trigger & no need to wait for it
145
93
 
146
- // Initialize conversion service
147
- const conversionService = new FileConversionService(context, useAzure);
94
+ // Initialize services
95
+ const storageService = new StorageService();
96
+ const conversionService = new FileConversionService(context, storageService.primaryProvider.constructor.name === 'AzureStorageProvider');
148
97
 
149
- // Clean up blob when request delete which means processing marked completed
98
+ // Validate URL for document processing and media chunking operations
99
+ if (operation === 'document_processing' || operation === 'media_chunking') {
100
+ try {
101
+ const urlObj = new URL(uri);
102
+ if (!['http:', 'https:', 'gs:'].includes(urlObj.protocol)) {
103
+ context.res = {
104
+ status: 400,
105
+ body: 'Invalid URL protocol - only HTTP, HTTPS, and GCS URLs are supported',
106
+ };
107
+ return;
108
+ }
109
+ // Check if the pathname is too long (e.g., > 1024 characters)
110
+ if (urlObj.pathname.length > 1024) {
111
+ context.res = {
112
+ status: 400,
113
+ body: 'URL pathname is too long',
114
+ };
115
+ return;
116
+ }
117
+ } catch (error) {
118
+ context.res = {
119
+ status: 400,
120
+ body: 'Invalid URL format',
121
+ };
122
+ return;
123
+ }
124
+ }
125
+
126
+ // Clean up files when request delete which means processing marked completed
150
127
  if (operation === 'delete') {
151
128
  const deleteRequestId = req.query.requestId || requestId;
129
+ const deleteHash = req.query.hash || hash;
152
130
  if (!deleteRequestId) {
153
131
  context.res = {
154
132
  status: 400,
@@ -157,26 +135,27 @@ async function CortexFileHandler(context, req) {
157
135
  return;
158
136
  }
159
137
 
160
- // Delete from Azure/Local storage
161
- const azureResult = useAzure
162
- ? await deleteBlob(deleteRequestId)
163
- : await deleteFolder(deleteRequestId);
164
- const gcsResult = [];
165
- if (gcs) {
166
- gcsResult.push(...(await deleteGCS(deleteRequestId)));
138
+ // First, get the hash from the map if it exists
139
+ if (deleteHash) {
140
+ const hashResult = await getFileStoreMap(deleteHash);
141
+ if (hashResult) {
142
+ context.log(`Found hash in map for deletion: ${deleteHash}`);
143
+ await removeFromFileStoreMap(deleteHash);
144
+ }
167
145
  }
168
146
 
147
+ const deleted = await storageService.deleteFiles(deleteRequestId);
169
148
  context.res = {
170
149
  status: 200,
171
- body: { body: [...azureResult, ...gcsResult] },
150
+ body: { body: deleted },
172
151
  };
173
152
  return;
174
153
  }
175
154
 
176
- const remoteUrl = fetch || restore || load;
155
+ const remoteUrl = shouldFetchRemote;
177
156
  if (req.method.toLowerCase() === 'get' && remoteUrl) {
178
157
  context.log(`Remote file: ${remoteUrl}`);
179
- let filename; // Declare filename outside try block
158
+ let filename;
180
159
  try {
181
160
  // Validate URL format and accessibility
182
161
  const urlCheck = await urlExists(remoteUrl);
@@ -227,10 +206,8 @@ async function CortexFileHandler(context, req) {
227
206
  await downloadFile(remoteUrl, filename);
228
207
 
229
208
  // Now upload the downloaded file
230
- const res = await uploadBlob(
209
+ const res = await storageService.uploadFile(
231
210
  context,
232
- null,
233
- !useAzure,
234
211
  filename,
235
212
  remoteUrl,
236
213
  );
@@ -296,147 +273,140 @@ async function CortexFileHandler(context, req) {
296
273
  // Log the URL retrieved from Redis before checking existence
297
274
  context.log(`Checking existence of URL from Redis: ${hashResult?.url}`);
298
275
 
299
- // Detect double-encoding in the blob name
300
- if (hashResult.url) {
301
- const urlPath = hashResult.url.split('?')[0];
302
- const blobName = urlPath.substring(urlPath.lastIndexOf('/') + 1);
303
- if (/%25[0-9A-Fa-f]{2}/.test(blobName)) {
304
- context.log(
305
- `Double-encoded blob detected for hash ${hash}. Invalidating cache entry.`,
306
- );
276
+ try {
277
+ // Check primary storage first
278
+ const primaryExists = hashResult?.url ? await storageService.fileExists(hashResult.url) : false;
279
+ const gcsExists = hashResult?.gcs ? await storageService.fileExists(hashResult.gcs) : false;
280
+
281
+ // If neither storage has the file, remove from map and return not found
282
+ if (!primaryExists && !gcsExists) {
283
+ context.log(`File not found in any storage. Removing from map: ${hash}`);
307
284
  await removeFromFileStoreMap(hash);
308
285
  context.res = {
309
286
  status: 404,
310
- body: `Hash ${hash} is double-encoded and has been invalidated. Please re-upload.`,
287
+ body: `Hash ${hash} not found in storage`,
311
288
  };
312
289
  return;
313
290
  }
314
- }
315
-
316
- // Check primary storage (Azure/Local) first
317
- const primaryExists = await urlExists(hashResult?.url);
318
- const gcsExists = gcs ? await gcsUrlExists(hashResult?.gcs) : false;
319
-
320
- // If neither storage has the file, remove from map and return not found
321
- if (!primaryExists.valid && !gcsExists) {
322
- context.log(
323
- `File not found in any storage. Removing from map: ${hash}`,
324
- );
325
- await removeFromFileStoreMap(hash);
326
- context.res = {
327
- status: 404,
328
- body: `Hash ${hash} not found in storage`,
329
- };
330
- return;
331
- }
332
291
 
333
- // If GCS is missing but primary exists, restore to GCS
334
- else if (primaryExists.valid && gcs && !gcsExists) {
335
- context.log(`GCS file missing, restoring from primary: ${hash}`);
336
- const { gcs: _, ...fileInfo } = hashResult;
337
- hashResult = await ensureGCSUpload(context, fileInfo);
338
- }
339
-
340
- // If primary is missing but GCS exists, restore from GCS
341
- if (!primaryExists.valid && gcsExists) {
342
- context.log(`Primary storage file missing, restoring from GCS: ${hash}`);
343
- try {
344
- // Create a temporary file to store the downloaded content
345
- const tempDir = path.join(os.tmpdir(), `${uuidv4()}`);
346
- fs.mkdirSync(tempDir);
347
- const downloadedFile = path.join(tempDir, path.basename(hashResult.gcs));
348
-
349
- // Download from GCS using the new function
350
- await downloadFromGCS(hashResult.gcs, downloadedFile);
351
-
352
- // Upload to primary storage
353
- const res = await uploadBlob(
354
- context,
355
- null,
356
- !useAzure,
357
- downloadedFile,
358
- hash
359
- );
360
-
361
- // Update the hash result with the new primary storage URL
362
- hashResult.url = res.url;
292
+ // If GCS is missing but primary exists, restore to GCS
293
+ if (primaryExists && !gcsExists && hashResult?.url) {
294
+ context.log(`GCS file missing, restoring from primary: ${hash}`);
295
+ try {
296
+ hashResult = await storageService.ensureGCSUpload(context, hashResult);
297
+ } catch (error) {
298
+ context.log(`Error restoring to GCS: ${error}`);
299
+ // If restoration fails, remove the hash from the map
300
+ await removeFromFileStoreMap(hash);
301
+ context.res = {
302
+ status: 404,
303
+ body: `Hash ${hash} not found`,
304
+ };
305
+ return;
306
+ }
307
+ }
363
308
 
364
- // Clean up temp file
309
+ // If primary is missing but GCS exists, restore from GCS
310
+ if (!primaryExists && gcsExists && hashResult?.gcs && storageService.backupProvider?.isConfigured()) {
311
+ context.log(`Primary storage file missing, restoring from GCS: ${hash}`);
365
312
  try {
366
- if (downloadedFile && fs.existsSync(downloadedFile)) {
367
- fs.unlinkSync(downloadedFile);
368
- }
369
- if (tempDir && fs.existsSync(tempDir)) {
370
- fs.rmSync(tempDir, { recursive: true });
313
+ // Create a temporary file to store the downloaded content
314
+ const tempDir = path.join(os.tmpdir(), `${uuidv4()}`);
315
+ fs.mkdirSync(tempDir);
316
+ const downloadedFile = path.join(tempDir, path.basename(hashResult.gcs));
317
+
318
+ // Download from GCS
319
+ await storageService.downloadFile(hashResult.gcs, downloadedFile);
320
+
321
+ // Upload to primary storage
322
+ const res = await storageService.uploadFile(
323
+ context,
324
+ downloadedFile,
325
+ hash
326
+ );
327
+
328
+ // Update the hash result with the new primary storage URL
329
+ hashResult.url = res.url;
330
+
331
+ // Clean up temp file
332
+ try {
333
+ if (downloadedFile && fs.existsSync(downloadedFile)) {
334
+ fs.unlinkSync(downloadedFile);
335
+ }
336
+ if (tempDir && fs.existsSync(tempDir)) {
337
+ fs.rmSync(tempDir, { recursive: true });
338
+ }
339
+ } catch (err) {
340
+ console.log('Error cleaning up temp files:', err);
371
341
  }
372
- } catch (err) {
373
- console.log('Error cleaning up temp files:', err);
342
+ } catch (error) {
343
+ console.error('Error restoring from GCS:', error);
344
+ // If restoration fails, remove the hash from the map
345
+ await removeFromFileStoreMap(hash);
346
+ context.res = {
347
+ status: 404,
348
+ body: `Hash ${hash} not found`,
349
+ };
350
+ return;
374
351
  }
375
- } catch (error) {
376
- console.error('Error restoring from GCS:', error);
377
352
  }
378
- }
379
353
 
380
- // Ensure converted version exists if needed
381
- hashResult = await conversionService.ensureConvertedVersion(hashResult, requestId);
354
+ // Final check to ensure we have at least one valid storage location
355
+ const finalPrimaryCheck = hashResult?.url ? await storageService.fileExists(hashResult.url) : false;
356
+ const finalGCSCheck = hashResult?.gcs ? await storageService.fileExists(hashResult.gcs) : false;
357
+ if (!finalPrimaryCheck && !finalGCSCheck) {
358
+ context.log(`Failed to restore file. Removing from map: ${hash}`);
359
+ await removeFromFileStoreMap(hash);
360
+ context.res = {
361
+ status: 404,
362
+ body: `Hash ${hash} not found`,
363
+ };
364
+ return;
365
+ }
382
366
 
383
- // Final check to ensure we have at least one valid storage location
384
- const finalPrimaryCheck = await urlExists(hashResult?.url);
385
- if (!finalPrimaryCheck.valid && !(await gcsUrlExists(hashResult?.gcs))) {
386
- context.log(`Failed to restore file. Removing from map: ${hash}`);
387
- await removeFromFileStoreMap(hash);
388
- context.res = {
389
- status: 404,
390
- body: `Hash ${hash} not found and restoration failed`,
367
+ // Create the response object
368
+ const response = {
369
+ message: `File '${hashResult.filename}' uploaded successfully.`,
370
+ filename: hashResult.filename,
371
+ url: hashResult.url,
372
+ gcs: hashResult.gcs,
373
+ hash: hashResult.hash,
374
+ timestamp: new Date().toISOString()
391
375
  };
392
- return;
393
- }
394
376
 
395
- // Create the response object
396
- const response = {
397
- message: `File '${hashResult.filename}' ${useAzure ? 'uploaded' : 'saved'} successfully.`,
398
- filename: hashResult.filename,
399
- url: hashResult.url,
400
- gcs: hashResult.gcs,
401
- hash: hashResult.hash,
402
- timestamp: new Date().toISOString()
403
- };
377
+ // Ensure converted version exists and is synced across storage providers
378
+ try {
379
+ hashResult = await conversionService.ensureConvertedVersion(hashResult, requestId);
380
+ } catch (error) {
381
+ context.log(`Error ensuring converted version: ${error}`);
382
+ }
404
383
 
405
- // Add converted info if it exists and has a valid URL
406
- if (hashResult.converted?.url) {
407
- context.log(`Adding converted info to final response`);
408
- response.converted = {
409
- url: hashResult.converted.url,
410
- gcs: hashResult.converted.gcs
411
- };
412
- } else if (hashResult.converted?.gcs) {
413
- // If we only have GCS URL, trigger conversion
414
- context.log(`Only GCS URL exists for converted file, triggering conversion`);
415
- const convertedResult = await conversionService.convertFile(
416
- await downloadFile(hashResult.url, path.join(os.tmpdir(), path.basename(hashResult.url))),
417
- hashResult.url
418
- );
419
- if (convertedResult.converted) {
420
- const convertedSaveResult = await conversionService._saveConvertedFile(convertedResult.convertedPath, requestId);
384
+ // Attach converted info to response if present
385
+ if (hashResult.converted) {
421
386
  response.converted = {
422
- url: convertedSaveResult.url,
387
+ url: hashResult.converted.url,
423
388
  gcs: hashResult.converted.gcs
424
389
  };
425
- // Update the hash map with the new converted info
426
- await setFileStoreMap(`${hashResult.hash}_converted`, response.converted);
427
390
  }
428
- } else {
429
- context.log(`No converted info to add to final response`);
430
- }
431
391
 
432
- //update redis timestamp with current time
433
- await setFileStoreMap(hash, hashResult);
392
+ //update redis timestamp with current time
393
+ await setFileStoreMap(hash, hashResult);
434
394
 
435
- context.res = {
436
- status: 200,
437
- body: response
438
- };
439
- return;
395
+ context.res = {
396
+ status: 200,
397
+ body: response
398
+ };
399
+ return;
400
+ } catch (error) {
401
+ context.log(`Error checking file existence: ${error}`);
402
+ // If there's an error checking file existence, remove the hash from the map
403
+ await removeFromFileStoreMap(hash);
404
+ context.res = {
405
+ status: 404,
406
+ body: `Hash ${hash} not found`,
407
+ };
408
+ return;
409
+ }
440
410
  }
441
411
 
442
412
  context.res = {
@@ -447,7 +417,10 @@ async function CortexFileHandler(context, req) {
447
417
  }
448
418
 
449
419
  if (req.method.toLowerCase() === 'post') {
450
- const result = await uploadBlob(context, req, !useAzure, null, hash);
420
+ // Determine if we should save to local storage based on primary provider
421
+ const saveToLocal = storageService.primaryProvider.constructor.name === 'LocalStorageProvider';
422
+ // Use uploadBlob to handle multipart/form-data
423
+ const result = await uploadBlob(context, req, saveToLocal, null, hash);
451
424
  if (result?.hash && context?.res?.body) {
452
425
  await setFileStoreMap(result.hash, context.res.body);
453
426
  }
@@ -484,7 +457,7 @@ async function CortexFileHandler(context, req) {
484
457
  };
485
458
 
486
459
  try {
487
- // Parse URL and get pathname without query parameters for extension check
460
+ // Parse URL and get pathname without query parameters for extension check
488
461
  const urlObj = new URL(uri);
489
462
  const pathWithoutQuery = urlObj.pathname;
490
463
 
@@ -498,22 +471,48 @@ async function CortexFileHandler(context, req) {
498
471
  await downloadFile(uri, downloadedFile);
499
472
 
500
473
  try {
501
- if (save) {
502
- const saveResults = [];
503
- const originalFileName = `${uuidv4()}_${encodeURIComponent(path.basename(downloadedFile))}`;
504
- const originalFilePath = path.join(tempDir, originalFileName);
505
- await fs.promises.copyFile(downloadedFile, originalFilePath);
506
- let fileUrl;
507
- if (useAzure) {
508
- const savedBlob = await saveFileToBlob(originalFilePath, requestId);
509
- fileUrl = savedBlob?.url;
474
+ if (shouldSave) {
475
+ // Check if file needs conversion first
476
+ if (conversionService.needsConversion(downloadedFile)) {
477
+ // Convert the file
478
+ const conversion = await conversionService.convertFile(downloadedFile, uri);
479
+ if (!conversion.converted) {
480
+ throw new Error('File conversion failed');
481
+ }
482
+
483
+ // Save the converted file
484
+ const convertedSaveResult = await conversionService._saveConvertedFile(conversion.convertedPath, requestId);
485
+
486
+ // Return the converted file URL
487
+ context.res = {
488
+ status: 200,
489
+ body: {
490
+ url: convertedSaveResult.url,
491
+ blobName: path.basename(convertedSaveResult.url)
492
+ }
493
+ };
510
494
  } else {
511
- fileUrl = await moveFileToPublicFolder(originalFilePath, requestId);
495
+ // File doesn't need conversion, save the original file
496
+ const saveResult = await conversionService._saveConvertedFile(downloadedFile, requestId);
497
+
498
+ // Return the original file URL
499
+ context.res = {
500
+ status: 200,
501
+ body: {
502
+ url: saveResult.url,
503
+ blobName: path.basename(saveResult.url)
504
+ }
505
+ };
512
506
  }
513
- saveResults.push(fileUrl);
514
- result.push(fileUrl);
507
+ return;
515
508
  } else {
516
- const text = await conversionService.convertFile(downloadedFile, uri, true);
509
+ let text;
510
+ if (conversionService.needsConversion(downloadedFile)) {
511
+ text = await conversionService.convertFile(downloadedFile, uri, true);
512
+ } else {
513
+ // For files that don't need conversion, read the file contents directly
514
+ text = await fs.promises.readFile(downloadedFile, 'utf-8');
515
+ }
517
516
  result.push(...easyChunker(text));
518
517
  }
519
518
  } catch (err) {
@@ -533,24 +532,16 @@ async function CortexFileHandler(context, req) {
533
532
  console.log(`Error cleaning temp file ${downloadedFile}:`, err);
534
533
  }
535
534
 
536
- try {
537
- //delete uploaded prev nontext file
538
- //check cleanup for uploaded files url
539
- const regex = new RegExp(
540
- `${AZURE_STORAGE_CONTAINER_NAME}/([a-z0-9-]+)`,
535
+ // Delete uploaded files only if we're NOT saving the converted version.
536
+ // When save=true we need to keep the converted file (which is stored under the same requestId prefix),
537
+ // so skip the cleanup in that case.
538
+ if (!shouldSave) {
539
+ await storageService.deleteFiles(requestId);
540
+ console.log(
541
+ `Cleaned temp files for request id ${requestId}`,
541
542
  );
542
- const match = uri.match(regex);
543
- if (match && match[1]) {
544
- const extractedValue = match[1];
545
- useAzure
546
- ? await deleteBlob(extractedValue)
547
- : await deleteFolder(extractedValue);
548
- console.log(
549
- `Cleaned temp file ${uri} with request id ${extractedValue}`,
550
- );
551
- }
552
- } catch (err) {
553
- console.log(`Error cleaning temp file ${uri}:`, err);
543
+ } else {
544
+ console.log(`Skip cleanup for request id ${requestId} because save flag is set`);
554
545
  }
555
546
  }
556
547
  } else {
@@ -571,23 +562,12 @@ async function CortexFileHandler(context, req) {
571
562
  // sequential processing of chunks
572
563
  for (let index = 0; index < chunks.length; index++) {
573
564
  const chunkPath = chunks[index];
574
- let chunkUrl;
575
- let chunkGcsUrl;
576
-
577
- if (useAzure) {
578
- const savedBlob = await saveFileToBlob(chunkPath, requestId);
579
- chunkUrl = savedBlob.url;
580
- } else {
581
- chunkUrl = await moveFileToPublicFolder(chunkPath, requestId);
582
- }
583
-
584
- // If GCS is configured, save to GCS
585
- chunkGcsUrl = await uploadChunkToGCS(chunkPath, requestId);
565
+ const chunkResult = await storageService.uploadFile(context, chunkPath, requestId);
586
566
 
587
567
  const chunkOffset = chunkOffsets[index];
588
- result.push({ uri: chunkUrl, offset: chunkOffset, gcs: chunkGcsUrl });
568
+ result.push({ uri: chunkResult.url, offset: chunkOffset, gcs: chunkResult.gcs });
589
569
  console.log(
590
- `Saved chunk as: ${chunkUrl}${chunkGcsUrl ? ` and ${chunkGcsUrl}` : ''}`,
570
+ `Saved chunk as: ${chunkResult.url}${chunkResult.gcs ? ` and ${chunkResult.gcs}` : ''}`,
591
571
  );
592
572
  await sendProgress();
593
573
  }
@@ -625,4 +605,4 @@ async function CortexFileHandler(context, req) {
625
605
  };
626
606
  }
627
607
 
628
- export default CortexFileHandler;
608
+ export default CortexFileHandler;