@aj-archipelago/cortex 1.3.50 → 1.3.52
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config.js +1 -1
- package/helper-apps/cortex-browser/Dockerfile +19 -31
- package/helper-apps/cortex-browser/function_app.py +708 -181
- package/helper-apps/cortex-browser/requirements.txt +4 -4
- package/helper-apps/cortex-file-handler/{.env.test.azure → .env.test.azure.sample} +2 -1
- package/helper-apps/cortex-file-handler/{.env.test.gcs → .env.test.gcs.sample} +2 -1
- package/helper-apps/cortex-file-handler/{.env.test → .env.test.sample} +2 -1
- package/helper-apps/cortex-file-handler/Dockerfile +1 -1
- package/helper-apps/cortex-file-handler/INTERFACE.md +178 -0
- package/helper-apps/cortex-file-handler/function.json +2 -6
- package/helper-apps/cortex-file-handler/package-lock.json +6065 -5964
- package/helper-apps/cortex-file-handler/package.json +11 -6
- package/helper-apps/cortex-file-handler/scripts/setup-azure-container.js +12 -9
- package/helper-apps/cortex-file-handler/scripts/setup-test-containers.js +21 -18
- package/helper-apps/cortex-file-handler/scripts/test-azure.sh +4 -1
- package/helper-apps/cortex-file-handler/scripts/test-gcs.sh +1 -1
- package/helper-apps/cortex-file-handler/src/blobHandler.js +1056 -0
- package/helper-apps/cortex-file-handler/{constants.js → src/constants.js} +64 -48
- package/helper-apps/cortex-file-handler/src/docHelper.js +37 -0
- package/helper-apps/cortex-file-handler/{fileChunker.js → src/fileChunker.js} +97 -65
- package/helper-apps/cortex-file-handler/{helper.js → src/helper.js} +34 -25
- package/helper-apps/cortex-file-handler/src/index.js +608 -0
- package/helper-apps/cortex-file-handler/src/localFileHandler.js +107 -0
- package/helper-apps/cortex-file-handler/{redis.js → src/redis.js} +23 -17
- package/helper-apps/cortex-file-handler/src/services/ConversionService.js +309 -0
- package/helper-apps/cortex-file-handler/src/services/FileConversionService.js +57 -0
- package/helper-apps/cortex-file-handler/src/services/storage/AzureStorageProvider.js +177 -0
- package/helper-apps/cortex-file-handler/src/services/storage/GCSStorageProvider.js +258 -0
- package/helper-apps/cortex-file-handler/src/services/storage/LocalStorageProvider.js +182 -0
- package/helper-apps/cortex-file-handler/src/services/storage/StorageFactory.js +86 -0
- package/helper-apps/cortex-file-handler/src/services/storage/StorageProvider.js +53 -0
- package/helper-apps/cortex-file-handler/src/services/storage/StorageService.js +259 -0
- package/helper-apps/cortex-file-handler/src/start.js +88 -0
- package/helper-apps/cortex-file-handler/src/utils/filenameUtils.js +28 -0
- package/helper-apps/cortex-file-handler/tests/FileConversionService.test.js +144 -0
- package/helper-apps/cortex-file-handler/tests/blobHandler.test.js +90 -66
- package/helper-apps/cortex-file-handler/tests/conversionResilience.test.js +152 -0
- package/helper-apps/cortex-file-handler/tests/fileChunker.test.js +105 -108
- package/helper-apps/cortex-file-handler/tests/fileUpload.test.js +462 -0
- package/helper-apps/cortex-file-handler/tests/files/DOCX_TestPage.docx +0 -0
- package/helper-apps/cortex-file-handler/tests/files/tests-example.xls +0 -0
- package/helper-apps/cortex-file-handler/tests/getOperations.test.js +307 -0
- package/helper-apps/cortex-file-handler/tests/postOperations.test.js +291 -0
- package/helper-apps/cortex-file-handler/tests/start.test.js +984 -647
- package/helper-apps/cortex-file-handler/tests/storage/AzureStorageProvider.test.js +120 -0
- package/helper-apps/cortex-file-handler/tests/storage/GCSStorageProvider.test.js +193 -0
- package/helper-apps/cortex-file-handler/tests/storage/LocalStorageProvider.test.js +148 -0
- package/helper-apps/cortex-file-handler/tests/storage/StorageFactory.test.js +100 -0
- package/helper-apps/cortex-file-handler/tests/storage/StorageService.test.js +113 -0
- package/helper-apps/cortex-file-handler/tests/testUtils.helper.js +85 -0
- package/helper-apps/cortex-markitdown/.funcignore +1 -0
- package/helper-apps/cortex-markitdown/MarkitdownConverterFunction/__init__.py +64 -0
- package/helper-apps/cortex-markitdown/MarkitdownConverterFunction/function.json +21 -0
- package/helper-apps/cortex-markitdown/README.md +94 -0
- package/helper-apps/cortex-markitdown/host.json +15 -0
- package/helper-apps/cortex-markitdown/requirements.txt +2 -0
- package/lib/entityConstants.js +1 -1
- package/lib/requestExecutor.js +44 -36
- package/package.json +1 -1
- package/pathways/system/entity/tools/sys_tool_readfile.js +24 -2
- package/server/plugins/openAiWhisperPlugin.js +59 -87
- package/helper-apps/cortex-file-handler/blobHandler.js +0 -567
- package/helper-apps/cortex-file-handler/docHelper.js +0 -144
- package/helper-apps/cortex-file-handler/index.js +0 -440
- package/helper-apps/cortex-file-handler/localFileHandler.js +0 -108
- package/helper-apps/cortex-file-handler/start.js +0 -63
- package/helper-apps/cortex-file-handler/tests/docHelper.test.js +0 -148
|
@@ -0,0 +1,1056 @@
|
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
import { join } from 'path';
|
|
4
|
+
import { PassThrough } from 'stream';
|
|
5
|
+
import { pipeline as _pipeline } from 'stream';
|
|
6
|
+
import { promisify } from 'util';
|
|
7
|
+
|
|
8
|
+
import {
|
|
9
|
+
generateBlobSASQueryParameters,
|
|
10
|
+
StorageSharedKeyCredential,
|
|
11
|
+
BlobServiceClient,
|
|
12
|
+
} from '@azure/storage-blob';
|
|
13
|
+
import { Storage } from '@google-cloud/storage';
|
|
14
|
+
import axios from 'axios';
|
|
15
|
+
import Busboy from 'busboy';
|
|
16
|
+
import { v4 as uuidv4 } from 'uuid';
|
|
17
|
+
const pipeline = promisify(_pipeline);
|
|
18
|
+
|
|
19
|
+
import { publicFolder, port, ipAddress } from './start.js';
|
|
20
|
+
import { CONVERTED_EXTENSIONS } from './constants.js';
|
|
21
|
+
|
|
22
|
+
// eslint-disable-next-line import/no-extraneous-dependencies
|
|
23
|
+
import mime from 'mime-types';
|
|
24
|
+
|
|
25
|
+
import os from 'os';
|
|
26
|
+
import { sanitizeFilename } from './utils/filenameUtils.js';
|
|
27
|
+
|
|
28
|
+
import { FileConversionService } from './services/FileConversionService.js';
|
|
29
|
+
|
|
30
|
+
function isBase64(str) {
|
|
31
|
+
try {
|
|
32
|
+
return btoa(atob(str)) == str;
|
|
33
|
+
} catch (err) {
|
|
34
|
+
return false;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
const { SAS_TOKEN_LIFE_DAYS = 30 } = process.env;
|
|
39
|
+
const GCP_SERVICE_ACCOUNT_KEY =
|
|
40
|
+
process.env.GCP_SERVICE_ACCOUNT_KEY_BASE64 ||
|
|
41
|
+
process.env.GCP_SERVICE_ACCOUNT_KEY ||
|
|
42
|
+
'{}';
|
|
43
|
+
const GCP_SERVICE_ACCOUNT = isBase64(GCP_SERVICE_ACCOUNT_KEY)
|
|
44
|
+
? JSON.parse(Buffer.from(GCP_SERVICE_ACCOUNT_KEY, 'base64').toString())
|
|
45
|
+
: JSON.parse(GCP_SERVICE_ACCOUNT_KEY);
|
|
46
|
+
const { project_id: GCP_PROJECT_ID } = GCP_SERVICE_ACCOUNT;
|
|
47
|
+
|
|
48
|
+
let gcs;
|
|
49
|
+
if (!GCP_PROJECT_ID || !GCP_SERVICE_ACCOUNT) {
|
|
50
|
+
console.warn(
|
|
51
|
+
'No Google Cloud Storage credentials provided - GCS will not be used',
|
|
52
|
+
);
|
|
53
|
+
} else {
|
|
54
|
+
try {
|
|
55
|
+
gcs = new Storage({
|
|
56
|
+
projectId: GCP_PROJECT_ID,
|
|
57
|
+
credentials: GCP_SERVICE_ACCOUNT,
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
// Rest of your Google Cloud operations using gcs object
|
|
61
|
+
} catch (error) {
|
|
62
|
+
console.error(
|
|
63
|
+
'Google Cloud Storage credentials are invalid - GCS will not be used: ',
|
|
64
|
+
error,
|
|
65
|
+
);
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
export const AZURE_STORAGE_CONTAINER_NAME =
|
|
70
|
+
process.env.AZURE_STORAGE_CONTAINER_NAME || 'whispertempfiles';
|
|
71
|
+
export const GCS_BUCKETNAME = process.env.GCS_BUCKETNAME || 'cortextempfiles';
|
|
72
|
+
|
|
73
|
+
function isEncoded(str) {
|
|
74
|
+
// Checks for any percent-encoded sequence
|
|
75
|
+
return /%[0-9A-Fa-f]{2}/.test(str);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// Helper function to ensure GCS URLs are never encoded
|
|
79
|
+
function ensureUnencodedGcsUrl(url) {
|
|
80
|
+
if (!url || !url.startsWith('gs://')) {
|
|
81
|
+
return url;
|
|
82
|
+
}
|
|
83
|
+
// Split into bucket and path parts
|
|
84
|
+
const [bucket, ...pathParts] = url.replace('gs://', '').split('/');
|
|
85
|
+
// Reconstruct URL with decoded path parts, handling invalid characters
|
|
86
|
+
return `gs://${bucket}/${pathParts.map(part => {
|
|
87
|
+
try {
|
|
88
|
+
return decodeURIComponent(part);
|
|
89
|
+
} catch (error) {
|
|
90
|
+
// If decoding fails, sanitize the filename by removing invalid characters
|
|
91
|
+
return part.replace(/[^\w\-\.]/g, '_');
|
|
92
|
+
}
|
|
93
|
+
}).join('/')}`;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
async function gcsUrlExists(url, defaultReturn = false) {
|
|
97
|
+
try {
|
|
98
|
+
if (!url || !gcs) {
|
|
99
|
+
return defaultReturn; // Cannot check return
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// Ensure URL is not encoded
|
|
103
|
+
const unencodedUrl = ensureUnencodedGcsUrl(url);
|
|
104
|
+
const urlParts = unencodedUrl.replace('gs://', '').split('/');
|
|
105
|
+
const bucketName = urlParts[0];
|
|
106
|
+
const fileName = urlParts.slice(1).join('/');
|
|
107
|
+
|
|
108
|
+
if (process.env.STORAGE_EMULATOR_HOST) {
|
|
109
|
+
try {
|
|
110
|
+
const response = await axios.get(
|
|
111
|
+
`${process.env.STORAGE_EMULATOR_HOST}/storage/v1/b/${bucketName}/o/${encodeURIComponent(fileName)}`,
|
|
112
|
+
{ validateStatus: (status) => status === 200 || status === 404 },
|
|
113
|
+
);
|
|
114
|
+
return response.status === 200;
|
|
115
|
+
} catch (error) {
|
|
116
|
+
console.error('Error checking emulator file:', error);
|
|
117
|
+
return false;
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
const bucket = gcs.bucket(bucketName);
|
|
122
|
+
const file = bucket.file(fileName);
|
|
123
|
+
|
|
124
|
+
const [exists] = await file.exists();
|
|
125
|
+
|
|
126
|
+
return exists;
|
|
127
|
+
} catch (error) {
|
|
128
|
+
console.error('Error checking if GCS URL exists:', error);
|
|
129
|
+
return false;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
/**
|
|
134
|
+
* Downloads a file from Google Cloud Storage to a local file
|
|
135
|
+
* @param {string} gcsUrl - The GCS URL in format gs://bucket-name/file-path
|
|
136
|
+
* @param {string} destinationPath - The local path where the file should be saved
|
|
137
|
+
* @returns {Promise<void>}
|
|
138
|
+
*/
|
|
139
|
+
async function downloadFromGCS(gcsUrl, destinationPath) {
|
|
140
|
+
if (!gcsUrl || !gcs) {
|
|
141
|
+
throw new Error('Invalid GCS URL or GCS client not initialized');
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
const urlParts = gcsUrl.replace('gs://', '').split('/');
|
|
145
|
+
const bucketName = urlParts[0];
|
|
146
|
+
const fileName = urlParts.slice(1).join('/');
|
|
147
|
+
|
|
148
|
+
if (process.env.STORAGE_EMULATOR_HOST) {
|
|
149
|
+
// Use axios to download from emulator
|
|
150
|
+
const response = await axios({
|
|
151
|
+
method: 'GET',
|
|
152
|
+
url: `${process.env.STORAGE_EMULATOR_HOST}/storage/v1/b/${bucketName}/o/${encodeURIComponent(fileName)}?alt=media`,
|
|
153
|
+
responseType: 'stream'
|
|
154
|
+
});
|
|
155
|
+
|
|
156
|
+
// Write the response to file
|
|
157
|
+
const writer = fs.createWriteStream(destinationPath);
|
|
158
|
+
await new Promise((resolve, reject) => {
|
|
159
|
+
response.data.pipe(writer);
|
|
160
|
+
writer.on('finish', resolve);
|
|
161
|
+
writer.on('error', reject);
|
|
162
|
+
});
|
|
163
|
+
} else {
|
|
164
|
+
// Use GCS client for real GCS
|
|
165
|
+
const bucket = gcs.bucket(bucketName);
|
|
166
|
+
const file = bucket.file(fileName);
|
|
167
|
+
await file.download({ destination: destinationPath });
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
export const getBlobClient = async () => {
|
|
172
|
+
const connectionString = process.env.AZURE_STORAGE_CONNECTION_STRING;
|
|
173
|
+
const containerName = AZURE_STORAGE_CONTAINER_NAME;
|
|
174
|
+
if (!connectionString || !containerName) {
|
|
175
|
+
throw new Error(
|
|
176
|
+
'Missing Azure Storage connection string or container name environment variable',
|
|
177
|
+
);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
const blobServiceClient =
|
|
181
|
+
BlobServiceClient.fromConnectionString(connectionString);
|
|
182
|
+
|
|
183
|
+
const serviceProperties = await blobServiceClient.getProperties();
|
|
184
|
+
if (!serviceProperties.defaultServiceVersion) {
|
|
185
|
+
serviceProperties.defaultServiceVersion = '2020-02-10';
|
|
186
|
+
await blobServiceClient.setProperties(serviceProperties);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
const containerClient = blobServiceClient.getContainerClient(containerName);
|
|
190
|
+
|
|
191
|
+
return { blobServiceClient, containerClient };
|
|
192
|
+
};
|
|
193
|
+
|
|
194
|
+
async function saveFileToBlob(chunkPath, requestId) {
|
|
195
|
+
const { containerClient } = await getBlobClient();
|
|
196
|
+
// Use the filename with a UUID as the blob name
|
|
197
|
+
let baseName = path.basename(chunkPath);
|
|
198
|
+
// Remove any query parameters from the filename
|
|
199
|
+
baseName = baseName.split('?')[0];
|
|
200
|
+
// Only encode if not already encoded
|
|
201
|
+
if (!isEncoded(baseName)) {
|
|
202
|
+
baseName = encodeURIComponent(baseName);
|
|
203
|
+
}
|
|
204
|
+
const blobName = `${requestId}/${uuidv4()}_${baseName}`;
|
|
205
|
+
|
|
206
|
+
// Create a read stream for the chunk file
|
|
207
|
+
const fileStream = fs.createReadStream(chunkPath);
|
|
208
|
+
|
|
209
|
+
// Upload the chunk to Azure Blob Storage using the stream
|
|
210
|
+
const blockBlobClient = containerClient.getBlockBlobClient(blobName);
|
|
211
|
+
await blockBlobClient.uploadStream(fileStream);
|
|
212
|
+
|
|
213
|
+
// Generate SAS token after successful upload
|
|
214
|
+
const sasToken = generateSASToken(containerClient, blobName);
|
|
215
|
+
|
|
216
|
+
// Return an object with the URL property
|
|
217
|
+
return {
|
|
218
|
+
url: `${blockBlobClient.url}?${sasToken}`,
|
|
219
|
+
blobName: blobName
|
|
220
|
+
};
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
const generateSASToken = (
|
|
224
|
+
containerClient,
|
|
225
|
+
blobName,
|
|
226
|
+
expiryTimeSeconds = parseInt(SAS_TOKEN_LIFE_DAYS) * 24 * 60 * 60,
|
|
227
|
+
) => {
|
|
228
|
+
const { accountName, accountKey } = containerClient.credential;
|
|
229
|
+
const sharedKeyCredential = new StorageSharedKeyCredential(
|
|
230
|
+
accountName,
|
|
231
|
+
accountKey,
|
|
232
|
+
);
|
|
233
|
+
|
|
234
|
+
const sasOptions = {
|
|
235
|
+
containerName: containerClient.containerName,
|
|
236
|
+
blobName: blobName,
|
|
237
|
+
permissions: 'r', // Read permission
|
|
238
|
+
startsOn: new Date(),
|
|
239
|
+
expiresOn: new Date(new Date().valueOf() + expiryTimeSeconds * 1000),
|
|
240
|
+
};
|
|
241
|
+
|
|
242
|
+
const sasToken = generateBlobSASQueryParameters(
|
|
243
|
+
sasOptions,
|
|
244
|
+
sharedKeyCredential,
|
|
245
|
+
).toString();
|
|
246
|
+
return sasToken;
|
|
247
|
+
};
|
|
248
|
+
|
|
249
|
+
//deletes blob that has the requestId
|
|
250
|
+
async function deleteBlob(requestId) {
|
|
251
|
+
if (!requestId) throw new Error('Missing requestId parameter');
|
|
252
|
+
const { containerClient } = await getBlobClient();
|
|
253
|
+
// List all blobs in the container
|
|
254
|
+
const blobs = containerClient.listBlobsFlat();
|
|
255
|
+
|
|
256
|
+
const result = [];
|
|
257
|
+
// Iterate through the blobs
|
|
258
|
+
for await (const blob of blobs) {
|
|
259
|
+
// Check if the blob name starts with requestId_ (flat structure)
|
|
260
|
+
// or is inside a folder named requestId/ (folder structure)
|
|
261
|
+
if (
|
|
262
|
+
blob.name.startsWith(`${requestId}_`) ||
|
|
263
|
+
blob.name.startsWith(`${requestId}/`)
|
|
264
|
+
) {
|
|
265
|
+
// Delete the matching blob
|
|
266
|
+
const blockBlobClient = containerClient.getBlockBlobClient(blob.name);
|
|
267
|
+
await blockBlobClient.delete();
|
|
268
|
+
console.log(`Cleaned blob: ${blob.name}`);
|
|
269
|
+
result.push(blob.name);
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
return result;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
function uploadBlob(
|
|
277
|
+
context,
|
|
278
|
+
req,
|
|
279
|
+
saveToLocal = false,
|
|
280
|
+
filePath = null,
|
|
281
|
+
hash = null,
|
|
282
|
+
) {
|
|
283
|
+
return new Promise((resolve, reject) => {
|
|
284
|
+
(async () => {
|
|
285
|
+
try {
|
|
286
|
+
let requestId = uuidv4();
|
|
287
|
+
const body = {};
|
|
288
|
+
|
|
289
|
+
// If filePath is given, we are dealing with local file and not form-data
|
|
290
|
+
if (filePath) {
|
|
291
|
+
const file = fs.createReadStream(filePath);
|
|
292
|
+
const filename = path.basename(filePath);
|
|
293
|
+
try {
|
|
294
|
+
const result = await uploadFile(
|
|
295
|
+
context,
|
|
296
|
+
requestId,
|
|
297
|
+
body,
|
|
298
|
+
saveToLocal,
|
|
299
|
+
file,
|
|
300
|
+
filename,
|
|
301
|
+
resolve,
|
|
302
|
+
hash,
|
|
303
|
+
);
|
|
304
|
+
resolve(result);
|
|
305
|
+
} catch (error) {
|
|
306
|
+
const err = new Error('Error processing file upload.');
|
|
307
|
+
err.status = 500;
|
|
308
|
+
throw err;
|
|
309
|
+
}
|
|
310
|
+
} else {
|
|
311
|
+
// Otherwise, continue working with form-data
|
|
312
|
+
const busboy = Busboy({ headers: req.headers });
|
|
313
|
+
let hasFile = false;
|
|
314
|
+
let errorOccurred = false;
|
|
315
|
+
|
|
316
|
+
busboy.on('field', (fieldname, value) => {
|
|
317
|
+
if (fieldname === 'requestId') {
|
|
318
|
+
requestId = value;
|
|
319
|
+
} else if (fieldname === 'hash') {
|
|
320
|
+
hash = value;
|
|
321
|
+
}
|
|
322
|
+
});
|
|
323
|
+
|
|
324
|
+
busboy.on('file', async (fieldname, file, info) => {
|
|
325
|
+
if (errorOccurred) return;
|
|
326
|
+
hasFile = true;
|
|
327
|
+
|
|
328
|
+
// Validate file
|
|
329
|
+
if (!info.filename || info.filename.trim() === '') {
|
|
330
|
+
errorOccurred = true;
|
|
331
|
+
const err = new Error('Invalid file: missing filename');
|
|
332
|
+
err.status = 400;
|
|
333
|
+
reject(err);
|
|
334
|
+
return;
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
// Prepare for streaming to cloud destinations
|
|
338
|
+
const filename = info.filename;
|
|
339
|
+
const safeFilename = path.basename(filename); // Sanitize filename
|
|
340
|
+
const uploadName = `${requestId || uuidv4()}_${safeFilename}`;
|
|
341
|
+
const azureStream = !saveToLocal ? new PassThrough() : null;
|
|
342
|
+
const gcsStream = gcs ? new PassThrough() : null;
|
|
343
|
+
let diskWriteStream, tempDir, tempFilePath;
|
|
344
|
+
let diskWritePromise;
|
|
345
|
+
let diskWriteError = null;
|
|
346
|
+
let cloudUploadError = null;
|
|
347
|
+
|
|
348
|
+
// Start local disk write in parallel (non-blocking for response)
|
|
349
|
+
if (saveToLocal) {
|
|
350
|
+
try {
|
|
351
|
+
tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'upload-'));
|
|
352
|
+
} catch (err) {
|
|
353
|
+
console.error('Error creating tempDir:', err);
|
|
354
|
+
errorOccurred = true;
|
|
355
|
+
reject(err);
|
|
356
|
+
return;
|
|
357
|
+
}
|
|
358
|
+
tempFilePath = path.join(tempDir, safeFilename);
|
|
359
|
+
try {
|
|
360
|
+
diskWriteStream = fs.createWriteStream(tempFilePath, {
|
|
361
|
+
highWaterMark: 1024 * 1024,
|
|
362
|
+
autoClose: true,
|
|
363
|
+
});
|
|
364
|
+
} catch (err) {
|
|
365
|
+
console.error('Error creating write stream:', err, 'Temp dir exists:', fs.existsSync(tempDir));
|
|
366
|
+
errorOccurred = true;
|
|
367
|
+
reject(err);
|
|
368
|
+
return;
|
|
369
|
+
}
|
|
370
|
+
diskWriteStream.on('error', (err) => {
|
|
371
|
+
console.error('Disk write stream error:', err);
|
|
372
|
+
});
|
|
373
|
+
diskWriteStream.on('close', () => {
|
|
374
|
+
console.log('Disk write stream closed for:', tempFilePath);
|
|
375
|
+
});
|
|
376
|
+
diskWritePromise = new Promise((res, rej) => {
|
|
377
|
+
diskWriteStream.on('finish', res);
|
|
378
|
+
diskWriteStream.on('error', (err) => {
|
|
379
|
+
diskWriteError = err;
|
|
380
|
+
rej(err);
|
|
381
|
+
});
|
|
382
|
+
});
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
// Pipe incoming file to all destinations
|
|
386
|
+
let receivedAnyData = false;
|
|
387
|
+
file.on('data', () => { receivedAnyData = true; });
|
|
388
|
+
if (azureStream) file.pipe(azureStream);
|
|
389
|
+
if (gcsStream) file.pipe(gcsStream);
|
|
390
|
+
if (diskWriteStream) file.pipe(diskWriteStream);
|
|
391
|
+
|
|
392
|
+
// Listen for end event to check for empty file
|
|
393
|
+
file.on('end', async () => {
|
|
394
|
+
if (!receivedAnyData) {
|
|
395
|
+
errorOccurred = true;
|
|
396
|
+
// Abort all streams
|
|
397
|
+
if (azureStream) azureStream.destroy();
|
|
398
|
+
if (gcsStream) gcsStream.destroy();
|
|
399
|
+
if (diskWriteStream) diskWriteStream.destroy();
|
|
400
|
+
const err = new Error('Invalid file: file is empty');
|
|
401
|
+
err.status = 400;
|
|
402
|
+
reject(err);
|
|
403
|
+
}
|
|
404
|
+
});
|
|
405
|
+
|
|
406
|
+
// Start cloud uploads immediately
|
|
407
|
+
let azurePromise;
|
|
408
|
+
if (!saveToLocal) {
|
|
409
|
+
azurePromise = saveToAzureStorage(context, uploadName, azureStream)
|
|
410
|
+
.catch(async (err) => {
|
|
411
|
+
cloudUploadError = err;
|
|
412
|
+
// Fallback: try from disk if available
|
|
413
|
+
if (diskWritePromise) {
|
|
414
|
+
await diskWritePromise;
|
|
415
|
+
const diskStream = fs.createReadStream(tempFilePath, {
|
|
416
|
+
highWaterMark: 1024 * 1024,
|
|
417
|
+
autoClose: true,
|
|
418
|
+
});
|
|
419
|
+
return saveToAzureStorage(context, uploadName, diskStream);
|
|
420
|
+
}
|
|
421
|
+
throw err;
|
|
422
|
+
});
|
|
423
|
+
}
|
|
424
|
+
let gcsPromise;
|
|
425
|
+
if (gcsStream) {
|
|
426
|
+
gcsPromise = saveToGoogleStorage(context, uploadName, gcsStream)
|
|
427
|
+
.catch(async (err) => {
|
|
428
|
+
cloudUploadError = err;
|
|
429
|
+
if (diskWritePromise) {
|
|
430
|
+
await diskWritePromise;
|
|
431
|
+
const diskStream = fs.createReadStream(tempFilePath, {
|
|
432
|
+
highWaterMark: 1024 * 1024,
|
|
433
|
+
autoClose: true,
|
|
434
|
+
});
|
|
435
|
+
return saveToGoogleStorage(context, uploadName, diskStream);
|
|
436
|
+
}
|
|
437
|
+
throw err;
|
|
438
|
+
});
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
// Wait for cloud uploads to finish
|
|
442
|
+
try {
|
|
443
|
+
const results = await Promise.all([
|
|
444
|
+
azurePromise ? azurePromise.then((url) => ({ url, type: 'primary' })) : null,
|
|
445
|
+
(!azurePromise && saveToLocal)
|
|
446
|
+
? Promise.resolve({ url: null, type: 'primary-local' }) // placeholder for local, url handled later
|
|
447
|
+
: null,
|
|
448
|
+
gcsPromise ? gcsPromise.then((gcs) => ({ gcs, type: 'gcs' })) : null,
|
|
449
|
+
].filter(Boolean));
|
|
450
|
+
|
|
451
|
+
const result = {
|
|
452
|
+
message: `File '${uploadName}' uploaded successfully.`,
|
|
453
|
+
filename: uploadName,
|
|
454
|
+
...results.reduce((acc, result) => {
|
|
455
|
+
if (result.type === 'primary') acc.url = result.url;
|
|
456
|
+
if (result.type === 'gcs') acc.gcs = ensureUnencodedGcsUrl(result.gcs);
|
|
457
|
+
return acc;
|
|
458
|
+
}, {}),
|
|
459
|
+
};
|
|
460
|
+
if (hash) result.hash = hash;
|
|
461
|
+
|
|
462
|
+
// If saving locally, wait for disk write to finish and then move to public folder
|
|
463
|
+
if (saveToLocal) {
|
|
464
|
+
try {
|
|
465
|
+
if (diskWritePromise) {
|
|
466
|
+
await diskWritePromise; // ensure file fully written
|
|
467
|
+
}
|
|
468
|
+
const localUrl = await saveToLocalStorage(
|
|
469
|
+
context,
|
|
470
|
+
requestId,
|
|
471
|
+
uploadName,
|
|
472
|
+
fs.createReadStream(tempFilePath, {
|
|
473
|
+
highWaterMark: 1024 * 1024,
|
|
474
|
+
autoClose: true,
|
|
475
|
+
}),
|
|
476
|
+
);
|
|
477
|
+
result.url = localUrl;
|
|
478
|
+
} catch (err) {
|
|
479
|
+
console.error('Error saving to local storage:', err);
|
|
480
|
+
throw err;
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
// After original uploads, handle optional conversion
|
|
485
|
+
const conversionService = new FileConversionService(context, !saveToLocal);
|
|
486
|
+
|
|
487
|
+
if (conversionService.needsConversion(safeFilename)) {
|
|
488
|
+
try {
|
|
489
|
+
context.log('Starting file conversion (busboy)...');
|
|
490
|
+
|
|
491
|
+
// Ensure we have a local copy of the file for conversion
|
|
492
|
+
let localPathForConversion = tempFilePath;
|
|
493
|
+
|
|
494
|
+
if (!localPathForConversion) {
|
|
495
|
+
// No temp file was written (saveToLocal === false). Download from primary URL.
|
|
496
|
+
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'convert-'));
|
|
497
|
+
localPathForConversion = path.join(tmpDir, safeFilename);
|
|
498
|
+
await conversionService._downloadFile(result.url, localPathForConversion);
|
|
499
|
+
} else {
|
|
500
|
+
// Wait until disk write completes to guarantee full file is present
|
|
501
|
+
if (diskWritePromise) {
|
|
502
|
+
await diskWritePromise;
|
|
503
|
+
}
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
// Perform the conversion
|
|
507
|
+
const conversion = await conversionService.convertFile(localPathForConversion, result.url);
|
|
508
|
+
context.log('File conversion completed (busboy):', conversion);
|
|
509
|
+
|
|
510
|
+
if (conversion.converted) {
|
|
511
|
+
context.log('Saving converted file (busboy)...');
|
|
512
|
+
// Save converted file to primary storage
|
|
513
|
+
const convertedSaveResult = await conversionService._saveConvertedFile(conversion.convertedPath, requestId);
|
|
514
|
+
|
|
515
|
+
// Optionally save to GCS
|
|
516
|
+
let convertedGcsUrl;
|
|
517
|
+
if (conversionService._isGCSConfigured()) {
|
|
518
|
+
convertedGcsUrl = await conversionService._uploadChunkToGCS(conversion.convertedPath, requestId);
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
// Attach to response body
|
|
522
|
+
result.converted = {
|
|
523
|
+
url: convertedSaveResult.url,
|
|
524
|
+
gcs: convertedGcsUrl,
|
|
525
|
+
};
|
|
526
|
+
context.log('Conversion process (busboy) completed successfully');
|
|
527
|
+
}
|
|
528
|
+
} catch (convErr) {
|
|
529
|
+
console.error('Error converting file (busboy):', convErr);
|
|
530
|
+
context.log('Error during conversion (busboy):', convErr.message);
|
|
531
|
+
// Continue without failing the upload
|
|
532
|
+
}
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
// Respond after conversion (if any)
|
|
536
|
+
context.res = { status: 200, body: result };
|
|
537
|
+
resolve(result);
|
|
538
|
+
} catch (err) {
|
|
539
|
+
errorOccurred = true;
|
|
540
|
+
reject(err);
|
|
541
|
+
} finally {
|
|
542
|
+
// Clean up temp file if written
|
|
543
|
+
if (tempDir) {
|
|
544
|
+
fs.rmSync(tempDir, { recursive: true, force: true });
|
|
545
|
+
}
|
|
546
|
+
}
|
|
547
|
+
});
|
|
548
|
+
|
|
549
|
+
busboy.on('error', (error) => {
|
|
550
|
+
if (errorOccurred) return;
|
|
551
|
+
errorOccurred = true;
|
|
552
|
+
const err = new Error('No file provided in request');
|
|
553
|
+
err.status = 400;
|
|
554
|
+
reject(err);
|
|
555
|
+
});
|
|
556
|
+
|
|
557
|
+
busboy.on('finish', () => {
|
|
558
|
+
if (errorOccurred) return;
|
|
559
|
+
if (!hasFile) {
|
|
560
|
+
errorOccurred = true;
|
|
561
|
+
const err = new Error('No file provided in request');
|
|
562
|
+
err.status = 400;
|
|
563
|
+
reject(err);
|
|
564
|
+
}
|
|
565
|
+
});
|
|
566
|
+
|
|
567
|
+
// Handle errors from piping the request
|
|
568
|
+
req.on('error', (error) => {
|
|
569
|
+
if (errorOccurred) return;
|
|
570
|
+
errorOccurred = true;
|
|
571
|
+
// Only log unexpected errors
|
|
572
|
+
if (error.message !== 'No file provided in request') {
|
|
573
|
+
context.log('Error in request stream:', error);
|
|
574
|
+
}
|
|
575
|
+
const err = new Error('No file provided in request');
|
|
576
|
+
err.status = 400;
|
|
577
|
+
reject(err);
|
|
578
|
+
});
|
|
579
|
+
|
|
580
|
+
try {
|
|
581
|
+
req.pipe(busboy);
|
|
582
|
+
} catch (error) {
|
|
583
|
+
if (errorOccurred) return;
|
|
584
|
+
errorOccurred = true;
|
|
585
|
+
// Only log unexpected errors
|
|
586
|
+
if (error.message !== 'No file provided in request') {
|
|
587
|
+
context.log('Error piping request to busboy:', error);
|
|
588
|
+
}
|
|
589
|
+
const err = new Error('No file provided in request');
|
|
590
|
+
err.status = 400;
|
|
591
|
+
reject(err);
|
|
592
|
+
}
|
|
593
|
+
}
|
|
594
|
+
} catch (error) {
|
|
595
|
+
// Only log unexpected errors
|
|
596
|
+
if (error.message !== 'No file provided in request') {
|
|
597
|
+
context.log('Error processing file upload:', error);
|
|
598
|
+
}
|
|
599
|
+
const err = new Error(error.message || 'Error processing file upload.');
|
|
600
|
+
err.status = error.status || 500;
|
|
601
|
+
reject(err);
|
|
602
|
+
}
|
|
603
|
+
})();
|
|
604
|
+
});
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
// Helper function to handle local file storage
|
|
608
|
+
async function saveToLocalStorage(context, requestId, encodedFilename, file) {
|
|
609
|
+
const localPath = join(publicFolder, requestId);
|
|
610
|
+
fs.mkdirSync(localPath, { recursive: true });
|
|
611
|
+
|
|
612
|
+
// Sanitize filename by removing invalid characters
|
|
613
|
+
const sanitizedFilename = sanitizeFilename(encodedFilename);
|
|
614
|
+
const destinationPath = `${localPath}/${sanitizedFilename}`;
|
|
615
|
+
|
|
616
|
+
await pipeline(file, fs.createWriteStream(destinationPath));
|
|
617
|
+
return `http://${ipAddress}:${port}/files/${requestId}/${sanitizedFilename}`;
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
// Helper function to handle Azure blob storage
|
|
621
|
+
async function saveToAzureStorage(context, encodedFilename, file) {
|
|
622
|
+
const { containerClient } = await getBlobClient();
|
|
623
|
+
const contentType = mime.lookup(encodedFilename);
|
|
624
|
+
|
|
625
|
+
// Create a safe blob name that is URI-encoded once (no double encoding)
|
|
626
|
+
let blobName = sanitizeFilename(encodedFilename);
|
|
627
|
+
blobName = encodeURIComponent(blobName);
|
|
628
|
+
|
|
629
|
+
const options = {
|
|
630
|
+
blobHTTPHeaders: contentType ? { blobContentType: contentType } : {},
|
|
631
|
+
maxConcurrency: 50,
|
|
632
|
+
blockSize: 8 * 1024 * 1024,
|
|
633
|
+
};
|
|
634
|
+
|
|
635
|
+
const blockBlobClient = containerClient.getBlockBlobClient(blobName);
|
|
636
|
+
context.log(`Uploading to Azure... ${blobName}`);
|
|
637
|
+
await blockBlobClient.uploadStream(file, undefined, undefined, options);
|
|
638
|
+
const sasToken = generateSASToken(containerClient, blobName);
|
|
639
|
+
return `${blockBlobClient.url}?${sasToken}`;
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
// Helper function to upload a file to Google Cloud Storage
|
|
643
|
+
async function uploadToGCS(context, file, filename) {
|
|
644
|
+
const objectName = sanitizeFilename(filename);
|
|
645
|
+
const gcsFile = gcs.bucket(GCS_BUCKETNAME).file(objectName);
|
|
646
|
+
const writeStream = gcsFile.createWriteStream({
|
|
647
|
+
resumable: true,
|
|
648
|
+
validation: false,
|
|
649
|
+
metadata: {
|
|
650
|
+
contentType: mime.lookup(objectName) || 'application/octet-stream',
|
|
651
|
+
},
|
|
652
|
+
chunkSize: 8 * 1024 * 1024,
|
|
653
|
+
numRetries: 3,
|
|
654
|
+
retryDelay: 1000,
|
|
655
|
+
});
|
|
656
|
+
context.log(`Uploading to GCS... ${objectName}`);
|
|
657
|
+
await pipeline(file, writeStream);
|
|
658
|
+
return `gs://${GCS_BUCKETNAME}/${objectName}`;
|
|
659
|
+
}
|
|
660
|
+
|
|
661
|
+
// Wrapper that checks if GCS is configured
|
|
662
|
+
async function saveToGoogleStorage(context, encodedFilename, file) {
|
|
663
|
+
if (!gcs) {
|
|
664
|
+
throw new Error('Google Cloud Storage is not initialized');
|
|
665
|
+
}
|
|
666
|
+
return uploadToGCS(context, file, encodedFilename);
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
async function uploadFile(
|
|
670
|
+
context,
|
|
671
|
+
requestId,
|
|
672
|
+
body,
|
|
673
|
+
saveToLocal,
|
|
674
|
+
file,
|
|
675
|
+
filename,
|
|
676
|
+
resolve,
|
|
677
|
+
hash = null,
|
|
678
|
+
) {
|
|
679
|
+
try {
|
|
680
|
+
if (!file) {
|
|
681
|
+
context.res = {
|
|
682
|
+
status: 400,
|
|
683
|
+
body: 'No file provided in request',
|
|
684
|
+
};
|
|
685
|
+
resolve(context.res);
|
|
686
|
+
return;
|
|
687
|
+
}
|
|
688
|
+
|
|
689
|
+
const ext = path.extname(filename).toLowerCase();
|
|
690
|
+
context.log(`Processing file with extension: ${ext}`);
|
|
691
|
+
let uploadPath = null;
|
|
692
|
+
let uploadName = null;
|
|
693
|
+
let tempDir = null;
|
|
694
|
+
|
|
695
|
+
// Create temp directory for file operations
|
|
696
|
+
tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'upload-'));
|
|
697
|
+
const tempOriginal = path.join(tempDir, filename);
|
|
698
|
+
context.log(`Created temp directory: ${tempDir}`);
|
|
699
|
+
|
|
700
|
+
// Optimize initial write with larger buffer
|
|
701
|
+
const writeStream = fs.createWriteStream(tempOriginal, {
|
|
702
|
+
highWaterMark: 1024 * 1024, // 1MB chunks for initial write
|
|
703
|
+
autoClose: true,
|
|
704
|
+
});
|
|
705
|
+
|
|
706
|
+
// Use pipeline with error handling
|
|
707
|
+
context.log('Writing file to temp location...');
|
|
708
|
+
await pipeline(file, writeStream);
|
|
709
|
+
context.log('File written to temp location successfully');
|
|
710
|
+
|
|
711
|
+
uploadPath = tempOriginal;
|
|
712
|
+
uploadName = `${requestId || uuidv4()}_${filename}`;
|
|
713
|
+
context.log(`Prepared upload name: ${uploadName}`);
|
|
714
|
+
|
|
715
|
+
// Create optimized read streams with larger buffers for storage uploads
|
|
716
|
+
const createOptimizedReadStream = (path) => fs.createReadStream(path, {
|
|
717
|
+
highWaterMark: 1024 * 1024, // 1MB chunks for storage uploads
|
|
718
|
+
autoClose: true,
|
|
719
|
+
});
|
|
720
|
+
|
|
721
|
+
// Upload original in parallel with optimized streams
|
|
722
|
+
const storagePromises = [];
|
|
723
|
+
context.log('Starting primary storage upload...');
|
|
724
|
+
const primaryPromise = saveToLocal
|
|
725
|
+
? saveToLocalStorage(
|
|
726
|
+
context,
|
|
727
|
+
requestId,
|
|
728
|
+
uploadName,
|
|
729
|
+
createOptimizedReadStream(uploadPath),
|
|
730
|
+
)
|
|
731
|
+
: saveToAzureStorage(
|
|
732
|
+
context,
|
|
733
|
+
uploadName,
|
|
734
|
+
createOptimizedReadStream(uploadPath),
|
|
735
|
+
);
|
|
736
|
+
storagePromises.push(
|
|
737
|
+
primaryPromise.then((url) => {
|
|
738
|
+
context.log('Primary storage upload completed');
|
|
739
|
+
return { url, type: 'primary' };
|
|
740
|
+
}),
|
|
741
|
+
);
|
|
742
|
+
|
|
743
|
+
if (gcs) {
|
|
744
|
+
context.log('Starting GCS upload...');
|
|
745
|
+
storagePromises.push(
|
|
746
|
+
saveToGoogleStorage(
|
|
747
|
+
context,
|
|
748
|
+
uploadName,
|
|
749
|
+
createOptimizedReadStream(uploadPath),
|
|
750
|
+
).then((gcsUrl) => {
|
|
751
|
+
context.log('GCS upload completed');
|
|
752
|
+
return {
|
|
753
|
+
gcs: gcsUrl,
|
|
754
|
+
type: 'gcs',
|
|
755
|
+
};
|
|
756
|
+
}),
|
|
757
|
+
);
|
|
758
|
+
}
|
|
759
|
+
|
|
760
|
+
// Wait for original uploads to complete
|
|
761
|
+
context.log('Waiting for all storage uploads to complete...');
|
|
762
|
+
const results = await Promise.all(storagePromises);
|
|
763
|
+
const result = {
|
|
764
|
+
message: `File '${uploadName}' ${saveToLocal ? 'saved to folder' : 'uploaded'} successfully.`,
|
|
765
|
+
filename: uploadName,
|
|
766
|
+
...results.reduce((acc, result) => {
|
|
767
|
+
if (result.type === 'primary') acc.url = result.url;
|
|
768
|
+
if (result.type === 'gcs') acc.gcs = ensureUnencodedGcsUrl(result.gcs);
|
|
769
|
+
return acc;
|
|
770
|
+
}, {}),
|
|
771
|
+
};
|
|
772
|
+
|
|
773
|
+
if (hash) {
|
|
774
|
+
result.hash = hash;
|
|
775
|
+
}
|
|
776
|
+
|
|
777
|
+
// Initialize conversion service
|
|
778
|
+
const conversionService = new FileConversionService(context, !saveToLocal);
|
|
779
|
+
|
|
780
|
+
// Check if file needs conversion and handle it
|
|
781
|
+
if (conversionService.needsConversion(filename)) {
|
|
782
|
+
try {
|
|
783
|
+
context.log('Starting file conversion...');
|
|
784
|
+
// Convert the file
|
|
785
|
+
const conversion = await conversionService.convertFile(uploadPath, result.url);
|
|
786
|
+
context.log('File conversion completed:', conversion);
|
|
787
|
+
|
|
788
|
+
if (conversion.converted) {
|
|
789
|
+
context.log('Saving converted file...');
|
|
790
|
+
// Save converted file
|
|
791
|
+
const convertedSaveResult = await conversionService._saveConvertedFile(conversion.convertedPath, requestId);
|
|
792
|
+
context.log('Converted file saved to primary storage');
|
|
793
|
+
|
|
794
|
+
// If GCS is configured, also save to GCS
|
|
795
|
+
let convertedGcsUrl;
|
|
796
|
+
if (conversionService._isGCSConfigured()) {
|
|
797
|
+
context.log('Saving converted file to GCS...');
|
|
798
|
+
convertedGcsUrl = await conversionService._uploadChunkToGCS(conversion.convertedPath, requestId);
|
|
799
|
+
context.log('Converted file saved to GCS');
|
|
800
|
+
}
|
|
801
|
+
|
|
802
|
+
// Add converted file info to result
|
|
803
|
+
result.converted = {
|
|
804
|
+
url: convertedSaveResult.url,
|
|
805
|
+
gcs: convertedGcsUrl
|
|
806
|
+
};
|
|
807
|
+
context.log('Conversion process completed successfully');
|
|
808
|
+
}
|
|
809
|
+
} catch (error) {
|
|
810
|
+
console.error('Error converting file:', error);
|
|
811
|
+
context.log('Error during conversion:', error.message);
|
|
812
|
+
// Don't fail the upload if conversion fails
|
|
813
|
+
}
|
|
814
|
+
}
|
|
815
|
+
|
|
816
|
+
context.res = {
|
|
817
|
+
status: 200,
|
|
818
|
+
body: result,
|
|
819
|
+
};
|
|
820
|
+
|
|
821
|
+
// Clean up temp files
|
|
822
|
+
context.log('Cleaning up temporary files...');
|
|
823
|
+
if (tempDir) {
|
|
824
|
+
fs.rmSync(tempDir, { recursive: true, force: true });
|
|
825
|
+
context.log('Temporary files cleaned up');
|
|
826
|
+
}
|
|
827
|
+
|
|
828
|
+
context.log('Upload process completed successfully');
|
|
829
|
+
resolve(result);
|
|
830
|
+
} catch (error) {
|
|
831
|
+
context.log('Error in upload process:', error);
|
|
832
|
+
if (body.url) {
|
|
833
|
+
try {
|
|
834
|
+
await cleanup(context, [body.url]);
|
|
835
|
+
} catch (cleanupError) {
|
|
836
|
+
context.log('Error during cleanup after failure:', cleanupError);
|
|
837
|
+
}
|
|
838
|
+
}
|
|
839
|
+
throw error;
|
|
840
|
+
}
|
|
841
|
+
}
|
|
842
|
+
|
|
843
|
+
// Helper to convert a stream to a buffer
|
|
844
|
+
async function streamToBuffer(stream) {
|
|
845
|
+
return new Promise((resolve, reject) => {
|
|
846
|
+
const chunks = [];
|
|
847
|
+
stream.on('data', (chunk) => chunks.push(chunk));
|
|
848
|
+
stream.on('end', () => resolve(Buffer.concat(chunks)));
|
|
849
|
+
stream.on('error', reject);
|
|
850
|
+
});
|
|
851
|
+
}
|
|
852
|
+
|
|
853
|
+
// Function to delete files that haven't been used in more than a month
|
|
854
|
+
async function cleanup(context, urls = null) {
|
|
855
|
+
const { containerClient } = await getBlobClient();
|
|
856
|
+
const cleanedURLs = [];
|
|
857
|
+
|
|
858
|
+
if (!urls) {
|
|
859
|
+
const xMonthAgo = new Date();
|
|
860
|
+
xMonthAgo.setMonth(xMonthAgo.getMonth() - 1);
|
|
861
|
+
|
|
862
|
+
const blobs = containerClient.listBlobsFlat();
|
|
863
|
+
|
|
864
|
+
for await (const blob of blobs) {
|
|
865
|
+
const lastModified = blob.properties.lastModified;
|
|
866
|
+
if (lastModified < xMonthAgo) {
|
|
867
|
+
try {
|
|
868
|
+
const blockBlobClient = containerClient.getBlockBlobClient(blob.name);
|
|
869
|
+
await blockBlobClient.delete();
|
|
870
|
+
context.log(`Cleaned blob: ${blob.name}`);
|
|
871
|
+
cleanedURLs.push(blob.name);
|
|
872
|
+
} catch (error) {
|
|
873
|
+
if (error.statusCode !== 404) {
|
|
874
|
+
context.log(`Error cleaning blob ${blob.name}:`, error);
|
|
875
|
+
}
|
|
876
|
+
}
|
|
877
|
+
}
|
|
878
|
+
}
|
|
879
|
+
} else {
|
|
880
|
+
for (const url of urls) {
|
|
881
|
+
try {
|
|
882
|
+
const blobName = url.replace(containerClient.url, '');
|
|
883
|
+
const blockBlobClient = containerClient.getBlockBlobClient(blobName);
|
|
884
|
+
await blockBlobClient.delete();
|
|
885
|
+
context.log(`Cleaned blob: ${blobName}`);
|
|
886
|
+
cleanedURLs.push(blobName);
|
|
887
|
+
} catch (error) {
|
|
888
|
+
if (error.statusCode !== 404) {
|
|
889
|
+
context.log(`Error cleaning blob ${url}:`, error);
|
|
890
|
+
}
|
|
891
|
+
}
|
|
892
|
+
}
|
|
893
|
+
}
|
|
894
|
+
return cleanedURLs;
|
|
895
|
+
}
|
|
896
|
+
|
|
897
|
+
async function cleanupGCS(urls = null) {
|
|
898
|
+
if (!gcs) return [];
|
|
899
|
+
const bucket = gcs.bucket(GCS_BUCKETNAME);
|
|
900
|
+
const directories = new Set();
|
|
901
|
+
const cleanedURLs = [];
|
|
902
|
+
|
|
903
|
+
if (!urls) {
|
|
904
|
+
const daysN = 30;
|
|
905
|
+
const threshold = Date.now() - daysN * 24 * 60 * 60 * 1000;
|
|
906
|
+
const [files] = await bucket.getFiles();
|
|
907
|
+
|
|
908
|
+
for (const file of files) {
|
|
909
|
+
const [metadata] = await file.getMetadata();
|
|
910
|
+
const directoryPath = path.dirname(file.name);
|
|
911
|
+
directories.add(directoryPath);
|
|
912
|
+
if (metadata.updated) {
|
|
913
|
+
const updatedTime = new Date(metadata.updated).getTime();
|
|
914
|
+
if (updatedTime < threshold) {
|
|
915
|
+
await file.delete();
|
|
916
|
+
cleanedURLs.push(file.name);
|
|
917
|
+
}
|
|
918
|
+
}
|
|
919
|
+
}
|
|
920
|
+
} else {
|
|
921
|
+
for (const url of urls) {
|
|
922
|
+
const filePath = url.split('/').slice(3).join('/');
|
|
923
|
+
const file = bucket.file(filePath);
|
|
924
|
+
const directoryPath = path.dirname(file.name);
|
|
925
|
+
directories.add(directoryPath);
|
|
926
|
+
await file.delete();
|
|
927
|
+
cleanedURLs.push(url);
|
|
928
|
+
}
|
|
929
|
+
}
|
|
930
|
+
|
|
931
|
+
for (const directory of directories) {
|
|
932
|
+
const [files] = await bucket.getFiles({ prefix: directory });
|
|
933
|
+
if (files.length === 0) {
|
|
934
|
+
await bucket.deleteFiles({ prefix: directory });
|
|
935
|
+
}
|
|
936
|
+
}
|
|
937
|
+
|
|
938
|
+
return cleanedURLs;
|
|
939
|
+
}
|
|
940
|
+
|
|
941
|
+
async function deleteGCS(blobName) {
|
|
942
|
+
if (!blobName) {
|
|
943
|
+
console.log('[deleteGCS] No blobName provided, skipping GCS deletion');
|
|
944
|
+
return;
|
|
945
|
+
}
|
|
946
|
+
|
|
947
|
+
if (!gcs) {
|
|
948
|
+
console.log('[deleteGCS] GCS not initialized, skipping deletion');
|
|
949
|
+
return;
|
|
950
|
+
}
|
|
951
|
+
|
|
952
|
+
try {
|
|
953
|
+
if (process.env.STORAGE_EMULATOR_HOST) {
|
|
954
|
+
console.log(`[deleteGCS] Using emulator at ${process.env.STORAGE_EMULATOR_HOST}`);
|
|
955
|
+
console.log(`[deleteGCS] Attempting to delete files with prefix: ${blobName}`);
|
|
956
|
+
|
|
957
|
+
// List files first
|
|
958
|
+
const listUrl = `${process.env.STORAGE_EMULATOR_HOST}/storage/v1/b/${GCS_BUCKETNAME}/o?prefix=${blobName}`;
|
|
959
|
+
console.log(`[deleteGCS] Listing files with URL: ${listUrl}`);
|
|
960
|
+
|
|
961
|
+
const listResponse = await axios.get(listUrl, {
|
|
962
|
+
validateStatus: (status) => true,
|
|
963
|
+
});
|
|
964
|
+
console.log(`[deleteGCS] List response status: ${listResponse.status}`);
|
|
965
|
+
console.log(`[deleteGCS] List response data: ${JSON.stringify(listResponse.data)}`);
|
|
966
|
+
|
|
967
|
+
if (listResponse.status === 200 && listResponse.data.items) {
|
|
968
|
+
console.log(`[deleteGCS] Found ${listResponse.data.items.length} items to delete`);
|
|
969
|
+
|
|
970
|
+
// Delete each file
|
|
971
|
+
for (const item of listResponse.data.items) {
|
|
972
|
+
const deleteUrl = `${process.env.STORAGE_EMULATOR_HOST}/storage/v1/b/${GCS_BUCKETNAME}/o/${encodeURIComponent(item.name)}`;
|
|
973
|
+
console.log(`[deleteGCS] Deleting file: ${item.name}`);
|
|
974
|
+
console.log(`[deleteGCS] Delete URL: ${deleteUrl}`);
|
|
975
|
+
|
|
976
|
+
const deleteResponse = await axios.delete(deleteUrl, {
|
|
977
|
+
validateStatus: (status) => true,
|
|
978
|
+
headers: {
|
|
979
|
+
'Content-Type': 'application/json',
|
|
980
|
+
},
|
|
981
|
+
});
|
|
982
|
+
console.log(`[deleteGCS] Delete response status: ${deleteResponse.status}`);
|
|
983
|
+
console.log(`[deleteGCS] Delete response data: ${JSON.stringify(deleteResponse.data)}`);
|
|
984
|
+
}
|
|
985
|
+
console.log('[deleteGCS] All files deleted successfully');
|
|
986
|
+
} else {
|
|
987
|
+
console.log('[deleteGCS] No files found to delete');
|
|
988
|
+
}
|
|
989
|
+
} else {
|
|
990
|
+
console.log('[deleteGCS] Using real GCS');
|
|
991
|
+
const bucket = gcs.bucket(GCS_BUCKETNAME);
|
|
992
|
+
const [files] = await bucket.getFiles({ prefix: blobName });
|
|
993
|
+
console.log(`[deleteGCS] Found ${files.length} files to delete`);
|
|
994
|
+
|
|
995
|
+
if (files.length > 0) {
|
|
996
|
+
await Promise.all(files.map((file) => file.delete()));
|
|
997
|
+
console.log('[deleteGCS] All files deleted successfully');
|
|
998
|
+
} else {
|
|
999
|
+
console.log('[deleteGCS] No files found to delete');
|
|
1000
|
+
}
|
|
1001
|
+
}
|
|
1002
|
+
} catch (error) {
|
|
1003
|
+
// If we get a 404 error, it means the file is already gone, which is fine
|
|
1004
|
+
if (error.response?.status === 404 || error.code === 404) {
|
|
1005
|
+
console.log('[deleteGCS] File not found in GCS (404) - this is expected if file was already deleted');
|
|
1006
|
+
return;
|
|
1007
|
+
}
|
|
1008
|
+
console.error('[deleteGCS] Error during deletion:', error);
|
|
1009
|
+
console.error('[deleteGCS] Error details:', {
|
|
1010
|
+
message: error.message,
|
|
1011
|
+
code: error.code,
|
|
1012
|
+
errors: error.errors,
|
|
1013
|
+
response: error.response ? {
|
|
1014
|
+
status: error.response.status,
|
|
1015
|
+
statusText: error.response.statusText,
|
|
1016
|
+
data: error.response.data,
|
|
1017
|
+
headers: error.response.headers,
|
|
1018
|
+
} : null,
|
|
1019
|
+
});
|
|
1020
|
+
// Don't throw the error - we want to continue with cleanup even if GCS deletion fails
|
|
1021
|
+
}
|
|
1022
|
+
}
|
|
1023
|
+
|
|
1024
|
+
// Helper function to ensure GCS upload for existing files
|
|
1025
|
+
async function ensureGCSUpload(context, existingFile) {
|
|
1026
|
+
if (!existingFile.gcs && gcs) {
|
|
1027
|
+
context.log('GCS file was missing - uploading.');
|
|
1028
|
+
const fileName = sanitizeFilename(path.basename(existingFile.url.split('?')[0]));
|
|
1029
|
+
const response = await axios({ method: 'get', url: existingFile.url, responseType: 'stream' });
|
|
1030
|
+
existingFile.gcs = await uploadToGCS(context, response.data, fileName);
|
|
1031
|
+
}
|
|
1032
|
+
return existingFile;
|
|
1033
|
+
}
|
|
1034
|
+
|
|
1035
|
+
async function uploadChunkToGCS(chunkPath, requestId) {
|
|
1036
|
+
if (!gcs) return null;
|
|
1037
|
+
const dirName = requestId || uuidv4();
|
|
1038
|
+
const baseName = sanitizeFilename(path.basename(chunkPath));
|
|
1039
|
+
const gcsFileName = `${dirName}/${baseName}`;
|
|
1040
|
+
await gcs.bucket(GCS_BUCKETNAME).upload(chunkPath, { destination: gcsFileName });
|
|
1041
|
+
return `gs://${GCS_BUCKETNAME}/${gcsFileName}`;
|
|
1042
|
+
}
|
|
1043
|
+
|
|
1044
|
+
export {
|
|
1045
|
+
saveFileToBlob,
|
|
1046
|
+
deleteBlob,
|
|
1047
|
+
deleteGCS,
|
|
1048
|
+
uploadBlob,
|
|
1049
|
+
cleanup,
|
|
1050
|
+
cleanupGCS,
|
|
1051
|
+
gcsUrlExists,
|
|
1052
|
+
ensureGCSUpload,
|
|
1053
|
+
gcs,
|
|
1054
|
+
uploadChunkToGCS,
|
|
1055
|
+
downloadFromGCS,
|
|
1056
|
+
};
|