@aj-archipelago/cortex 1.3.49 → 1.3.51
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config.js +1 -1
- package/helper-apps/cortex-browser/Dockerfile +19 -31
- package/helper-apps/cortex-browser/function_app.py +708 -181
- package/helper-apps/cortex-browser/requirements.txt +4 -4
- package/helper-apps/cortex-file-handler/blobHandler.js +850 -429
- package/helper-apps/cortex-file-handler/constants.js +64 -48
- package/helper-apps/cortex-file-handler/docHelper.js +7 -114
- package/helper-apps/cortex-file-handler/fileChunker.js +96 -51
- package/helper-apps/cortex-file-handler/function.json +2 -6
- package/helper-apps/cortex-file-handler/helper.js +34 -25
- package/helper-apps/cortex-file-handler/index.js +324 -136
- package/helper-apps/cortex-file-handler/localFileHandler.js +56 -57
- package/helper-apps/cortex-file-handler/package-lock.json +6065 -5964
- package/helper-apps/cortex-file-handler/package.json +8 -4
- package/helper-apps/cortex-file-handler/redis.js +23 -17
- package/helper-apps/cortex-file-handler/scripts/setup-azure-container.js +12 -9
- package/helper-apps/cortex-file-handler/scripts/setup-test-containers.js +21 -18
- package/helper-apps/cortex-file-handler/scripts/test-azure.sh +1 -1
- package/helper-apps/cortex-file-handler/scripts/test-gcs.sh +1 -1
- package/helper-apps/cortex-file-handler/services/ConversionService.js +288 -0
- package/helper-apps/cortex-file-handler/services/FileConversionService.js +53 -0
- package/helper-apps/cortex-file-handler/start.js +63 -38
- package/helper-apps/cortex-file-handler/tests/FileConversionService.test.js +144 -0
- package/helper-apps/cortex-file-handler/tests/blobHandler.test.js +88 -64
- package/helper-apps/cortex-file-handler/tests/fileChunker.test.js +114 -91
- package/helper-apps/cortex-file-handler/tests/fileUpload.test.js +351 -0
- package/helper-apps/cortex-file-handler/tests/files/DOCX_TestPage.docx +0 -0
- package/helper-apps/cortex-file-handler/tests/files/tests-example.xls +0 -0
- package/helper-apps/cortex-file-handler/tests/start.test.js +943 -642
- package/helper-apps/cortex-file-handler/tests/testUtils.helper.js +31 -0
- package/helper-apps/cortex-markitdown/.funcignore +1 -0
- package/helper-apps/cortex-markitdown/MarkitdownConverterFunction/__init__.py +64 -0
- package/helper-apps/cortex-markitdown/MarkitdownConverterFunction/function.json +21 -0
- package/helper-apps/cortex-markitdown/README.md +94 -0
- package/helper-apps/cortex-markitdown/host.json +15 -0
- package/helper-apps/cortex-markitdown/requirements.txt +2 -0
- package/lib/requestExecutor.js +44 -36
- package/package.json +1 -1
- package/pathways/system/entity/tools/sys_tool_cognitive_search.js +1 -1
- package/pathways/system/entity/tools/sys_tool_readfile.js +24 -2
- package/server/plugins/openAiWhisperPlugin.js +59 -87
- package/helper-apps/cortex-file-handler/tests/docHelper.test.js +0 -148
|
@@ -1,68 +1,99 @@
|
|
|
1
|
-
import fs from
|
|
2
|
-
import path from
|
|
3
|
-
import {
|
|
4
|
-
import {
|
|
5
|
-
import
|
|
6
|
-
import {
|
|
7
|
-
|
|
8
|
-
import {
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
import { join } from 'path';
|
|
4
|
+
import { PassThrough } from 'stream';
|
|
5
|
+
import { pipeline as _pipeline } from 'stream';
|
|
6
|
+
import { promisify } from 'util';
|
|
7
|
+
|
|
8
|
+
import {
|
|
9
|
+
generateBlobSASQueryParameters,
|
|
10
|
+
StorageSharedKeyCredential,
|
|
11
|
+
BlobServiceClient,
|
|
12
|
+
} from '@azure/storage-blob';
|
|
13
|
+
import { Storage } from '@google-cloud/storage';
|
|
14
|
+
import axios from 'axios';
|
|
15
|
+
import Busboy from 'busboy';
|
|
16
|
+
import { v4 as uuidv4 } from 'uuid';
|
|
9
17
|
const pipeline = promisify(_pipeline);
|
|
10
|
-
|
|
11
|
-
import {
|
|
12
|
-
import
|
|
13
|
-
|
|
18
|
+
|
|
19
|
+
import { publicFolder, port, ipAddress } from './start.js';
|
|
20
|
+
import { CONVERTED_EXTENSIONS } from './constants.js';
|
|
21
|
+
|
|
14
22
|
// eslint-disable-next-line import/no-extraneous-dependencies
|
|
15
|
-
import mime from
|
|
23
|
+
import mime from 'mime-types';
|
|
24
|
+
|
|
25
|
+
import os from 'os';
|
|
26
|
+
|
|
27
|
+
import { FileConversionService } from './services/FileConversionService.js';
|
|
16
28
|
|
|
17
29
|
function isBase64(str) {
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
30
|
+
try {
|
|
31
|
+
return btoa(atob(str)) == str;
|
|
32
|
+
} catch (err) {
|
|
33
|
+
return false;
|
|
34
|
+
}
|
|
23
35
|
}
|
|
24
36
|
|
|
25
37
|
const { SAS_TOKEN_LIFE_DAYS = 30 } = process.env;
|
|
26
38
|
const GCP_SERVICE_ACCOUNT_KEY =
|
|
27
39
|
process.env.GCP_SERVICE_ACCOUNT_KEY_BASE64 ||
|
|
28
40
|
process.env.GCP_SERVICE_ACCOUNT_KEY ||
|
|
29
|
-
|
|
41
|
+
'{}';
|
|
30
42
|
const GCP_SERVICE_ACCOUNT = isBase64(GCP_SERVICE_ACCOUNT_KEY)
|
|
31
|
-
|
|
32
|
-
|
|
43
|
+
? JSON.parse(Buffer.from(GCP_SERVICE_ACCOUNT_KEY, 'base64').toString())
|
|
44
|
+
: JSON.parse(GCP_SERVICE_ACCOUNT_KEY);
|
|
33
45
|
const { project_id: GCP_PROJECT_ID } = GCP_SERVICE_ACCOUNT;
|
|
34
46
|
|
|
35
47
|
let gcs;
|
|
36
48
|
if (!GCP_PROJECT_ID || !GCP_SERVICE_ACCOUNT) {
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
49
|
+
console.warn(
|
|
50
|
+
'No Google Cloud Storage credentials provided - GCS will not be used',
|
|
51
|
+
);
|
|
40
52
|
} else {
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
53
|
+
try {
|
|
54
|
+
gcs = new Storage({
|
|
55
|
+
projectId: GCP_PROJECT_ID,
|
|
56
|
+
credentials: GCP_SERVICE_ACCOUNT,
|
|
57
|
+
});
|
|
46
58
|
|
|
47
59
|
// Rest of your Google Cloud operations using gcs object
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
60
|
+
} catch (error) {
|
|
61
|
+
console.error(
|
|
62
|
+
'Google Cloud Storage credentials are invalid - GCS will not be used: ',
|
|
63
|
+
error,
|
|
64
|
+
);
|
|
65
|
+
}
|
|
54
66
|
}
|
|
55
67
|
|
|
56
|
-
export const AZURE_STORAGE_CONTAINER_NAME =
|
|
57
|
-
|
|
68
|
+
export const AZURE_STORAGE_CONTAINER_NAME =
|
|
69
|
+
process.env.AZURE_STORAGE_CONTAINER_NAME || 'whispertempfiles';
|
|
70
|
+
export const GCS_BUCKETNAME = process.env.GCS_BUCKETNAME || 'cortextempfiles';
|
|
71
|
+
|
|
72
|
+
function isEncoded(str) {
|
|
73
|
+
// Checks for any percent-encoded sequence
|
|
74
|
+
return /%[0-9A-Fa-f]{2}/.test(str);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Helper function to ensure GCS URLs are never encoded
|
|
78
|
+
function ensureUnencodedGcsUrl(url) {
|
|
79
|
+
if (!url || !url.startsWith('gs://')) {
|
|
80
|
+
return url;
|
|
81
|
+
}
|
|
82
|
+
// Split into bucket and path parts
|
|
83
|
+
const [bucket, ...pathParts] = url.replace('gs://', '').split('/');
|
|
84
|
+
// Reconstruct URL with decoded path parts
|
|
85
|
+
return `gs://${bucket}/${pathParts.map(part => decodeURIComponent(part)).join('/')}`;
|
|
86
|
+
}
|
|
58
87
|
|
|
59
88
|
async function gcsUrlExists(url, defaultReturn = false) {
|
|
60
89
|
try {
|
|
61
|
-
if(!url || !gcs) {
|
|
90
|
+
if (!url || !gcs) {
|
|
62
91
|
return defaultReturn; // Cannot check return
|
|
63
92
|
}
|
|
64
93
|
|
|
65
|
-
|
|
94
|
+
// Ensure URL is not encoded
|
|
95
|
+
const unencodedUrl = ensureUnencodedGcsUrl(url);
|
|
96
|
+
const urlParts = unencodedUrl.replace('gs://', '').split('/');
|
|
66
97
|
const bucketName = urlParts[0];
|
|
67
98
|
const fileName = urlParts.slice(1).join('/');
|
|
68
99
|
|
|
@@ -70,7 +101,7 @@ async function gcsUrlExists(url, defaultReturn = false) {
|
|
|
70
101
|
try {
|
|
71
102
|
const response = await axios.get(
|
|
72
103
|
`${process.env.STORAGE_EMULATOR_HOST}/storage/v1/b/${bucketName}/o/${encodeURIComponent(fileName)}`,
|
|
73
|
-
{ validateStatus: status => status === 200 || status === 404 }
|
|
104
|
+
{ validateStatus: (status) => status === 200 || status === 404 },
|
|
74
105
|
);
|
|
75
106
|
return response.status === 200;
|
|
76
107
|
} catch (error) {
|
|
@@ -83,7 +114,7 @@ async function gcsUrlExists(url, defaultReturn = false) {
|
|
|
83
114
|
const file = bucket.file(fileName);
|
|
84
115
|
|
|
85
116
|
const [exists] = await file.exists();
|
|
86
|
-
|
|
117
|
+
|
|
87
118
|
return exists;
|
|
88
119
|
} catch (error) {
|
|
89
120
|
console.error('Error checking if GCS URL exists:', error);
|
|
@@ -91,477 +122,867 @@ async function gcsUrlExists(url, defaultReturn = false) {
|
|
|
91
122
|
}
|
|
92
123
|
}
|
|
93
124
|
|
|
125
|
+
/**
|
|
126
|
+
* Downloads a file from Google Cloud Storage to a local file
|
|
127
|
+
* @param {string} gcsUrl - The GCS URL in format gs://bucket-name/file-path
|
|
128
|
+
* @param {string} destinationPath - The local path where the file should be saved
|
|
129
|
+
* @returns {Promise<void>}
|
|
130
|
+
*/
|
|
131
|
+
async function downloadFromGCS(gcsUrl, destinationPath) {
|
|
132
|
+
if (!gcsUrl || !gcs) {
|
|
133
|
+
throw new Error('Invalid GCS URL or GCS client not initialized');
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
const urlParts = gcsUrl.replace('gs://', '').split('/');
|
|
137
|
+
const bucketName = urlParts[0];
|
|
138
|
+
const fileName = urlParts.slice(1).join('/');
|
|
139
|
+
|
|
140
|
+
if (process.env.STORAGE_EMULATOR_HOST) {
|
|
141
|
+
// Use axios to download from emulator
|
|
142
|
+
const response = await axios({
|
|
143
|
+
method: 'GET',
|
|
144
|
+
url: `${process.env.STORAGE_EMULATOR_HOST}/storage/v1/b/${bucketName}/o/${encodeURIComponent(fileName)}?alt=media`,
|
|
145
|
+
responseType: 'stream'
|
|
146
|
+
});
|
|
147
|
+
|
|
148
|
+
// Write the response to file
|
|
149
|
+
const writer = fs.createWriteStream(destinationPath);
|
|
150
|
+
await new Promise((resolve, reject) => {
|
|
151
|
+
response.data.pipe(writer);
|
|
152
|
+
writer.on('finish', resolve);
|
|
153
|
+
writer.on('error', reject);
|
|
154
|
+
});
|
|
155
|
+
} else {
|
|
156
|
+
// Use GCS client for real GCS
|
|
157
|
+
const bucket = gcs.bucket(bucketName);
|
|
158
|
+
const file = bucket.file(fileName);
|
|
159
|
+
await file.download({ destination: destinationPath });
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
94
163
|
export const getBlobClient = async () => {
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
164
|
+
const connectionString = process.env.AZURE_STORAGE_CONNECTION_STRING;
|
|
165
|
+
const containerName = AZURE_STORAGE_CONTAINER_NAME;
|
|
166
|
+
if (!connectionString || !containerName) {
|
|
167
|
+
throw new Error(
|
|
168
|
+
'Missing Azure Storage connection string or container name environment variable',
|
|
169
|
+
);
|
|
170
|
+
}
|
|
102
171
|
|
|
103
|
-
|
|
172
|
+
const blobServiceClient =
|
|
173
|
+
BlobServiceClient.fromConnectionString(connectionString);
|
|
104
174
|
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
175
|
+
const serviceProperties = await blobServiceClient.getProperties();
|
|
176
|
+
if (!serviceProperties.defaultServiceVersion) {
|
|
177
|
+
serviceProperties.defaultServiceVersion = '2020-02-10';
|
|
178
|
+
await blobServiceClient.setProperties(serviceProperties);
|
|
179
|
+
}
|
|
110
180
|
|
|
111
|
-
|
|
181
|
+
const containerClient = blobServiceClient.getContainerClient(containerName);
|
|
112
182
|
|
|
113
|
-
|
|
183
|
+
return { blobServiceClient, containerClient };
|
|
114
184
|
};
|
|
115
185
|
|
|
116
186
|
async function saveFileToBlob(chunkPath, requestId) {
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
187
|
+
const { containerClient } = await getBlobClient();
|
|
188
|
+
// Use the filename with a UUID as the blob name
|
|
189
|
+
let baseName = path.basename(chunkPath);
|
|
190
|
+
// Remove any query parameters from the filename
|
|
191
|
+
baseName = baseName.split('?')[0];
|
|
192
|
+
// Only encode if not already encoded
|
|
193
|
+
if (!isEncoded(baseName)) {
|
|
194
|
+
baseName = encodeURIComponent(baseName);
|
|
195
|
+
}
|
|
196
|
+
const blobName = `${requestId}/${uuidv4()}_${baseName}`;
|
|
197
|
+
|
|
198
|
+
// Create a read stream for the chunk file
|
|
199
|
+
const fileStream = fs.createReadStream(chunkPath);
|
|
124
200
|
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
201
|
+
// Upload the chunk to Azure Blob Storage using the stream
|
|
202
|
+
const blockBlobClient = containerClient.getBlockBlobClient(blobName);
|
|
203
|
+
await blockBlobClient.uploadStream(fileStream);
|
|
128
204
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
205
|
+
// Generate SAS token after successful upload
|
|
206
|
+
const sasToken = generateSASToken(containerClient, blobName);
|
|
207
|
+
|
|
208
|
+
// Return an object with the URL property
|
|
209
|
+
return {
|
|
210
|
+
url: `${blockBlobClient.url}?${sasToken}`,
|
|
211
|
+
blobName: blobName
|
|
212
|
+
};
|
|
132
213
|
}
|
|
133
214
|
|
|
134
|
-
const generateSASToken = (
|
|
135
|
-
|
|
215
|
+
const generateSASToken = (
|
|
216
|
+
containerClient,
|
|
217
|
+
blobName,
|
|
218
|
+
expiryTimeSeconds = parseInt(SAS_TOKEN_LIFE_DAYS) * 24 * 60 * 60,
|
|
136
219
|
) => {
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
220
|
+
const { accountName, accountKey } = containerClient.credential;
|
|
221
|
+
const sharedKeyCredential = new StorageSharedKeyCredential(
|
|
222
|
+
accountName,
|
|
223
|
+
accountKey,
|
|
224
|
+
);
|
|
225
|
+
|
|
226
|
+
const sasOptions = {
|
|
227
|
+
containerName: containerClient.containerName,
|
|
228
|
+
blobName: blobName,
|
|
229
|
+
permissions: 'r', // Read permission
|
|
230
|
+
startsOn: new Date(),
|
|
231
|
+
expiresOn: new Date(new Date().valueOf() + expiryTimeSeconds * 1000),
|
|
232
|
+
};
|
|
233
|
+
|
|
234
|
+
const sasToken = generateBlobSASQueryParameters(
|
|
235
|
+
sasOptions,
|
|
236
|
+
sharedKeyCredential,
|
|
237
|
+
).toString();
|
|
238
|
+
return sasToken;
|
|
150
239
|
};
|
|
151
240
|
|
|
152
241
|
//deletes blob that has the requestId
|
|
153
242
|
async function deleteBlob(requestId) {
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
243
|
+
if (!requestId) throw new Error('Missing requestId parameter');
|
|
244
|
+
const { containerClient } = await getBlobClient();
|
|
245
|
+
// List all blobs in the container
|
|
246
|
+
const blobs = containerClient.listBlobsFlat();
|
|
247
|
+
|
|
248
|
+
const result = [];
|
|
249
|
+
// Iterate through the blobs
|
|
250
|
+
for await (const blob of blobs) {
|
|
162
251
|
// Check if the blob name starts with requestId_ (flat structure)
|
|
163
252
|
// or is inside a folder named requestId/ (folder structure)
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
253
|
+
if (
|
|
254
|
+
blob.name.startsWith(`${requestId}_`) ||
|
|
255
|
+
blob.name.startsWith(`${requestId}/`)
|
|
256
|
+
) {
|
|
257
|
+
// Delete the matching blob
|
|
258
|
+
const blockBlobClient = containerClient.getBlockBlobClient(blob.name);
|
|
259
|
+
await blockBlobClient.delete();
|
|
260
|
+
console.log(`Cleaned blob: ${blob.name}`);
|
|
261
|
+
result.push(blob.name);
|
|
262
|
+
}
|
|
170
263
|
}
|
|
171
|
-
}
|
|
172
264
|
|
|
173
|
-
|
|
265
|
+
return result;
|
|
174
266
|
}
|
|
175
267
|
|
|
176
|
-
function uploadBlob(
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
268
|
+
function uploadBlob(
|
|
269
|
+
context,
|
|
270
|
+
req,
|
|
271
|
+
saveToLocal = false,
|
|
272
|
+
filePath = null,
|
|
273
|
+
hash = null,
|
|
274
|
+
) {
|
|
275
|
+
return new Promise((resolve, reject) => {
|
|
276
|
+
(async () => {
|
|
277
|
+
try {
|
|
278
|
+
let requestId = uuidv4();
|
|
279
|
+
const body = {};
|
|
280
|
+
|
|
281
|
+
// If filePath is given, we are dealing with local file and not form-data
|
|
282
|
+
if (filePath) {
|
|
283
|
+
const file = fs.createReadStream(filePath);
|
|
284
|
+
const filename = path.basename(filePath);
|
|
285
|
+
try {
|
|
286
|
+
const result = await uploadFile(
|
|
287
|
+
context,
|
|
288
|
+
requestId,
|
|
289
|
+
body,
|
|
290
|
+
saveToLocal,
|
|
291
|
+
file,
|
|
292
|
+
filename,
|
|
293
|
+
resolve,
|
|
294
|
+
hash,
|
|
295
|
+
);
|
|
296
|
+
resolve(result);
|
|
297
|
+
} catch (error) {
|
|
298
|
+
const err = new Error('Error processing file upload.');
|
|
299
|
+
err.status = 500;
|
|
300
|
+
throw err;
|
|
301
|
+
}
|
|
302
|
+
} else {
|
|
303
|
+
// Otherwise, continue working with form-data
|
|
304
|
+
const busboy = Busboy({ headers: req.headers });
|
|
305
|
+
let hasFile = false;
|
|
306
|
+
let errorOccurred = false;
|
|
307
|
+
|
|
308
|
+
busboy.on('field', (fieldname, value) => {
|
|
309
|
+
if (fieldname === 'requestId') {
|
|
310
|
+
requestId = value;
|
|
311
|
+
} else if (fieldname === 'hash') {
|
|
312
|
+
hash = value;
|
|
313
|
+
}
|
|
314
|
+
});
|
|
315
|
+
|
|
316
|
+
busboy.on('file', async (fieldname, file, info) => {
|
|
317
|
+
if (errorOccurred) return;
|
|
318
|
+
hasFile = true;
|
|
319
|
+
|
|
320
|
+
// Validate file
|
|
321
|
+
if (!info.filename || info.filename.trim() === '') {
|
|
322
|
+
errorOccurred = true;
|
|
323
|
+
const err = new Error('Invalid file: missing filename');
|
|
324
|
+
err.status = 400;
|
|
325
|
+
reject(err);
|
|
326
|
+
return;
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
// Prepare for streaming to cloud destinations
|
|
330
|
+
const filename = info.filename;
|
|
331
|
+
const safeFilename = path.basename(filename); // Sanitize filename
|
|
332
|
+
const uploadName = `${requestId || uuidv4()}_${safeFilename}`;
|
|
333
|
+
const azureStream = !saveToLocal ? new PassThrough() : null;
|
|
334
|
+
const gcsStream = gcs ? new PassThrough() : null;
|
|
335
|
+
let diskWriteStream, tempDir, tempFilePath;
|
|
336
|
+
let diskWritePromise;
|
|
337
|
+
let diskWriteError = null;
|
|
338
|
+
let cloudUploadError = null;
|
|
339
|
+
|
|
340
|
+
// Start local disk write in parallel (non-blocking for response)
|
|
341
|
+
if (saveToLocal) {
|
|
342
|
+
try {
|
|
343
|
+
tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'upload-'));
|
|
344
|
+
} catch (err) {
|
|
345
|
+
console.error('Error creating tempDir:', err);
|
|
346
|
+
errorOccurred = true;
|
|
347
|
+
reject(err);
|
|
348
|
+
return;
|
|
349
|
+
}
|
|
350
|
+
tempFilePath = path.join(tempDir, safeFilename);
|
|
351
|
+
console.log('Temp dir:', tempDir, 'Original filename:', filename, 'Safe filename:', safeFilename, 'Temp file path:', tempFilePath);
|
|
352
|
+
console.log('About to create write stream for:', tempFilePath);
|
|
353
|
+
try {
|
|
354
|
+
diskWriteStream = fs.createWriteStream(tempFilePath, {
|
|
355
|
+
highWaterMark: 1024 * 1024,
|
|
356
|
+
autoClose: true,
|
|
357
|
+
});
|
|
358
|
+
console.log('Write stream created successfully for:', tempFilePath);
|
|
359
|
+
} catch (err) {
|
|
360
|
+
console.error('Error creating write stream:', err, 'Temp dir exists:', fs.existsSync(tempDir));
|
|
361
|
+
errorOccurred = true;
|
|
362
|
+
reject(err);
|
|
363
|
+
return;
|
|
364
|
+
}
|
|
365
|
+
diskWriteStream.on('error', (err) => {
|
|
366
|
+
console.error('Disk write stream error:', err);
|
|
367
|
+
});
|
|
368
|
+
diskWriteStream.on('close', () => {
|
|
369
|
+
console.log('Disk write stream closed for:', tempFilePath);
|
|
370
|
+
});
|
|
371
|
+
diskWritePromise = new Promise((res, rej) => {
|
|
372
|
+
diskWriteStream.on('finish', res);
|
|
373
|
+
diskWriteStream.on('error', (err) => {
|
|
374
|
+
diskWriteError = err;
|
|
375
|
+
rej(err);
|
|
376
|
+
});
|
|
377
|
+
});
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
// Pipe incoming file to all destinations
|
|
381
|
+
let receivedAnyData = false;
|
|
382
|
+
file.on('data', () => { receivedAnyData = true; });
|
|
383
|
+
if (azureStream) file.pipe(azureStream);
|
|
384
|
+
if (gcsStream) file.pipe(gcsStream);
|
|
385
|
+
if (diskWriteStream) file.pipe(diskWriteStream);
|
|
386
|
+
|
|
387
|
+
// Listen for end event to check for empty file
|
|
388
|
+
file.on('end', async () => {
|
|
389
|
+
if (!receivedAnyData) {
|
|
390
|
+
errorOccurred = true;
|
|
391
|
+
// Abort all streams
|
|
392
|
+
if (azureStream) azureStream.destroy();
|
|
393
|
+
if (gcsStream) gcsStream.destroy();
|
|
394
|
+
if (diskWriteStream) diskWriteStream.destroy();
|
|
395
|
+
const err = new Error('Invalid file: file is empty');
|
|
396
|
+
err.status = 400;
|
|
397
|
+
reject(err);
|
|
398
|
+
}
|
|
399
|
+
});
|
|
400
|
+
|
|
401
|
+
// Start cloud uploads immediately
|
|
402
|
+
let azurePromise;
|
|
403
|
+
if (!saveToLocal) {
|
|
404
|
+
azurePromise = saveToAzureStorage(context, uploadName, azureStream)
|
|
405
|
+
.catch(async (err) => {
|
|
406
|
+
cloudUploadError = err;
|
|
407
|
+
// Fallback: try from disk if available
|
|
408
|
+
if (diskWritePromise) {
|
|
409
|
+
await diskWritePromise;
|
|
410
|
+
const diskStream = fs.createReadStream(tempFilePath, {
|
|
411
|
+
highWaterMark: 1024 * 1024,
|
|
412
|
+
autoClose: true,
|
|
413
|
+
});
|
|
414
|
+
return saveToAzureStorage(context, uploadName, diskStream);
|
|
415
|
+
}
|
|
416
|
+
throw err;
|
|
417
|
+
});
|
|
418
|
+
}
|
|
419
|
+
let gcsPromise;
|
|
420
|
+
if (gcsStream) {
|
|
421
|
+
gcsPromise = saveToGoogleStorage(context, uploadName, gcsStream)
|
|
422
|
+
.catch(async (err) => {
|
|
423
|
+
cloudUploadError = err;
|
|
424
|
+
if (diskWritePromise) {
|
|
425
|
+
await diskWritePromise;
|
|
426
|
+
const diskStream = fs.createReadStream(tempFilePath, {
|
|
427
|
+
highWaterMark: 1024 * 1024,
|
|
428
|
+
autoClose: true,
|
|
429
|
+
});
|
|
430
|
+
return saveToGoogleStorage(context, uploadName, diskStream);
|
|
431
|
+
}
|
|
432
|
+
throw err;
|
|
433
|
+
});
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
// Wait for cloud uploads to finish
|
|
437
|
+
try {
|
|
438
|
+
const results = await Promise.all([
|
|
439
|
+
azurePromise ? azurePromise.then((url) => ({ url, type: 'primary' })) : null,
|
|
440
|
+
(!azurePromise && saveToLocal)
|
|
441
|
+
? Promise.resolve({ url: null, type: 'primary-local' }) // placeholder for local, url handled later
|
|
442
|
+
: null,
|
|
443
|
+
gcsPromise ? gcsPromise.then((gcs) => ({ gcs, type: 'gcs' })) : null,
|
|
444
|
+
].filter(Boolean));
|
|
445
|
+
|
|
446
|
+
const result = {
|
|
447
|
+
message: `File '${uploadName}' uploaded successfully.`,
|
|
448
|
+
filename: uploadName,
|
|
449
|
+
...results.reduce((acc, result) => {
|
|
450
|
+
if (result.type === 'primary') acc.url = result.url;
|
|
451
|
+
if (result.type === 'gcs') acc.gcs = ensureUnencodedGcsUrl(result.gcs);
|
|
452
|
+
return acc;
|
|
453
|
+
}, {}),
|
|
454
|
+
};
|
|
455
|
+
if (hash) result.hash = hash;
|
|
456
|
+
|
|
457
|
+
// If saving locally, wait for disk write to finish and then move to public folder
|
|
458
|
+
if (saveToLocal) {
|
|
459
|
+
try {
|
|
460
|
+
if (diskWritePromise) {
|
|
461
|
+
await diskWritePromise; // ensure file fully written
|
|
462
|
+
}
|
|
463
|
+
const localUrl = await saveToLocalStorage(
|
|
464
|
+
context,
|
|
465
|
+
requestId,
|
|
466
|
+
uploadName,
|
|
467
|
+
fs.createReadStream(tempFilePath, {
|
|
468
|
+
highWaterMark: 1024 * 1024,
|
|
469
|
+
autoClose: true,
|
|
470
|
+
}),
|
|
471
|
+
);
|
|
472
|
+
result.url = localUrl;
|
|
473
|
+
} catch (err) {
|
|
474
|
+
console.error('Error saving to local storage:', err);
|
|
475
|
+
throw err;
|
|
476
|
+
}
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
// Respond as soon as cloud uploads are done
|
|
480
|
+
context.res = { status: 200, body: result };
|
|
481
|
+
resolve(result);
|
|
482
|
+
} catch (err) {
|
|
483
|
+
errorOccurred = true;
|
|
484
|
+
reject(err);
|
|
485
|
+
} finally {
|
|
486
|
+
// Clean up temp file if written
|
|
487
|
+
if (tempDir) {
|
|
488
|
+
fs.rmSync(tempDir, { recursive: true, force: true });
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
});
|
|
492
|
+
|
|
493
|
+
busboy.on('error', (error) => {
|
|
494
|
+
if (errorOccurred) return;
|
|
495
|
+
errorOccurred = true;
|
|
496
|
+
const err = new Error('No file provided in request');
|
|
497
|
+
err.status = 400;
|
|
498
|
+
reject(err);
|
|
499
|
+
});
|
|
500
|
+
|
|
501
|
+
busboy.on('finish', () => {
|
|
502
|
+
if (errorOccurred) return;
|
|
503
|
+
if (!hasFile) {
|
|
504
|
+
errorOccurred = true;
|
|
505
|
+
const err = new Error('No file provided in request');
|
|
506
|
+
err.status = 400;
|
|
507
|
+
reject(err);
|
|
508
|
+
}
|
|
509
|
+
});
|
|
510
|
+
|
|
511
|
+
// Handle errors from piping the request
|
|
512
|
+
req.on('error', (error) => {
|
|
513
|
+
if (errorOccurred) return;
|
|
514
|
+
errorOccurred = true;
|
|
515
|
+
// Only log unexpected errors
|
|
516
|
+
if (error.message !== 'No file provided in request') {
|
|
517
|
+
context.log('Error in request stream:', error);
|
|
518
|
+
}
|
|
519
|
+
const err = new Error('No file provided in request');
|
|
520
|
+
err.status = 400;
|
|
521
|
+
reject(err);
|
|
522
|
+
});
|
|
523
|
+
|
|
524
|
+
try {
|
|
525
|
+
req.pipe(busboy);
|
|
526
|
+
} catch (error) {
|
|
527
|
+
if (errorOccurred) return;
|
|
528
|
+
errorOccurred = true;
|
|
529
|
+
// Only log unexpected errors
|
|
530
|
+
if (error.message !== 'No file provided in request') {
|
|
531
|
+
context.log('Error piping request to busboy:', error);
|
|
532
|
+
}
|
|
533
|
+
const err = new Error('No file provided in request');
|
|
534
|
+
err.status = 400;
|
|
535
|
+
reject(err);
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
} catch (error) {
|
|
539
|
+
// Only log unexpected errors
|
|
540
|
+
if (error.message !== 'No file provided in request') {
|
|
541
|
+
context.log('Error processing file upload:', error);
|
|
542
|
+
}
|
|
543
|
+
const err = new Error(error.message || 'Error processing file upload.');
|
|
544
|
+
err.status = error.status || 500;
|
|
545
|
+
reject(err);
|
|
258
546
|
}
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
reject(err);
|
|
262
|
-
}
|
|
263
|
-
}
|
|
264
|
-
} catch (error) {
|
|
265
|
-
// Only log unexpected errors
|
|
266
|
-
if (error.message !== "No file provided in request") {
|
|
267
|
-
context.log("Error processing file upload:", error);
|
|
268
|
-
}
|
|
269
|
-
const err = new Error(error.message || "Error processing file upload.");
|
|
270
|
-
err.status = error.status || 500;
|
|
271
|
-
reject(err);
|
|
272
|
-
}
|
|
273
|
-
})();
|
|
274
|
-
});
|
|
547
|
+
})();
|
|
548
|
+
});
|
|
275
549
|
}
|
|
276
550
|
|
|
277
551
|
// Helper function to handle local file storage
|
|
278
552
|
async function saveToLocalStorage(context, requestId, encodedFilename, file) {
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
553
|
+
const localPath = join(publicFolder, requestId);
|
|
554
|
+
fs.mkdirSync(localPath, { recursive: true });
|
|
555
|
+
const destinationPath = `${localPath}/${encodedFilename}`;
|
|
556
|
+
context.log(`Saving to local storage... ${destinationPath}`);
|
|
557
|
+
await pipeline(file, fs.createWriteStream(destinationPath));
|
|
558
|
+
return `http://${ipAddress}:${port}/files/${requestId}/${encodedFilename}`;
|
|
285
559
|
}
|
|
286
560
|
|
|
287
561
|
// Helper function to handle Azure blob storage
|
|
288
562
|
async function saveToAzureStorage(context, encodedFilename, file) {
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
563
|
+
const { containerClient } = await getBlobClient();
|
|
564
|
+
const contentType = mime.lookup(encodedFilename);
|
|
565
|
+
|
|
566
|
+
// Decode the filename if it's already encoded to prevent double-encoding
|
|
567
|
+
let blobName = encodedFilename;
|
|
568
|
+
if (isEncoded(blobName)) {
|
|
569
|
+
blobName = decodeURIComponent(blobName);
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
const options = {
|
|
573
|
+
blobHTTPHeaders: contentType ? { blobContentType: contentType } : {},
|
|
574
|
+
maxConcurrency: 50,
|
|
575
|
+
blockSize: 8 * 1024 * 1024,
|
|
576
|
+
};
|
|
577
|
+
|
|
578
|
+
const blockBlobClient = containerClient.getBlockBlobClient(blobName);
|
|
579
|
+
context.log(`Uploading to Azure... ${blobName}`);
|
|
580
|
+
await blockBlobClient.uploadStream(file, undefined, undefined, options);
|
|
581
|
+
const sasToken = generateSASToken(containerClient, blobName);
|
|
582
|
+
return `${blockBlobClient.url}?${sasToken}`;
|
|
299
583
|
}
|
|
300
584
|
|
|
301
585
|
// Helper function to upload a file to Google Cloud Storage
|
|
302
586
|
async function uploadToGCS(context, file, encodedFilename) {
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
587
|
+
const gcsFile = gcs.bucket(GCS_BUCKETNAME).file(encodedFilename);
|
|
588
|
+
const writeStream = gcsFile.createWriteStream({
|
|
589
|
+
resumable: true,
|
|
590
|
+
validation: false,
|
|
591
|
+
metadata: {
|
|
592
|
+
contentType: mime.lookup(encodedFilename) || 'application/octet-stream',
|
|
593
|
+
},
|
|
594
|
+
chunkSize: 8 * 1024 * 1024,
|
|
595
|
+
numRetries: 3,
|
|
596
|
+
retryDelay: 1000,
|
|
597
|
+
});
|
|
598
|
+
context.log(`Uploading to GCS... ${encodedFilename}`);
|
|
599
|
+
await pipeline(file, writeStream);
|
|
600
|
+
// Never encode GCS URLs
|
|
601
|
+
const gcsUrl = `gs://${GCS_BUCKETNAME}/${encodedFilename}`;
|
|
602
|
+
return gcsUrl;
|
|
310
603
|
}
|
|
311
604
|
|
|
312
605
|
// Helper function to handle Google Cloud Storage
|
|
313
606
|
async function saveToGoogleStorage(context, encodedFilename, file) {
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
607
|
+
if (!gcs) {
|
|
608
|
+
throw new Error('Google Cloud Storage is not initialized');
|
|
609
|
+
}
|
|
317
610
|
|
|
318
|
-
|
|
611
|
+
return uploadToGCS(context, file, encodedFilename);
|
|
319
612
|
}
|
|
320
613
|
|
|
321
|
-
async function uploadFile(
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
614
|
+
async function uploadFile(
|
|
615
|
+
context,
|
|
616
|
+
requestId,
|
|
617
|
+
body,
|
|
618
|
+
saveToLocal,
|
|
619
|
+
file,
|
|
620
|
+
filename,
|
|
621
|
+
resolve,
|
|
622
|
+
hash = null,
|
|
623
|
+
) {
|
|
624
|
+
try {
|
|
625
|
+
if (!file) {
|
|
626
|
+
context.res = {
|
|
627
|
+
status: 400,
|
|
628
|
+
body: 'No file provided in request',
|
|
629
|
+
};
|
|
630
|
+
resolve(context.res);
|
|
631
|
+
return;
|
|
632
|
+
}
|
|
331
633
|
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
634
|
+
const ext = path.extname(filename).toLowerCase();
|
|
635
|
+
context.log(`Processing file with extension: ${ext}`);
|
|
636
|
+
let uploadPath = null;
|
|
637
|
+
let uploadName = null;
|
|
638
|
+
let tempDir = null;
|
|
639
|
+
|
|
640
|
+
// Create temp directory for file operations
|
|
641
|
+
tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'upload-'));
|
|
642
|
+
const tempOriginal = path.join(tempDir, filename);
|
|
643
|
+
context.log(`Created temp directory: ${tempDir}`);
|
|
644
|
+
|
|
645
|
+
// Optimize initial write with larger buffer
|
|
646
|
+
const writeStream = fs.createWriteStream(tempOriginal, {
|
|
647
|
+
highWaterMark: 1024 * 1024, // 1MB chunks for initial write
|
|
648
|
+
autoClose: true,
|
|
649
|
+
});
|
|
650
|
+
|
|
651
|
+
// Use pipeline with error handling
|
|
652
|
+
context.log('Writing file to temp location...');
|
|
653
|
+
await pipeline(file, writeStream);
|
|
654
|
+
context.log('File written to temp location successfully');
|
|
655
|
+
|
|
656
|
+
uploadPath = tempOriginal;
|
|
657
|
+
uploadName = `${requestId || uuidv4()}_${filename}`;
|
|
658
|
+
context.log(`Prepared upload name: ${uploadName}`);
|
|
659
|
+
|
|
660
|
+
// Create optimized read streams with larger buffers for storage uploads
|
|
661
|
+
const createOptimizedReadStream = (path) => fs.createReadStream(path, {
|
|
662
|
+
highWaterMark: 1024 * 1024, // 1MB chunks for storage uploads
|
|
663
|
+
autoClose: true,
|
|
664
|
+
});
|
|
665
|
+
|
|
666
|
+
// Upload original in parallel with optimized streams
|
|
667
|
+
const storagePromises = [];
|
|
668
|
+
context.log('Starting primary storage upload...');
|
|
669
|
+
const primaryPromise = saveToLocal
|
|
670
|
+
? saveToLocalStorage(
|
|
671
|
+
context,
|
|
672
|
+
requestId,
|
|
673
|
+
uploadName,
|
|
674
|
+
createOptimizedReadStream(uploadPath),
|
|
675
|
+
)
|
|
676
|
+
: saveToAzureStorage(
|
|
677
|
+
context,
|
|
678
|
+
uploadName,
|
|
679
|
+
createOptimizedReadStream(uploadPath),
|
|
680
|
+
);
|
|
681
|
+
storagePromises.push(
|
|
682
|
+
primaryPromise.then((url) => {
|
|
683
|
+
context.log('Primary storage upload completed');
|
|
684
|
+
return { url, type: 'primary' };
|
|
685
|
+
}),
|
|
686
|
+
);
|
|
687
|
+
|
|
688
|
+
if (gcs) {
|
|
689
|
+
context.log('Starting GCS upload...');
|
|
690
|
+
storagePromises.push(
|
|
691
|
+
saveToGoogleStorage(
|
|
692
|
+
context,
|
|
693
|
+
uploadName,
|
|
694
|
+
createOptimizedReadStream(uploadPath),
|
|
695
|
+
).then((gcsUrl) => {
|
|
696
|
+
context.log('GCS upload completed');
|
|
697
|
+
return {
|
|
698
|
+
gcs: gcsUrl,
|
|
699
|
+
type: 'gcs',
|
|
700
|
+
};
|
|
701
|
+
}),
|
|
702
|
+
);
|
|
703
|
+
}
|
|
340
704
|
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
705
|
+
// Wait for original uploads to complete
|
|
706
|
+
context.log('Waiting for all storage uploads to complete...');
|
|
707
|
+
const results = await Promise.all(storagePromises);
|
|
708
|
+
const result = {
|
|
709
|
+
message: `File '${uploadName}' ${saveToLocal ? 'saved to folder' : 'uploaded'} successfully.`,
|
|
710
|
+
filename: uploadName,
|
|
711
|
+
...results.reduce((acc, result) => {
|
|
712
|
+
if (result.type === 'primary') acc.url = result.url;
|
|
713
|
+
if (result.type === 'gcs') acc.gcs = ensureUnencodedGcsUrl(result.gcs);
|
|
714
|
+
return acc;
|
|
715
|
+
}, {}),
|
|
716
|
+
};
|
|
717
|
+
|
|
718
|
+
if (hash) {
|
|
719
|
+
result.hash = hash;
|
|
720
|
+
}
|
|
345
721
|
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
const primaryPromise = saveToLocal
|
|
349
|
-
? saveToLocalStorage(context, requestId, encodedFilename, streams[streams.length - 1])
|
|
350
|
-
: saveToAzureStorage(context, encodedFilename, streams[streams.length - 1]);
|
|
351
|
-
|
|
352
|
-
storagePromises.push(primaryPromise.then(url => ({ url, type: 'primary' })));
|
|
353
|
-
|
|
354
|
-
// Add GCS promise if configured - now uses its own stream
|
|
355
|
-
if (gcs) {
|
|
356
|
-
storagePromises.push(
|
|
357
|
-
saveToGoogleStorage(context, encodedFilename, streams[0])
|
|
358
|
-
.then(gcsUrl => ({ gcs: gcsUrl, type: 'gcs' }))
|
|
359
|
-
);
|
|
360
|
-
}
|
|
722
|
+
// Initialize conversion service
|
|
723
|
+
const conversionService = new FileConversionService(context, !saveToLocal);
|
|
361
724
|
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
725
|
+
// Check if file needs conversion and handle it
|
|
726
|
+
if (conversionService.needsConversion(filename)) {
|
|
727
|
+
try {
|
|
728
|
+
context.log('Starting file conversion...');
|
|
729
|
+
// Convert the file
|
|
730
|
+
const conversion = await conversionService.convertFile(uploadPath, result.url);
|
|
731
|
+
context.log('File conversion completed:', conversion);
|
|
732
|
+
|
|
733
|
+
if (conversion.converted) {
|
|
734
|
+
context.log('Saving converted file...');
|
|
735
|
+
// Save converted file
|
|
736
|
+
const convertedSaveResult = await conversionService._saveConvertedFile(conversion.convertedPath, requestId);
|
|
737
|
+
context.log('Converted file saved to primary storage');
|
|
738
|
+
|
|
739
|
+
// If GCS is configured, also save to GCS
|
|
740
|
+
let convertedGcsUrl;
|
|
741
|
+
if (conversionService._isGCSConfigured()) {
|
|
742
|
+
context.log('Saving converted file to GCS...');
|
|
743
|
+
convertedGcsUrl = await conversionService._uploadChunkToGCS(conversion.convertedPath, requestId);
|
|
744
|
+
context.log('Converted file saved to GCS');
|
|
745
|
+
}
|
|
746
|
+
|
|
747
|
+
// Add converted file info to result
|
|
748
|
+
result.converted = {
|
|
749
|
+
url: convertedSaveResult.url,
|
|
750
|
+
gcs: convertedGcsUrl
|
|
751
|
+
};
|
|
752
|
+
context.log('Conversion process completed successfully');
|
|
753
|
+
}
|
|
754
|
+
} catch (error) {
|
|
755
|
+
console.error('Error converting file:', error);
|
|
756
|
+
context.log('Error during conversion:', error.message);
|
|
757
|
+
// Don't fail the upload if conversion fails
|
|
758
|
+
}
|
|
759
|
+
}
|
|
375
760
|
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
761
|
+
context.res = {
|
|
762
|
+
status: 200,
|
|
763
|
+
body: result,
|
|
764
|
+
};
|
|
379
765
|
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
766
|
+
// Clean up temp files
|
|
767
|
+
context.log('Cleaning up temporary files...');
|
|
768
|
+
if (tempDir) {
|
|
769
|
+
fs.rmSync(tempDir, { recursive: true, force: true });
|
|
770
|
+
context.log('Temporary files cleaned up');
|
|
771
|
+
}
|
|
384
772
|
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
773
|
+
context.log('Upload process completed successfully');
|
|
774
|
+
resolve(result);
|
|
775
|
+
} catch (error) {
|
|
776
|
+
context.log('Error in upload process:', error);
|
|
777
|
+
if (body.url) {
|
|
778
|
+
try {
|
|
779
|
+
await cleanup(context, [body.url]);
|
|
780
|
+
} catch (cleanupError) {
|
|
781
|
+
context.log('Error during cleanup after failure:', cleanupError);
|
|
782
|
+
}
|
|
783
|
+
}
|
|
784
|
+
throw error;
|
|
394
785
|
}
|
|
395
|
-
throw error;
|
|
396
|
-
}
|
|
397
786
|
}
|
|
398
787
|
|
|
399
|
-
//
|
|
400
|
-
async function
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
788
|
+
// Helper to convert a stream to a buffer
|
|
789
|
+
async function streamToBuffer(stream) {
|
|
790
|
+
return new Promise((resolve, reject) => {
|
|
791
|
+
const chunks = [];
|
|
792
|
+
stream.on('data', (chunk) => chunks.push(chunk));
|
|
793
|
+
stream.on('end', () => resolve(Buffer.concat(chunks)));
|
|
794
|
+
stream.on('error', reject);
|
|
795
|
+
});
|
|
796
|
+
}
|
|
407
797
|
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
798
|
+
// Function to delete files that haven't been used in more than a month
|
|
799
|
+
async function cleanup(context, urls = null) {
|
|
800
|
+
const { containerClient } = await getBlobClient();
|
|
801
|
+
const cleanedURLs = [];
|
|
802
|
+
|
|
803
|
+
if (!urls) {
|
|
804
|
+
const xMonthAgo = new Date();
|
|
805
|
+
xMonthAgo.setMonth(xMonthAgo.getMonth() - 1);
|
|
806
|
+
|
|
807
|
+
const blobs = containerClient.listBlobsFlat();
|
|
808
|
+
|
|
809
|
+
for await (const blob of blobs) {
|
|
810
|
+
const lastModified = blob.properties.lastModified;
|
|
811
|
+
if (lastModified < xMonthAgo) {
|
|
812
|
+
try {
|
|
813
|
+
const blockBlobClient = containerClient.getBlockBlobClient(blob.name);
|
|
814
|
+
await blockBlobClient.delete();
|
|
815
|
+
context.log(`Cleaned blob: ${blob.name}`);
|
|
816
|
+
cleanedURLs.push(blob.name);
|
|
817
|
+
} catch (error) {
|
|
818
|
+
if (error.statusCode !== 404) {
|
|
819
|
+
// Ignore "not found" errors
|
|
820
|
+
context.log(`Error cleaning blob ${blob.name}:`, error);
|
|
821
|
+
}
|
|
822
|
+
}
|
|
823
|
+
}
|
|
422
824
|
}
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
825
|
+
} else {
|
|
826
|
+
for (const url of urls) {
|
|
827
|
+
try {
|
|
828
|
+
const blobName = url.replace(containerClient.url, '');
|
|
829
|
+
const blockBlobClient = containerClient.getBlockBlobClient(blobName);
|
|
830
|
+
await blockBlobClient.delete();
|
|
831
|
+
context.log(`Cleaned blob: ${blobName}`);
|
|
832
|
+
cleanedURLs.push(blobName);
|
|
833
|
+
} catch (error) {
|
|
834
|
+
if (error.statusCode !== 404) {
|
|
835
|
+
// Ignore "not found" errors
|
|
836
|
+
context.log(`Error cleaning blob ${url}:`, error);
|
|
837
|
+
}
|
|
838
|
+
}
|
|
436
839
|
}
|
|
437
|
-
}
|
|
438
840
|
}
|
|
439
|
-
|
|
440
|
-
return cleanedURLs;
|
|
841
|
+
return cleanedURLs;
|
|
441
842
|
}
|
|
442
843
|
|
|
443
|
-
async function cleanupGCS(urls=null) {
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
844
|
+
async function cleanupGCS(urls = null) {
|
|
845
|
+
const bucket = gcs.bucket(GCS_BUCKETNAME);
|
|
846
|
+
const directories = new Set();
|
|
847
|
+
const cleanedURLs = [];
|
|
848
|
+
|
|
849
|
+
if (!urls) {
|
|
850
|
+
const daysN = 30;
|
|
851
|
+
const thirtyDaysAgo = new Date(Date.now() - daysN * 24 * 60 * 60 * 1000);
|
|
852
|
+
const [files] = await bucket.getFiles();
|
|
853
|
+
|
|
854
|
+
for (const file of files) {
|
|
855
|
+
const [metadata] = await file.getMetadata();
|
|
856
|
+
const directoryPath = path.dirname(file.name);
|
|
857
|
+
directories.add(directoryPath);
|
|
858
|
+
if (metadata.updated) {
|
|
859
|
+
const updatedTime = new Date(metadata.updated);
|
|
860
|
+
if (updatedTime.getTime() < thirtyDaysAgo.getTime()) {
|
|
861
|
+
console.log(`Cleaning file: ${file.name}`);
|
|
862
|
+
await file.delete();
|
|
863
|
+
cleanedURLs.push(file.name);
|
|
864
|
+
}
|
|
865
|
+
}
|
|
866
|
+
}
|
|
867
|
+
} else {
|
|
868
|
+
try {
|
|
869
|
+
for (const url of urls) {
|
|
870
|
+
const filename = path.join(url.split('/').slice(3).join('/'));
|
|
871
|
+
const file = bucket.file(filename);
|
|
872
|
+
const directoryPath = path.dirname(file.name);
|
|
873
|
+
directories.add(directoryPath);
|
|
874
|
+
await file.delete();
|
|
875
|
+
cleanedURLs.push(url);
|
|
876
|
+
}
|
|
877
|
+
} catch (error) {
|
|
878
|
+
console.error(`Error cleaning up files: ${error}`);
|
|
463
879
|
}
|
|
464
|
-
}
|
|
465
|
-
}
|
|
466
|
-
}else{
|
|
467
|
-
try {
|
|
468
|
-
for(const url of urls) {
|
|
469
|
-
const filename = path.join(url.split('/').slice(3).join('/'));
|
|
470
|
-
const file = bucket.file(filename);
|
|
471
|
-
const directoryPath = path.dirname(file.name);
|
|
472
|
-
directories.add(directoryPath);
|
|
473
|
-
await file.delete();
|
|
474
|
-
cleanedURLs.push(url);
|
|
475
|
-
}
|
|
476
|
-
}catch(error){
|
|
477
|
-
console.error(`Error cleaning up files: ${error}`);
|
|
478
880
|
}
|
|
479
|
-
}
|
|
480
881
|
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
882
|
+
for (const directory of directories) {
|
|
883
|
+
const [files] = await bucket.getFiles({ prefix: directory });
|
|
884
|
+
if (files.length === 0) {
|
|
885
|
+
console.log(`Deleting empty directory: ${directory}`);
|
|
886
|
+
await bucket.deleteFiles({ prefix: directory });
|
|
887
|
+
}
|
|
486
888
|
}
|
|
487
|
-
}
|
|
488
889
|
|
|
489
|
-
|
|
890
|
+
return cleanedURLs;
|
|
490
891
|
}
|
|
491
892
|
|
|
492
893
|
async function deleteGCS(blobName) {
|
|
493
|
-
|
|
494
|
-
|
|
894
|
+
if (!blobName) throw new Error('Missing blobName parameter');
|
|
895
|
+
if (!gcs) throw new Error('Google Cloud Storage is not initialized');
|
|
495
896
|
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
897
|
+
try {
|
|
898
|
+
const bucket = gcs.bucket(GCS_BUCKETNAME);
|
|
899
|
+
const deletedFiles = [];
|
|
499
900
|
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
901
|
+
if (process.env.STORAGE_EMULATOR_HOST) {
|
|
902
|
+
// For fake GCS server, use HTTP API directly
|
|
903
|
+
const response = await axios.get(
|
|
904
|
+
`http://localhost:4443/storage/v1/b/${GCS_BUCKETNAME}/o`,
|
|
905
|
+
{ params: { prefix: blobName } },
|
|
906
|
+
);
|
|
907
|
+
if (response.data.items) {
|
|
908
|
+
for (const item of response.data.items) {
|
|
909
|
+
await axios.delete(
|
|
910
|
+
`http://localhost:4443/storage/v1/b/${GCS_BUCKETNAME}/o/${encodeURIComponent(item.name)}`,
|
|
911
|
+
{ validateStatus: (status) => status === 200 || status === 404 },
|
|
912
|
+
);
|
|
913
|
+
deletedFiles.push(item.name);
|
|
914
|
+
}
|
|
915
|
+
}
|
|
916
|
+
} else {
|
|
917
|
+
// For real GCS, use the SDK
|
|
918
|
+
const [files] = await bucket.getFiles({ prefix: blobName });
|
|
919
|
+
for (const file of files) {
|
|
920
|
+
await file.delete();
|
|
921
|
+
deletedFiles.push(file.name);
|
|
922
|
+
}
|
|
513
923
|
}
|
|
514
|
-
}
|
|
515
|
-
} else {
|
|
516
|
-
// For real GCS, use the SDK
|
|
517
|
-
const [files] = await bucket.getFiles({ prefix: blobName });
|
|
518
|
-
for (const file of files) {
|
|
519
|
-
await file.delete();
|
|
520
|
-
deletedFiles.push(file.name);
|
|
521
|
-
}
|
|
522
|
-
}
|
|
523
924
|
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
925
|
+
if (deletedFiles.length > 0) {
|
|
926
|
+
console.log(`Cleaned GCS files: ${deletedFiles.join(', ')}`);
|
|
927
|
+
}
|
|
928
|
+
return deletedFiles;
|
|
929
|
+
} catch (error) {
|
|
930
|
+
if (error.code !== 404) {
|
|
931
|
+
console.error(`Error in deleteGCS: ${error}`);
|
|
932
|
+
throw error;
|
|
933
|
+
}
|
|
934
|
+
return [];
|
|
532
935
|
}
|
|
533
|
-
return [];
|
|
534
|
-
}
|
|
535
936
|
}
|
|
536
937
|
|
|
537
938
|
// Helper function to ensure GCS upload for existing files
|
|
538
939
|
async function ensureGCSUpload(context, existingFile) {
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
940
|
+
if (!existingFile.gcs && gcs) {
|
|
941
|
+
context.log('GCS file was missing - uploading.');
|
|
942
|
+
let encodedFilename = path.basename(existingFile.url.split('?')[0]);
|
|
943
|
+
if (!isEncoded(encodedFilename)) {
|
|
944
|
+
encodedFilename = encodeURIComponent(encodedFilename);
|
|
945
|
+
}
|
|
946
|
+
// Download the file from Azure/local storage
|
|
947
|
+
const response = await axios({
|
|
948
|
+
method: 'get',
|
|
949
|
+
url: existingFile.url,
|
|
950
|
+
responseType: 'stream',
|
|
951
|
+
});
|
|
952
|
+
// Upload the file stream to GCS
|
|
953
|
+
existingFile.gcs = await uploadToGCS(
|
|
954
|
+
context,
|
|
955
|
+
response.data,
|
|
956
|
+
encodedFilename,
|
|
957
|
+
);
|
|
958
|
+
}
|
|
959
|
+
return existingFile;
|
|
554
960
|
}
|
|
555
961
|
|
|
556
962
|
// Helper function to upload a chunk to GCS
|
|
557
963
|
async function uploadChunkToGCS(chunkPath, requestId) {
|
|
558
964
|
if (!gcs) return null;
|
|
559
|
-
|
|
560
|
-
|
|
965
|
+
let baseName = path.basename(chunkPath);
|
|
966
|
+
if (!isEncoded(baseName)) {
|
|
967
|
+
baseName = encodeURIComponent(baseName);
|
|
968
|
+
}
|
|
969
|
+
const gcsFileName = `${requestId}/${baseName}`;
|
|
561
970
|
await gcs.bucket(GCS_BUCKETNAME).upload(chunkPath, {
|
|
562
|
-
destination: gcsFileName
|
|
971
|
+
destination: gcsFileName,
|
|
563
972
|
});
|
|
564
973
|
return `gs://${GCS_BUCKETNAME}/${gcsFileName}`;
|
|
565
974
|
}
|
|
566
975
|
|
|
567
|
-
export {
|
|
976
|
+
export {
|
|
977
|
+
saveFileToBlob,
|
|
978
|
+
deleteBlob,
|
|
979
|
+
deleteGCS,
|
|
980
|
+
uploadBlob,
|
|
981
|
+
cleanup,
|
|
982
|
+
cleanupGCS,
|
|
983
|
+
gcsUrlExists,
|
|
984
|
+
ensureGCSUpload,
|
|
985
|
+
gcs,
|
|
986
|
+
uploadChunkToGCS,
|
|
987
|
+
downloadFromGCS,
|
|
988
|
+
};
|