@aj-archipelago/cortex 1.3.57 → 1.3.59
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -0
- package/config.js +22 -0
- package/helper-apps/cortex-file-handler/INTERFACE.md +20 -9
- package/helper-apps/cortex-file-handler/package-lock.json +2 -2
- package/helper-apps/cortex-file-handler/package.json +1 -1
- package/helper-apps/cortex-file-handler/scripts/setup-azure-container.js +17 -17
- package/helper-apps/cortex-file-handler/scripts/setup-test-containers.js +35 -35
- package/helper-apps/cortex-file-handler/src/blobHandler.js +1010 -909
- package/helper-apps/cortex-file-handler/src/constants.js +98 -98
- package/helper-apps/cortex-file-handler/src/docHelper.js +27 -27
- package/helper-apps/cortex-file-handler/src/fileChunker.js +224 -214
- package/helper-apps/cortex-file-handler/src/helper.js +93 -93
- package/helper-apps/cortex-file-handler/src/index.js +584 -550
- package/helper-apps/cortex-file-handler/src/localFileHandler.js +86 -86
- package/helper-apps/cortex-file-handler/src/redis.js +186 -90
- package/helper-apps/cortex-file-handler/src/services/ConversionService.js +301 -273
- package/helper-apps/cortex-file-handler/src/services/FileConversionService.js +55 -55
- package/helper-apps/cortex-file-handler/src/services/storage/AzureStorageProvider.js +174 -154
- package/helper-apps/cortex-file-handler/src/services/storage/GCSStorageProvider.js +239 -223
- package/helper-apps/cortex-file-handler/src/services/storage/LocalStorageProvider.js +161 -159
- package/helper-apps/cortex-file-handler/src/services/storage/StorageFactory.js +73 -71
- package/helper-apps/cortex-file-handler/src/services/storage/StorageProvider.js +46 -45
- package/helper-apps/cortex-file-handler/src/services/storage/StorageService.js +256 -213
- package/helper-apps/cortex-file-handler/src/start.js +4 -1
- package/helper-apps/cortex-file-handler/src/utils/filenameUtils.js +59 -25
- package/helper-apps/cortex-file-handler/tests/FileConversionService.test.js +119 -116
- package/helper-apps/cortex-file-handler/tests/blobHandler.test.js +257 -257
- package/helper-apps/cortex-file-handler/tests/cleanup.test.js +676 -0
- package/helper-apps/cortex-file-handler/tests/conversionResilience.test.js +124 -124
- package/helper-apps/cortex-file-handler/tests/fileChunker.test.js +249 -208
- package/helper-apps/cortex-file-handler/tests/fileUpload.test.js +439 -380
- package/helper-apps/cortex-file-handler/tests/getOperations.test.js +299 -263
- package/helper-apps/cortex-file-handler/tests/postOperations.test.js +265 -239
- package/helper-apps/cortex-file-handler/tests/start.test.js +1230 -1201
- package/helper-apps/cortex-file-handler/tests/storage/AzureStorageProvider.test.js +110 -105
- package/helper-apps/cortex-file-handler/tests/storage/GCSStorageProvider.test.js +201 -175
- package/helper-apps/cortex-file-handler/tests/storage/LocalStorageProvider.test.js +128 -125
- package/helper-apps/cortex-file-handler/tests/storage/StorageFactory.test.js +78 -73
- package/helper-apps/cortex-file-handler/tests/storage/StorageService.test.js +99 -99
- package/helper-apps/cortex-file-handler/tests/testUtils.helper.js +74 -70
- package/package.json +1 -1
- package/pathways/translate_apptek.js +33 -0
- package/pathways/translate_subtitle.js +15 -8
- package/server/plugins/apptekTranslatePlugin.js +46 -91
- package/tests/apptekTranslatePlugin.test.js +0 -2
- package/tests/integration/apptekTranslatePlugin.integration.test.js +159 -93
- package/tests/translate_apptek.test.js +16 -0
|
@@ -1,133 +1,134 @@
|
|
|
1
|
-
import fs from
|
|
2
|
-
import
|
|
3
|
-
import
|
|
4
|
-
import {
|
|
5
|
-
import { pipeline as _pipeline } from
|
|
6
|
-
import {
|
|
7
|
-
|
|
1
|
+
import fs from "fs";
|
|
2
|
+
import os from "os";
|
|
3
|
+
import path from "path";
|
|
4
|
+
import { promisify } from "util";
|
|
5
|
+
import { pipeline as _pipeline } from "stream";
|
|
6
|
+
import { v4 as uuidv4 } from "uuid";
|
|
7
|
+
import Busboy from "busboy";
|
|
8
|
+
import { PassThrough } from "stream";
|
|
9
|
+
import mime from "mime-types";
|
|
10
|
+
import { Storage } from "@google-cloud/storage";
|
|
8
11
|
import {
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
} from
|
|
13
|
-
import
|
|
14
|
-
import axios from 'axios';
|
|
15
|
-
import Busboy from 'busboy';
|
|
16
|
-
import { v4 as uuidv4 } from 'uuid';
|
|
17
|
-
const pipeline = promisify(_pipeline);
|
|
18
|
-
|
|
19
|
-
import { publicFolder, port, ipAddress } from './start.js';
|
|
20
|
-
import { CONVERTED_EXTENSIONS } from './constants.js';
|
|
12
|
+
generateBlobSASQueryParameters,
|
|
13
|
+
StorageSharedKeyCredential,
|
|
14
|
+
BlobServiceClient,
|
|
15
|
+
} from "@azure/storage-blob";
|
|
16
|
+
import axios from "axios";
|
|
21
17
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
18
|
+
import {
|
|
19
|
+
sanitizeFilename,
|
|
20
|
+
generateShortId,
|
|
21
|
+
generateBlobName,
|
|
22
|
+
} from "./utils/filenameUtils.js";
|
|
23
|
+
import { publicFolder, port, ipAddress } from "./start.js";
|
|
24
|
+
import { CONVERTED_EXTENSIONS } from "./constants.js";
|
|
25
|
+
import { FileConversionService } from "./services/FileConversionService.js";
|
|
27
26
|
|
|
28
|
-
|
|
27
|
+
const pipeline = promisify(_pipeline);
|
|
29
28
|
|
|
30
29
|
function isBase64(str) {
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
30
|
+
try {
|
|
31
|
+
return btoa(atob(str)) == str;
|
|
32
|
+
} catch (err) {
|
|
33
|
+
return false;
|
|
34
|
+
}
|
|
36
35
|
}
|
|
37
36
|
|
|
38
37
|
const { SAS_TOKEN_LIFE_DAYS = 30 } = process.env;
|
|
39
38
|
const GCP_SERVICE_ACCOUNT_KEY =
|
|
40
39
|
process.env.GCP_SERVICE_ACCOUNT_KEY_BASE64 ||
|
|
41
40
|
process.env.GCP_SERVICE_ACCOUNT_KEY ||
|
|
42
|
-
|
|
41
|
+
"{}";
|
|
43
42
|
const GCP_SERVICE_ACCOUNT = isBase64(GCP_SERVICE_ACCOUNT_KEY)
|
|
44
|
-
|
|
45
|
-
|
|
43
|
+
? JSON.parse(Buffer.from(GCP_SERVICE_ACCOUNT_KEY, "base64").toString())
|
|
44
|
+
: JSON.parse(GCP_SERVICE_ACCOUNT_KEY);
|
|
46
45
|
const { project_id: GCP_PROJECT_ID } = GCP_SERVICE_ACCOUNT;
|
|
47
46
|
|
|
48
47
|
let gcs;
|
|
49
48
|
if (!GCP_PROJECT_ID || !GCP_SERVICE_ACCOUNT) {
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
49
|
+
console.warn(
|
|
50
|
+
"No Google Cloud Storage credentials provided - GCS will not be used",
|
|
51
|
+
);
|
|
53
52
|
} else {
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
53
|
+
try {
|
|
54
|
+
gcs = new Storage({
|
|
55
|
+
projectId: GCP_PROJECT_ID,
|
|
56
|
+
credentials: GCP_SERVICE_ACCOUNT,
|
|
57
|
+
});
|
|
59
58
|
|
|
60
59
|
// Rest of your Google Cloud operations using gcs object
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
60
|
+
} catch (error) {
|
|
61
|
+
console.error(
|
|
62
|
+
"Google Cloud Storage credentials are invalid - GCS will not be used: ",
|
|
63
|
+
error,
|
|
64
|
+
);
|
|
65
|
+
}
|
|
67
66
|
}
|
|
68
67
|
|
|
69
68
|
export const AZURE_STORAGE_CONTAINER_NAME =
|
|
70
|
-
process.env.AZURE_STORAGE_CONTAINER_NAME ||
|
|
71
|
-
export const GCS_BUCKETNAME = process.env.GCS_BUCKETNAME ||
|
|
69
|
+
process.env.AZURE_STORAGE_CONTAINER_NAME || "whispertempfiles";
|
|
70
|
+
export const GCS_BUCKETNAME = process.env.GCS_BUCKETNAME || "cortextempfiles";
|
|
72
71
|
|
|
73
72
|
function isEncoded(str) {
|
|
74
|
-
|
|
75
|
-
|
|
73
|
+
// Checks for any percent-encoded sequence
|
|
74
|
+
return /%[0-9A-Fa-f]{2}/.test(str);
|
|
76
75
|
}
|
|
77
76
|
|
|
78
77
|
// Helper function to ensure GCS URLs are never encoded
|
|
79
78
|
function ensureUnencodedGcsUrl(url) {
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
79
|
+
if (!url || !url.startsWith("gs://")) {
|
|
80
|
+
return url;
|
|
81
|
+
}
|
|
82
|
+
// Split into bucket and path parts
|
|
83
|
+
const [bucket, ...pathParts] = url.replace("gs://", "").split("/");
|
|
84
|
+
// Reconstruct URL with decoded path parts, handling invalid characters
|
|
85
|
+
return `gs://${bucket}/${pathParts
|
|
86
|
+
.map((part) => {
|
|
87
|
+
try {
|
|
88
|
+
return decodeURIComponent(part);
|
|
89
|
+
} catch (error) {
|
|
90
|
+
// If decoding fails, sanitize the filename by removing invalid characters
|
|
91
|
+
return part.replace(/[^\w\-\.]/g, "_");
|
|
92
|
+
}
|
|
93
|
+
})
|
|
94
|
+
.join("/")}`;
|
|
94
95
|
}
|
|
95
96
|
|
|
96
97
|
async function gcsUrlExists(url, defaultReturn = false) {
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
98
|
+
try {
|
|
99
|
+
if (!url || !gcs) {
|
|
100
|
+
return defaultReturn; // Cannot check return
|
|
101
|
+
}
|
|
101
102
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
103
|
+
// Ensure URL is not encoded
|
|
104
|
+
const unencodedUrl = ensureUnencodedGcsUrl(url);
|
|
105
|
+
const urlParts = unencodedUrl.replace("gs://", "").split("/");
|
|
106
|
+
const bucketName = urlParts[0];
|
|
107
|
+
const fileName = urlParts.slice(1).join("/");
|
|
107
108
|
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
109
|
+
if (process.env.STORAGE_EMULATOR_HOST) {
|
|
110
|
+
try {
|
|
111
|
+
const response = await axios.get(
|
|
112
|
+
`${process.env.STORAGE_EMULATOR_HOST}/storage/v1/b/${bucketName}/o/${encodeURIComponent(fileName)}`,
|
|
113
|
+
{ validateStatus: (status) => status === 200 || status === 404 },
|
|
114
|
+
);
|
|
115
|
+
return response.status === 200;
|
|
116
|
+
} catch (error) {
|
|
117
|
+
console.error("Error checking emulator file:", error);
|
|
118
|
+
return false;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
120
121
|
|
|
121
|
-
|
|
122
|
-
|
|
122
|
+
const bucket = gcs.bucket(bucketName);
|
|
123
|
+
const file = bucket.file(fileName);
|
|
123
124
|
|
|
124
|
-
|
|
125
|
+
const [exists] = await file.exists();
|
|
125
126
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
127
|
+
return exists;
|
|
128
|
+
} catch (error) {
|
|
129
|
+
console.error("Error checking if GCS URL exists:", error);
|
|
130
|
+
return false;
|
|
131
|
+
}
|
|
131
132
|
}
|
|
132
133
|
|
|
133
134
|
/**
|
|
@@ -137,920 +138,1020 @@ async function gcsUrlExists(url, defaultReturn = false) {
|
|
|
137
138
|
* @returns {Promise<void>}
|
|
138
139
|
*/
|
|
139
140
|
async function downloadFromGCS(gcsUrl, destinationPath) {
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
141
|
+
if (!gcsUrl || !gcs) {
|
|
142
|
+
throw new Error("Invalid GCS URL or GCS client not initialized");
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
const urlParts = gcsUrl.replace("gs://", "").split("/");
|
|
146
|
+
const bucketName = urlParts[0];
|
|
147
|
+
const fileName = urlParts.slice(1).join("/");
|
|
148
|
+
|
|
149
|
+
if (process.env.STORAGE_EMULATOR_HOST) {
|
|
150
|
+
// Use axios to download from emulator
|
|
151
|
+
const response = await axios({
|
|
152
|
+
method: "GET",
|
|
153
|
+
url: `${process.env.STORAGE_EMULATOR_HOST}/storage/v1/b/${bucketName}/o/${encodeURIComponent(fileName)}?alt=media`,
|
|
154
|
+
responseType: "stream",
|
|
155
|
+
});
|
|
147
156
|
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
writer.on('error', reject);
|
|
162
|
-
});
|
|
163
|
-
} else {
|
|
164
|
-
// Use GCS client for real GCS
|
|
165
|
-
const bucket = gcs.bucket(bucketName);
|
|
166
|
-
const file = bucket.file(fileName);
|
|
167
|
-
await file.download({ destination: destinationPath });
|
|
168
|
-
}
|
|
157
|
+
// Write the response to file
|
|
158
|
+
const writer = fs.createWriteStream(destinationPath);
|
|
159
|
+
await new Promise((resolve, reject) => {
|
|
160
|
+
response.data.pipe(writer);
|
|
161
|
+
writer.on("finish", resolve);
|
|
162
|
+
writer.on("error", reject);
|
|
163
|
+
});
|
|
164
|
+
} else {
|
|
165
|
+
// Use GCS client for real GCS
|
|
166
|
+
const bucket = gcs.bucket(bucketName);
|
|
167
|
+
const file = bucket.file(fileName);
|
|
168
|
+
await file.download({ destination: destinationPath });
|
|
169
|
+
}
|
|
169
170
|
}
|
|
170
171
|
|
|
171
172
|
export const getBlobClient = async () => {
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
173
|
+
const connectionString = process.env.AZURE_STORAGE_CONNECTION_STRING;
|
|
174
|
+
const containerName = AZURE_STORAGE_CONTAINER_NAME;
|
|
175
|
+
if (!connectionString || !containerName) {
|
|
176
|
+
throw new Error(
|
|
177
|
+
"Missing Azure Storage connection string or container name environment variable",
|
|
178
|
+
);
|
|
179
|
+
}
|
|
179
180
|
|
|
180
|
-
|
|
181
|
+
const blobServiceClient =
|
|
181
182
|
BlobServiceClient.fromConnectionString(connectionString);
|
|
182
183
|
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
184
|
+
const serviceProperties = await blobServiceClient.getProperties();
|
|
185
|
+
if (!serviceProperties.defaultServiceVersion) {
|
|
186
|
+
serviceProperties.defaultServiceVersion = "2020-02-10";
|
|
187
|
+
await blobServiceClient.setProperties(serviceProperties);
|
|
188
|
+
}
|
|
188
189
|
|
|
189
|
-
|
|
190
|
+
const containerClient = blobServiceClient.getContainerClient(containerName);
|
|
190
191
|
|
|
191
|
-
|
|
192
|
+
return { blobServiceClient, containerClient };
|
|
192
193
|
};
|
|
193
194
|
|
|
194
|
-
async function saveFileToBlob(chunkPath, requestId) {
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
}
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
195
|
+
async function saveFileToBlob(chunkPath, requestId, filename = null) {
|
|
196
|
+
const { containerClient } = await getBlobClient();
|
|
197
|
+
// Use provided filename or generate LLM-friendly naming
|
|
198
|
+
let blobName;
|
|
199
|
+
if (filename) {
|
|
200
|
+
blobName = generateBlobName(requestId, filename);
|
|
201
|
+
} else {
|
|
202
|
+
const fileExtension = path.extname(chunkPath);
|
|
203
|
+
const shortId = generateShortId();
|
|
204
|
+
blobName = generateBlobName(requestId, `${shortId}${fileExtension}`);
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// Create a read stream for the chunk file
|
|
208
|
+
const fileStream = fs.createReadStream(chunkPath);
|
|
209
|
+
|
|
210
|
+
// Upload the chunk to Azure Blob Storage using the stream
|
|
211
|
+
const blockBlobClient = containerClient.getBlockBlobClient(blobName);
|
|
212
|
+
await blockBlobClient.uploadStream(fileStream);
|
|
213
|
+
|
|
214
|
+
// Generate SAS token after successful upload
|
|
215
|
+
const sasToken = generateSASToken(containerClient, blobName);
|
|
216
|
+
|
|
217
|
+
// Return an object with the URL property
|
|
218
|
+
return {
|
|
219
|
+
url: `${blockBlobClient.url}?${sasToken}`,
|
|
220
|
+
blobName: blobName,
|
|
221
|
+
};
|
|
221
222
|
}
|
|
222
223
|
|
|
223
224
|
const generateSASToken = (
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
225
|
+
containerClient,
|
|
226
|
+
blobName,
|
|
227
|
+
expiryTimeSeconds = parseInt(SAS_TOKEN_LIFE_DAYS) * 24 * 60 * 60,
|
|
227
228
|
) => {
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
229
|
+
const { accountName, accountKey } = containerClient.credential;
|
|
230
|
+
const sharedKeyCredential = new StorageSharedKeyCredential(
|
|
231
|
+
accountName,
|
|
232
|
+
accountKey,
|
|
233
|
+
);
|
|
234
|
+
|
|
235
|
+
const sasOptions = {
|
|
236
|
+
containerName: containerClient.containerName,
|
|
237
|
+
blobName: blobName,
|
|
238
|
+
permissions: "r", // Read permission
|
|
239
|
+
startsOn: new Date(),
|
|
240
|
+
expiresOn: new Date(new Date().valueOf() + expiryTimeSeconds * 1000),
|
|
241
|
+
};
|
|
242
|
+
|
|
243
|
+
const sasToken = generateBlobSASQueryParameters(
|
|
244
|
+
sasOptions,
|
|
245
|
+
sharedKeyCredential,
|
|
246
|
+
).toString();
|
|
247
|
+
return sasToken;
|
|
247
248
|
};
|
|
248
249
|
|
|
249
250
|
//deletes blob that has the requestId
|
|
250
251
|
async function deleteBlob(requestId) {
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
252
|
+
if (!requestId) throw new Error("Missing requestId parameter");
|
|
253
|
+
const { containerClient } = await getBlobClient();
|
|
254
|
+
// List all blobs in the container
|
|
255
|
+
const blobs = containerClient.listBlobsFlat();
|
|
256
|
+
|
|
257
|
+
const result = [];
|
|
258
|
+
// Iterate through the blobs
|
|
259
|
+
for await (const blob of blobs) {
|
|
259
260
|
// Check if the blob name starts with requestId_ (flat structure)
|
|
260
261
|
// or is inside a folder named requestId/ (folder structure)
|
|
261
|
-
|
|
262
|
-
|
|
262
|
+
if (
|
|
263
|
+
blob.name.startsWith(`${requestId}_`) ||
|
|
263
264
|
blob.name.startsWith(`${requestId}/`)
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
}
|
|
265
|
+
) {
|
|
266
|
+
// Delete the matching blob
|
|
267
|
+
const blockBlobClient = containerClient.getBlockBlobClient(blob.name);
|
|
268
|
+
await blockBlobClient.delete();
|
|
269
|
+
console.log(`Cleaned blob: ${blob.name}`);
|
|
270
|
+
result.push(blob.name);
|
|
271
271
|
}
|
|
272
|
+
}
|
|
272
273
|
|
|
273
|
-
|
|
274
|
+
return result;
|
|
274
275
|
}
|
|
275
276
|
|
|
276
277
|
function uploadBlob(
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
278
|
+
context,
|
|
279
|
+
req,
|
|
280
|
+
saveToLocal = false,
|
|
281
|
+
filePath = null,
|
|
282
|
+
hash = null,
|
|
282
283
|
) {
|
|
283
|
-
|
|
284
|
-
|
|
284
|
+
return new Promise((resolve, reject) => {
|
|
285
|
+
(async () => {
|
|
286
|
+
try {
|
|
287
|
+
let requestId = uuidv4();
|
|
288
|
+
const body = {};
|
|
289
|
+
|
|
290
|
+
// If filePath is given, we are dealing with local file and not form-data
|
|
291
|
+
if (filePath) {
|
|
292
|
+
const file = fs.createReadStream(filePath);
|
|
293
|
+
const filename = path.basename(filePath);
|
|
294
|
+
|
|
295
|
+
// Generate LLM-friendly ID for requestId to match the filename pattern
|
|
296
|
+
const fileExtension = path.extname(filename);
|
|
297
|
+
const shortId = generateShortId();
|
|
298
|
+
const uploadName = `${shortId}${fileExtension}`;
|
|
299
|
+
requestId = shortId; // Use the short ID as requestId
|
|
300
|
+
|
|
301
|
+
try {
|
|
302
|
+
const result = await uploadFile(
|
|
303
|
+
context,
|
|
304
|
+
requestId,
|
|
305
|
+
body,
|
|
306
|
+
saveToLocal,
|
|
307
|
+
file,
|
|
308
|
+
uploadName, // Use the LLM-friendly filename
|
|
309
|
+
resolve,
|
|
310
|
+
hash,
|
|
311
|
+
);
|
|
312
|
+
resolve(result);
|
|
313
|
+
} catch (error) {
|
|
314
|
+
const err = new Error("Error processing file upload.");
|
|
315
|
+
err.status = 500;
|
|
316
|
+
throw err;
|
|
317
|
+
}
|
|
318
|
+
} else {
|
|
319
|
+
// Otherwise, continue working with form-data
|
|
320
|
+
const busboy = Busboy({ headers: req.headers });
|
|
321
|
+
let hasFile = false;
|
|
322
|
+
let errorOccurred = false;
|
|
323
|
+
|
|
324
|
+
busboy.on("field", (fieldname, value) => {
|
|
325
|
+
if (fieldname === "requestId") {
|
|
326
|
+
requestId = value;
|
|
327
|
+
} else if (fieldname === "hash") {
|
|
328
|
+
hash = value;
|
|
329
|
+
}
|
|
330
|
+
});
|
|
331
|
+
|
|
332
|
+
busboy.on("file", async (fieldname, file, info) => {
|
|
333
|
+
if (errorOccurred) return;
|
|
334
|
+
hasFile = true;
|
|
335
|
+
|
|
336
|
+
// Validate file
|
|
337
|
+
if (!info.filename || info.filename.trim() === "") {
|
|
338
|
+
errorOccurred = true;
|
|
339
|
+
const err = new Error("Invalid file: missing filename");
|
|
340
|
+
err.status = 400;
|
|
341
|
+
reject(err);
|
|
342
|
+
return;
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
// Prepare for streaming to cloud destinations
|
|
346
|
+
const filename = info.filename;
|
|
347
|
+
const fileExtension = path.extname(filename);
|
|
348
|
+
const shortId = generateShortId();
|
|
349
|
+
const uploadName = `${shortId}${fileExtension}`;
|
|
350
|
+
const azureStream = !saveToLocal ? new PassThrough() : null;
|
|
351
|
+
const gcsStream = gcs ? new PassThrough() : null;
|
|
352
|
+
let diskWriteStream, tempDir, tempFilePath;
|
|
353
|
+
let diskWritePromise;
|
|
354
|
+
let diskWriteError = null;
|
|
355
|
+
let cloudUploadError = null;
|
|
356
|
+
|
|
357
|
+
// Start local disk write in parallel (non-blocking for response)
|
|
358
|
+
if (saveToLocal) {
|
|
359
|
+
try {
|
|
360
|
+
tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "upload-"));
|
|
361
|
+
} catch (err) {
|
|
362
|
+
console.error("Error creating tempDir:", err);
|
|
363
|
+
errorOccurred = true;
|
|
364
|
+
reject(err);
|
|
365
|
+
return;
|
|
366
|
+
}
|
|
367
|
+
tempFilePath = path.join(tempDir, uploadName);
|
|
368
|
+
try {
|
|
369
|
+
diskWriteStream = fs.createWriteStream(tempFilePath, {
|
|
370
|
+
highWaterMark: 1024 * 1024,
|
|
371
|
+
autoClose: true,
|
|
372
|
+
});
|
|
373
|
+
} catch (err) {
|
|
374
|
+
console.error(
|
|
375
|
+
"Error creating write stream:",
|
|
376
|
+
err,
|
|
377
|
+
"Temp dir exists:",
|
|
378
|
+
fs.existsSync(tempDir),
|
|
379
|
+
);
|
|
380
|
+
errorOccurred = true;
|
|
381
|
+
reject(err);
|
|
382
|
+
return;
|
|
383
|
+
}
|
|
384
|
+
diskWriteStream.on("error", (err) => {
|
|
385
|
+
console.error("Disk write stream error:", err);
|
|
386
|
+
});
|
|
387
|
+
diskWriteStream.on("close", () => {
|
|
388
|
+
console.log("Disk write stream closed for:", tempFilePath);
|
|
389
|
+
});
|
|
390
|
+
diskWritePromise = new Promise((res, rej) => {
|
|
391
|
+
diskWriteStream.on("finish", res);
|
|
392
|
+
diskWriteStream.on("error", (err) => {
|
|
393
|
+
diskWriteError = err;
|
|
394
|
+
rej(err);
|
|
395
|
+
});
|
|
396
|
+
});
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
// Pipe incoming file to all destinations
|
|
400
|
+
let receivedAnyData = false;
|
|
401
|
+
file.on("data", () => {
|
|
402
|
+
receivedAnyData = true;
|
|
403
|
+
});
|
|
404
|
+
if (azureStream) file.pipe(azureStream);
|
|
405
|
+
if (gcsStream) file.pipe(gcsStream);
|
|
406
|
+
if (diskWriteStream) file.pipe(diskWriteStream);
|
|
407
|
+
|
|
408
|
+
// Listen for end event to check for empty file
|
|
409
|
+
file.on("end", async () => {
|
|
410
|
+
if (!receivedAnyData) {
|
|
411
|
+
errorOccurred = true;
|
|
412
|
+
// Abort all streams
|
|
413
|
+
if (azureStream) azureStream.destroy();
|
|
414
|
+
if (gcsStream) gcsStream.destroy();
|
|
415
|
+
if (diskWriteStream) diskWriteStream.destroy();
|
|
416
|
+
const err = new Error("Invalid file: file is empty");
|
|
417
|
+
err.status = 400;
|
|
418
|
+
reject(err);
|
|
419
|
+
}
|
|
420
|
+
});
|
|
421
|
+
|
|
422
|
+
// Start cloud uploads immediately
|
|
423
|
+
let azurePromise;
|
|
424
|
+
if (!saveToLocal) {
|
|
425
|
+
azurePromise = saveToAzureStorage(
|
|
426
|
+
context,
|
|
427
|
+
uploadName,
|
|
428
|
+
azureStream,
|
|
429
|
+
).catch(async (err) => {
|
|
430
|
+
cloudUploadError = err;
|
|
431
|
+
// Fallback: try from disk if available
|
|
432
|
+
if (diskWritePromise) {
|
|
433
|
+
await diskWritePromise;
|
|
434
|
+
const diskStream = fs.createReadStream(tempFilePath, {
|
|
435
|
+
highWaterMark: 1024 * 1024,
|
|
436
|
+
autoClose: true,
|
|
437
|
+
});
|
|
438
|
+
return saveToAzureStorage(context, uploadName, diskStream);
|
|
439
|
+
}
|
|
440
|
+
throw err;
|
|
441
|
+
});
|
|
442
|
+
}
|
|
443
|
+
let gcsPromise;
|
|
444
|
+
if (gcsStream) {
|
|
445
|
+
gcsPromise = saveToGoogleStorage(
|
|
446
|
+
context,
|
|
447
|
+
uploadName,
|
|
448
|
+
gcsStream,
|
|
449
|
+
).catch(async (err) => {
|
|
450
|
+
cloudUploadError = err;
|
|
451
|
+
if (diskWritePromise) {
|
|
452
|
+
await diskWritePromise;
|
|
453
|
+
const diskStream = fs.createReadStream(tempFilePath, {
|
|
454
|
+
highWaterMark: 1024 * 1024,
|
|
455
|
+
autoClose: true,
|
|
456
|
+
});
|
|
457
|
+
return saveToGoogleStorage(context, uploadName, diskStream);
|
|
458
|
+
}
|
|
459
|
+
throw err;
|
|
460
|
+
});
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
// Wait for cloud uploads to finish
|
|
285
464
|
try {
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
465
|
+
const results = await Promise.all(
|
|
466
|
+
[
|
|
467
|
+
azurePromise
|
|
468
|
+
? azurePromise.then((url) => ({ url, type: "primary" }))
|
|
469
|
+
: null,
|
|
470
|
+
!azurePromise && saveToLocal
|
|
471
|
+
? Promise.resolve({ url: null, type: "primary-local" }) // placeholder for local, url handled later
|
|
472
|
+
: null,
|
|
473
|
+
gcsPromise
|
|
474
|
+
? gcsPromise.then((gcs) => ({ gcs, type: "gcs" }))
|
|
475
|
+
: null,
|
|
476
|
+
].filter(Boolean),
|
|
477
|
+
);
|
|
478
|
+
|
|
479
|
+
const result = {
|
|
480
|
+
message: `File '${uploadName}' uploaded successfully.`,
|
|
481
|
+
filename: uploadName,
|
|
482
|
+
...results.reduce((acc, result) => {
|
|
483
|
+
if (result.type === "primary") acc.url = result.url;
|
|
484
|
+
if (result.type === "gcs")
|
|
485
|
+
acc.gcs = ensureUnencodedGcsUrl(result.gcs);
|
|
486
|
+
return acc;
|
|
487
|
+
}, {}),
|
|
488
|
+
};
|
|
489
|
+
if (hash) result.hash = hash;
|
|
490
|
+
|
|
491
|
+
// If saving locally, wait for disk write to finish and then move to public folder
|
|
492
|
+
if (saveToLocal) {
|
|
493
|
+
try {
|
|
494
|
+
if (diskWritePromise) {
|
|
495
|
+
await diskWritePromise; // ensure file fully written
|
|
496
|
+
}
|
|
497
|
+
const localUrl = await saveToLocalStorage(
|
|
498
|
+
context,
|
|
499
|
+
requestId,
|
|
500
|
+
uploadName,
|
|
501
|
+
fs.createReadStream(tempFilePath, {
|
|
502
|
+
highWaterMark: 1024 * 1024,
|
|
503
|
+
autoClose: true,
|
|
504
|
+
}),
|
|
505
|
+
);
|
|
506
|
+
result.url = localUrl;
|
|
507
|
+
} catch (err) {
|
|
508
|
+
console.error("Error saving to local storage:", err);
|
|
509
|
+
throw err;
|
|
510
|
+
}
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
// After original uploads, handle optional conversion
|
|
514
|
+
const conversionService = new FileConversionService(
|
|
515
|
+
context,
|
|
516
|
+
!saveToLocal,
|
|
517
|
+
);
|
|
518
|
+
|
|
519
|
+
if (conversionService.needsConversion(fileExtension)) {
|
|
520
|
+
try {
|
|
521
|
+
context.log("Starting file conversion (busboy)...");
|
|
522
|
+
|
|
523
|
+
// Ensure we have a local copy of the file for conversion
|
|
524
|
+
let localPathForConversion = tempFilePath;
|
|
525
|
+
|
|
526
|
+
if (!localPathForConversion) {
|
|
527
|
+
// No temp file was written (saveToLocal === false). Download from primary URL.
|
|
528
|
+
const tmpDir = fs.mkdtempSync(
|
|
529
|
+
path.join(os.tmpdir(), "convert-"),
|
|
530
|
+
);
|
|
531
|
+
localPathForConversion = path.join(tmpDir, uploadName);
|
|
532
|
+
await conversionService._downloadFile(
|
|
533
|
+
result.url,
|
|
534
|
+
localPathForConversion,
|
|
535
|
+
);
|
|
536
|
+
} else {
|
|
537
|
+
// Wait until disk write completes to guarantee full file is present
|
|
538
|
+
if (diskWritePromise) {
|
|
539
|
+
await diskWritePromise;
|
|
309
540
|
}
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
const safeFilename = path.basename(filename); // Sanitize filename
|
|
340
|
-
const uploadName = `${requestId || uuidv4()}_${safeFilename}`;
|
|
341
|
-
const azureStream = !saveToLocal ? new PassThrough() : null;
|
|
342
|
-
const gcsStream = gcs ? new PassThrough() : null;
|
|
343
|
-
let diskWriteStream, tempDir, tempFilePath;
|
|
344
|
-
let diskWritePromise;
|
|
345
|
-
let diskWriteError = null;
|
|
346
|
-
let cloudUploadError = null;
|
|
347
|
-
|
|
348
|
-
// Start local disk write in parallel (non-blocking for response)
|
|
349
|
-
if (saveToLocal) {
|
|
350
|
-
try {
|
|
351
|
-
tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'upload-'));
|
|
352
|
-
} catch (err) {
|
|
353
|
-
console.error('Error creating tempDir:', err);
|
|
354
|
-
errorOccurred = true;
|
|
355
|
-
reject(err);
|
|
356
|
-
return;
|
|
357
|
-
}
|
|
358
|
-
tempFilePath = path.join(tempDir, safeFilename);
|
|
359
|
-
try {
|
|
360
|
-
diskWriteStream = fs.createWriteStream(tempFilePath, {
|
|
361
|
-
highWaterMark: 1024 * 1024,
|
|
362
|
-
autoClose: true,
|
|
363
|
-
});
|
|
364
|
-
} catch (err) {
|
|
365
|
-
console.error('Error creating write stream:', err, 'Temp dir exists:', fs.existsSync(tempDir));
|
|
366
|
-
errorOccurred = true;
|
|
367
|
-
reject(err);
|
|
368
|
-
return;
|
|
369
|
-
}
|
|
370
|
-
diskWriteStream.on('error', (err) => {
|
|
371
|
-
console.error('Disk write stream error:', err);
|
|
372
|
-
});
|
|
373
|
-
diskWriteStream.on('close', () => {
|
|
374
|
-
console.log('Disk write stream closed for:', tempFilePath);
|
|
375
|
-
});
|
|
376
|
-
diskWritePromise = new Promise((res, rej) => {
|
|
377
|
-
diskWriteStream.on('finish', res);
|
|
378
|
-
diskWriteStream.on('error', (err) => {
|
|
379
|
-
diskWriteError = err;
|
|
380
|
-
rej(err);
|
|
381
|
-
});
|
|
382
|
-
});
|
|
383
|
-
}
|
|
384
|
-
|
|
385
|
-
// Pipe incoming file to all destinations
|
|
386
|
-
let receivedAnyData = false;
|
|
387
|
-
file.on('data', () => { receivedAnyData = true; });
|
|
388
|
-
if (azureStream) file.pipe(azureStream);
|
|
389
|
-
if (gcsStream) file.pipe(gcsStream);
|
|
390
|
-
if (diskWriteStream) file.pipe(diskWriteStream);
|
|
391
|
-
|
|
392
|
-
// Listen for end event to check for empty file
|
|
393
|
-
file.on('end', async () => {
|
|
394
|
-
if (!receivedAnyData) {
|
|
395
|
-
errorOccurred = true;
|
|
396
|
-
// Abort all streams
|
|
397
|
-
if (azureStream) azureStream.destroy();
|
|
398
|
-
if (gcsStream) gcsStream.destroy();
|
|
399
|
-
if (diskWriteStream) diskWriteStream.destroy();
|
|
400
|
-
const err = new Error('Invalid file: file is empty');
|
|
401
|
-
err.status = 400;
|
|
402
|
-
reject(err);
|
|
403
|
-
}
|
|
404
|
-
});
|
|
405
|
-
|
|
406
|
-
// Start cloud uploads immediately
|
|
407
|
-
let azurePromise;
|
|
408
|
-
if (!saveToLocal) {
|
|
409
|
-
azurePromise = saveToAzureStorage(context, uploadName, azureStream)
|
|
410
|
-
.catch(async (err) => {
|
|
411
|
-
cloudUploadError = err;
|
|
412
|
-
// Fallback: try from disk if available
|
|
413
|
-
if (diskWritePromise) {
|
|
414
|
-
await diskWritePromise;
|
|
415
|
-
const diskStream = fs.createReadStream(tempFilePath, {
|
|
416
|
-
highWaterMark: 1024 * 1024,
|
|
417
|
-
autoClose: true,
|
|
418
|
-
});
|
|
419
|
-
return saveToAzureStorage(context, uploadName, diskStream);
|
|
420
|
-
}
|
|
421
|
-
throw err;
|
|
422
|
-
});
|
|
423
|
-
}
|
|
424
|
-
let gcsPromise;
|
|
425
|
-
if (gcsStream) {
|
|
426
|
-
gcsPromise = saveToGoogleStorage(context, uploadName, gcsStream)
|
|
427
|
-
.catch(async (err) => {
|
|
428
|
-
cloudUploadError = err;
|
|
429
|
-
if (diskWritePromise) {
|
|
430
|
-
await diskWritePromise;
|
|
431
|
-
const diskStream = fs.createReadStream(tempFilePath, {
|
|
432
|
-
highWaterMark: 1024 * 1024,
|
|
433
|
-
autoClose: true,
|
|
434
|
-
});
|
|
435
|
-
return saveToGoogleStorage(context, uploadName, diskStream);
|
|
436
|
-
}
|
|
437
|
-
throw err;
|
|
438
|
-
});
|
|
439
|
-
}
|
|
440
|
-
|
|
441
|
-
// Wait for cloud uploads to finish
|
|
442
|
-
try {
|
|
443
|
-
const results = await Promise.all([
|
|
444
|
-
azurePromise ? azurePromise.then((url) => ({ url, type: 'primary' })) : null,
|
|
445
|
-
(!azurePromise && saveToLocal)
|
|
446
|
-
? Promise.resolve({ url: null, type: 'primary-local' }) // placeholder for local, url handled later
|
|
447
|
-
: null,
|
|
448
|
-
gcsPromise ? gcsPromise.then((gcs) => ({ gcs, type: 'gcs' })) : null,
|
|
449
|
-
].filter(Boolean));
|
|
450
|
-
|
|
451
|
-
const result = {
|
|
452
|
-
message: `File '${uploadName}' uploaded successfully.`,
|
|
453
|
-
filename: uploadName,
|
|
454
|
-
...results.reduce((acc, result) => {
|
|
455
|
-
if (result.type === 'primary') acc.url = result.url;
|
|
456
|
-
if (result.type === 'gcs') acc.gcs = ensureUnencodedGcsUrl(result.gcs);
|
|
457
|
-
return acc;
|
|
458
|
-
}, {}),
|
|
459
|
-
};
|
|
460
|
-
if (hash) result.hash = hash;
|
|
461
|
-
|
|
462
|
-
// If saving locally, wait for disk write to finish and then move to public folder
|
|
463
|
-
if (saveToLocal) {
|
|
464
|
-
try {
|
|
465
|
-
if (diskWritePromise) {
|
|
466
|
-
await diskWritePromise; // ensure file fully written
|
|
467
|
-
}
|
|
468
|
-
const localUrl = await saveToLocalStorage(
|
|
469
|
-
context,
|
|
470
|
-
requestId,
|
|
471
|
-
uploadName,
|
|
472
|
-
fs.createReadStream(tempFilePath, {
|
|
473
|
-
highWaterMark: 1024 * 1024,
|
|
474
|
-
autoClose: true,
|
|
475
|
-
}),
|
|
476
|
-
);
|
|
477
|
-
result.url = localUrl;
|
|
478
|
-
} catch (err) {
|
|
479
|
-
console.error('Error saving to local storage:', err);
|
|
480
|
-
throw err;
|
|
481
|
-
}
|
|
482
|
-
}
|
|
483
|
-
|
|
484
|
-
// After original uploads, handle optional conversion
|
|
485
|
-
const conversionService = new FileConversionService(context, !saveToLocal);
|
|
486
|
-
|
|
487
|
-
if (conversionService.needsConversion(safeFilename)) {
|
|
488
|
-
try {
|
|
489
|
-
context.log('Starting file conversion (busboy)...');
|
|
490
|
-
|
|
491
|
-
// Ensure we have a local copy of the file for conversion
|
|
492
|
-
let localPathForConversion = tempFilePath;
|
|
493
|
-
|
|
494
|
-
if (!localPathForConversion) {
|
|
495
|
-
// No temp file was written (saveToLocal === false). Download from primary URL.
|
|
496
|
-
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'convert-'));
|
|
497
|
-
localPathForConversion = path.join(tmpDir, safeFilename);
|
|
498
|
-
await conversionService._downloadFile(result.url, localPathForConversion);
|
|
499
|
-
} else {
|
|
500
|
-
// Wait until disk write completes to guarantee full file is present
|
|
501
|
-
if (diskWritePromise) {
|
|
502
|
-
await diskWritePromise;
|
|
503
|
-
}
|
|
504
|
-
}
|
|
505
|
-
|
|
506
|
-
// Perform the conversion
|
|
507
|
-
const conversion = await conversionService.convertFile(localPathForConversion, result.url);
|
|
508
|
-
context.log('File conversion completed (busboy):', conversion);
|
|
509
|
-
|
|
510
|
-
if (conversion.converted) {
|
|
511
|
-
context.log('Saving converted file (busboy)...');
|
|
512
|
-
// Save converted file to primary storage
|
|
513
|
-
const convertedSaveResult = await conversionService._saveConvertedFile(conversion.convertedPath, requestId);
|
|
514
|
-
|
|
515
|
-
// Optionally save to GCS
|
|
516
|
-
let convertedGcsUrl;
|
|
517
|
-
if (conversionService._isGCSConfigured()) {
|
|
518
|
-
convertedGcsUrl = await conversionService._uploadChunkToGCS(conversion.convertedPath, requestId);
|
|
519
|
-
}
|
|
520
|
-
|
|
521
|
-
// Attach to response body
|
|
522
|
-
result.converted = {
|
|
523
|
-
url: convertedSaveResult.url,
|
|
524
|
-
gcs: convertedGcsUrl,
|
|
525
|
-
};
|
|
526
|
-
context.log('Conversion process (busboy) completed successfully');
|
|
527
|
-
}
|
|
528
|
-
} catch (convErr) {
|
|
529
|
-
console.error('Error converting file (busboy):', convErr);
|
|
530
|
-
context.log('Error during conversion (busboy):', convErr.message);
|
|
531
|
-
// Continue without failing the upload
|
|
532
|
-
}
|
|
533
|
-
}
|
|
534
|
-
|
|
535
|
-
// Respond after conversion (if any)
|
|
536
|
-
context.res = { status: 200, body: result };
|
|
537
|
-
resolve(result);
|
|
538
|
-
} catch (err) {
|
|
539
|
-
errorOccurred = true;
|
|
540
|
-
reject(err);
|
|
541
|
-
} finally {
|
|
542
|
-
// Clean up temp file if written
|
|
543
|
-
if (tempDir) {
|
|
544
|
-
fs.rmSync(tempDir, { recursive: true, force: true });
|
|
545
|
-
}
|
|
546
|
-
}
|
|
547
|
-
});
|
|
548
|
-
|
|
549
|
-
busboy.on('error', (error) => {
|
|
550
|
-
if (errorOccurred) return;
|
|
551
|
-
errorOccurred = true;
|
|
552
|
-
const err = new Error('No file provided in request');
|
|
553
|
-
err.status = 400;
|
|
554
|
-
reject(err);
|
|
555
|
-
});
|
|
556
|
-
|
|
557
|
-
busboy.on('finish', () => {
|
|
558
|
-
if (errorOccurred) return;
|
|
559
|
-
if (!hasFile) {
|
|
560
|
-
errorOccurred = true;
|
|
561
|
-
const err = new Error('No file provided in request');
|
|
562
|
-
err.status = 400;
|
|
563
|
-
reject(err);
|
|
564
|
-
}
|
|
565
|
-
});
|
|
566
|
-
|
|
567
|
-
// Handle errors from piping the request
|
|
568
|
-
req.on('error', (error) => {
|
|
569
|
-
if (errorOccurred) return;
|
|
570
|
-
errorOccurred = true;
|
|
571
|
-
// Only log unexpected errors
|
|
572
|
-
if (error.message !== 'No file provided in request') {
|
|
573
|
-
context.log('Error in request stream:', error);
|
|
574
|
-
}
|
|
575
|
-
const err = new Error('No file provided in request');
|
|
576
|
-
err.status = 400;
|
|
577
|
-
reject(err);
|
|
578
|
-
});
|
|
579
|
-
|
|
580
|
-
try {
|
|
581
|
-
req.pipe(busboy);
|
|
582
|
-
} catch (error) {
|
|
583
|
-
if (errorOccurred) return;
|
|
584
|
-
errorOccurred = true;
|
|
585
|
-
// Only log unexpected errors
|
|
586
|
-
if (error.message !== 'No file provided in request') {
|
|
587
|
-
context.log('Error piping request to busboy:', error);
|
|
588
|
-
}
|
|
589
|
-
const err = new Error('No file provided in request');
|
|
590
|
-
err.status = 400;
|
|
591
|
-
reject(err);
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
// Perform the conversion
|
|
544
|
+
const conversion = await conversionService.convertFile(
|
|
545
|
+
localPathForConversion,
|
|
546
|
+
result.url,
|
|
547
|
+
);
|
|
548
|
+
context.log(
|
|
549
|
+
"File conversion completed (busboy):",
|
|
550
|
+
conversion,
|
|
551
|
+
);
|
|
552
|
+
|
|
553
|
+
if (conversion.converted) {
|
|
554
|
+
context.log("Saving converted file (busboy)...");
|
|
555
|
+
// Save converted file to primary storage
|
|
556
|
+
const convertedSaveResult =
|
|
557
|
+
await conversionService._saveConvertedFile(
|
|
558
|
+
conversion.convertedPath,
|
|
559
|
+
requestId,
|
|
560
|
+
);
|
|
561
|
+
|
|
562
|
+
// Optionally save to GCS
|
|
563
|
+
let convertedGcsUrl;
|
|
564
|
+
if (conversionService._isGCSConfigured()) {
|
|
565
|
+
convertedGcsUrl =
|
|
566
|
+
await conversionService._uploadChunkToGCS(
|
|
567
|
+
conversion.convertedPath,
|
|
568
|
+
requestId,
|
|
569
|
+
);
|
|
592
570
|
}
|
|
571
|
+
|
|
572
|
+
// Attach to response body
|
|
573
|
+
result.converted = {
|
|
574
|
+
url: convertedSaveResult.url,
|
|
575
|
+
gcs: convertedGcsUrl,
|
|
576
|
+
};
|
|
577
|
+
context.log(
|
|
578
|
+
"Conversion process (busboy) completed successfully",
|
|
579
|
+
);
|
|
580
|
+
}
|
|
581
|
+
} catch (convErr) {
|
|
582
|
+
console.error("Error converting file (busboy):", convErr);
|
|
583
|
+
context.log(
|
|
584
|
+
"Error during conversion (busboy):",
|
|
585
|
+
convErr.message,
|
|
586
|
+
);
|
|
587
|
+
// Continue without failing the upload
|
|
593
588
|
}
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
// Respond after conversion (if any)
|
|
592
|
+
context.res = { status: 200, body: result };
|
|
593
|
+
resolve(result);
|
|
594
|
+
} catch (err) {
|
|
595
|
+
errorOccurred = true;
|
|
596
|
+
reject(err);
|
|
597
|
+
} finally {
|
|
598
|
+
// Clean up temp file if written
|
|
599
|
+
if (tempDir) {
|
|
600
|
+
fs.rmSync(tempDir, { recursive: true, force: true });
|
|
601
|
+
}
|
|
602
602
|
}
|
|
603
|
-
|
|
604
|
-
|
|
603
|
+
});
|
|
604
|
+
|
|
605
|
+
busboy.on("error", (error) => {
|
|
606
|
+
if (errorOccurred) return;
|
|
607
|
+
errorOccurred = true;
|
|
608
|
+
const err = new Error("No file provided in request");
|
|
609
|
+
err.status = 400;
|
|
610
|
+
reject(err);
|
|
611
|
+
});
|
|
612
|
+
|
|
613
|
+
busboy.on("finish", () => {
|
|
614
|
+
if (errorOccurred) return;
|
|
615
|
+
if (!hasFile) {
|
|
616
|
+
errorOccurred = true;
|
|
617
|
+
const err = new Error("No file provided in request");
|
|
618
|
+
err.status = 400;
|
|
619
|
+
reject(err);
|
|
620
|
+
}
|
|
621
|
+
});
|
|
622
|
+
|
|
623
|
+
// Handle errors from piping the request
|
|
624
|
+
req.on("error", (error) => {
|
|
625
|
+
if (errorOccurred) return;
|
|
626
|
+
errorOccurred = true;
|
|
627
|
+
// Only log unexpected errors
|
|
628
|
+
if (error.message !== "No file provided in request") {
|
|
629
|
+
context.log("Error in request stream:", error);
|
|
630
|
+
}
|
|
631
|
+
const err = new Error("No file provided in request");
|
|
632
|
+
err.status = 400;
|
|
633
|
+
reject(err);
|
|
634
|
+
});
|
|
635
|
+
|
|
636
|
+
try {
|
|
637
|
+
req.pipe(busboy);
|
|
638
|
+
} catch (error) {
|
|
639
|
+
if (errorOccurred) return;
|
|
640
|
+
errorOccurred = true;
|
|
641
|
+
// Only log unexpected errors
|
|
642
|
+
if (error.message !== "No file provided in request") {
|
|
643
|
+
context.log("Error piping request to busboy:", error);
|
|
644
|
+
}
|
|
645
|
+
const err = new Error("No file provided in request");
|
|
646
|
+
err.status = 400;
|
|
647
|
+
reject(err);
|
|
648
|
+
}
|
|
649
|
+
}
|
|
650
|
+
} catch (error) {
|
|
651
|
+
// Only log unexpected errors
|
|
652
|
+
if (error.message !== "No file provided in request") {
|
|
653
|
+
context.log("Error processing file upload:", error);
|
|
654
|
+
}
|
|
655
|
+
const err = new Error(error.message || "Error processing file upload.");
|
|
656
|
+
err.status = error.status || 500;
|
|
657
|
+
reject(err);
|
|
658
|
+
}
|
|
659
|
+
})();
|
|
660
|
+
});
|
|
605
661
|
}
|
|
606
662
|
|
|
607
663
|
// Helper function to handle local file storage
|
|
608
664
|
async function saveToLocalStorage(context, requestId, encodedFilename, file) {
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
665
|
+
const localPath = path.join(publicFolder, requestId);
|
|
666
|
+
fs.mkdirSync(localPath, { recursive: true });
|
|
667
|
+
|
|
668
|
+
// Sanitize filename by removing invalid characters
|
|
669
|
+
const sanitizedFilename = sanitizeFilename(encodedFilename);
|
|
670
|
+
const destinationPath = `${localPath}/${sanitizedFilename}`;
|
|
671
|
+
|
|
672
|
+
await pipeline(file, fs.createWriteStream(destinationPath));
|
|
673
|
+
return `http://${ipAddress}:${port}/files/${requestId}/${sanitizedFilename}`;
|
|
618
674
|
}
|
|
619
675
|
|
|
620
676
|
// Helper function to handle Azure blob storage
|
|
621
677
|
async function saveToAzureStorage(context, encodedFilename, file) {
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
678
|
+
const { containerClient } = await getBlobClient();
|
|
679
|
+
const contentType = mime.lookup(encodedFilename);
|
|
680
|
+
|
|
681
|
+
// Create a safe blob name that is URI-encoded once (no double encoding)
|
|
682
|
+
let blobName = sanitizeFilename(encodedFilename);
|
|
683
|
+
blobName = encodeURIComponent(blobName);
|
|
684
|
+
|
|
685
|
+
const options = {
|
|
686
|
+
blobHTTPHeaders: contentType ? { blobContentType: contentType } : {},
|
|
687
|
+
maxConcurrency: 50,
|
|
688
|
+
blockSize: 8 * 1024 * 1024,
|
|
689
|
+
};
|
|
690
|
+
|
|
691
|
+
const blockBlobClient = containerClient.getBlockBlobClient(blobName);
|
|
692
|
+
context.log(`Uploading to Azure... ${blobName}`);
|
|
693
|
+
await blockBlobClient.uploadStream(file, undefined, undefined, options);
|
|
694
|
+
const sasToken = generateSASToken(containerClient, blobName);
|
|
695
|
+
return `${blockBlobClient.url}?${sasToken}`;
|
|
640
696
|
}
|
|
641
697
|
|
|
642
698
|
// Helper function to upload a file to Google Cloud Storage
|
|
643
699
|
async function uploadToGCS(context, file, filename) {
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
700
|
+
const objectName = sanitizeFilename(filename);
|
|
701
|
+
const gcsFile = gcs.bucket(GCS_BUCKETNAME).file(objectName);
|
|
702
|
+
const writeStream = gcsFile.createWriteStream({
|
|
703
|
+
resumable: true,
|
|
704
|
+
validation: false,
|
|
705
|
+
metadata: {
|
|
706
|
+
contentType: mime.lookup(objectName) || "application/octet-stream",
|
|
707
|
+
},
|
|
708
|
+
chunkSize: 8 * 1024 * 1024,
|
|
709
|
+
numRetries: 3,
|
|
710
|
+
retryDelay: 1000,
|
|
711
|
+
});
|
|
712
|
+
context.log(`Uploading to GCS... ${objectName}`);
|
|
713
|
+
await pipeline(file, writeStream);
|
|
714
|
+
return `gs://${GCS_BUCKETNAME}/${objectName}`;
|
|
659
715
|
}
|
|
660
716
|
|
|
661
717
|
// Wrapper that checks if GCS is configured
|
|
662
718
|
async function saveToGoogleStorage(context, encodedFilename, file) {
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
719
|
+
if (!gcs) {
|
|
720
|
+
throw new Error("Google Cloud Storage is not initialized");
|
|
721
|
+
}
|
|
722
|
+
return uploadToGCS(context, file, encodedFilename);
|
|
667
723
|
}
|
|
668
724
|
|
|
669
725
|
async function uploadFile(
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
726
|
+
context,
|
|
727
|
+
requestId,
|
|
728
|
+
body,
|
|
729
|
+
saveToLocal,
|
|
730
|
+
file,
|
|
731
|
+
filename,
|
|
732
|
+
resolve,
|
|
733
|
+
hash = null,
|
|
678
734
|
) {
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
735
|
+
try {
|
|
736
|
+
if (!file) {
|
|
737
|
+
context.res = {
|
|
738
|
+
status: 400,
|
|
739
|
+
body: "No file provided in request",
|
|
740
|
+
};
|
|
741
|
+
resolve(context.res);
|
|
742
|
+
return;
|
|
743
|
+
}
|
|
688
744
|
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
// Use pipeline with error handling
|
|
707
|
-
context.log('Writing file to temp location...');
|
|
708
|
-
await pipeline(file, writeStream);
|
|
709
|
-
context.log('File written to temp location successfully');
|
|
710
|
-
|
|
711
|
-
uploadPath = tempOriginal;
|
|
712
|
-
uploadName = `${requestId || uuidv4()}_${filename}`;
|
|
713
|
-
context.log(`Prepared upload name: ${uploadName}`);
|
|
714
|
-
|
|
715
|
-
// Create optimized read streams with larger buffers for storage uploads
|
|
716
|
-
const createOptimizedReadStream = (path) => fs.createReadStream(path, {
|
|
717
|
-
highWaterMark: 1024 * 1024, // 1MB chunks for storage uploads
|
|
718
|
-
autoClose: true,
|
|
719
|
-
});
|
|
720
|
-
|
|
721
|
-
// Upload original in parallel with optimized streams
|
|
722
|
-
const storagePromises = [];
|
|
723
|
-
context.log('Starting primary storage upload...');
|
|
724
|
-
const primaryPromise = saveToLocal
|
|
725
|
-
? saveToLocalStorage(
|
|
726
|
-
context,
|
|
727
|
-
requestId,
|
|
728
|
-
uploadName,
|
|
729
|
-
createOptimizedReadStream(uploadPath),
|
|
730
|
-
)
|
|
731
|
-
: saveToAzureStorage(
|
|
732
|
-
context,
|
|
733
|
-
uploadName,
|
|
734
|
-
createOptimizedReadStream(uploadPath),
|
|
735
|
-
);
|
|
736
|
-
storagePromises.push(
|
|
737
|
-
primaryPromise.then((url) => {
|
|
738
|
-
context.log('Primary storage upload completed');
|
|
739
|
-
return { url, type: 'primary' };
|
|
740
|
-
}),
|
|
741
|
-
);
|
|
745
|
+
const ext = path.extname(filename).toLowerCase();
|
|
746
|
+
context.log(`Processing file with extension: ${ext}`);
|
|
747
|
+
let uploadPath = null;
|
|
748
|
+
let uploadName = null;
|
|
749
|
+
let tempDir = null;
|
|
750
|
+
|
|
751
|
+
// Create temp directory for file operations
|
|
752
|
+
tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "upload-"));
|
|
753
|
+
const tempOriginal = path.join(tempDir, filename);
|
|
754
|
+
context.log(`Created temp directory: ${tempDir}`);
|
|
755
|
+
|
|
756
|
+
// Optimize initial write with larger buffer
|
|
757
|
+
const writeStream = fs.createWriteStream(tempOriginal, {
|
|
758
|
+
highWaterMark: 1024 * 1024, // 1MB chunks for initial write
|
|
759
|
+
autoClose: true,
|
|
760
|
+
});
|
|
742
761
|
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
762
|
+
// Use pipeline with error handling
|
|
763
|
+
context.log("Writing file to temp location...");
|
|
764
|
+
await pipeline(file, writeStream);
|
|
765
|
+
context.log("File written to temp location successfully");
|
|
766
|
+
|
|
767
|
+
uploadPath = tempOriginal;
|
|
768
|
+
// Use the filename that was passed in (which should already be the LLM-friendly name)
|
|
769
|
+
uploadName = filename;
|
|
770
|
+
const fileExtension = path.extname(filename);
|
|
771
|
+
context.log(`Prepared upload name: ${uploadName}`);
|
|
772
|
+
|
|
773
|
+
// Create optimized read streams with larger buffers for storage uploads
|
|
774
|
+
const createOptimizedReadStream = (path) =>
|
|
775
|
+
fs.createReadStream(path, {
|
|
776
|
+
highWaterMark: 1024 * 1024, // 1MB chunks for storage uploads
|
|
777
|
+
autoClose: true,
|
|
778
|
+
});
|
|
779
|
+
|
|
780
|
+
// Upload original in parallel with optimized streams
|
|
781
|
+
const storagePromises = [];
|
|
782
|
+
context.log("Starting primary storage upload...");
|
|
783
|
+
const primaryPromise = saveToLocal
|
|
784
|
+
? saveToLocalStorage(
|
|
785
|
+
context,
|
|
786
|
+
requestId,
|
|
787
|
+
uploadName,
|
|
788
|
+
createOptimizedReadStream(uploadPath),
|
|
789
|
+
)
|
|
790
|
+
: saveToAzureStorage(
|
|
791
|
+
context,
|
|
792
|
+
uploadName,
|
|
793
|
+
createOptimizedReadStream(uploadPath),
|
|
794
|
+
);
|
|
795
|
+
storagePromises.push(
|
|
796
|
+
primaryPromise.then((url) => {
|
|
797
|
+
context.log("Primary storage upload completed");
|
|
798
|
+
return { url, type: "primary" };
|
|
799
|
+
}),
|
|
800
|
+
);
|
|
759
801
|
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
802
|
+
if (gcs) {
|
|
803
|
+
context.log("Starting GCS upload...");
|
|
804
|
+
storagePromises.push(
|
|
805
|
+
saveToGoogleStorage(
|
|
806
|
+
context,
|
|
807
|
+
uploadName,
|
|
808
|
+
createOptimizedReadStream(uploadPath),
|
|
809
|
+
).then((gcsUrl) => {
|
|
810
|
+
context.log("GCS upload completed");
|
|
811
|
+
return {
|
|
812
|
+
gcs: gcsUrl,
|
|
813
|
+
type: "gcs",
|
|
814
|
+
};
|
|
815
|
+
}),
|
|
816
|
+
);
|
|
817
|
+
}
|
|
776
818
|
|
|
777
|
-
|
|
778
|
-
|
|
819
|
+
// Wait for original uploads to complete
|
|
820
|
+
context.log("Waiting for all storage uploads to complete...");
|
|
821
|
+
const results = await Promise.all(storagePromises);
|
|
822
|
+
const result = {
|
|
823
|
+
message: `File '${uploadName}' ${saveToLocal ? "saved to folder" : "uploaded"} successfully.`,
|
|
824
|
+
filename: uploadName,
|
|
825
|
+
...results.reduce((acc, result) => {
|
|
826
|
+
if (result.type === "primary") acc.url = result.url;
|
|
827
|
+
if (result.type === "gcs") acc.gcs = ensureUnencodedGcsUrl(result.gcs);
|
|
828
|
+
return acc;
|
|
829
|
+
}, {}),
|
|
830
|
+
};
|
|
779
831
|
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
context.log('Starting file conversion...');
|
|
784
|
-
// Convert the file
|
|
785
|
-
const conversion = await conversionService.convertFile(uploadPath, result.url);
|
|
786
|
-
context.log('File conversion completed:', conversion);
|
|
787
|
-
|
|
788
|
-
if (conversion.converted) {
|
|
789
|
-
context.log('Saving converted file...');
|
|
790
|
-
// Save converted file
|
|
791
|
-
const convertedSaveResult = await conversionService._saveConvertedFile(conversion.convertedPath, requestId);
|
|
792
|
-
context.log('Converted file saved to primary storage');
|
|
793
|
-
|
|
794
|
-
// If GCS is configured, also save to GCS
|
|
795
|
-
let convertedGcsUrl;
|
|
796
|
-
if (conversionService._isGCSConfigured()) {
|
|
797
|
-
context.log('Saving converted file to GCS...');
|
|
798
|
-
convertedGcsUrl = await conversionService._uploadChunkToGCS(conversion.convertedPath, requestId);
|
|
799
|
-
context.log('Converted file saved to GCS');
|
|
800
|
-
}
|
|
832
|
+
if (hash) {
|
|
833
|
+
result.hash = hash;
|
|
834
|
+
}
|
|
801
835
|
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
836
|
+
// Initialize conversion service
|
|
837
|
+
const conversionService = new FileConversionService(context, !saveToLocal);
|
|
838
|
+
|
|
839
|
+
// Check if file needs conversion and handle it
|
|
840
|
+
if (conversionService.needsConversion(fileExtension)) {
|
|
841
|
+
try {
|
|
842
|
+
context.log("Starting file conversion...");
|
|
843
|
+
// Convert the file
|
|
844
|
+
const conversion = await conversionService.convertFile(
|
|
845
|
+
uploadPath,
|
|
846
|
+
result.url,
|
|
847
|
+
);
|
|
848
|
+
context.log("File conversion completed:", conversion);
|
|
849
|
+
|
|
850
|
+
if (conversion.converted) {
|
|
851
|
+
context.log("Saving converted file...");
|
|
852
|
+
// Save converted file
|
|
853
|
+
const convertedSaveResult =
|
|
854
|
+
await conversionService._saveConvertedFile(
|
|
855
|
+
conversion.convertedPath,
|
|
856
|
+
requestId,
|
|
857
|
+
);
|
|
858
|
+
context.log("Converted file saved to primary storage");
|
|
859
|
+
|
|
860
|
+
// If GCS is configured, also save to GCS
|
|
861
|
+
let convertedGcsUrl;
|
|
862
|
+
if (conversionService._isGCSConfigured()) {
|
|
863
|
+
context.log("Saving converted file to GCS...");
|
|
864
|
+
convertedGcsUrl = await conversionService._uploadChunkToGCS(
|
|
865
|
+
conversion.convertedPath,
|
|
866
|
+
requestId,
|
|
867
|
+
);
|
|
868
|
+
context.log("Converted file saved to GCS");
|
|
869
|
+
}
|
|
870
|
+
|
|
871
|
+
// Add converted file info to result
|
|
872
|
+
result.converted = {
|
|
873
|
+
url: convertedSaveResult.url,
|
|
874
|
+
gcs: convertedGcsUrl,
|
|
875
|
+
};
|
|
876
|
+
context.log("Conversion process completed successfully");
|
|
814
877
|
}
|
|
878
|
+
} catch (error) {
|
|
879
|
+
console.error("Error converting file:", error);
|
|
880
|
+
context.log("Error during conversion:", error.message);
|
|
881
|
+
// Don't fail the upload if conversion fails
|
|
882
|
+
}
|
|
883
|
+
}
|
|
815
884
|
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
885
|
+
context.res = {
|
|
886
|
+
status: 200,
|
|
887
|
+
body: result,
|
|
888
|
+
};
|
|
820
889
|
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
890
|
+
// Clean up temp files
|
|
891
|
+
context.log("Cleaning up temporary files...");
|
|
892
|
+
if (tempDir) {
|
|
893
|
+
fs.rmSync(tempDir, { recursive: true, force: true });
|
|
894
|
+
context.log("Temporary files cleaned up");
|
|
895
|
+
}
|
|
827
896
|
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
}
|
|
839
|
-
throw error;
|
|
897
|
+
context.log("Upload process completed successfully");
|
|
898
|
+
resolve(result);
|
|
899
|
+
} catch (error) {
|
|
900
|
+
context.log("Error in upload process:", error);
|
|
901
|
+
if (body.url) {
|
|
902
|
+
try {
|
|
903
|
+
await cleanup(context, [body.url]);
|
|
904
|
+
} catch (cleanupError) {
|
|
905
|
+
context.log("Error during cleanup after failure:", cleanupError);
|
|
906
|
+
}
|
|
840
907
|
}
|
|
908
|
+
throw error;
|
|
909
|
+
}
|
|
841
910
|
}
|
|
842
911
|
|
|
843
912
|
// Helper to convert a stream to a buffer
|
|
844
913
|
async function streamToBuffer(stream) {
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
914
|
+
return new Promise((resolve, reject) => {
|
|
915
|
+
const chunks = [];
|
|
916
|
+
stream.on("data", (chunk) => chunks.push(chunk));
|
|
917
|
+
stream.on("end", () => resolve(Buffer.concat(chunks)));
|
|
918
|
+
stream.on("error", reject);
|
|
919
|
+
});
|
|
851
920
|
}
|
|
852
921
|
|
|
853
922
|
// Function to delete files that haven't been used in more than a month
|
|
854
923
|
async function cleanup(context, urls = null) {
|
|
855
|
-
|
|
856
|
-
|
|
924
|
+
const { containerClient } = await getBlobClient();
|
|
925
|
+
const cleanedURLs = [];
|
|
857
926
|
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
927
|
+
if (!urls) {
|
|
928
|
+
const xMonthAgo = new Date();
|
|
929
|
+
xMonthAgo.setMonth(xMonthAgo.getMonth() - 1);
|
|
861
930
|
|
|
862
|
-
|
|
931
|
+
const blobs = containerClient.listBlobsFlat();
|
|
863
932
|
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
}
|
|
877
|
-
}
|
|
933
|
+
for await (const blob of blobs) {
|
|
934
|
+
const lastModified = blob.properties.lastModified;
|
|
935
|
+
if (lastModified < xMonthAgo) {
|
|
936
|
+
try {
|
|
937
|
+
const blockBlobClient = containerClient.getBlockBlobClient(blob.name);
|
|
938
|
+
await blockBlobClient.delete();
|
|
939
|
+
context.log(`Cleaned blob: ${blob.name}`);
|
|
940
|
+
cleanedURLs.push(blob.name);
|
|
941
|
+
} catch (error) {
|
|
942
|
+
if (error.statusCode !== 404) {
|
|
943
|
+
context.log(`Error cleaning blob ${blob.name}:`, error);
|
|
944
|
+
}
|
|
878
945
|
}
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
946
|
+
}
|
|
947
|
+
}
|
|
948
|
+
} else {
|
|
949
|
+
for (const url of urls) {
|
|
950
|
+
try {
|
|
951
|
+
const blobName = url.replace(containerClient.url, "");
|
|
952
|
+
const blockBlobClient = containerClient.getBlockBlobClient(blobName);
|
|
953
|
+
await blockBlobClient.delete();
|
|
954
|
+
context.log(`Cleaned blob: ${blobName}`);
|
|
955
|
+
cleanedURLs.push(blobName);
|
|
956
|
+
} catch (error) {
|
|
957
|
+
if (error.statusCode !== 404) {
|
|
958
|
+
context.log(`Error cleaning blob ${url}:`, error);
|
|
892
959
|
}
|
|
960
|
+
}
|
|
893
961
|
}
|
|
894
|
-
|
|
962
|
+
}
|
|
963
|
+
return cleanedURLs;
|
|
895
964
|
}
|
|
896
965
|
|
|
897
966
|
async function cleanupGCS(urls = null) {
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
}
|
|
918
|
-
}
|
|
919
|
-
}
|
|
920
|
-
} else {
|
|
921
|
-
for (const url of urls) {
|
|
922
|
-
const filePath = url.split('/').slice(3).join('/');
|
|
923
|
-
const file = bucket.file(filePath);
|
|
924
|
-
const directoryPath = path.dirname(file.name);
|
|
925
|
-
directories.add(directoryPath);
|
|
926
|
-
await file.delete();
|
|
927
|
-
cleanedURLs.push(url);
|
|
967
|
+
if (!gcs) return [];
|
|
968
|
+
const bucket = gcs.bucket(GCS_BUCKETNAME);
|
|
969
|
+
const directories = new Set();
|
|
970
|
+
const cleanedURLs = [];
|
|
971
|
+
|
|
972
|
+
if (!urls) {
|
|
973
|
+
const daysN = 30;
|
|
974
|
+
const threshold = Date.now() - daysN * 24 * 60 * 60 * 1000;
|
|
975
|
+
const [files] = await bucket.getFiles();
|
|
976
|
+
|
|
977
|
+
for (const file of files) {
|
|
978
|
+
const [metadata] = await file.getMetadata();
|
|
979
|
+
const directoryPath = path.dirname(file.name);
|
|
980
|
+
directories.add(directoryPath);
|
|
981
|
+
if (metadata.updated) {
|
|
982
|
+
const updatedTime = new Date(metadata.updated).getTime();
|
|
983
|
+
if (updatedTime < threshold) {
|
|
984
|
+
await file.delete();
|
|
985
|
+
cleanedURLs.push(file.name);
|
|
928
986
|
}
|
|
987
|
+
}
|
|
929
988
|
}
|
|
989
|
+
} else {
|
|
990
|
+
for (const url of urls) {
|
|
991
|
+
const filePath = url.split("/").slice(3).join("/");
|
|
992
|
+
const file = bucket.file(filePath);
|
|
993
|
+
const directoryPath = path.dirname(file.name);
|
|
994
|
+
directories.add(directoryPath);
|
|
995
|
+
await file.delete();
|
|
996
|
+
cleanedURLs.push(url);
|
|
997
|
+
}
|
|
998
|
+
}
|
|
930
999
|
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
}
|
|
1000
|
+
for (const directory of directories) {
|
|
1001
|
+
const [files] = await bucket.getFiles({ prefix: directory });
|
|
1002
|
+
if (files.length === 0) {
|
|
1003
|
+
await bucket.deleteFiles({ prefix: directory });
|
|
936
1004
|
}
|
|
1005
|
+
}
|
|
937
1006
|
|
|
938
|
-
|
|
1007
|
+
return cleanedURLs;
|
|
939
1008
|
}
|
|
940
1009
|
|
|
941
1010
|
async function deleteGCS(blobName) {
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
1011
|
+
if (!blobName) {
|
|
1012
|
+
console.log("[deleteGCS] No blobName provided, skipping GCS deletion");
|
|
1013
|
+
return;
|
|
1014
|
+
}
|
|
946
1015
|
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
1016
|
+
if (!gcs) {
|
|
1017
|
+
console.log("[deleteGCS] GCS not initialized, skipping deletion");
|
|
1018
|
+
return;
|
|
1019
|
+
}
|
|
951
1020
|
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
}
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
await Promise.all(files.map((file) => file.delete()));
|
|
997
|
-
console.log('[deleteGCS] All files deleted successfully');
|
|
998
|
-
} else {
|
|
999
|
-
console.log('[deleteGCS] No files found to delete');
|
|
1000
|
-
}
|
|
1001
|
-
}
|
|
1002
|
-
} catch (error) {
|
|
1003
|
-
// If we get a 404 error, it means the file is already gone, which is fine
|
|
1004
|
-
if (error.response?.status === 404 || error.code === 404) {
|
|
1005
|
-
console.log('[deleteGCS] File not found in GCS (404) - this is expected if file was already deleted');
|
|
1006
|
-
return;
|
|
1021
|
+
try {
|
|
1022
|
+
if (process.env.STORAGE_EMULATOR_HOST) {
|
|
1023
|
+
console.log(
|
|
1024
|
+
`[deleteGCS] Using emulator at ${process.env.STORAGE_EMULATOR_HOST}`,
|
|
1025
|
+
);
|
|
1026
|
+
console.log(
|
|
1027
|
+
`[deleteGCS] Attempting to delete files with prefix: ${blobName}`,
|
|
1028
|
+
);
|
|
1029
|
+
|
|
1030
|
+
// List files first
|
|
1031
|
+
const listUrl = `${process.env.STORAGE_EMULATOR_HOST}/storage/v1/b/${GCS_BUCKETNAME}/o?prefix=${blobName}`;
|
|
1032
|
+
console.log(`[deleteGCS] Listing files with URL: ${listUrl}`);
|
|
1033
|
+
|
|
1034
|
+
const listResponse = await axios.get(listUrl, {
|
|
1035
|
+
validateStatus: (status) => true,
|
|
1036
|
+
});
|
|
1037
|
+
console.log(`[deleteGCS] List response status: ${listResponse.status}`);
|
|
1038
|
+
console.log(
|
|
1039
|
+
`[deleteGCS] List response data: ${JSON.stringify(listResponse.data)}`,
|
|
1040
|
+
);
|
|
1041
|
+
|
|
1042
|
+
if (listResponse.status === 200 && listResponse.data.items) {
|
|
1043
|
+
console.log(
|
|
1044
|
+
`[deleteGCS] Found ${listResponse.data.items.length} items to delete`,
|
|
1045
|
+
);
|
|
1046
|
+
|
|
1047
|
+
// Delete each file
|
|
1048
|
+
for (const item of listResponse.data.items) {
|
|
1049
|
+
const deleteUrl = `${process.env.STORAGE_EMULATOR_HOST}/storage/v1/b/${GCS_BUCKETNAME}/o/${encodeURIComponent(item.name)}`;
|
|
1050
|
+
console.log(`[deleteGCS] Deleting file: ${item.name}`);
|
|
1051
|
+
console.log(`[deleteGCS] Delete URL: ${deleteUrl}`);
|
|
1052
|
+
|
|
1053
|
+
const deleteResponse = await axios.delete(deleteUrl, {
|
|
1054
|
+
validateStatus: (status) => true,
|
|
1055
|
+
headers: {
|
|
1056
|
+
"Content-Type": "application/json",
|
|
1057
|
+
},
|
|
1058
|
+
});
|
|
1059
|
+
console.log(
|
|
1060
|
+
`[deleteGCS] Delete response status: ${deleteResponse.status}`,
|
|
1061
|
+
);
|
|
1062
|
+
console.log(
|
|
1063
|
+
`[deleteGCS] Delete response data: ${JSON.stringify(deleteResponse.data)}`,
|
|
1064
|
+
);
|
|
1007
1065
|
}
|
|
1008
|
-
console.
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1066
|
+
console.log("[deleteGCS] All files deleted successfully");
|
|
1067
|
+
} else {
|
|
1068
|
+
console.log("[deleteGCS] No files found to delete");
|
|
1069
|
+
}
|
|
1070
|
+
} else {
|
|
1071
|
+
console.log("[deleteGCS] Using real GCS");
|
|
1072
|
+
const bucket = gcs.bucket(GCS_BUCKETNAME);
|
|
1073
|
+
const [files] = await bucket.getFiles({ prefix: blobName });
|
|
1074
|
+
console.log(`[deleteGCS] Found ${files.length} files to delete`);
|
|
1075
|
+
|
|
1076
|
+
if (files.length > 0) {
|
|
1077
|
+
await Promise.all(files.map((file) => file.delete()));
|
|
1078
|
+
console.log("[deleteGCS] All files deleted successfully");
|
|
1079
|
+
} else {
|
|
1080
|
+
console.log("[deleteGCS] No files found to delete");
|
|
1081
|
+
}
|
|
1021
1082
|
}
|
|
1083
|
+
} catch (error) {
|
|
1084
|
+
// If we get a 404 error, it means the file is already gone, which is fine
|
|
1085
|
+
if (error.response?.status === 404 || error.code === 404) {
|
|
1086
|
+
console.log(
|
|
1087
|
+
"[deleteGCS] File not found in GCS (404) - this is expected if file was already deleted",
|
|
1088
|
+
);
|
|
1089
|
+
return;
|
|
1090
|
+
}
|
|
1091
|
+
console.error("[deleteGCS] Error during deletion:", error);
|
|
1092
|
+
console.error("[deleteGCS] Error details:", {
|
|
1093
|
+
message: error.message,
|
|
1094
|
+
code: error.code,
|
|
1095
|
+
errors: error.errors,
|
|
1096
|
+
response: error.response
|
|
1097
|
+
? {
|
|
1098
|
+
status: error.response.status,
|
|
1099
|
+
statusText: error.response.statusText,
|
|
1100
|
+
data: error.response.data,
|
|
1101
|
+
headers: error.response.headers,
|
|
1102
|
+
}
|
|
1103
|
+
: null,
|
|
1104
|
+
});
|
|
1105
|
+
// Don't throw the error - we want to continue with cleanup even if GCS deletion fails
|
|
1106
|
+
}
|
|
1022
1107
|
}
|
|
1023
1108
|
|
|
1024
1109
|
// Helper function to ensure GCS upload for existing files
|
|
1025
1110
|
async function ensureGCSUpload(context, existingFile) {
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
}
|
|
1032
|
-
|
|
1111
|
+
if (!existingFile.gcs && gcs) {
|
|
1112
|
+
context.log("GCS file was missing - uploading.");
|
|
1113
|
+
// Use LLM-friendly naming instead of extracting original filename
|
|
1114
|
+
const fileExtension = path.extname(existingFile.url.split("?")[0]);
|
|
1115
|
+
const shortId = generateShortId();
|
|
1116
|
+
const fileName = `${shortId}${fileExtension}`;
|
|
1117
|
+
const response = await axios({
|
|
1118
|
+
method: "get",
|
|
1119
|
+
url: existingFile.url,
|
|
1120
|
+
responseType: "stream",
|
|
1121
|
+
});
|
|
1122
|
+
existingFile.gcs = await uploadToGCS(context, response.data, fileName);
|
|
1123
|
+
}
|
|
1124
|
+
return existingFile;
|
|
1033
1125
|
}
|
|
1034
1126
|
|
|
1035
|
-
async function uploadChunkToGCS(chunkPath, requestId) {
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1127
|
+
async function uploadChunkToGCS(chunkPath, requestId, filename = null) {
|
|
1128
|
+
if (!gcs) return null;
|
|
1129
|
+
const dirName = requestId || uuidv4();
|
|
1130
|
+
// Use provided filename or generate LLM-friendly naming
|
|
1131
|
+
let gcsFileName;
|
|
1132
|
+
if (filename) {
|
|
1133
|
+
gcsFileName = `${dirName}/${filename}`;
|
|
1134
|
+
} else {
|
|
1135
|
+
const fileExtension = path.extname(chunkPath);
|
|
1136
|
+
const shortId = generateShortId();
|
|
1137
|
+
gcsFileName = `${dirName}/${shortId}${fileExtension}`;
|
|
1138
|
+
}
|
|
1139
|
+
await gcs
|
|
1140
|
+
.bucket(GCS_BUCKETNAME)
|
|
1141
|
+
.upload(chunkPath, { destination: gcsFileName });
|
|
1142
|
+
return `gs://${GCS_BUCKETNAME}/${gcsFileName}`;
|
|
1042
1143
|
}
|
|
1043
1144
|
|
|
1044
1145
|
export {
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
};
|
|
1146
|
+
saveFileToBlob,
|
|
1147
|
+
deleteBlob,
|
|
1148
|
+
deleteGCS,
|
|
1149
|
+
uploadBlob,
|
|
1150
|
+
cleanup,
|
|
1151
|
+
cleanupGCS,
|
|
1152
|
+
gcsUrlExists,
|
|
1153
|
+
ensureGCSUpload,
|
|
1154
|
+
gcs,
|
|
1155
|
+
uploadChunkToGCS,
|
|
1156
|
+
downloadFromGCS,
|
|
1157
|
+
};
|