@aj-archipelago/cortex 1.1.18 → 1.1.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/default.example.json +1 -14
- package/helper-apps/cortex-file-handler/blobHandler.js +317 -142
- package/helper-apps/cortex-file-handler/index.js +98 -10
- package/helper-apps/cortex-file-handler/localFileHandler.js +45 -27
- package/helper-apps/cortex-file-handler/package-lock.json +785 -20
- package/helper-apps/cortex-file-handler/package.json +2 -0
- package/helper-apps/cortex-file-handler/redis.js +102 -2
- package/lib/pathwayTools.js +5 -1
- package/package.json +1 -1
- package/pathways/summary.js +1 -1
- package/server/modelExecutor.js +8 -0
- package/server/pathwayResolver.js +1 -0
- package/server/plugins/azureCognitivePlugin.js +26 -2
- package/server/plugins/claude3VertexPlugin.js +273 -140
- package/server/plugins/gemini15ChatPlugin.js +215 -0
- package/server/plugins/gemini15VisionPlugin.js +101 -0
- package/server/plugins/geminiChatPlugin.js +19 -7
- package/server/plugins/openAiVisionPlugin.js +9 -3
- package/server/resolver.js +10 -1
- package/server/typeDef.js +8 -8
- package/tests/vision.test.js +1 -1
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
{
|
|
2
|
-
"defaultModelName": "oai-
|
|
2
|
+
"defaultModelName": "oai-gpturbo",
|
|
3
3
|
"models": {
|
|
4
4
|
"azure-translate": {
|
|
5
5
|
"type": "AZURE-TRANSLATE",
|
|
@@ -12,19 +12,6 @@
|
|
|
12
12
|
"requestsPerSecond": 10,
|
|
13
13
|
"maxTokenLength": 2000
|
|
14
14
|
},
|
|
15
|
-
"oai-td3": {
|
|
16
|
-
"type": "OPENAI-COMPLETION",
|
|
17
|
-
"url": "https://api.openai.com/v1/completions",
|
|
18
|
-
"headers": {
|
|
19
|
-
"Authorization": "Bearer {{OPENAI_API_KEY}}",
|
|
20
|
-
"Content-Type": "application/json"
|
|
21
|
-
},
|
|
22
|
-
"params": {
|
|
23
|
-
"model": "text-davinci-003"
|
|
24
|
-
},
|
|
25
|
-
"requestsPerSecond": 10,
|
|
26
|
-
"maxTokenLength": 4096
|
|
27
|
-
},
|
|
28
15
|
"oai-gpturbo": {
|
|
29
16
|
"type": "OPENAI-CHAT",
|
|
30
17
|
"url": "https://api.openai.com/v1/chat/completions",
|
|
@@ -1,176 +1,351 @@
|
|
|
1
|
-
import fs from
|
|
2
|
-
import path from
|
|
3
|
-
import { BlobServiceClient } from
|
|
4
|
-
import { v4 as uuidv4 } from
|
|
5
|
-
import Busboy from
|
|
6
|
-
import { PassThrough } from
|
|
7
|
-
import { pipeline as _pipeline } from
|
|
8
|
-
import { promisify } from
|
|
1
|
+
import fs from "fs";
|
|
2
|
+
import path from "path";
|
|
3
|
+
import { BlobServiceClient } from "@azure/storage-blob";
|
|
4
|
+
import { v4 as uuidv4 } from "uuid";
|
|
5
|
+
import Busboy from "busboy";
|
|
6
|
+
import { PassThrough } from "stream";
|
|
7
|
+
import { pipeline as _pipeline } from "stream";
|
|
8
|
+
import { promisify } from "util";
|
|
9
9
|
const pipeline = promisify(_pipeline);
|
|
10
|
-
import { join } from
|
|
10
|
+
import { join } from "path";
|
|
11
|
+
import { Storage } from "@google-cloud/storage";
|
|
12
|
+
import axios from "axios";
|
|
13
|
+
import { publicFolder, port, ipAddress } from "./start.js";
|
|
14
|
+
import mime from "mime-types";
|
|
15
|
+
|
|
16
|
+
const IMAGE_EXTENSIONS = [
|
|
17
|
+
".jpg",
|
|
18
|
+
".jpeg",
|
|
19
|
+
".png",
|
|
20
|
+
".gif",
|
|
21
|
+
".bmp",
|
|
22
|
+
".webp",
|
|
23
|
+
".tiff",
|
|
24
|
+
".svg",
|
|
25
|
+
".pdf"
|
|
26
|
+
];
|
|
27
|
+
|
|
28
|
+
const VIDEO_EXTENSIONS = [
|
|
29
|
+
".mp4",
|
|
30
|
+
".webm",
|
|
31
|
+
".ogg",
|
|
32
|
+
".mov",
|
|
33
|
+
".avi",
|
|
34
|
+
".flv",
|
|
35
|
+
".wmv",
|
|
36
|
+
".mkv",
|
|
37
|
+
];
|
|
38
|
+
|
|
39
|
+
function isBase64(str) {
|
|
40
|
+
try {
|
|
41
|
+
return btoa(atob(str)) == str;
|
|
42
|
+
} catch (err) {
|
|
43
|
+
return false;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
11
46
|
|
|
47
|
+
const GCP_SERVICE_ACCOUNT_KEY =
|
|
48
|
+
process.env.GCP_SERVICE_ACCOUNT_KEY_BASE64 ||
|
|
49
|
+
process.env.GCP_SERVICE_ACCOUNT_KEY ||
|
|
50
|
+
"{}";
|
|
51
|
+
const GCP_SERVICE_ACCOUNT = isBase64(GCP_SERVICE_ACCOUNT_KEY)
|
|
52
|
+
? JSON.parse(Buffer.from(GCP_SERVICE_ACCOUNT_KEY, "base64").toString())
|
|
53
|
+
: JSON.parse(GCP_SERVICE_ACCOUNT_KEY);
|
|
54
|
+
const { project_id: GCP_PROJECT_ID } = GCP_SERVICE_ACCOUNT;
|
|
55
|
+
|
|
56
|
+
let gcs;
|
|
57
|
+
if (!GCP_PROJECT_ID || !GCP_SERVICE_ACCOUNT) {
|
|
58
|
+
console.warn(
|
|
59
|
+
"Google Cloud Project ID or Service Account details are missing"
|
|
60
|
+
);
|
|
61
|
+
} else {
|
|
62
|
+
try {
|
|
63
|
+
gcs = new Storage({
|
|
64
|
+
projectId: GCP_PROJECT_ID,
|
|
65
|
+
credentials: GCP_SERVICE_ACCOUNT,
|
|
66
|
+
});
|
|
12
67
|
|
|
13
|
-
|
|
68
|
+
// Rest of your Google Cloud operations using gcs object
|
|
69
|
+
} catch (error) {
|
|
70
|
+
console.error(
|
|
71
|
+
"Provided Google Cloud Service Account details are invalid: ",
|
|
72
|
+
error
|
|
73
|
+
);
|
|
74
|
+
}
|
|
75
|
+
}
|
|
14
76
|
|
|
15
|
-
const
|
|
16
|
-
const connectionString = process.env.AZURE_STORAGE_CONNECTION_STRING;
|
|
17
|
-
const containerName = process.env.AZURE_STORAGE_CONTAINER_NAME;
|
|
18
|
-
if (!connectionString || !containerName) {
|
|
19
|
-
throw new Error('Missing Azure Storage connection string or container name environment variable');
|
|
20
|
-
}
|
|
77
|
+
const GCS_BUCKETNAME = process.env.GCS_BUCKETNAME || "cortextempfiles";
|
|
21
78
|
|
|
22
|
-
|
|
23
|
-
|
|
79
|
+
const getBlobClient = async () => {
|
|
80
|
+
const connectionString = process.env.AZURE_STORAGE_CONNECTION_STRING;
|
|
81
|
+
const containerName = process.env.AZURE_STORAGE_CONTAINER_NAME;
|
|
82
|
+
if (!connectionString || !containerName) {
|
|
83
|
+
throw new Error(
|
|
84
|
+
"Missing Azure Storage connection string or container name environment variable"
|
|
85
|
+
);
|
|
86
|
+
}
|
|
24
87
|
|
|
25
|
-
|
|
26
|
-
|
|
88
|
+
const blobServiceClient = BlobServiceClient.fromConnectionString(connectionString);
|
|
89
|
+
|
|
90
|
+
const serviceProperties = await blobServiceClient.getProperties();
|
|
91
|
+
if(!serviceProperties.defaultServiceVersion) {
|
|
92
|
+
serviceProperties.defaultServiceVersion = '2020-02-10';
|
|
93
|
+
await blobServiceClient.setProperties(serviceProperties);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
const containerClient = blobServiceClient.getContainerClient(containerName);
|
|
97
|
+
|
|
98
|
+
return { blobServiceClient, containerClient };
|
|
99
|
+
};
|
|
27
100
|
|
|
28
101
|
async function saveFileToBlob(chunkPath, requestId) {
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
102
|
+
const { containerClient } = await getBlobClient();
|
|
103
|
+
// Use the filename with a UUID as the blob name
|
|
104
|
+
const blobName = `${requestId}/${uuidv4()}_${path.basename(chunkPath)}`;
|
|
32
105
|
|
|
33
|
-
|
|
34
|
-
|
|
106
|
+
// Create a read stream for the chunk file
|
|
107
|
+
const fileStream = fs.createReadStream(chunkPath);
|
|
35
108
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
109
|
+
// Upload the chunk to Azure Blob Storage using the stream
|
|
110
|
+
const blockBlobClient = containerClient.getBlockBlobClient(blobName);
|
|
111
|
+
await blockBlobClient.uploadStream(fileStream);
|
|
39
112
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
113
|
+
// Return the full URI of the uploaded blob
|
|
114
|
+
const blobUrl = blockBlobClient.url;
|
|
115
|
+
return blobUrl;
|
|
43
116
|
}
|
|
44
117
|
|
|
45
118
|
//deletes blob that has the requestId
|
|
46
119
|
async function deleteBlob(requestId) {
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
120
|
+
if (!requestId) throw new Error("Missing requestId parameter");
|
|
121
|
+
const { containerClient } = await getBlobClient();
|
|
122
|
+
// List the blobs in the container with the specified prefix
|
|
123
|
+
const blobs = containerClient.listBlobsFlat({ prefix: `${requestId}/` });
|
|
124
|
+
|
|
125
|
+
const result = [];
|
|
126
|
+
// Iterate through the blobs
|
|
127
|
+
for await (const blob of blobs) {
|
|
128
|
+
// Delete the matching blob
|
|
129
|
+
const blockBlobClient = containerClient.getBlockBlobClient(blob.name);
|
|
130
|
+
await blockBlobClient.delete();
|
|
131
|
+
console.log(`Cleaned blob: ${blob.name}`);
|
|
132
|
+
result.push(blob.name);
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
return result;
|
|
136
|
+
}
|
|
51
137
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
138
|
+
async function uploadBlob(
|
|
139
|
+
context,
|
|
140
|
+
req,
|
|
141
|
+
saveToLocal = false,
|
|
142
|
+
useGoogle = false
|
|
143
|
+
) {
|
|
144
|
+
return new Promise((resolve, reject) => {
|
|
145
|
+
try {
|
|
146
|
+
const busboy = Busboy({ headers: req.headers });
|
|
147
|
+
let requestId = uuidv4();
|
|
148
|
+
let body = {};
|
|
149
|
+
|
|
150
|
+
busboy.on("field", (fieldname, value) => {
|
|
151
|
+
if (fieldname === "requestId") {
|
|
152
|
+
requestId = value;
|
|
153
|
+
} else if (fieldname === "useGoogle") {
|
|
154
|
+
useGoogle = value;
|
|
155
|
+
}
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
busboy.on("file", async (fieldname, file, info) => {
|
|
159
|
+
//do not use google if file is not image or video
|
|
160
|
+
const ext = path.extname(info.filename).toLowerCase();
|
|
161
|
+
const canUseGoogle = IMAGE_EXTENSIONS.includes(ext) || VIDEO_EXTENSIONS.includes(ext);
|
|
162
|
+
if(!canUseGoogle) {
|
|
163
|
+
useGoogle = false;
|
|
164
|
+
}
|
|
61
165
|
|
|
62
|
-
|
|
63
|
-
|
|
166
|
+
//check if useGoogle is set but no gcs and warn
|
|
167
|
+
if(useGoogle && useGoogle !== "false" && !gcs) {
|
|
168
|
+
context.log.warn("Google Cloud Storage is not initialized reverting google upload ");
|
|
169
|
+
useGoogle = false;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
if (saveToLocal) {
|
|
173
|
+
// Create the target folder if it doesn't exist
|
|
174
|
+
const localPath = join(publicFolder, requestId);
|
|
175
|
+
fs.mkdirSync(localPath, { recursive: true });
|
|
176
|
+
|
|
177
|
+
const filename = `${uuidv4()}_${info.filename}`;
|
|
178
|
+
const destinationPath = `${localPath}/${filename}`;
|
|
179
|
+
|
|
180
|
+
await pipeline(file, fs.createWriteStream(destinationPath));
|
|
181
|
+
|
|
182
|
+
const message = `File '${filename}' saved to folder successfully.`;
|
|
183
|
+
context.log(message);
|
|
184
|
+
|
|
185
|
+
const url = `http://${ipAddress}:${port}/files/${requestId}/${filename}`;
|
|
186
|
+
|
|
187
|
+
body = { message, url };
|
|
188
|
+
|
|
189
|
+
resolve(body); // Resolve the promise
|
|
190
|
+
} else {
|
|
191
|
+
const filename = `${requestId}/${uuidv4()}_${info.filename}`;
|
|
192
|
+
const { containerClient } = await getBlobClient();
|
|
193
|
+
|
|
194
|
+
const contentType = mime.lookup(filename); // content type based on file extension
|
|
195
|
+
const options = {};
|
|
196
|
+
if (contentType) {
|
|
197
|
+
options.blobHTTPHeaders = { blobContentType: contentType };
|
|
198
|
+
}
|
|
64
199
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
busboy.on('file', async (fieldname, file, info) => {
|
|
78
|
-
if (saveToLocal) {
|
|
79
|
-
// Create the target folder if it doesn't exist
|
|
80
|
-
const localPath = join(publicFolder, requestId);
|
|
81
|
-
fs.mkdirSync(localPath, { recursive: true });
|
|
82
|
-
|
|
83
|
-
const filename = `${uuidv4()}_${info.filename}`;
|
|
84
|
-
const destinationPath = `${localPath}/${filename}`;
|
|
85
|
-
|
|
86
|
-
await pipeline(file, fs.createWriteStream(destinationPath));
|
|
87
|
-
|
|
88
|
-
const message = `File '${filename}' saved to folder successfully.`;
|
|
89
|
-
context.log(message);
|
|
90
|
-
|
|
91
|
-
const url = `http://${ipAddress}:${port}/files/${requestId}/${filename}`;
|
|
92
|
-
|
|
93
|
-
const body = { message, url };
|
|
94
|
-
|
|
95
|
-
context.res = {
|
|
96
|
-
status: 200,
|
|
97
|
-
body,
|
|
98
|
-
};
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
resolve(body); // Resolve the promise
|
|
102
|
-
} else {
|
|
103
|
-
const { containerClient } = getBlobClient();
|
|
104
|
-
const filename = `${requestId}/${uuidv4()}_${info.filename}`;
|
|
105
|
-
|
|
106
|
-
const blockBlobClient = containerClient.getBlockBlobClient(filename);
|
|
107
|
-
|
|
108
|
-
const passThroughStream = new PassThrough();
|
|
109
|
-
file.pipe(passThroughStream);
|
|
110
|
-
|
|
111
|
-
await blockBlobClient.uploadStream(passThroughStream);
|
|
112
|
-
|
|
113
|
-
const message = `File '${filename}' uploaded successfully.`;
|
|
114
|
-
const url = blockBlobClient.url;
|
|
115
|
-
context.log(message);
|
|
116
|
-
const body = { message, url };
|
|
117
|
-
|
|
118
|
-
context.res = {
|
|
119
|
-
status: 200,
|
|
120
|
-
body,
|
|
121
|
-
};
|
|
122
|
-
|
|
123
|
-
resolve(body); // Resolve the promise
|
|
124
|
-
}
|
|
125
|
-
});
|
|
126
|
-
|
|
127
|
-
busboy.on('error', (error) => {
|
|
128
|
-
context.log.error('Error processing file upload:', error);
|
|
129
|
-
context.res = {
|
|
130
|
-
status: 500,
|
|
131
|
-
body: 'Error processing file upload.',
|
|
132
|
-
};
|
|
133
|
-
reject(error); // Reject the promise
|
|
134
|
-
});
|
|
135
|
-
|
|
136
|
-
req.pipe(busboy);
|
|
137
|
-
} catch (error) {
|
|
138
|
-
context.log.error('Error processing file upload:', error);
|
|
139
|
-
context.res = {
|
|
140
|
-
status: 500,
|
|
141
|
-
body: 'Error processing file upload.',
|
|
142
|
-
};
|
|
143
|
-
reject(error); // Reject the promise
|
|
200
|
+
const blockBlobClient = containerClient.getBlockBlobClient(filename);
|
|
201
|
+
|
|
202
|
+
const passThroughStream = new PassThrough();
|
|
203
|
+
file.pipe(passThroughStream);
|
|
204
|
+
|
|
205
|
+
await blockBlobClient.uploadStream(passThroughStream, undefined, undefined, options);
|
|
206
|
+
|
|
207
|
+
const message = `File '${filename}' uploaded successfully.`;
|
|
208
|
+
const url = blockBlobClient.url;
|
|
209
|
+
context.log(message);
|
|
210
|
+
body = { message, url };
|
|
144
211
|
}
|
|
145
|
-
|
|
212
|
+
|
|
213
|
+
context.res = {
|
|
214
|
+
status: 200,
|
|
215
|
+
body,
|
|
216
|
+
};
|
|
217
|
+
|
|
218
|
+
if (useGoogle && useGoogle !== "false") {
|
|
219
|
+
const { url } = body;
|
|
220
|
+
const filename = `${requestId}/${uuidv4()}_${info.filename}`;
|
|
221
|
+
const gcsFile = gcs.bucket(GCS_BUCKETNAME).file(filename);
|
|
222
|
+
const writeStream = gcsFile.createWriteStream();
|
|
223
|
+
|
|
224
|
+
const response = await axios({
|
|
225
|
+
method: "get",
|
|
226
|
+
url: url,
|
|
227
|
+
responseType: "stream",
|
|
228
|
+
});
|
|
229
|
+
|
|
230
|
+
// Pipe the Axios response stream directly into the GCS Write Stream
|
|
231
|
+
response.data.pipe(writeStream);
|
|
232
|
+
|
|
233
|
+
await new Promise((resolve, reject) => {
|
|
234
|
+
writeStream.on("finish", resolve);
|
|
235
|
+
writeStream.on("error", reject);
|
|
236
|
+
});
|
|
237
|
+
|
|
238
|
+
body.gcs = `gs://${GCS_BUCKETNAME}/${filename}`;
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
resolve(body); // Resolve the promise
|
|
242
|
+
});
|
|
243
|
+
|
|
244
|
+
busboy.on("error", (error) => {
|
|
245
|
+
context.log.error("Error processing file upload:", error);
|
|
246
|
+
context.res = {
|
|
247
|
+
status: 500,
|
|
248
|
+
body: "Error processing file upload.",
|
|
249
|
+
};
|
|
250
|
+
reject(error); // Reject the promise
|
|
251
|
+
});
|
|
252
|
+
|
|
253
|
+
req.pipe(busboy);
|
|
254
|
+
} catch (error) {
|
|
255
|
+
context.log.error("Error processing file upload:", error);
|
|
256
|
+
context.res = {
|
|
257
|
+
status: 500,
|
|
258
|
+
body: "Error processing file upload.",
|
|
259
|
+
};
|
|
260
|
+
reject(error); // Reject the promise
|
|
261
|
+
}
|
|
262
|
+
});
|
|
146
263
|
}
|
|
147
264
|
|
|
148
265
|
// Function to delete files that haven't been used in more than a month
|
|
149
|
-
async function cleanup() {
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
const blobs = containerClient.listBlobsFlat();
|
|
154
|
-
|
|
155
|
-
// Calculate the date that is x month ago
|
|
266
|
+
async function cleanup(urls=null) {
|
|
267
|
+
const { containerClient } = await getBlobClient();
|
|
268
|
+
|
|
269
|
+
if(!urls) {
|
|
156
270
|
const xMonthAgo = new Date();
|
|
157
271
|
xMonthAgo.setMonth(xMonthAgo.getMonth() - 1);
|
|
158
|
-
|
|
159
|
-
|
|
272
|
+
|
|
273
|
+
const blobs = containerClient.listBlobsFlat();
|
|
274
|
+
const cleanedURLs = [];
|
|
275
|
+
|
|
160
276
|
for await (const blob of blobs) {
|
|
161
|
-
// Get the last modified date of the blob
|
|
162
277
|
const lastModified = blob.properties.lastModified;
|
|
163
|
-
|
|
164
|
-
// Compare the last modified date with one month ago
|
|
165
278
|
if (lastModified < xMonthAgo) {
|
|
166
|
-
// Delete the blob
|
|
167
279
|
const blockBlobClient = containerClient.getBlockBlobClient(blob.name);
|
|
168
280
|
await blockBlobClient.delete();
|
|
169
281
|
console.log(`Cleaned blob: ${blob.name}`);
|
|
282
|
+
cleanedURLs.push(blob.name);
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
return cleanedURLs;
|
|
287
|
+
}else{
|
|
288
|
+
// Delete the blobs with the specified URLs
|
|
289
|
+
const cleanedURLs = [];
|
|
290
|
+
for(const url of urls) {
|
|
291
|
+
// Remove the base url to get the blob name
|
|
292
|
+
const blobName = url.replace(containerClient.url, '');
|
|
293
|
+
const blockBlobClient = containerClient.getBlockBlobClient(blobName);
|
|
294
|
+
await blockBlobClient.delete();
|
|
295
|
+
console.log(`Cleaned blob: ${blobName}`);
|
|
296
|
+
cleanedURLs.push(blobName);
|
|
297
|
+
}
|
|
298
|
+
return cleanedURLs;
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
async function cleanupGCS(urls=null) {
|
|
303
|
+
const bucket = gcs.bucket(GCS_BUCKETNAME);
|
|
304
|
+
const directories = new Set();
|
|
305
|
+
const cleanedURLs = [];
|
|
306
|
+
|
|
307
|
+
if(!urls){
|
|
308
|
+
const daysN = 30;
|
|
309
|
+
const thirtyDaysAgo = new Date(Date.now() - daysN * 24 * 60 * 60 * 1000);
|
|
310
|
+
const [files] = await bucket.getFiles();
|
|
311
|
+
|
|
312
|
+
for (const file of files) {
|
|
313
|
+
const [metadata] = await file.getMetadata();
|
|
314
|
+
const directoryPath = path.dirname(file.name);
|
|
315
|
+
directories.add(directoryPath);
|
|
316
|
+
if (metadata.updated) {
|
|
317
|
+
const updatedTime = new Date(metadata.updated);
|
|
318
|
+
if (updatedTime.getTime() < thirtyDaysAgo.getTime()) {
|
|
319
|
+
console.log(`Cleaning file: ${file.name}`);
|
|
320
|
+
await file.delete();
|
|
321
|
+
cleanedURLs.push(file.name);
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
}else{
|
|
326
|
+
try {
|
|
327
|
+
for(const url of urls) {
|
|
328
|
+
const filename = path.join(url.split('/').slice(3).join('/'));
|
|
329
|
+
const file = bucket.file(filename);
|
|
330
|
+
const directoryPath = path.dirname(file.name);
|
|
331
|
+
directories.add(directoryPath);
|
|
332
|
+
await file.delete();
|
|
333
|
+
cleanedURLs.push(url);
|
|
170
334
|
}
|
|
335
|
+
}catch(error){
|
|
336
|
+
console.error(`Error cleaning up files: ${error}`);
|
|
171
337
|
}
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
for (const directory of directories) {
|
|
341
|
+
const [files] = await bucket.getFiles({ prefix: directory });
|
|
342
|
+
if (files.length === 0) {
|
|
343
|
+
console.log(`Deleting empty directory: ${directory}`);
|
|
344
|
+
await bucket.deleteFiles({ prefix: directory });
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
return cleanedURLs;
|
|
172
349
|
}
|
|
173
350
|
|
|
174
|
-
export {
|
|
175
|
-
saveFileToBlob, deleteBlob, uploadBlob, cleanup
|
|
176
|
-
}
|
|
351
|
+
export { saveFileToBlob, deleteBlob, uploadBlob, cleanup, cleanupGCS };
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { downloadFile, processYoutubeUrl, splitMediaFile } from './fileChunker.js';
|
|
2
|
-
import { saveFileToBlob, deleteBlob, uploadBlob, cleanup } from './blobHandler.js';
|
|
3
|
-
import { publishRequestProgress } from './redis.js';
|
|
2
|
+
import { saveFileToBlob, deleteBlob, uploadBlob, cleanup, cleanupGCS } from './blobHandler.js';
|
|
3
|
+
import { cleanupRedisFileStoreMap, getFileStoreMap, publishRequestProgress, removeFromFileStoreMap, setFileStoreMap } from './redis.js';
|
|
4
4
|
import { deleteTempPath, ensureEncoded, isValidYoutubeUrl } from './helper.js';
|
|
5
5
|
import { moveFileToPublicFolder, deleteFolder, cleanupLocal } from './localFileHandler.js';
|
|
6
6
|
import { documentToText, easyChunker } from './docHelper.js';
|
|
@@ -8,6 +8,8 @@ import path from 'path';
|
|
|
8
8
|
import os from 'os';
|
|
9
9
|
import { v4 as uuidv4 } from 'uuid';
|
|
10
10
|
import fs from 'fs';
|
|
11
|
+
import http from 'http';
|
|
12
|
+
import https from 'https';
|
|
11
13
|
|
|
12
14
|
const DOC_EXTENSIONS = [".txt", ".json", ".csv", ".md", ".xml", ".js", ".html", ".css", '.pdf', '.docx', '.xlsx', '.csv'];
|
|
13
15
|
|
|
@@ -16,15 +18,58 @@ console.log(useAzure ? 'Using Azure Storage' : 'Using local file system');
|
|
|
16
18
|
|
|
17
19
|
|
|
18
20
|
let isCleanupRunning = false;
|
|
19
|
-
async function cleanupInactive(
|
|
21
|
+
async function cleanupInactive() {
|
|
20
22
|
try {
|
|
21
23
|
if (isCleanupRunning) { return; } //no need to cleanup every call
|
|
22
24
|
isCleanupRunning = true;
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
25
|
+
const cleaned = await cleanupRedisFileStoreMap();
|
|
26
|
+
|
|
27
|
+
const cleanedAzure = [];
|
|
28
|
+
const cleanedLocal = [];
|
|
29
|
+
const cleanedGCS = [];
|
|
30
|
+
|
|
31
|
+
for(const key in cleaned){
|
|
32
|
+
const item = cleaned[key];
|
|
33
|
+
const {url,gcs} = item;
|
|
34
|
+
if(url){
|
|
35
|
+
if(url.includes('.blob.core.windows.net/')){
|
|
36
|
+
cleanedAzure.push(url);
|
|
37
|
+
}else if(url.startsWith('gs://')){
|
|
38
|
+
cleanedGCS.push(url);
|
|
39
|
+
}else{
|
|
40
|
+
cleanedLocal.push(url);
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
if(item && item.gcs){
|
|
45
|
+
cleanedGCS.push(gcs);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
try {
|
|
50
|
+
if (cleanedAzure && cleanedAzure.length > 0) {
|
|
51
|
+
await cleanup(cleanedAzure);
|
|
52
|
+
}
|
|
53
|
+
} catch (error) {
|
|
54
|
+
console.log('Error occurred during azure cleanup:', error);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
try {
|
|
58
|
+
if (cleanedLocal && cleanedLocal.length > 0) {
|
|
59
|
+
await cleanupLocal(cleanedLocal);
|
|
60
|
+
}
|
|
61
|
+
}catch(err){
|
|
62
|
+
console.log('Error occurred during local cleanup:', err);
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
try{
|
|
66
|
+
if(cleanedGCS && cleanedGCS.length > 0){
|
|
67
|
+
await cleanupGCS(cleanedGCS);
|
|
68
|
+
}
|
|
69
|
+
}catch(err){
|
|
70
|
+
console.log('Error occurred during GCS cleanup:', err);
|
|
27
71
|
}
|
|
72
|
+
|
|
28
73
|
} catch (error) {
|
|
29
74
|
console.log('Error occurred during cleanup:', error);
|
|
30
75
|
} finally{
|
|
@@ -32,11 +77,27 @@ async function cleanupInactive(useAzure) {
|
|
|
32
77
|
}
|
|
33
78
|
}
|
|
34
79
|
|
|
80
|
+
async function urlExists(url) {
|
|
81
|
+
if(!url) return false;
|
|
82
|
+
const httpModule = url.startsWith('https') ? https : http;
|
|
83
|
+
|
|
84
|
+
return new Promise((resolve) => {
|
|
85
|
+
httpModule
|
|
86
|
+
.get(url, function (response) {
|
|
87
|
+
// Check if the response status is OK
|
|
88
|
+
resolve(response.statusCode === 200);
|
|
89
|
+
})
|
|
90
|
+
.on('error', function () {
|
|
91
|
+
resolve(false);
|
|
92
|
+
});
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
|
|
35
96
|
|
|
36
97
|
async function main(context, req) {
|
|
37
98
|
context.log('Starting req processing..');
|
|
38
99
|
|
|
39
|
-
cleanupInactive(
|
|
100
|
+
cleanupInactive(); //trigger & no need to wait for it
|
|
40
101
|
|
|
41
102
|
// Clean up blob when request delete which means processing marked completed
|
|
42
103
|
if (req.method.toLowerCase() === `delete`) {
|
|
@@ -55,13 +116,40 @@ async function main(context, req) {
|
|
|
55
116
|
return;
|
|
56
117
|
}
|
|
57
118
|
|
|
119
|
+
const { uri, requestId, save, hash, checkHash } = req.body?.params || req.query;
|
|
120
|
+
|
|
121
|
+
if(hash && checkHash){ //check if hash exists
|
|
122
|
+
context.log(`Checking hash: ${hash}`);
|
|
123
|
+
const result = await getFileStoreMap(hash);
|
|
124
|
+
|
|
125
|
+
const exists = await urlExists(result?.url);
|
|
126
|
+
|
|
127
|
+
if(!exists){
|
|
128
|
+
await removeFromFileStoreMap(hash);
|
|
129
|
+
return;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
if(result){
|
|
133
|
+
context.log(`Hash exists: ${hash}`);
|
|
134
|
+
//update redis timestamp with current time
|
|
135
|
+
await setFileStoreMap(hash, result);
|
|
136
|
+
}
|
|
137
|
+
context.res = {
|
|
138
|
+
body: result
|
|
139
|
+
};
|
|
140
|
+
return;
|
|
141
|
+
}
|
|
142
|
+
|
|
58
143
|
if (req.method.toLowerCase() === `post`) {
|
|
59
|
-
const {
|
|
144
|
+
const { useGoogle } = req.body?.params || req.query;
|
|
145
|
+
const { url } = await uploadBlob(context, req, !useAzure, useGoogle);
|
|
60
146
|
context.log(`File url: ${url}`);
|
|
147
|
+
if(hash && context?.res?.body){ //save hash after upload
|
|
148
|
+
await setFileStoreMap(hash, context.res.body);
|
|
149
|
+
}
|
|
61
150
|
return
|
|
62
151
|
}
|
|
63
152
|
|
|
64
|
-
const { uri, requestId, save } = req.body?.params || req.query;
|
|
65
153
|
if (!uri || !requestId) {
|
|
66
154
|
context.res = {
|
|
67
155
|
status: 400,
|