@aj-archipelago/cortex 1.4.5 → 1.4.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/helper-apps/cortex-file-handler/package-lock.json +2 -2
- package/helper-apps/cortex-file-handler/package.json +1 -1
- package/helper-apps/cortex-file-handler/src/blobHandler.js +29 -29
- package/helper-apps/cortex-file-handler/src/constants.js +30 -0
- package/helper-apps/cortex-file-handler/src/index.js +49 -20
- package/helper-apps/cortex-file-handler/src/redis.js +43 -1
- package/helper-apps/cortex-file-handler/src/services/storage/AzureStorageProvider.js +74 -10
- package/helper-apps/cortex-file-handler/src/services/storage/StorageFactory.js +2 -11
- package/helper-apps/cortex-file-handler/src/services/storage/StorageService.js +39 -4
- package/helper-apps/cortex-file-handler/src/start.js +7 -0
- package/helper-apps/cortex-file-handler/tests/blobHandler.test.js +6 -5
- package/helper-apps/cortex-file-handler/tests/containerNameParsing.test.js +7 -6
- package/helper-apps/cortex-file-handler/tests/containerParameterFlow.test.js +4 -3
- package/helper-apps/cortex-file-handler/tests/deleteOperations.test.js +287 -0
- package/helper-apps/cortex-file-handler/tests/hashContainerScoping.test.js +415 -0
- package/helper-apps/cortex-file-handler/tests/start.test.js +1 -1
- package/lib/entityConstants.js +1 -1
- package/lib/fileUtils.js +1481 -0
- package/lib/pathwayTools.js +7 -1
- package/lib/util.js +2 -313
- package/package.json +4 -3
- package/pathways/image_qwen.js +1 -1
- package/pathways/system/entity/memory/sys_read_memory.js +17 -3
- package/pathways/system/entity/memory/sys_save_memory.js +22 -6
- package/pathways/system/entity/sys_entity_agent.js +91 -13
- package/pathways/system/entity/sys_generator_error.js +2 -2
- package/pathways/system/entity/tools/sys_tool_analyzefile.js +171 -0
- package/pathways/system/entity/tools/sys_tool_codingagent.js +38 -4
- package/pathways/system/entity/tools/sys_tool_editfile.js +403 -0
- package/pathways/system/entity/tools/sys_tool_file_collection.js +433 -0
- package/pathways/system/entity/tools/sys_tool_image.js +172 -10
- package/pathways/system/entity/tools/sys_tool_image_gemini.js +123 -10
- package/pathways/system/entity/tools/sys_tool_readfile.js +217 -124
- package/pathways/system/entity/tools/sys_tool_validate_url.js +137 -0
- package/pathways/system/entity/tools/sys_tool_writefile.js +211 -0
- package/pathways/system/workspaces/run_workspace_prompt.js +4 -3
- package/pathways/system/workspaces/workspace_applet_edit.js +13 -66
- package/pathways/transcribe_gemini.js +2 -1
- package/server/executeWorkspace.js +1 -1
- package/server/plugins/neuralSpacePlugin.js +2 -6
- package/server/plugins/openAiWhisperPlugin.js +2 -1
- package/server/plugins/replicateApiPlugin.js +4 -14
- package/server/typeDef.js +10 -1
- package/tests/integration/features/tools/fileCollection.test.js +858 -0
- package/tests/integration/features/tools/fileOperations.test.js +851 -0
- package/tests/integration/features/tools/writefile.test.js +350 -0
- package/tests/unit/core/fileCollection.test.js +259 -0
- package/tests/unit/core/util.test.js +320 -1
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aj-archipelago/cortex-file-handler",
|
|
3
|
-
"version": "2.6.
|
|
3
|
+
"version": "2.6.3",
|
|
4
4
|
"lockfileVersion": 3,
|
|
5
5
|
"requires": true,
|
|
6
6
|
"packages": {
|
|
7
7
|
"": {
|
|
8
8
|
"name": "@aj-archipelago/cortex-file-handler",
|
|
9
|
-
"version": "2.6.
|
|
9
|
+
"version": "2.6.3",
|
|
10
10
|
"dependencies": {
|
|
11
11
|
"@azure/storage-blob": "^12.13.0",
|
|
12
12
|
"@distube/ytdl-core": "^4.14.3",
|
|
@@ -16,7 +16,16 @@ import {
|
|
|
16
16
|
generateBlobName,
|
|
17
17
|
} from "./utils/filenameUtils.js";
|
|
18
18
|
import { publicFolder, port, ipAddress } from "./start.js";
|
|
19
|
-
import {
|
|
19
|
+
import {
|
|
20
|
+
CONVERTED_EXTENSIONS,
|
|
21
|
+
AZURITE_ACCOUNT_NAME,
|
|
22
|
+
parseContainerNames,
|
|
23
|
+
getCurrentContainerNames,
|
|
24
|
+
AZURE_STORAGE_CONTAINER_NAMES,
|
|
25
|
+
getDefaultContainerName,
|
|
26
|
+
GCS_BUCKETNAME,
|
|
27
|
+
isValidContainerName
|
|
28
|
+
} from "./constants.js";
|
|
20
29
|
import { FileConversionService } from "./services/FileConversionService.js";
|
|
21
30
|
import { StorageFactory } from "./services/storage/StorageFactory.js";
|
|
22
31
|
|
|
@@ -70,28 +79,6 @@ if (!GCP_PROJECT_ID || !GCP_SERVICE_ACCOUNT) {
|
|
|
70
79
|
}
|
|
71
80
|
}
|
|
72
81
|
|
|
73
|
-
// Parse comma-separated container names from environment variable
|
|
74
|
-
const parseContainerNames = () => {
|
|
75
|
-
const containerStr = process.env.AZURE_STORAGE_CONTAINER_NAME || "whispertempfiles";
|
|
76
|
-
return containerStr.split(',').map(name => name.trim());
|
|
77
|
-
};
|
|
78
|
-
|
|
79
|
-
// Helper function to get current container names at runtime
|
|
80
|
-
export const getCurrentContainerNames = () => {
|
|
81
|
-
return parseContainerNames();
|
|
82
|
-
};
|
|
83
|
-
|
|
84
|
-
export const AZURE_STORAGE_CONTAINER_NAMES = parseContainerNames();
|
|
85
|
-
export const DEFAULT_AZURE_STORAGE_CONTAINER_NAME = AZURE_STORAGE_CONTAINER_NAMES[0];
|
|
86
|
-
export const GCS_BUCKETNAME = process.env.GCS_BUCKETNAME || "cortextempfiles";
|
|
87
|
-
|
|
88
|
-
// Validate if a container name is allowed
|
|
89
|
-
export const isValidContainerName = (containerName) => {
|
|
90
|
-
// Read from environment at runtime to support dynamically changing env in tests
|
|
91
|
-
const currentContainerNames = getCurrentContainerNames();
|
|
92
|
-
return currentContainerNames.includes(containerName);
|
|
93
|
-
};
|
|
94
|
-
|
|
95
82
|
function isEncoded(str) {
|
|
96
83
|
// Checks for any percent-encoded sequence
|
|
97
84
|
return /%[0-9A-Fa-f]{2}/.test(str);
|
|
@@ -194,7 +181,7 @@ async function downloadFromGCS(gcsUrl, destinationPath) {
|
|
|
194
181
|
|
|
195
182
|
export const getBlobClient = async (containerName = null) => {
|
|
196
183
|
const connectionString = process.env.AZURE_STORAGE_CONNECTION_STRING;
|
|
197
|
-
const finalContainerName = containerName ||
|
|
184
|
+
const finalContainerName = containerName || getDefaultContainerName();
|
|
198
185
|
|
|
199
186
|
// Validate container name is in whitelist
|
|
200
187
|
if (!isValidContainerName(finalContainerName)) {
|
|
@@ -350,10 +337,12 @@ function uploadBlob(
|
|
|
350
337
|
|
|
351
338
|
if (errorOccurred) return; // Check again after waiting
|
|
352
339
|
|
|
353
|
-
|
|
340
|
+
// Capture containerName value to avoid closure issues
|
|
341
|
+
const capturedContainerName = containerName;
|
|
342
|
+
await processFile(fieldname, file, info, capturedContainerName);
|
|
354
343
|
});
|
|
355
344
|
|
|
356
|
-
const processFile = async (fieldname, file, info) => {
|
|
345
|
+
const processFile = async (fieldname, file, info, capturedContainerName) => {
|
|
357
346
|
if (errorOccurred) return;
|
|
358
347
|
|
|
359
348
|
// Validate file
|
|
@@ -449,7 +438,7 @@ function uploadBlob(
|
|
|
449
438
|
context,
|
|
450
439
|
uploadName,
|
|
451
440
|
azureStream,
|
|
452
|
-
|
|
441
|
+
capturedContainerName,
|
|
453
442
|
).catch(async (err) => {
|
|
454
443
|
cloudUploadError = err;
|
|
455
444
|
// Fallback: try from disk if available
|
|
@@ -459,7 +448,7 @@ function uploadBlob(
|
|
|
459
448
|
highWaterMark: 1024 * 1024,
|
|
460
449
|
autoClose: true,
|
|
461
450
|
});
|
|
462
|
-
return saveToAzureStorage(context, uploadName, diskStream,
|
|
451
|
+
return saveToAzureStorage(context, uploadName, diskStream, capturedContainerName);
|
|
463
452
|
}
|
|
464
453
|
throw err;
|
|
465
454
|
});
|
|
@@ -511,6 +500,7 @@ function uploadBlob(
|
|
|
511
500
|
}, {}),
|
|
512
501
|
};
|
|
513
502
|
if (hash) result.hash = hash;
|
|
503
|
+
if (capturedContainerName) result.container = capturedContainerName;
|
|
514
504
|
|
|
515
505
|
// If saving locally, wait for disk write to finish and then move to public folder
|
|
516
506
|
if (saveToLocal) {
|
|
@@ -582,7 +572,7 @@ function uploadBlob(
|
|
|
582
572
|
conversion.convertedPath,
|
|
583
573
|
requestId,
|
|
584
574
|
null,
|
|
585
|
-
|
|
575
|
+
capturedContainerName,
|
|
586
576
|
);
|
|
587
577
|
|
|
588
578
|
// Optionally save to GCS
|
|
@@ -827,6 +817,10 @@ async function uploadFile(
|
|
|
827
817
|
if (hash) {
|
|
828
818
|
result.hash = hash;
|
|
829
819
|
}
|
|
820
|
+
|
|
821
|
+
if (containerName) {
|
|
822
|
+
result.container = containerName;
|
|
823
|
+
}
|
|
830
824
|
|
|
831
825
|
// Initialize conversion service
|
|
832
826
|
const conversionService = new FileConversionService(context, !saveToLocal);
|
|
@@ -1156,4 +1150,10 @@ export {
|
|
|
1156
1150
|
gcs,
|
|
1157
1151
|
uploadChunkToGCS,
|
|
1158
1152
|
downloadFromGCS,
|
|
1153
|
+
// Re-export container constants for backward compatibility
|
|
1154
|
+
getCurrentContainerNames,
|
|
1155
|
+
AZURE_STORAGE_CONTAINER_NAMES,
|
|
1156
|
+
getDefaultContainerName,
|
|
1157
|
+
GCS_BUCKETNAME,
|
|
1158
|
+
isValidContainerName,
|
|
1159
1159
|
};
|
|
@@ -132,3 +132,33 @@ export const CONVERTED_EXTENSIONS = [
|
|
|
132
132
|
|
|
133
133
|
// Azure Storage constants
|
|
134
134
|
export const AZURITE_ACCOUNT_NAME = "devstoreaccount1";
|
|
135
|
+
|
|
136
|
+
// Parse comma-separated container names from environment variable
|
|
137
|
+
export const parseContainerNames = () => {
|
|
138
|
+
const containerStr = process.env.AZURE_STORAGE_CONTAINER_NAME || "whispertempfiles";
|
|
139
|
+
return containerStr.split(',').map(name => name.trim());
|
|
140
|
+
};
|
|
141
|
+
|
|
142
|
+
// Helper function to get current container names at runtime
|
|
143
|
+
// Useful for runtime validation when env vars might change (e.g., in tests)
|
|
144
|
+
export const getCurrentContainerNames = () => {
|
|
145
|
+
return parseContainerNames();
|
|
146
|
+
};
|
|
147
|
+
|
|
148
|
+
export const AZURE_STORAGE_CONTAINER_NAMES = parseContainerNames();
|
|
149
|
+
|
|
150
|
+
// Helper function to get the default container name at runtime
|
|
151
|
+
// This allows tests to change the environment variable and have the correct default
|
|
152
|
+
export const getDefaultContainerName = () => {
|
|
153
|
+
return process.env.DEFAULT_AZURE_STORAGE_CONTAINER_NAME || getCurrentContainerNames()[0];
|
|
154
|
+
};
|
|
155
|
+
|
|
156
|
+
export const DEFAULT_AZURE_STORAGE_CONTAINER_NAME = process.env.DEFAULT_AZURE_STORAGE_CONTAINER_NAME || AZURE_STORAGE_CONTAINER_NAMES[0];
|
|
157
|
+
export const GCS_BUCKETNAME = process.env.GCS_BUCKETNAME || "cortextempfiles";
|
|
158
|
+
|
|
159
|
+
// Validate if a container name is allowed
|
|
160
|
+
export const isValidContainerName = (containerName) => {
|
|
161
|
+
// Read from environment at runtime to support dynamically changing env in tests
|
|
162
|
+
const currentContainerNames = getCurrentContainerNames();
|
|
163
|
+
return currentContainerNames.includes(containerName);
|
|
164
|
+
};
|
|
@@ -14,6 +14,7 @@ import {
|
|
|
14
14
|
removeFromFileStoreMap,
|
|
15
15
|
setFileStoreMap,
|
|
16
16
|
cleanupRedisFileStoreMapAge,
|
|
17
|
+
getScopedHashKey,
|
|
17
18
|
} from "./redis.js";
|
|
18
19
|
import { FileConversionService } from "./services/FileConversionService.js";
|
|
19
20
|
import { StorageService } from "./services/storage/StorageService.js";
|
|
@@ -46,6 +47,27 @@ async function cleanupInactive(context) {
|
|
|
46
47
|
}
|
|
47
48
|
|
|
48
49
|
async function CortexFileHandler(context, req) {
|
|
50
|
+
// Parse body if it's a string (Azure Functions sometimes doesn't auto-parse DELETE bodies)
|
|
51
|
+
let parsedBody = req.body;
|
|
52
|
+
if (typeof req.body === 'string' && req.body.length > 0) {
|
|
53
|
+
try {
|
|
54
|
+
parsedBody = JSON.parse(req.body);
|
|
55
|
+
} catch (e) {
|
|
56
|
+
// If parsing fails, treat as empty object
|
|
57
|
+
parsedBody = {};
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// For GET requests, prioritize query string. For other methods, check body first, then query
|
|
62
|
+
// Also check if parsedBody actually has content (not just empty object)
|
|
63
|
+
const hasBodyContent = parsedBody && typeof parsedBody === 'object' && Object.keys(parsedBody).length > 0;
|
|
64
|
+
const bodySource = hasBodyContent ? (parsedBody.params || parsedBody) : {};
|
|
65
|
+
const querySource = req.query || {};
|
|
66
|
+
|
|
67
|
+
// Merge sources: for GET, query takes priority; for others, body takes priority
|
|
68
|
+
const isGet = req.method?.toLowerCase() === 'get';
|
|
69
|
+
const source = isGet ? { ...bodySource, ...querySource } : { ...querySource, ...bodySource };
|
|
70
|
+
|
|
49
71
|
const {
|
|
50
72
|
uri,
|
|
51
73
|
requestId,
|
|
@@ -58,7 +80,7 @@ async function CortexFileHandler(context, req) {
|
|
|
58
80
|
load,
|
|
59
81
|
restore,
|
|
60
82
|
container,
|
|
61
|
-
} =
|
|
83
|
+
} = source;
|
|
62
84
|
|
|
63
85
|
// Normalize boolean parameters
|
|
64
86
|
const shouldSave = save === true || save === "true";
|
|
@@ -135,13 +157,15 @@ async function CortexFileHandler(context, req) {
|
|
|
135
157
|
// 1. Delete multiple files by requestId (existing behavior)
|
|
136
158
|
// 2. Delete single file by hash (new behavior)
|
|
137
159
|
if (operation === "delete") {
|
|
138
|
-
|
|
139
|
-
|
|
160
|
+
// Check both query string and body params for delete parameters
|
|
161
|
+
// Handle both req.body.params.hash and req.body.hash formats
|
|
162
|
+
const deleteRequestId = req.query.requestId || parsedBody?.params?.requestId || parsedBody?.requestId || requestId;
|
|
163
|
+
const deleteHash = req.query.hash || parsedBody?.params?.hash || parsedBody?.hash || hash;
|
|
140
164
|
|
|
141
165
|
// If only hash is provided, delete single file by hash
|
|
142
166
|
if (deleteHash && !deleteRequestId) {
|
|
143
167
|
try {
|
|
144
|
-
const deleted = await storageService.deleteFileByHash(deleteHash);
|
|
168
|
+
const deleted = await storageService.deleteFileByHash(deleteHash, container);
|
|
145
169
|
context.res = {
|
|
146
170
|
status: 200,
|
|
147
171
|
body: {
|
|
@@ -163,17 +187,18 @@ async function CortexFileHandler(context, req) {
|
|
|
163
187
|
if (!deleteRequestId) {
|
|
164
188
|
context.res = {
|
|
165
189
|
status: 400,
|
|
166
|
-
body: "Please pass either a requestId or hash
|
|
190
|
+
body: "Please pass either a requestId or hash in the query string or request body",
|
|
167
191
|
};
|
|
168
192
|
return;
|
|
169
193
|
}
|
|
170
194
|
|
|
171
195
|
// First, get the hash from the map if it exists
|
|
172
196
|
if (deleteHash) {
|
|
173
|
-
const
|
|
197
|
+
const scopedHash = getScopedHashKey(deleteHash, container);
|
|
198
|
+
const hashResult = await getFileStoreMap(scopedHash);
|
|
174
199
|
if (hashResult) {
|
|
175
|
-
context.log(`Found hash in map for deletion: ${deleteHash}`);
|
|
176
|
-
await removeFromFileStoreMap(
|
|
200
|
+
context.log(`Found hash in map for deletion: ${deleteHash} (scoped key: ${scopedHash})`);
|
|
201
|
+
await removeFromFileStoreMap(scopedHash);
|
|
177
202
|
}
|
|
178
203
|
}
|
|
179
204
|
|
|
@@ -201,7 +226,8 @@ async function CortexFileHandler(context, req) {
|
|
|
201
226
|
}
|
|
202
227
|
|
|
203
228
|
// Check if file already exists (using hash or URL as the key)
|
|
204
|
-
|
|
229
|
+
// If hash is provided, scope it by container; otherwise use URL as-is
|
|
230
|
+
const cacheKey = hash ? getScopedHashKey(hash, container) : remoteUrl;
|
|
205
231
|
const exists = await getFileStoreMap(cacheKey);
|
|
206
232
|
if (exists) {
|
|
207
233
|
context.res = {
|
|
@@ -255,9 +281,10 @@ async function CortexFileHandler(context, req) {
|
|
|
255
281
|
|
|
256
282
|
if (hash && clearHash) {
|
|
257
283
|
try {
|
|
258
|
-
const
|
|
284
|
+
const scopedHash = getScopedHashKey(hash, container);
|
|
285
|
+
const hashValue = await getFileStoreMap(scopedHash);
|
|
259
286
|
if (hashValue) {
|
|
260
|
-
await removeFromFileStoreMap(
|
|
287
|
+
await removeFromFileStoreMap(scopedHash);
|
|
261
288
|
context.res = {
|
|
262
289
|
status: 200,
|
|
263
290
|
body: `Hash ${hash} removed`,
|
|
@@ -279,10 +306,11 @@ async function CortexFileHandler(context, req) {
|
|
|
279
306
|
}
|
|
280
307
|
|
|
281
308
|
if (hash && checkHash) {
|
|
282
|
-
|
|
309
|
+
const scopedHash = getScopedHashKey(hash, container);
|
|
310
|
+
let hashResult = await getFileStoreMap(scopedHash, true); // Skip lazy cleanup to handle it ourselves
|
|
283
311
|
|
|
284
312
|
if (hashResult) {
|
|
285
|
-
context.log(`File exists in map: ${hash}`);
|
|
313
|
+
context.log(`File exists in map: ${hash} (scoped key: ${scopedHash})`);
|
|
286
314
|
|
|
287
315
|
// Log the URL retrieved from Redis before checking existence
|
|
288
316
|
context.log(`Checking existence of URL from Redis: ${hashResult?.url}`);
|
|
@@ -301,7 +329,7 @@ async function CortexFileHandler(context, req) {
|
|
|
301
329
|
context.log(
|
|
302
330
|
`File not found in any storage. Removing from map: ${hash}`,
|
|
303
331
|
);
|
|
304
|
-
await removeFromFileStoreMap(
|
|
332
|
+
await removeFromFileStoreMap(scopedHash);
|
|
305
333
|
context.res = {
|
|
306
334
|
status: 404,
|
|
307
335
|
body: `Hash ${hash} not found in storage`,
|
|
@@ -320,7 +348,7 @@ async function CortexFileHandler(context, req) {
|
|
|
320
348
|
} catch (error) {
|
|
321
349
|
context.log(`Error restoring to GCS: ${error}`);
|
|
322
350
|
// If restoration fails, remove the hash from the map
|
|
323
|
-
await removeFromFileStoreMap(
|
|
351
|
+
await removeFromFileStoreMap(scopedHash);
|
|
324
352
|
context.res = {
|
|
325
353
|
status: 404,
|
|
326
354
|
body: `Hash ${hash} not found`,
|
|
@@ -378,7 +406,7 @@ async function CortexFileHandler(context, req) {
|
|
|
378
406
|
} catch (error) {
|
|
379
407
|
console.error("Error restoring from GCS:", error);
|
|
380
408
|
// If restoration fails, remove the hash from the map
|
|
381
|
-
await removeFromFileStoreMap(
|
|
409
|
+
await removeFromFileStoreMap(scopedHash);
|
|
382
410
|
context.res = {
|
|
383
411
|
status: 404,
|
|
384
412
|
body: `Hash ${hash} not found`,
|
|
@@ -396,7 +424,7 @@ async function CortexFileHandler(context, req) {
|
|
|
396
424
|
: false;
|
|
397
425
|
if (!finalPrimaryCheck && !finalGCSCheck) {
|
|
398
426
|
context.log(`Failed to restore file. Removing from map: ${hash}`);
|
|
399
|
-
await removeFromFileStoreMap(
|
|
427
|
+
await removeFromFileStoreMap(scopedHash);
|
|
400
428
|
context.res = {
|
|
401
429
|
status: 404,
|
|
402
430
|
body: `Hash ${hash} not found`,
|
|
@@ -498,7 +526,7 @@ async function CortexFileHandler(context, req) {
|
|
|
498
526
|
}
|
|
499
527
|
|
|
500
528
|
//update redis timestamp with current time
|
|
501
|
-
await setFileStoreMap(
|
|
529
|
+
await setFileStoreMap(scopedHash, hashResult);
|
|
502
530
|
|
|
503
531
|
context.res = {
|
|
504
532
|
status: 200,
|
|
@@ -508,7 +536,7 @@ async function CortexFileHandler(context, req) {
|
|
|
508
536
|
} catch (error) {
|
|
509
537
|
context.log(`Error checking file existence: ${error}`);
|
|
510
538
|
// If there's an error checking file existence, remove the hash from the map
|
|
511
|
-
await removeFromFileStoreMap(
|
|
539
|
+
await removeFromFileStoreMap(scopedHash);
|
|
512
540
|
context.res = {
|
|
513
541
|
status: 404,
|
|
514
542
|
body: `Hash ${hash} not found`,
|
|
@@ -532,7 +560,8 @@ async function CortexFileHandler(context, req) {
|
|
|
532
560
|
// Use uploadBlob to handle multipart/form-data
|
|
533
561
|
const result = await uploadBlob(context, req, saveToLocal, null, hash, container);
|
|
534
562
|
if (result?.hash && context?.res?.body) {
|
|
535
|
-
|
|
563
|
+
const scopedHash = getScopedHashKey(result.hash, result.container || container);
|
|
564
|
+
await setFileStoreMap(scopedHash, context.res.body);
|
|
536
565
|
}
|
|
537
566
|
return;
|
|
538
567
|
}
|
|
@@ -1,7 +1,28 @@
|
|
|
1
1
|
import redis from "ioredis";
|
|
2
|
+
import { getDefaultContainerName } from "./constants.js";
|
|
2
3
|
|
|
3
4
|
const connectionString = process.env["REDIS_CONNECTION_STRING"];
|
|
4
5
|
|
|
6
|
+
/**
|
|
7
|
+
* Generate a scoped hash key for Redis storage
|
|
8
|
+
* Always includes the container name in the format hash:container
|
|
9
|
+
* @param {string} hash - The file hash
|
|
10
|
+
* @param {string} containerName - The container name (optional, defaults to default container)
|
|
11
|
+
* @returns {string} The scoped hash key
|
|
12
|
+
*/
|
|
13
|
+
export const getScopedHashKey = (hash, containerName = null) => {
|
|
14
|
+
if (!hash) return hash;
|
|
15
|
+
|
|
16
|
+
// Get the default container name at runtime to support dynamic env changes in tests
|
|
17
|
+
const defaultContainerName = getDefaultContainerName();
|
|
18
|
+
|
|
19
|
+
// Use default container if not provided
|
|
20
|
+
const container = containerName || defaultContainerName;
|
|
21
|
+
|
|
22
|
+
// Always scope by container
|
|
23
|
+
return `${hash}:${container}`;
|
|
24
|
+
};
|
|
25
|
+
|
|
5
26
|
// Create a mock client for test environment when Redis is not configured
|
|
6
27
|
const createMockClient = () => {
|
|
7
28
|
const store = new Map();
|
|
@@ -123,7 +144,28 @@ const setFileStoreMap = async (key, value) => {
|
|
|
123
144
|
|
|
124
145
|
const getFileStoreMap = async (key, skipLazyCleanup = false) => {
|
|
125
146
|
try {
|
|
126
|
-
|
|
147
|
+
let value = await client.hget("FileStoreMap", key);
|
|
148
|
+
|
|
149
|
+
// Backwards compatibility: if not found and key is for default container, try legacy key
|
|
150
|
+
if (!value && key && key.includes(':')) {
|
|
151
|
+
const [hash, containerName] = key.split(':', 2);
|
|
152
|
+
const defaultContainerName = getDefaultContainerName();
|
|
153
|
+
|
|
154
|
+
// If this is the default container, try the legacy key (hash without container)
|
|
155
|
+
if (containerName === defaultContainerName) {
|
|
156
|
+
console.log(`Key ${key} not found, trying legacy key ${hash} for backwards compatibility`);
|
|
157
|
+
value = await client.hget("FileStoreMap", hash);
|
|
158
|
+
|
|
159
|
+
// If found with legacy key, migrate it to the new scoped key
|
|
160
|
+
if (value) {
|
|
161
|
+
console.log(`Found value with legacy key ${hash}, migrating to new key ${key}`);
|
|
162
|
+
await client.hset("FileStoreMap", key, value);
|
|
163
|
+
// Optionally remove the old key after migration
|
|
164
|
+
// await client.hdel("FileStoreMap", hash);
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
127
169
|
if (value) {
|
|
128
170
|
try {
|
|
129
171
|
// parse the value back to an object before returning
|
|
@@ -119,6 +119,11 @@ export class AzureStorageProvider extends StorageProvider {
|
|
|
119
119
|
blobName = generateBlobName(requestId, `${shortId}${fileExtension}`);
|
|
120
120
|
}
|
|
121
121
|
|
|
122
|
+
// Validate blobName is not empty
|
|
123
|
+
if (!blobName || blobName.trim().length === 0) {
|
|
124
|
+
throw new Error(`Invalid blob name generated: blobName="${blobName}", requestId="${requestId}", filename="${filename}"`);
|
|
125
|
+
}
|
|
126
|
+
|
|
122
127
|
// Create a read stream for the file
|
|
123
128
|
const fileStream = fs.createReadStream(filePath);
|
|
124
129
|
|
|
@@ -134,8 +139,20 @@ export class AzureStorageProvider extends StorageProvider {
|
|
|
134
139
|
// Generate SAS token after successful upload
|
|
135
140
|
const sasToken = this.generateSASToken(containerClient, blobName);
|
|
136
141
|
|
|
142
|
+
const url = `${blockBlobClient.url}?${sasToken}`;
|
|
143
|
+
|
|
144
|
+
// Validate that the URL contains a blob name (not just container)
|
|
145
|
+
// Azure blob URLs should be: https://account.blob.core.windows.net/container/blobname
|
|
146
|
+
// Container-only URLs end with /container/ or /container
|
|
147
|
+
const urlObj = new URL(url);
|
|
148
|
+
const pathParts = urlObj.pathname.split('/').filter(p => p.length > 0);
|
|
149
|
+
if (pathParts.length <= 1) {
|
|
150
|
+
// Only container name, no blob name - this is invalid
|
|
151
|
+
throw new Error(`Generated invalid Azure URL (container-only): ${url}, blobName: ${blobName}`);
|
|
152
|
+
}
|
|
153
|
+
|
|
137
154
|
return {
|
|
138
|
-
url:
|
|
155
|
+
url: url,
|
|
139
156
|
blobName: blobName,
|
|
140
157
|
};
|
|
141
158
|
}
|
|
@@ -148,6 +165,11 @@ export class AzureStorageProvider extends StorageProvider {
|
|
|
148
165
|
let blobName = sanitizeFilename(encodedFilename);
|
|
149
166
|
blobName = encodeURIComponent(blobName);
|
|
150
167
|
|
|
168
|
+
// Validate blobName is not empty
|
|
169
|
+
if (!blobName || blobName.trim().length === 0) {
|
|
170
|
+
throw new Error(`Invalid blob name generated from encodedFilename: "${encodedFilename}"`);
|
|
171
|
+
}
|
|
172
|
+
|
|
151
173
|
const options = {
|
|
152
174
|
blobHTTPHeaders: {
|
|
153
175
|
...(contentType ? { blobContentType: contentType } : {}),
|
|
@@ -163,7 +185,16 @@ export class AzureStorageProvider extends StorageProvider {
|
|
|
163
185
|
await blockBlobClient.uploadStream(stream, undefined, undefined, options);
|
|
164
186
|
const sasToken = this.generateSASToken(containerClient, blobName);
|
|
165
187
|
|
|
166
|
-
|
|
188
|
+
const url = `${blockBlobClient.url}?${sasToken}`;
|
|
189
|
+
|
|
190
|
+
// Validate that the URL contains a blob name (not just container)
|
|
191
|
+
const urlObj = new URL(url);
|
|
192
|
+
const pathParts = urlObj.pathname.split('/').filter(p => p.length > 0);
|
|
193
|
+
if (pathParts.length <= 1) {
|
|
194
|
+
throw new Error(`Generated invalid Azure URL (container-only) from uploadStream: ${url}, blobName: ${blobName}`);
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
return url;
|
|
167
198
|
}
|
|
168
199
|
|
|
169
200
|
async deleteFiles(requestId) {
|
|
@@ -204,19 +235,52 @@ export class AzureStorageProvider extends StorageProvider {
|
|
|
204
235
|
const urlObj = new URL(url);
|
|
205
236
|
let blobName = urlObj.pathname.substring(1); // Remove leading slash
|
|
206
237
|
|
|
207
|
-
// Handle
|
|
238
|
+
// Handle different URL formats:
|
|
239
|
+
// 1. Azurite: /devstoreaccount1/container/blobname (3 segments)
|
|
240
|
+
// 2. Standard Azure: /container/blobname (2 segments)
|
|
241
|
+
// 3. Container-only: /container or /container/ (invalid)
|
|
242
|
+
|
|
208
243
|
if (blobName.includes('/')) {
|
|
209
|
-
const pathSegments = blobName.split('/');
|
|
210
|
-
|
|
211
|
-
|
|
244
|
+
const pathSegments = blobName.split('/').filter(segment => segment.length > 0);
|
|
245
|
+
|
|
246
|
+
if (pathSegments.length === 1) {
|
|
247
|
+
// Only container name, no blob name - this is invalid
|
|
248
|
+
console.warn(`Invalid blob URL (container-only): ${url}`);
|
|
249
|
+
return null;
|
|
250
|
+
} else if (pathSegments.length === 2) {
|
|
251
|
+
// Standard Azure format: container/blobname
|
|
252
|
+
// Check if first segment matches container name
|
|
253
|
+
if (pathSegments[0] === this.containerName) {
|
|
254
|
+
blobName = pathSegments[1];
|
|
255
|
+
} else {
|
|
256
|
+
// Container name doesn't match, but assume second segment is blob name
|
|
257
|
+
blobName = pathSegments[1];
|
|
258
|
+
}
|
|
259
|
+
} else if (pathSegments.length >= 3) {
|
|
260
|
+
// Azurite format: devstoreaccount1/container/blobname
|
|
212
261
|
// Skip the account and container segments to get the actual blob name
|
|
213
|
-
|
|
262
|
+
// Check if second segment matches container name
|
|
263
|
+
if (pathSegments[1] === this.containerName) {
|
|
264
|
+
blobName = pathSegments.slice(2).join('/');
|
|
265
|
+
} else {
|
|
266
|
+
// Container name doesn't match, but assume remaining segments are blob name
|
|
267
|
+
blobName = pathSegments.slice(2).join('/');
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
} else {
|
|
271
|
+
// No slashes - could be just container name or just blob name
|
|
272
|
+
if (blobName === this.containerName || blobName === this.containerName + '/') {
|
|
273
|
+
// URL is just the container name - invalid blob URL
|
|
274
|
+
console.warn(`Invalid blob URL (container-only): ${url}`);
|
|
275
|
+
return null;
|
|
214
276
|
}
|
|
277
|
+
// Otherwise assume it's a blob name at root level (unlikely but possible)
|
|
215
278
|
}
|
|
216
279
|
|
|
217
|
-
//
|
|
218
|
-
if (blobName.
|
|
219
|
-
|
|
280
|
+
// Validate that we have a non-empty blob name
|
|
281
|
+
if (!blobName || blobName.trim().length === 0) {
|
|
282
|
+
console.warn(`Invalid blob URL (empty blob name): ${url}`);
|
|
283
|
+
return null;
|
|
220
284
|
}
|
|
221
285
|
|
|
222
286
|
const blockBlobClient = containerClient.getBlockBlobClient(blobName);
|
|
@@ -1,18 +1,10 @@
|
|
|
1
1
|
import { AzureStorageProvider } from "./AzureStorageProvider.js";
|
|
2
2
|
import { GCSStorageProvider } from "./GCSStorageProvider.js";
|
|
3
3
|
import { LocalStorageProvider } from "./LocalStorageProvider.js";
|
|
4
|
+
import { getCurrentContainerNames, GCS_BUCKETNAME } from "../../constants.js";
|
|
4
5
|
import path from "path";
|
|
5
6
|
import { fileURLToPath } from "url";
|
|
6
7
|
|
|
7
|
-
// Lazy-load blob handler constants to avoid blocking module import
|
|
8
|
-
let blobHandlerConstants = null;
|
|
9
|
-
async function getBlobHandlerConstants() {
|
|
10
|
-
if (!blobHandlerConstants) {
|
|
11
|
-
blobHandlerConstants = await import("../../blobHandler.js");
|
|
12
|
-
}
|
|
13
|
-
return blobHandlerConstants;
|
|
14
|
-
}
|
|
15
|
-
|
|
16
8
|
// Singleton instance for provider caching across the application
|
|
17
9
|
let storageFactoryInstance = null;
|
|
18
10
|
|
|
@@ -49,7 +41,6 @@ export class StorageFactory {
|
|
|
49
41
|
|
|
50
42
|
async getAzureProvider(containerName = null) {
|
|
51
43
|
// Read container names from environment directly to get current values
|
|
52
|
-
const { getCurrentContainerNames } = await getBlobHandlerConstants();
|
|
53
44
|
const azureStorageContainerNames = getCurrentContainerNames();
|
|
54
45
|
const defaultAzureStorageContainerName = azureStorageContainerNames[0];
|
|
55
46
|
|
|
@@ -82,7 +73,7 @@ export class StorageFactory {
|
|
|
82
73
|
}
|
|
83
74
|
const provider = new GCSStorageProvider(
|
|
84
75
|
credentials,
|
|
85
|
-
|
|
76
|
+
GCS_BUCKETNAME,
|
|
86
77
|
);
|
|
87
78
|
this.providers.set(key, provider);
|
|
88
79
|
}
|
|
@@ -163,9 +163,10 @@ export class StorageService {
|
|
|
163
163
|
/**
|
|
164
164
|
* Delete a single file by its hash from both primary and backup storage
|
|
165
165
|
* @param {string} hash - The hash of the file to delete
|
|
166
|
+
* @param {string} containerName - Optional container name for scoping the hash
|
|
166
167
|
* @returns {Promise<Object>} Object containing deletion results and file info
|
|
167
168
|
*/
|
|
168
|
-
async deleteFileByHash(hash) {
|
|
169
|
+
async deleteFileByHash(hash, containerName = null) {
|
|
169
170
|
await this._initialize();
|
|
170
171
|
|
|
171
172
|
if (!hash) {
|
|
@@ -175,11 +176,27 @@ export class StorageService {
|
|
|
175
176
|
const results = [];
|
|
176
177
|
|
|
177
178
|
// Get and remove file information from Redis map (non-atomic operations)
|
|
178
|
-
const { getFileStoreMap, removeFromFileStoreMap } = await import("../../redis.js");
|
|
179
|
-
const
|
|
179
|
+
const { getFileStoreMap, removeFromFileStoreMap, getScopedHashKey, getDefaultContainerName } = await import("../../redis.js");
|
|
180
|
+
const { getDefaultContainerName: getDefaultContainerNameFromConstants } = await import("../../constants.js");
|
|
181
|
+
const scopedHash = getScopedHashKey(hash, containerName);
|
|
182
|
+
const hashResult = await getFileStoreMap(scopedHash);
|
|
180
183
|
|
|
181
184
|
if (hashResult) {
|
|
182
|
-
|
|
185
|
+
// Remove from scoped key
|
|
186
|
+
await removeFromFileStoreMap(scopedHash);
|
|
187
|
+
|
|
188
|
+
// Also check and remove legacy key (unscoped) if this is the default container
|
|
189
|
+
// This handles backwards compatibility with old entries stored without container scoping
|
|
190
|
+
const defaultContainerName = getDefaultContainerNameFromConstants();
|
|
191
|
+
const effectiveContainer = containerName || defaultContainerName;
|
|
192
|
+
if (effectiveContainer === defaultContainerName && scopedHash.includes(':')) {
|
|
193
|
+
const [legacyHash] = scopedHash.split(':', 2);
|
|
194
|
+
// Try to remove legacy key - only attempt if it exists to avoid unnecessary "does not exist" logs
|
|
195
|
+
const legacyExists = await getFileStoreMap(legacyHash);
|
|
196
|
+
if (legacyExists) {
|
|
197
|
+
await removeFromFileStoreMap(legacyHash);
|
|
198
|
+
}
|
|
199
|
+
}
|
|
183
200
|
}
|
|
184
201
|
|
|
185
202
|
if (!hashResult) {
|
|
@@ -189,9 +206,16 @@ export class StorageService {
|
|
|
189
206
|
// Delete from primary storage
|
|
190
207
|
if (hashResult.url) {
|
|
191
208
|
try {
|
|
209
|
+
// Log the URL being deleted for debugging
|
|
210
|
+
console.log(`Deleting file from primary storage - hash: ${hash}, url: ${hashResult.url}`);
|
|
192
211
|
const primaryResult = await this.deleteFile(hashResult.url);
|
|
193
212
|
if (primaryResult) {
|
|
213
|
+
console.log(`Successfully deleted from primary storage - hash: ${hash}, result: ${primaryResult}`);
|
|
194
214
|
results.push({ provider: 'primary', result: primaryResult });
|
|
215
|
+
} else {
|
|
216
|
+
// deleteFile returned null, which means the URL was invalid
|
|
217
|
+
console.warn(`Invalid or empty URL for hash ${hash}: ${hashResult.url}`);
|
|
218
|
+
results.push({ provider: 'primary', error: 'Invalid URL (container-only or empty blob name)' });
|
|
195
219
|
}
|
|
196
220
|
} catch (error) {
|
|
197
221
|
console.error(`Error deleting file from primary storage:`, error);
|
|
@@ -202,14 +226,25 @@ export class StorageService {
|
|
|
202
226
|
// Delete from backup storage (GCS)
|
|
203
227
|
if (hashResult.gcs && this.backupProvider) {
|
|
204
228
|
try {
|
|
229
|
+
console.log(`Deleting file from backup storage - hash: ${hash}, gcs: ${hashResult.gcs}`);
|
|
205
230
|
const backupResult = await this.deleteFileFromBackup(hashResult.gcs);
|
|
206
231
|
if (backupResult) {
|
|
232
|
+
console.log(`Successfully deleted from backup storage - hash: ${hash}, result: ${backupResult}`);
|
|
207
233
|
results.push({ provider: 'backup', result: backupResult });
|
|
234
|
+
} else {
|
|
235
|
+
console.warn(`Backup deletion returned null for hash ${hash}: ${hashResult.gcs}`);
|
|
236
|
+
results.push({ provider: 'backup', error: 'Deletion returned null' });
|
|
208
237
|
}
|
|
209
238
|
} catch (error) {
|
|
210
239
|
console.error(`Error deleting file from backup storage:`, error);
|
|
211
240
|
results.push({ provider: 'backup', error: error.message });
|
|
212
241
|
}
|
|
242
|
+
} else {
|
|
243
|
+
if (!hashResult.gcs) {
|
|
244
|
+
console.log(`No GCS URL found for hash ${hash}, skipping backup deletion`);
|
|
245
|
+
} else if (!this.backupProvider) {
|
|
246
|
+
console.log(`Backup provider not configured, skipping backup deletion for hash ${hash}`);
|
|
247
|
+
}
|
|
213
248
|
}
|
|
214
249
|
|
|
215
250
|
// Note: Hash was already removed from Redis atomically at the beginning
|