@aj-archipelago/cortex 1.4.5 → 1.4.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/helper-apps/cortex-file-handler/package-lock.json +2 -2
  2. package/helper-apps/cortex-file-handler/package.json +1 -1
  3. package/helper-apps/cortex-file-handler/src/blobHandler.js +29 -29
  4. package/helper-apps/cortex-file-handler/src/constants.js +30 -0
  5. package/helper-apps/cortex-file-handler/src/index.js +49 -20
  6. package/helper-apps/cortex-file-handler/src/redis.js +43 -1
  7. package/helper-apps/cortex-file-handler/src/services/storage/AzureStorageProvider.js +74 -10
  8. package/helper-apps/cortex-file-handler/src/services/storage/StorageFactory.js +2 -11
  9. package/helper-apps/cortex-file-handler/src/services/storage/StorageService.js +39 -4
  10. package/helper-apps/cortex-file-handler/src/start.js +7 -0
  11. package/helper-apps/cortex-file-handler/tests/blobHandler.test.js +6 -5
  12. package/helper-apps/cortex-file-handler/tests/containerNameParsing.test.js +7 -6
  13. package/helper-apps/cortex-file-handler/tests/containerParameterFlow.test.js +4 -3
  14. package/helper-apps/cortex-file-handler/tests/deleteOperations.test.js +287 -0
  15. package/helper-apps/cortex-file-handler/tests/hashContainerScoping.test.js +415 -0
  16. package/helper-apps/cortex-file-handler/tests/start.test.js +1 -1
  17. package/lib/entityConstants.js +1 -1
  18. package/lib/fileUtils.js +1481 -0
  19. package/lib/pathwayTools.js +7 -1
  20. package/lib/util.js +2 -313
  21. package/package.json +4 -3
  22. package/pathways/image_qwen.js +1 -1
  23. package/pathways/system/entity/memory/sys_read_memory.js +17 -3
  24. package/pathways/system/entity/memory/sys_save_memory.js +22 -6
  25. package/pathways/system/entity/sys_entity_agent.js +91 -13
  26. package/pathways/system/entity/sys_generator_error.js +2 -2
  27. package/pathways/system/entity/tools/sys_tool_analyzefile.js +171 -0
  28. package/pathways/system/entity/tools/sys_tool_codingagent.js +38 -4
  29. package/pathways/system/entity/tools/sys_tool_editfile.js +403 -0
  30. package/pathways/system/entity/tools/sys_tool_file_collection.js +433 -0
  31. package/pathways/system/entity/tools/sys_tool_image.js +172 -10
  32. package/pathways/system/entity/tools/sys_tool_image_gemini.js +123 -10
  33. package/pathways/system/entity/tools/sys_tool_readfile.js +217 -124
  34. package/pathways/system/entity/tools/sys_tool_validate_url.js +137 -0
  35. package/pathways/system/entity/tools/sys_tool_writefile.js +211 -0
  36. package/pathways/system/workspaces/run_workspace_prompt.js +4 -3
  37. package/pathways/system/workspaces/workspace_applet_edit.js +13 -66
  38. package/pathways/transcribe_gemini.js +2 -1
  39. package/server/executeWorkspace.js +1 -1
  40. package/server/plugins/neuralSpacePlugin.js +2 -6
  41. package/server/plugins/openAiWhisperPlugin.js +2 -1
  42. package/server/plugins/replicateApiPlugin.js +4 -14
  43. package/server/typeDef.js +10 -1
  44. package/tests/integration/features/tools/fileCollection.test.js +858 -0
  45. package/tests/integration/features/tools/fileOperations.test.js +851 -0
  46. package/tests/integration/features/tools/writefile.test.js +350 -0
  47. package/tests/unit/core/fileCollection.test.js +259 -0
  48. package/tests/unit/core/util.test.js +320 -1
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "@aj-archipelago/cortex-file-handler",
3
- "version": "2.6.2",
3
+ "version": "2.6.3",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "@aj-archipelago/cortex-file-handler",
9
- "version": "2.6.2",
9
+ "version": "2.6.3",
10
10
  "dependencies": {
11
11
  "@azure/storage-blob": "^12.13.0",
12
12
  "@distube/ytdl-core": "^4.14.3",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aj-archipelago/cortex-file-handler",
3
- "version": "2.6.2",
3
+ "version": "2.6.3",
4
4
  "description": "File handling service for Cortex - handles file uploads, media chunking, and document processing",
5
5
  "type": "module",
6
6
  "main": "src/index.js",
@@ -16,7 +16,16 @@ import {
16
16
  generateBlobName,
17
17
  } from "./utils/filenameUtils.js";
18
18
  import { publicFolder, port, ipAddress } from "./start.js";
19
- import { CONVERTED_EXTENSIONS, AZURITE_ACCOUNT_NAME } from "./constants.js";
19
+ import {
20
+ CONVERTED_EXTENSIONS,
21
+ AZURITE_ACCOUNT_NAME,
22
+ parseContainerNames,
23
+ getCurrentContainerNames,
24
+ AZURE_STORAGE_CONTAINER_NAMES,
25
+ getDefaultContainerName,
26
+ GCS_BUCKETNAME,
27
+ isValidContainerName
28
+ } from "./constants.js";
20
29
  import { FileConversionService } from "./services/FileConversionService.js";
21
30
  import { StorageFactory } from "./services/storage/StorageFactory.js";
22
31
 
@@ -70,28 +79,6 @@ if (!GCP_PROJECT_ID || !GCP_SERVICE_ACCOUNT) {
70
79
  }
71
80
  }
72
81
 
73
- // Parse comma-separated container names from environment variable
74
- const parseContainerNames = () => {
75
- const containerStr = process.env.AZURE_STORAGE_CONTAINER_NAME || "whispertempfiles";
76
- return containerStr.split(',').map(name => name.trim());
77
- };
78
-
79
- // Helper function to get current container names at runtime
80
- export const getCurrentContainerNames = () => {
81
- return parseContainerNames();
82
- };
83
-
84
- export const AZURE_STORAGE_CONTAINER_NAMES = parseContainerNames();
85
- export const DEFAULT_AZURE_STORAGE_CONTAINER_NAME = AZURE_STORAGE_CONTAINER_NAMES[0];
86
- export const GCS_BUCKETNAME = process.env.GCS_BUCKETNAME || "cortextempfiles";
87
-
88
- // Validate if a container name is allowed
89
- export const isValidContainerName = (containerName) => {
90
- // Read from environment at runtime to support dynamically changing env in tests
91
- const currentContainerNames = getCurrentContainerNames();
92
- return currentContainerNames.includes(containerName);
93
- };
94
-
95
82
  function isEncoded(str) {
96
83
  // Checks for any percent-encoded sequence
97
84
  return /%[0-9A-Fa-f]{2}/.test(str);
@@ -194,7 +181,7 @@ async function downloadFromGCS(gcsUrl, destinationPath) {
194
181
 
195
182
  export const getBlobClient = async (containerName = null) => {
196
183
  const connectionString = process.env.AZURE_STORAGE_CONNECTION_STRING;
197
- const finalContainerName = containerName || DEFAULT_AZURE_STORAGE_CONTAINER_NAME;
184
+ const finalContainerName = containerName || getDefaultContainerName();
198
185
 
199
186
  // Validate container name is in whitelist
200
187
  if (!isValidContainerName(finalContainerName)) {
@@ -350,10 +337,12 @@ function uploadBlob(
350
337
 
351
338
  if (errorOccurred) return; // Check again after waiting
352
339
 
353
- await processFile(fieldname, file, info);
340
+ // Capture containerName value to avoid closure issues
341
+ const capturedContainerName = containerName;
342
+ await processFile(fieldname, file, info, capturedContainerName);
354
343
  });
355
344
 
356
- const processFile = async (fieldname, file, info) => {
345
+ const processFile = async (fieldname, file, info, capturedContainerName) => {
357
346
  if (errorOccurred) return;
358
347
 
359
348
  // Validate file
@@ -449,7 +438,7 @@ function uploadBlob(
449
438
  context,
450
439
  uploadName,
451
440
  azureStream,
452
- containerName,
441
+ capturedContainerName,
453
442
  ).catch(async (err) => {
454
443
  cloudUploadError = err;
455
444
  // Fallback: try from disk if available
@@ -459,7 +448,7 @@ function uploadBlob(
459
448
  highWaterMark: 1024 * 1024,
460
449
  autoClose: true,
461
450
  });
462
- return saveToAzureStorage(context, uploadName, diskStream, containerName);
451
+ return saveToAzureStorage(context, uploadName, diskStream, capturedContainerName);
463
452
  }
464
453
  throw err;
465
454
  });
@@ -511,6 +500,7 @@ function uploadBlob(
511
500
  }, {}),
512
501
  };
513
502
  if (hash) result.hash = hash;
503
+ if (capturedContainerName) result.container = capturedContainerName;
514
504
 
515
505
  // If saving locally, wait for disk write to finish and then move to public folder
516
506
  if (saveToLocal) {
@@ -582,7 +572,7 @@ function uploadBlob(
582
572
  conversion.convertedPath,
583
573
  requestId,
584
574
  null,
585
- containerName,
575
+ capturedContainerName,
586
576
  );
587
577
 
588
578
  // Optionally save to GCS
@@ -827,6 +817,10 @@ async function uploadFile(
827
817
  if (hash) {
828
818
  result.hash = hash;
829
819
  }
820
+
821
+ if (containerName) {
822
+ result.container = containerName;
823
+ }
830
824
 
831
825
  // Initialize conversion service
832
826
  const conversionService = new FileConversionService(context, !saveToLocal);
@@ -1156,4 +1150,10 @@ export {
1156
1150
  gcs,
1157
1151
  uploadChunkToGCS,
1158
1152
  downloadFromGCS,
1153
+ // Re-export container constants for backward compatibility
1154
+ getCurrentContainerNames,
1155
+ AZURE_STORAGE_CONTAINER_NAMES,
1156
+ getDefaultContainerName,
1157
+ GCS_BUCKETNAME,
1158
+ isValidContainerName,
1159
1159
  };
@@ -132,3 +132,33 @@ export const CONVERTED_EXTENSIONS = [
132
132
 
133
133
  // Azure Storage constants
134
134
  export const AZURITE_ACCOUNT_NAME = "devstoreaccount1";
135
+
136
+ // Parse comma-separated container names from environment variable
137
+ export const parseContainerNames = () => {
138
+ const containerStr = process.env.AZURE_STORAGE_CONTAINER_NAME || "whispertempfiles";
139
+ return containerStr.split(',').map(name => name.trim());
140
+ };
141
+
142
+ // Helper function to get current container names at runtime
143
+ // Useful for runtime validation when env vars might change (e.g., in tests)
144
+ export const getCurrentContainerNames = () => {
145
+ return parseContainerNames();
146
+ };
147
+
148
+ export const AZURE_STORAGE_CONTAINER_NAMES = parseContainerNames();
149
+
150
+ // Helper function to get the default container name at runtime
151
+ // This allows tests to change the environment variable and have the correct default
152
+ export const getDefaultContainerName = () => {
153
+ return process.env.DEFAULT_AZURE_STORAGE_CONTAINER_NAME || getCurrentContainerNames()[0];
154
+ };
155
+
156
+ export const DEFAULT_AZURE_STORAGE_CONTAINER_NAME = process.env.DEFAULT_AZURE_STORAGE_CONTAINER_NAME || AZURE_STORAGE_CONTAINER_NAMES[0];
157
+ export const GCS_BUCKETNAME = process.env.GCS_BUCKETNAME || "cortextempfiles";
158
+
159
+ // Validate if a container name is allowed
160
+ export const isValidContainerName = (containerName) => {
161
+ // Read from environment at runtime to support dynamically changing env in tests
162
+ const currentContainerNames = getCurrentContainerNames();
163
+ return currentContainerNames.includes(containerName);
164
+ };
@@ -14,6 +14,7 @@ import {
14
14
  removeFromFileStoreMap,
15
15
  setFileStoreMap,
16
16
  cleanupRedisFileStoreMapAge,
17
+ getScopedHashKey,
17
18
  } from "./redis.js";
18
19
  import { FileConversionService } from "./services/FileConversionService.js";
19
20
  import { StorageService } from "./services/storage/StorageService.js";
@@ -46,6 +47,27 @@ async function cleanupInactive(context) {
46
47
  }
47
48
 
48
49
  async function CortexFileHandler(context, req) {
50
+ // Parse body if it's a string (Azure Functions sometimes doesn't auto-parse DELETE bodies)
51
+ let parsedBody = req.body;
52
+ if (typeof req.body === 'string' && req.body.length > 0) {
53
+ try {
54
+ parsedBody = JSON.parse(req.body);
55
+ } catch (e) {
56
+ // If parsing fails, treat as empty object
57
+ parsedBody = {};
58
+ }
59
+ }
60
+
61
+ // For GET requests, prioritize query string. For other methods, check body first, then query
62
+ // Also check if parsedBody actually has content (not just empty object)
63
+ const hasBodyContent = parsedBody && typeof parsedBody === 'object' && Object.keys(parsedBody).length > 0;
64
+ const bodySource = hasBodyContent ? (parsedBody.params || parsedBody) : {};
65
+ const querySource = req.query || {};
66
+
67
+ // Merge sources: for GET, query takes priority; for others, body takes priority
68
+ const isGet = req.method?.toLowerCase() === 'get';
69
+ const source = isGet ? { ...bodySource, ...querySource } : { ...querySource, ...bodySource };
70
+
49
71
  const {
50
72
  uri,
51
73
  requestId,
@@ -58,7 +80,7 @@ async function CortexFileHandler(context, req) {
58
80
  load,
59
81
  restore,
60
82
  container,
61
- } = req.body?.params || req.query;
83
+ } = source;
62
84
 
63
85
  // Normalize boolean parameters
64
86
  const shouldSave = save === true || save === "true";
@@ -135,13 +157,15 @@ async function CortexFileHandler(context, req) {
135
157
  // 1. Delete multiple files by requestId (existing behavior)
136
158
  // 2. Delete single file by hash (new behavior)
137
159
  if (operation === "delete") {
138
- const deleteRequestId = req.query.requestId || requestId;
139
- const deleteHash = req.query.hash || hash;
160
+ // Check both query string and body params for delete parameters
161
+ // Handle both req.body.params.hash and req.body.hash formats
162
+ const deleteRequestId = req.query.requestId || parsedBody?.params?.requestId || parsedBody?.requestId || requestId;
163
+ const deleteHash = req.query.hash || parsedBody?.params?.hash || parsedBody?.hash || hash;
140
164
 
141
165
  // If only hash is provided, delete single file by hash
142
166
  if (deleteHash && !deleteRequestId) {
143
167
  try {
144
- const deleted = await storageService.deleteFileByHash(deleteHash);
168
+ const deleted = await storageService.deleteFileByHash(deleteHash, container);
145
169
  context.res = {
146
170
  status: 200,
147
171
  body: {
@@ -163,17 +187,18 @@ async function CortexFileHandler(context, req) {
163
187
  if (!deleteRequestId) {
164
188
  context.res = {
165
189
  status: 400,
166
- body: "Please pass either a requestId or hash on the query string",
190
+ body: "Please pass either a requestId or hash in the query string or request body",
167
191
  };
168
192
  return;
169
193
  }
170
194
 
171
195
  // First, get the hash from the map if it exists
172
196
  if (deleteHash) {
173
- const hashResult = await getFileStoreMap(deleteHash);
197
+ const scopedHash = getScopedHashKey(deleteHash, container);
198
+ const hashResult = await getFileStoreMap(scopedHash);
174
199
  if (hashResult) {
175
- context.log(`Found hash in map for deletion: ${deleteHash}`);
176
- await removeFromFileStoreMap(deleteHash);
200
+ context.log(`Found hash in map for deletion: ${deleteHash} (scoped key: ${scopedHash})`);
201
+ await removeFromFileStoreMap(scopedHash);
177
202
  }
178
203
  }
179
204
 
@@ -201,7 +226,8 @@ async function CortexFileHandler(context, req) {
201
226
  }
202
227
 
203
228
  // Check if file already exists (using hash or URL as the key)
204
- const cacheKey = hash || remoteUrl;
229
+ // If hash is provided, scope it by container; otherwise use URL as-is
230
+ const cacheKey = hash ? getScopedHashKey(hash, container) : remoteUrl;
205
231
  const exists = await getFileStoreMap(cacheKey);
206
232
  if (exists) {
207
233
  context.res = {
@@ -255,9 +281,10 @@ async function CortexFileHandler(context, req) {
255
281
 
256
282
  if (hash && clearHash) {
257
283
  try {
258
- const hashValue = await getFileStoreMap(hash);
284
+ const scopedHash = getScopedHashKey(hash, container);
285
+ const hashValue = await getFileStoreMap(scopedHash);
259
286
  if (hashValue) {
260
- await removeFromFileStoreMap(hash);
287
+ await removeFromFileStoreMap(scopedHash);
261
288
  context.res = {
262
289
  status: 200,
263
290
  body: `Hash ${hash} removed`,
@@ -279,10 +306,11 @@ async function CortexFileHandler(context, req) {
279
306
  }
280
307
 
281
308
  if (hash && checkHash) {
282
- let hashResult = await getFileStoreMap(hash, true); // Skip lazy cleanup to handle it ourselves
309
+ const scopedHash = getScopedHashKey(hash, container);
310
+ let hashResult = await getFileStoreMap(scopedHash, true); // Skip lazy cleanup to handle it ourselves
283
311
 
284
312
  if (hashResult) {
285
- context.log(`File exists in map: ${hash}`);
313
+ context.log(`File exists in map: ${hash} (scoped key: ${scopedHash})`);
286
314
 
287
315
  // Log the URL retrieved from Redis before checking existence
288
316
  context.log(`Checking existence of URL from Redis: ${hashResult?.url}`);
@@ -301,7 +329,7 @@ async function CortexFileHandler(context, req) {
301
329
  context.log(
302
330
  `File not found in any storage. Removing from map: ${hash}`,
303
331
  );
304
- await removeFromFileStoreMap(hash);
332
+ await removeFromFileStoreMap(scopedHash);
305
333
  context.res = {
306
334
  status: 404,
307
335
  body: `Hash ${hash} not found in storage`,
@@ -320,7 +348,7 @@ async function CortexFileHandler(context, req) {
320
348
  } catch (error) {
321
349
  context.log(`Error restoring to GCS: ${error}`);
322
350
  // If restoration fails, remove the hash from the map
323
- await removeFromFileStoreMap(hash);
351
+ await removeFromFileStoreMap(scopedHash);
324
352
  context.res = {
325
353
  status: 404,
326
354
  body: `Hash ${hash} not found`,
@@ -378,7 +406,7 @@ async function CortexFileHandler(context, req) {
378
406
  } catch (error) {
379
407
  console.error("Error restoring from GCS:", error);
380
408
  // If restoration fails, remove the hash from the map
381
- await removeFromFileStoreMap(hash);
409
+ await removeFromFileStoreMap(scopedHash);
382
410
  context.res = {
383
411
  status: 404,
384
412
  body: `Hash ${hash} not found`,
@@ -396,7 +424,7 @@ async function CortexFileHandler(context, req) {
396
424
  : false;
397
425
  if (!finalPrimaryCheck && !finalGCSCheck) {
398
426
  context.log(`Failed to restore file. Removing from map: ${hash}`);
399
- await removeFromFileStoreMap(hash);
427
+ await removeFromFileStoreMap(scopedHash);
400
428
  context.res = {
401
429
  status: 404,
402
430
  body: `Hash ${hash} not found`,
@@ -498,7 +526,7 @@ async function CortexFileHandler(context, req) {
498
526
  }
499
527
 
500
528
  //update redis timestamp with current time
501
- await setFileStoreMap(hash, hashResult);
529
+ await setFileStoreMap(scopedHash, hashResult);
502
530
 
503
531
  context.res = {
504
532
  status: 200,
@@ -508,7 +536,7 @@ async function CortexFileHandler(context, req) {
508
536
  } catch (error) {
509
537
  context.log(`Error checking file existence: ${error}`);
510
538
  // If there's an error checking file existence, remove the hash from the map
511
- await removeFromFileStoreMap(hash);
539
+ await removeFromFileStoreMap(scopedHash);
512
540
  context.res = {
513
541
  status: 404,
514
542
  body: `Hash ${hash} not found`,
@@ -532,7 +560,8 @@ async function CortexFileHandler(context, req) {
532
560
  // Use uploadBlob to handle multipart/form-data
533
561
  const result = await uploadBlob(context, req, saveToLocal, null, hash, container);
534
562
  if (result?.hash && context?.res?.body) {
535
- await setFileStoreMap(result.hash, context.res.body);
563
+ const scopedHash = getScopedHashKey(result.hash, result.container || container);
564
+ await setFileStoreMap(scopedHash, context.res.body);
536
565
  }
537
566
  return;
538
567
  }
@@ -1,7 +1,28 @@
1
1
  import redis from "ioredis";
2
+ import { getDefaultContainerName } from "./constants.js";
2
3
 
3
4
  const connectionString = process.env["REDIS_CONNECTION_STRING"];
4
5
 
6
+ /**
7
+ * Generate a scoped hash key for Redis storage
8
+ * Always includes the container name in the format hash:container
9
+ * @param {string} hash - The file hash
10
+ * @param {string} containerName - The container name (optional, defaults to default container)
11
+ * @returns {string} The scoped hash key
12
+ */
13
+ export const getScopedHashKey = (hash, containerName = null) => {
14
+ if (!hash) return hash;
15
+
16
+ // Get the default container name at runtime to support dynamic env changes in tests
17
+ const defaultContainerName = getDefaultContainerName();
18
+
19
+ // Use default container if not provided
20
+ const container = containerName || defaultContainerName;
21
+
22
+ // Always scope by container
23
+ return `${hash}:${container}`;
24
+ };
25
+
5
26
  // Create a mock client for test environment when Redis is not configured
6
27
  const createMockClient = () => {
7
28
  const store = new Map();
@@ -123,7 +144,28 @@ const setFileStoreMap = async (key, value) => {
123
144
 
124
145
  const getFileStoreMap = async (key, skipLazyCleanup = false) => {
125
146
  try {
126
- const value = await client.hget("FileStoreMap", key);
147
+ let value = await client.hget("FileStoreMap", key);
148
+
149
+ // Backwards compatibility: if not found and key is for default container, try legacy key
150
+ if (!value && key && key.includes(':')) {
151
+ const [hash, containerName] = key.split(':', 2);
152
+ const defaultContainerName = getDefaultContainerName();
153
+
154
+ // If this is the default container, try the legacy key (hash without container)
155
+ if (containerName === defaultContainerName) {
156
+ console.log(`Key ${key} not found, trying legacy key ${hash} for backwards compatibility`);
157
+ value = await client.hget("FileStoreMap", hash);
158
+
159
+ // If found with legacy key, migrate it to the new scoped key
160
+ if (value) {
161
+ console.log(`Found value with legacy key ${hash}, migrating to new key ${key}`);
162
+ await client.hset("FileStoreMap", key, value);
163
+ // Optionally remove the old key after migration
164
+ // await client.hdel("FileStoreMap", hash);
165
+ }
166
+ }
167
+ }
168
+
127
169
  if (value) {
128
170
  try {
129
171
  // parse the value back to an object before returning
@@ -119,6 +119,11 @@ export class AzureStorageProvider extends StorageProvider {
119
119
  blobName = generateBlobName(requestId, `${shortId}${fileExtension}`);
120
120
  }
121
121
 
122
+ // Validate blobName is not empty
123
+ if (!blobName || blobName.trim().length === 0) {
124
+ throw new Error(`Invalid blob name generated: blobName="${blobName}", requestId="${requestId}", filename="${filename}"`);
125
+ }
126
+
122
127
  // Create a read stream for the file
123
128
  const fileStream = fs.createReadStream(filePath);
124
129
 
@@ -134,8 +139,20 @@ export class AzureStorageProvider extends StorageProvider {
134
139
  // Generate SAS token after successful upload
135
140
  const sasToken = this.generateSASToken(containerClient, blobName);
136
141
 
142
+ const url = `${blockBlobClient.url}?${sasToken}`;
143
+
144
+ // Validate that the URL contains a blob name (not just container)
145
+ // Azure blob URLs should be: https://account.blob.core.windows.net/container/blobname
146
+ // Container-only URLs end with /container/ or /container
147
+ const urlObj = new URL(url);
148
+ const pathParts = urlObj.pathname.split('/').filter(p => p.length > 0);
149
+ if (pathParts.length <= 1) {
150
+ // Only container name, no blob name - this is invalid
151
+ throw new Error(`Generated invalid Azure URL (container-only): ${url}, blobName: ${blobName}`);
152
+ }
153
+
137
154
  return {
138
- url: `${blockBlobClient.url}?${sasToken}`,
155
+ url: url,
139
156
  blobName: blobName,
140
157
  };
141
158
  }
@@ -148,6 +165,11 @@ export class AzureStorageProvider extends StorageProvider {
148
165
  let blobName = sanitizeFilename(encodedFilename);
149
166
  blobName = encodeURIComponent(blobName);
150
167
 
168
+ // Validate blobName is not empty
169
+ if (!blobName || blobName.trim().length === 0) {
170
+ throw new Error(`Invalid blob name generated from encodedFilename: "${encodedFilename}"`);
171
+ }
172
+
151
173
  const options = {
152
174
  blobHTTPHeaders: {
153
175
  ...(contentType ? { blobContentType: contentType } : {}),
@@ -163,7 +185,16 @@ export class AzureStorageProvider extends StorageProvider {
163
185
  await blockBlobClient.uploadStream(stream, undefined, undefined, options);
164
186
  const sasToken = this.generateSASToken(containerClient, blobName);
165
187
 
166
- return `${blockBlobClient.url}?${sasToken}`;
188
+ const url = `${blockBlobClient.url}?${sasToken}`;
189
+
190
+ // Validate that the URL contains a blob name (not just container)
191
+ const urlObj = new URL(url);
192
+ const pathParts = urlObj.pathname.split('/').filter(p => p.length > 0);
193
+ if (pathParts.length <= 1) {
194
+ throw new Error(`Generated invalid Azure URL (container-only) from uploadStream: ${url}, blobName: ${blobName}`);
195
+ }
196
+
197
+ return url;
167
198
  }
168
199
 
169
200
  async deleteFiles(requestId) {
@@ -204,19 +235,52 @@ export class AzureStorageProvider extends StorageProvider {
204
235
  const urlObj = new URL(url);
205
236
  let blobName = urlObj.pathname.substring(1); // Remove leading slash
206
237
 
207
- // Handle Azurite URLs which include account name in path: /devstoreaccount1/container/blob
238
+ // Handle different URL formats:
239
+ // 1. Azurite: /devstoreaccount1/container/blobname (3 segments)
240
+ // 2. Standard Azure: /container/blobname (2 segments)
241
+ // 3. Container-only: /container or /container/ (invalid)
242
+
208
243
  if (blobName.includes('/')) {
209
- const pathSegments = blobName.split('/');
210
- if (pathSegments.length >= 2) {
211
- // For Azurite: devstoreaccount1/container/blobname -> blobname
244
+ const pathSegments = blobName.split('/').filter(segment => segment.length > 0);
245
+
246
+ if (pathSegments.length === 1) {
247
+ // Only container name, no blob name - this is invalid
248
+ console.warn(`Invalid blob URL (container-only): ${url}`);
249
+ return null;
250
+ } else if (pathSegments.length === 2) {
251
+ // Standard Azure format: container/blobname
252
+ // Check if first segment matches container name
253
+ if (pathSegments[0] === this.containerName) {
254
+ blobName = pathSegments[1];
255
+ } else {
256
+ // Container name doesn't match, but assume second segment is blob name
257
+ blobName = pathSegments[1];
258
+ }
259
+ } else if (pathSegments.length >= 3) {
260
+ // Azurite format: devstoreaccount1/container/blobname
212
261
  // Skip the account and container segments to get the actual blob name
213
- blobName = pathSegments.slice(2).join('/');
262
+ // Check if second segment matches container name
263
+ if (pathSegments[1] === this.containerName) {
264
+ blobName = pathSegments.slice(2).join('/');
265
+ } else {
266
+ // Container name doesn't match, but assume remaining segments are blob name
267
+ blobName = pathSegments.slice(2).join('/');
268
+ }
269
+ }
270
+ } else {
271
+ // No slashes - could be just container name or just blob name
272
+ if (blobName === this.containerName || blobName === this.containerName + '/') {
273
+ // URL is just the container name - invalid blob URL
274
+ console.warn(`Invalid blob URL (container-only): ${url}`);
275
+ return null;
214
276
  }
277
+ // Otherwise assume it's a blob name at root level (unlikely but possible)
215
278
  }
216
279
 
217
- // Remove container name prefix if present (for non-Azurite URLs)
218
- if (blobName.startsWith(this.containerName + '/')) {
219
- blobName = blobName.substring(this.containerName.length + 1);
280
+ // Validate that we have a non-empty blob name
281
+ if (!blobName || blobName.trim().length === 0) {
282
+ console.warn(`Invalid blob URL (empty blob name): ${url}`);
283
+ return null;
220
284
  }
221
285
 
222
286
  const blockBlobClient = containerClient.getBlockBlobClient(blobName);
@@ -1,18 +1,10 @@
1
1
  import { AzureStorageProvider } from "./AzureStorageProvider.js";
2
2
  import { GCSStorageProvider } from "./GCSStorageProvider.js";
3
3
  import { LocalStorageProvider } from "./LocalStorageProvider.js";
4
+ import { getCurrentContainerNames, GCS_BUCKETNAME } from "../../constants.js";
4
5
  import path from "path";
5
6
  import { fileURLToPath } from "url";
6
7
 
7
- // Lazy-load blob handler constants to avoid blocking module import
8
- let blobHandlerConstants = null;
9
- async function getBlobHandlerConstants() {
10
- if (!blobHandlerConstants) {
11
- blobHandlerConstants = await import("../../blobHandler.js");
12
- }
13
- return blobHandlerConstants;
14
- }
15
-
16
8
  // Singleton instance for provider caching across the application
17
9
  let storageFactoryInstance = null;
18
10
 
@@ -49,7 +41,6 @@ export class StorageFactory {
49
41
 
50
42
  async getAzureProvider(containerName = null) {
51
43
  // Read container names from environment directly to get current values
52
- const { getCurrentContainerNames } = await getBlobHandlerConstants();
53
44
  const azureStorageContainerNames = getCurrentContainerNames();
54
45
  const defaultAzureStorageContainerName = azureStorageContainerNames[0];
55
46
 
@@ -82,7 +73,7 @@ export class StorageFactory {
82
73
  }
83
74
  const provider = new GCSStorageProvider(
84
75
  credentials,
85
- process.env.GCS_BUCKETNAME || "cortextempfiles",
76
+ GCS_BUCKETNAME,
86
77
  );
87
78
  this.providers.set(key, provider);
88
79
  }
@@ -163,9 +163,10 @@ export class StorageService {
163
163
  /**
164
164
  * Delete a single file by its hash from both primary and backup storage
165
165
  * @param {string} hash - The hash of the file to delete
166
+ * @param {string} containerName - Optional container name for scoping the hash
166
167
  * @returns {Promise<Object>} Object containing deletion results and file info
167
168
  */
168
- async deleteFileByHash(hash) {
169
+ async deleteFileByHash(hash, containerName = null) {
169
170
  await this._initialize();
170
171
 
171
172
  if (!hash) {
@@ -175,11 +176,27 @@ export class StorageService {
175
176
  const results = [];
176
177
 
177
178
  // Get and remove file information from Redis map (non-atomic operations)
178
- const { getFileStoreMap, removeFromFileStoreMap } = await import("../../redis.js");
179
- const hashResult = await getFileStoreMap(hash);
179
+ const { getFileStoreMap, removeFromFileStoreMap, getScopedHashKey, getDefaultContainerName } = await import("../../redis.js");
180
+ const { getDefaultContainerName: getDefaultContainerNameFromConstants } = await import("../../constants.js");
181
+ const scopedHash = getScopedHashKey(hash, containerName);
182
+ const hashResult = await getFileStoreMap(scopedHash);
180
183
 
181
184
  if (hashResult) {
182
- await removeFromFileStoreMap(hash);
185
+ // Remove from scoped key
186
+ await removeFromFileStoreMap(scopedHash);
187
+
188
+ // Also check and remove legacy key (unscoped) if this is the default container
189
+ // This handles backwards compatibility with old entries stored without container scoping
190
+ const defaultContainerName = getDefaultContainerNameFromConstants();
191
+ const effectiveContainer = containerName || defaultContainerName;
192
+ if (effectiveContainer === defaultContainerName && scopedHash.includes(':')) {
193
+ const [legacyHash] = scopedHash.split(':', 2);
194
+ // Try to remove legacy key - only attempt if it exists to avoid unnecessary "does not exist" logs
195
+ const legacyExists = await getFileStoreMap(legacyHash);
196
+ if (legacyExists) {
197
+ await removeFromFileStoreMap(legacyHash);
198
+ }
199
+ }
183
200
  }
184
201
 
185
202
  if (!hashResult) {
@@ -189,9 +206,16 @@ export class StorageService {
189
206
  // Delete from primary storage
190
207
  if (hashResult.url) {
191
208
  try {
209
+ // Log the URL being deleted for debugging
210
+ console.log(`Deleting file from primary storage - hash: ${hash}, url: ${hashResult.url}`);
192
211
  const primaryResult = await this.deleteFile(hashResult.url);
193
212
  if (primaryResult) {
213
+ console.log(`Successfully deleted from primary storage - hash: ${hash}, result: ${primaryResult}`);
194
214
  results.push({ provider: 'primary', result: primaryResult });
215
+ } else {
216
+ // deleteFile returned null, which means the URL was invalid
217
+ console.warn(`Invalid or empty URL for hash ${hash}: ${hashResult.url}`);
218
+ results.push({ provider: 'primary', error: 'Invalid URL (container-only or empty blob name)' });
195
219
  }
196
220
  } catch (error) {
197
221
  console.error(`Error deleting file from primary storage:`, error);
@@ -202,14 +226,25 @@ export class StorageService {
202
226
  // Delete from backup storage (GCS)
203
227
  if (hashResult.gcs && this.backupProvider) {
204
228
  try {
229
+ console.log(`Deleting file from backup storage - hash: ${hash}, gcs: ${hashResult.gcs}`);
205
230
  const backupResult = await this.deleteFileFromBackup(hashResult.gcs);
206
231
  if (backupResult) {
232
+ console.log(`Successfully deleted from backup storage - hash: ${hash}, result: ${backupResult}`);
207
233
  results.push({ provider: 'backup', result: backupResult });
234
+ } else {
235
+ console.warn(`Backup deletion returned null for hash ${hash}: ${hashResult.gcs}`);
236
+ results.push({ provider: 'backup', error: 'Deletion returned null' });
208
237
  }
209
238
  } catch (error) {
210
239
  console.error(`Error deleting file from backup storage:`, error);
211
240
  results.push({ provider: 'backup', error: error.message });
212
241
  }
242
+ } else {
243
+ if (!hashResult.gcs) {
244
+ console.log(`No GCS URL found for hash ${hash}, skipping backup deletion`);
245
+ } else if (!this.backupProvider) {
246
+ console.log(`Backup provider not configured, skipping backup deletion for hash ${hash}`);
247
+ }
213
248
  }
214
249
 
215
250
  // Note: Hash was already removed from Redis atomically at the beginning