@aj-archipelago/cortex 1.4.22 → 1.4.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/FILE_SYSTEM_DOCUMENTATION.md +116 -48
  2. package/config.js +9 -0
  3. package/lib/fileUtils.js +329 -214
  4. package/package.json +1 -1
  5. package/pathways/system/entity/files/sys_read_file_collection.js +22 -11
  6. package/pathways/system/entity/files/sys_update_file_metadata.js +18 -8
  7. package/pathways/system/entity/sys_entity_agent.js +8 -6
  8. package/pathways/system/entity/tools/sys_tool_codingagent.js +4 -4
  9. package/pathways/system/entity/tools/sys_tool_editfile.js +35 -24
  10. package/pathways/system/entity/tools/sys_tool_file_collection.js +93 -36
  11. package/pathways/system/entity/tools/sys_tool_image.js +1 -1
  12. package/pathways/system/entity/tools/sys_tool_image_gemini.js +1 -1
  13. package/pathways/system/entity/tools/sys_tool_readfile.js +4 -4
  14. package/pathways/system/entity/tools/sys_tool_slides_gemini.js +1 -1
  15. package/pathways/system/entity/tools/sys_tool_video_veo.js +1 -1
  16. package/pathways/system/entity/tools/sys_tool_view_image.js +10 -5
  17. package/pathways/system/workspaces/run_workspace_agent.js +4 -1
  18. package/pathways/video_seedance.js +2 -0
  19. package/server/executeWorkspace.js +45 -2
  20. package/server/pathwayResolver.js +18 -0
  21. package/server/plugins/replicateApiPlugin.js +18 -0
  22. package/server/typeDef.js +10 -1
  23. package/tests/integration/features/tools/fileCollection.test.js +254 -248
  24. package/tests/integration/features/tools/fileOperations.test.js +131 -81
  25. package/tests/integration/graphql/async/stream/vendors/claude_streaming.test.js +3 -4
  26. package/tests/integration/graphql/async/stream/vendors/gemini_streaming.test.js +3 -4
  27. package/tests/integration/graphql/async/stream/vendors/grok_streaming.test.js +3 -4
  28. package/tests/integration/graphql/async/stream/vendors/openai_streaming.test.js +5 -5
  29. package/tests/unit/core/fileCollection.test.js +86 -25
  30. package/pathways/system/workspaces/run_workspace_research_agent.js +0 -27
package/lib/fileUtils.js CHANGED
@@ -702,23 +702,21 @@ function parseRawFileData(allFiles, contextKey = null) {
702
702
  * Filter and format file collection based on inCollection and chatId
703
703
  * @param {Array} rawFiles - Array of parsed file data objects
704
704
  * @param {string|null} chatId - Optional chat ID to filter by
705
- * @returns {Array} Filtered and sorted file collection
705
+ * @returns {Array} Filtered and sorted file collection (includes inCollection for reference counting)
706
706
  */
707
707
  function filterAndFormatFileCollection(rawFiles, chatId = null) {
708
708
  // Filter by inCollection and optional chatId
709
709
  const filtered = rawFiles.filter(file => isFileInCollection(file.inCollection, chatId));
710
710
 
711
- // Remove inCollection from output (internal metadata)
712
- const formatted = filtered.map(({ inCollection, ...file }) => file);
713
-
711
+ // Keep inCollection in output (needed for reference counting display)
714
712
  // Sort by lastAccessed (most recent first)
715
- formatted.sort((a, b) => {
713
+ filtered.sort((a, b) => {
716
714
  const aDate = new Date(a.lastAccessed || a.addedDate || 0);
717
715
  const bDate = new Date(b.lastAccessed || b.addedDate || 0);
718
716
  return bDate - aDate;
719
717
  });
720
718
 
721
- return formatted;
719
+ return filtered;
722
720
  }
723
721
 
724
722
  async function loadFileCollection(contextId, contextKey = null, useCache = true, chatId = null) {
@@ -768,6 +766,45 @@ async function loadFileCollection(contextId, contextKey = null, useCache = true,
768
766
  return filterAndFormatFileCollection(rawFiles, chatId);
769
767
  }
770
768
 
769
+ /**
770
+ * Load ALL files from a context's file collection, bypassing inCollection filtering.
771
+ * Used when merging alt contexts where we want all files regardless of chat scope.
772
+ * @param {string} contextId - Context ID
773
+ * @param {string|null} contextKey - Optional encryption key
774
+ * @returns {Promise<Array>} All files in the collection
775
+ */
776
+ async function loadFileCollectionAll(contextId, contextKey = null) {
777
+ if (!contextId) {
778
+ return [];
779
+ }
780
+
781
+ try {
782
+ const redisClient = await getRedisClient();
783
+
784
+ if (redisClient) {
785
+ const contextMapKey = `FileStoreMap:ctx:${contextId}`;
786
+ const allFiles = await redisClient.hgetall(contextMapKey);
787
+
788
+ // Parse raw file data
789
+ const rawFiles = parseRawFileData(allFiles, contextKey);
790
+
791
+ // Return all files without inCollection filtering (keep inCollection for reference counting)
792
+ // Sort by lastAccessed (most recent first)
793
+ rawFiles.sort((a, b) => {
794
+ const aDate = new Date(a.lastAccessed || a.addedDate || 0);
795
+ const bDate = new Date(b.lastAccessed || b.addedDate || 0);
796
+ return bDate - aDate;
797
+ });
798
+
799
+ return rawFiles;
800
+ }
801
+ } catch (e) {
802
+ // Collection doesn't exist yet or error reading
803
+ }
804
+
805
+ return [];
806
+ }
807
+
771
808
  /**
772
809
  * Normalize inCollection value to array format
773
810
  * @param {boolean|Array<string>|undefined} inCollection - inCollection value to normalize
@@ -798,16 +835,94 @@ function normalizeInCollection(inCollection) {
798
835
  return ['*'];
799
836
  }
800
837
 
838
+ /**
839
+ * Get the appropriate inCollection value based on chatId
840
+ * Centralized function to ensure consistent behavior across all file operations
841
+ * @param {string|null|undefined} chatId - Optional chat ID
842
+ * @returns {Array<string>} Array with chatId if provided, otherwise ['*'] for global
843
+ */
844
+ function getInCollectionValue(chatId = null) {
845
+ if (chatId && typeof chatId === 'string' && chatId.trim() !== '') {
846
+ return [chatId];
847
+ }
848
+ return ['*'];
849
+ }
850
+
851
+ /**
852
+ * Add a chatId to an existing inCollection array (reference counting)
853
+ * If the chatId is already present, returns the array unchanged.
854
+ *
855
+ * IMPORTANT: inCollection is either ['*'] (global) OR [chatId, ...] (chat-scoped), never mixed.
856
+ * If inCollection contains '*' (global), it stays global - no chatIds are added.
857
+ *
858
+ * @param {Array<string>|undefined} existingInCollection - Current inCollection value
859
+ * @param {string|null} chatId - Chat ID to add
860
+ * @returns {Array<string>} Updated inCollection array
861
+ */
862
+ function addChatIdToInCollection(existingInCollection, chatId) {
863
+ // Normalize existing to array
864
+ const existing = Array.isArray(existingInCollection) ? existingInCollection : [];
865
+
866
+ // If already global, stay global
867
+ if (existing.includes('*')) {
868
+ return existing;
869
+ }
870
+
871
+ // If no chatId provided, return existing or default to global
872
+ if (!chatId || typeof chatId !== 'string' || chatId.trim() === '') {
873
+ return existing.length > 0 ? existing : ['*'];
874
+ }
875
+
876
+ // Add chatId if not already present
877
+ if (!existing.includes(chatId)) {
878
+ return [...existing, chatId];
879
+ }
880
+
881
+ return existing;
882
+ }
883
+
884
+ /**
885
+ * Remove a chatId from an inCollection array (reference counting)
886
+ * Returns the updated array without the chatId.
887
+ *
888
+ * IMPORTANT: Global files (['*']) are not reference-counted - they return unchanged.
889
+ * Only chat-scoped files have chatIds removed. When removing from collection,
890
+ * global files should be fully deleted, not reference-counted.
891
+ *
892
+ * @param {Array<string>|undefined} existingInCollection - Current inCollection value
893
+ * @param {string|null} chatId - Chat ID to remove
894
+ * @returns {Array<string>} Updated inCollection array (may be empty for chat-scoped files)
895
+ */
896
+ function removeChatIdFromInCollection(existingInCollection, chatId) {
897
+ // Normalize existing to array
898
+ const existing = Array.isArray(existingInCollection) ? existingInCollection : [];
899
+
900
+ // If no chatId provided, can't remove anything
901
+ if (!chatId || typeof chatId !== 'string' || chatId.trim() === '') {
902
+ return existing;
903
+ }
904
+
905
+ // If global, removing a specific chatId doesn't make sense - return as-is
906
+ // (global files aren't scoped to chats)
907
+ if (existing.includes('*')) {
908
+ return existing;
909
+ }
910
+
911
+ // Remove the chatId
912
+ return existing.filter(id => id !== chatId);
913
+ }
914
+
801
915
  /**
802
916
  * Update file metadata in Redis hash map (direct atomic operation)
803
917
  * @param {string} contextId - Context ID
804
918
  * @param {string} hash - File hash
805
919
  * @param {Object} metadata - Metadata to update (displayFilename, id, tags, notes, mimeType, addedDate, lastAccessed, permanent, inCollection)
806
920
  * @param {string} contextKey - Optional context key for encryption
921
+ * @param {string|null} chatId - Optional chat ID, used as default for inCollection if not provided in metadata and not already set
807
922
  * Note: Does NOT update CFH core fields (url, gcs, hash, filename) - those are managed by CFH
808
923
  * @returns {Promise<boolean>} True if successful
809
924
  */
810
- async function updateFileMetadata(contextId, hash, metadata, contextKey = null) {
925
+ async function updateFileMetadata(contextId, hash, metadata, contextKey = null, chatId = null) {
811
926
  if (!contextId || !hash) {
812
927
  return false;
813
928
  }
@@ -819,20 +934,24 @@ async function updateFileMetadata(contextId, hash, metadata, contextKey = null)
819
934
  }
820
935
 
821
936
  const contextMapKey = `FileStoreMap:ctx:${contextId}`;
822
- // Get existing file data from CFH (if any)
937
+ // Get existing file data - must exist to update
823
938
  const existingDataStr = await redisClient.hget(contextMapKey, hash);
939
+ if (!existingDataStr) {
940
+ // File doesn't exist in this context - don't create new entries
941
+ return false;
942
+ }
824
943
  const existingData = readFileDataFromRedis(existingDataStr, contextKey) || {};
825
944
 
826
945
  // Merge CFH data with Cortex metadata
827
946
  // Only update Cortex-managed fields, preserve CFH fields (url, gcs, hash, filename)
828
947
  const fileData = {
829
948
  ...existingData, // Preserve all CFH data (url, gcs, hash, filename, etc.)
830
- // Handle inCollection: normalize if provided, otherwise preserve existing or default to global
949
+ // Handle inCollection: normalize if provided, otherwise preserve existing or default based on chatId
831
950
  inCollection: metadata.inCollection !== undefined
832
951
  ? normalizeInCollection(metadata.inCollection)
833
952
  : (existingData.inCollection !== undefined
834
953
  ? normalizeInCollection(existingData.inCollection)
835
- : ['*']),
954
+ : getInCollectionValue(chatId)),
836
955
  // Update only Cortex-managed metadata fields
837
956
  ...(metadata.displayFilename !== undefined && { displayFilename: metadata.displayFilename }),
838
957
  ...(metadata.id !== undefined && { id: metadata.id }),
@@ -869,9 +988,10 @@ async function updateFileMetadata(contextId, hash, metadata, contextKey = null)
869
988
  * @param {string} contextId - Context ID for the file collection
870
989
  * @param {string} contextKey - Optional context key for encryption (unused with hash maps)
871
990
  * @param {Array} collection - File collection array
991
+ * @param {string|null} chatId - Optional chat ID, used for inCollection value (chat-scoped if provided, global if not)
872
992
  * @returns {Promise<boolean>} True if successful
873
993
  */
874
- async function saveFileCollection(contextId, contextKey, collection) {
994
+ async function saveFileCollection(contextId, contextKey, collection, chatId = null) {
875
995
  const cacheKey = getCollectionCacheKey(contextId, contextKey);
876
996
 
877
997
  try {
@@ -932,7 +1052,10 @@ async function saveFileCollection(contextId, contextKey, collection) {
932
1052
  addedDate: file.addedDate || existingData.timestamp || new Date().toISOString(),
933
1053
  lastAccessed: file.lastAccessed || new Date().toISOString(),
934
1054
  permanent: file.permanent !== undefined ? file.permanent : (existingData.permanent || false),
935
- inCollection: ['*'] // Mark as global chat file (available to all chats)
1055
+ // Add chatId to existing inCollection (reference counting) - file may be used in multiple chats
1056
+ inCollection: existingData.inCollection
1057
+ ? addChatIdToInCollection(existingData.inCollection, chatId)
1058
+ : getInCollectionValue(chatId)
936
1059
  };
937
1060
 
938
1061
  // Write back to hash map (atomic operation) - encryption happens in helper
@@ -968,9 +1091,11 @@ async function saveFileCollection(contextId, contextKey, collection) {
968
1091
  * @param {string} hash - Optional file hash
969
1092
  * @param {string} fileUrl - Optional: URL of file to upload (if not already in cloud storage)
970
1093
  * @param {pathwayResolver} pathwayResolver - Optional pathway resolver for logging
1094
+ * @param {boolean} permanent - If true, file is stored with permanent retention
1095
+ * @param {string|null} chatId - Optional chat ID, used for inCollection value (chat-scoped if provided, global if not)
971
1096
  * @returns {Promise<Object>} File entry object with id
972
1097
  */
973
- async function addFileToCollection(contextId, contextKey, url, gcs, filename, tags = [], notes = '', hash = null, fileUrl = null, pathwayResolver = null, permanent = false) {
1098
+ async function addFileToCollection(contextId, contextKey, url, gcs, filename, tags = [], notes = '', hash = null, fileUrl = null, pathwayResolver = null, permanent = false, chatId = null) {
974
1099
  if (!contextId || !filename) {
975
1100
  throw new Error("contextId and filename are required");
976
1101
  }
@@ -1078,7 +1203,10 @@ async function addFileToCollection(contextId, contextKey, url, gcs, filename, ta
1078
1203
  tags: fileEntry.tags.length > 0 ? fileEntry.tags : (existingData.tags || []), // Merge tags if new ones provided
1079
1204
  notes: fileEntry.notes || existingData.notes || '', // Keep existing notes if new ones empty
1080
1205
  mimeType: fileEntry.mimeType || existingData.mimeType || null, // MIME type from URL (actual content type)
1081
- inCollection: ['*'], // Mark as global chat file (available to all chats)
1206
+ // Add chatId to existing inCollection (reference counting) - file may be used in multiple chats
1207
+ inCollection: existingData.inCollection
1208
+ ? addChatIdToInCollection(existingData.inCollection, chatId)
1209
+ : getInCollectionValue(chatId),
1082
1210
  addedDate: existingData.addedDate || fileEntry.addedDate, // Keep earliest addedDate
1083
1211
  lastAccessed: new Date().toISOString(), // Always update lastAccessed
1084
1212
  permanent: fileEntry.permanent !== undefined ? fileEntry.permanent : (existingData.permanent || false),
@@ -1238,135 +1366,6 @@ function getActualContentMimeType(file) {
1238
1366
  return determineMimeTypeFromUrl(file.url, file.gcs, null);
1239
1367
  }
1240
1368
 
1241
- /**
1242
- * Sync files from chat history to file collection
1243
- * @param {Array} chatHistory - Chat history to scan
1244
- * @param {string} contextId - Context ID for the file collection
1245
- * @param {string} contextKey - Optional context key for encryption
1246
- * @returns {Promise<Array>} Array of file metadata objects
1247
- */
1248
- async function syncFilesToCollection(chatHistory, contextId, contextKey = null) {
1249
- if (!chatHistory || !Array.isArray(chatHistory) || !contextId) {
1250
- return [];
1251
- }
1252
-
1253
- // Extract all files from chat history
1254
- const extractedFiles = extractFilesFromChatHistory(chatHistory);
1255
-
1256
- if (extractedFiles.length === 0) {
1257
- // No new files to add, return existing collection
1258
- return await loadFileCollection(contextId, contextKey, true);
1259
- }
1260
-
1261
- // Sync files - check individually and update only what's needed (atomic operations)
1262
- try {
1263
- const redisClient = await getRedisClient();
1264
- if (!redisClient) {
1265
- // No Redis, return existing collection
1266
- return await loadFileCollection(contextId, contextKey, true);
1267
- }
1268
-
1269
- const contextMapKey = `FileStoreMap:ctx:${contextId}`;
1270
- const existingFiles = await redisClient.hgetall(contextMapKey);
1271
- const existingByUrl = new Map();
1272
- const existingByGcs = new Map();
1273
- const existingByHash = new Map();
1274
-
1275
- // Build lookup maps from existing files
1276
- for (const [hash, dataStr] of Object.entries(existingFiles)) {
1277
- try {
1278
- const data = JSON.parse(dataStr);
1279
- if (data.url) existingByUrl.set(data.url, hash);
1280
- if (data.gcs) existingByGcs.set(data.gcs, hash);
1281
- if (hash) existingByHash.set(hash, hash);
1282
- } catch (e) {
1283
- // Skip invalid entries
1284
- }
1285
- }
1286
-
1287
- // Add/update files individually (atomic operations)
1288
- for (const file of extractedFiles) {
1289
- const existsByUrl = file.url && existingByUrl.has(file.url);
1290
- const existsByGcs = file.gcs && existingByGcs.has(file.gcs);
1291
- const existsByHash = file.hash && existingByHash.has(file.hash);
1292
-
1293
- if (!existsByUrl && !existsByGcs && !existsByHash && file.hash) {
1294
- // File not found in context-scoped map - check if CFH has it (context-scoped or unscoped)
1295
- // This handles the case where file was uploaded but not yet in this context's collection
1296
- const existingDataStr = await redisClient.hget(contextMapKey, file.hash);
1297
- let existingData = readFileDataFromRedis(existingDataStr, contextKey);
1298
-
1299
- // Also check unscoped map (CFH might have written it there)
1300
- if (!existingData) {
1301
- const unscopedDataStr = await redisClient.hget("FileStoreMap", file.hash);
1302
- existingData = readFileDataFromRedis(unscopedDataStr, contextKey);
1303
- }
1304
-
1305
- if (existingData) {
1306
- // CFH already has this file - merge CFH data with Cortex metadata
1307
- // Only set Cortex-managed fields (tags, notes, id, dates), preserve all CFH data
1308
- // Ensure mimeType is set (CFH doesn't store it, so we need to determine it)
1309
- // IMPORTANT: Determine MIME type from URL (actual content), not displayFilename
1310
- // displayFilename may have original extension (e.g., .docx) while URL points to converted content (e.g., .md)
1311
- const mimeType = existingData.mimeType || determineMimeTypeFromUrl(existingData.url, existingData.gcs, null);
1312
-
1313
- const fileData = {
1314
- ...existingData, // Preserve all CFH data (url, gcs, filename, displayFilename, permanent, etc.)
1315
- mimeType: mimeType, // Ensure mimeType is set
1316
- id: existingData.id || `${Date.now()}-${Math.random().toString(36).substring(2, 9)}`,
1317
- tags: existingData.tags || [],
1318
- notes: existingData.notes || '',
1319
- addedDate: existingData.addedDate || existingData.timestamp || new Date().toISOString(),
1320
- lastAccessed: new Date().toISOString(),
1321
- inCollection: ['*'] // Mark as global chat file (available to all chats)
1322
- };
1323
-
1324
- // Write to Redis - encryption happens in helper
1325
- await writeFileDataToRedis(redisClient, contextMapKey, file.hash, fileData, contextKey);
1326
- } else {
1327
- // File doesn't exist in CFH - create minimal entry (file referenced in chat but not uploaded)
1328
- const mimeType = determineMimeTypeFromUrl(file.url, file.gcs, null);
1329
-
1330
- const fileData = {
1331
- url: file.url,
1332
- gcs: file.gcs || null,
1333
- mimeType: mimeType,
1334
- id: `${Date.now()}-${Math.random().toString(36).substring(2, 9)}`,
1335
- tags: [],
1336
- notes: '',
1337
- hash: file.hash,
1338
- permanent: false,
1339
- addedDate: new Date().toISOString(),
1340
- lastAccessed: new Date().toISOString(),
1341
- inCollection: ['*'] // Mark as global chat file (available to all chats)
1342
- };
1343
-
1344
- // Write to Redis - encryption happens in helper
1345
- await writeFileDataToRedis(redisClient, contextMapKey, file.hash, fileData, contextKey);
1346
- }
1347
- } else if (file.hash) {
1348
- // File exists - update lastAccessed directly
1349
- await updateFileMetadata(contextId, file.hash, {
1350
- lastAccessed: new Date().toISOString()
1351
- }, contextKey);
1352
- }
1353
- }
1354
-
1355
- // Invalidate cache
1356
- const cacheKey = getCollectionCacheKey(contextId, contextKey);
1357
- fileCollectionCache.delete(cacheKey);
1358
- } catch (e) {
1359
- // Fallback: log error and return existing collection
1360
- const logger = (await import('./logger.js')).default;
1361
- logger.warn(`Failed to sync files individually: ${e.message}`);
1362
- // Return existing collection on error
1363
- return await loadFileCollection(contextId, contextKey, true);
1364
- }
1365
-
1366
- // Return updated collection
1367
- return await loadFileCollection(contextId, contextKey, false);
1368
- }
1369
-
1370
1369
  /**
1371
1370
  * Get available files from file collection and format for template
1372
1371
  * @param {string} contextId - Context ID for the file collection
@@ -1430,69 +1429,164 @@ function formatFilesForTemplate(collection) {
1430
1429
  }
1431
1430
 
1432
1431
  /**
1433
- * Get available files - now async and works with file collection
1434
- * @param {Array} chatHistory - Chat history to scan
1435
- * @param {string} contextId - Context ID for the file collection
1436
- * @param {string} contextKey - Optional context key for encryption
1437
- * @returns {Promise<string>} Formatted string of available files
1432
+ * Extract default context from agentContext array (for writes/updates)
1433
+ * @param {Array} agentContext - Array of context objects { contextId, contextKey, default }
1434
+ * @returns {Object|null} Default context object or null if not found
1438
1435
  */
1439
- async function getAvailableFiles(chatHistory, contextId, contextKey = null) {
1440
- if (!contextId) {
1441
- // Fallback to old behavior if no contextId
1442
- const files = extractFilesFromChatHistory(chatHistory);
1443
- return files.map(f => f.url).filter(Boolean).join('\n') || 'No files available.';
1436
+ function getDefaultContext(agentContext) {
1437
+ if (!agentContext || !Array.isArray(agentContext) || agentContext.length === 0) {
1438
+ return null;
1444
1439
  }
1440
+ return agentContext.find(ctx => ctx.default === true) || agentContext[0] || null;
1441
+ }
1445
1442
 
1446
- // Sync files from chat history to collection
1447
- await syncFilesToCollection(chatHistory, contextId, contextKey);
1443
+ /**
1444
+ * Load merged file collection from agentContext array
1445
+ * Merges all contexts in the array for read operations
1446
+ * @param {Array} agentContext - Array of context objects { contextId, contextKey, default }
1447
+ * @returns {Promise<Array>} Merged file collection
1448
+ */
1449
+ async function loadMergedFileCollection(agentContext) {
1450
+ if (!agentContext || !Array.isArray(agentContext) || agentContext.length === 0) {
1451
+ return [];
1452
+ }
1448
1453
 
1449
- // Return formatted files from collection
1450
- return await getAvailableFilesFromCollection(contextId, contextKey);
1454
+ // Load first context as primary - use loadFileCollectionAll to get all files (not filtered by inCollection)
1455
+ const primaryCtx = agentContext[0];
1456
+ const primaryCollection = await loadFileCollectionAll(primaryCtx.contextId, primaryCtx.contextKey || null);
1457
+
1458
+ // Tag primary files with their source context
1459
+ const collection = primaryCollection.map(f => ({ ...f, _contextId: primaryCtx.contextId }));
1460
+
1461
+ // If only one context, return early
1462
+ if (agentContext.length === 1) {
1463
+ return collection;
1464
+ }
1465
+
1466
+ // Load and merge additional contexts
1467
+ for (let i = 1; i < agentContext.length; i++) {
1468
+ const ctx = agentContext[i];
1469
+ if (!ctx.contextId) continue;
1470
+
1471
+ // Load alternate collection - use loadFileCollectionAll to bypass inCollection filtering
1472
+ // (we want ALL files from the alt context, not just global ones)
1473
+ const altCollection = await loadFileCollectionAll(ctx.contextId, ctx.contextKey || null);
1474
+
1475
+ // Build set of existing identifiers from current collection
1476
+ const existingHashes = new Set(collection.map(f => f.hash).filter(Boolean));
1477
+ const existingUrls = new Set(collection.map(f => f.url).filter(Boolean));
1478
+ const existingGcs = new Set(collection.map(f => f.gcs).filter(Boolean));
1479
+
1480
+ // Add files from alt collection that aren't already in collection, tagged with alt context
1481
+ for (const file of altCollection) {
1482
+ const isDupe = (file.hash && existingHashes.has(file.hash)) ||
1483
+ (file.url && existingUrls.has(file.url)) ||
1484
+ (file.gcs && existingGcs.has(file.gcs));
1485
+ if (!isDupe) {
1486
+ collection.push({ ...file, _contextId: ctx.contextId });
1487
+ }
1488
+ }
1489
+ }
1490
+
1491
+ return collection;
1451
1492
  }
1452
1493
 
1453
1494
  /**
1454
- * Sync files from chat history to collection and strip file content from messages.
1455
- * Files are synced to the collection where they can be accessed via tools (AnalyzeFile, ReadTextFile, etc.)
1456
- * File content is replaced with placeholders to avoid sending large files to the model.
1495
+ * Get available files from file collection (no syncing from chat history)
1496
+ * @param {Array} chatHistory - Unused, kept for API compatibility
1497
+ * @param {Array} agentContext - Array of context objects { contextId, contextKey, default }
1498
+ * @returns {Promise<string>} Formatted string of available files
1499
+ */
1500
+ async function getAvailableFiles(chatHistory, agentContext) {
1501
+ if (!agentContext || !Array.isArray(agentContext) || agentContext.length === 0) {
1502
+ return 'No files available.';
1503
+ }
1504
+ const collection = await loadMergedFileCollection(agentContext);
1505
+ // Strip internal _contextId before formatting
1506
+ const cleanCollection = collection.map(({ _contextId, ...file }) => file);
1507
+ return formatFilesForTemplate(cleanCollection);
1508
+ }
1509
+
1510
+ /**
1511
+ * Process files in chat history:
1512
+ * - Files IN collection (all agentContext contexts): update lastAccessed, add chatId to inCollection (reference counting), strip from message (tools can access)
1513
+ * - Files NOT in collection: leave in message (model sees directly)
1514
+ *
1457
1515
  * @param {Array} chatHistory - Chat history array
1458
- * @param {string} contextId - Context ID for file collection
1459
- * @param {string|null} contextKey - Optional encryption key
1460
- * @returns {Promise<{chatHistory: Array, availableFiles: string}>} Modified chat history and available files string
1516
+ * @param {Array} agentContext - Array of context objects { contextId, contextKey, default }
1517
+ * @param {string|null} chatId - Optional chat ID, added to inCollection for reference counting when files are accessed
1518
+ * @returns {Promise<{chatHistory: Array, availableFiles: string}>}
1461
1519
  */
1462
- async function syncAndStripFilesFromChatHistory(chatHistory, contextId, contextKey = null) {
1520
+ async function syncAndStripFilesFromChatHistory(chatHistory, agentContext, chatId = null) {
1463
1521
  if (!chatHistory || !Array.isArray(chatHistory)) {
1464
1522
  return { chatHistory: chatHistory || [], availableFiles: 'No files available.' };
1465
1523
  }
1466
1524
 
1467
- if (!contextId) {
1468
- // No contextId - can't sync to collection, just strip files
1469
- const strippedHistory = stripAllFilesFromChatHistory(chatHistory);
1470
- return { chatHistory: strippedHistory, availableFiles: 'No files available.' };
1525
+ if (!agentContext || !Array.isArray(agentContext) || agentContext.length === 0) {
1526
+ // No agentContext - no collection to check, leave all files in messages
1527
+ return { chatHistory, availableFiles: 'No files available.' };
1471
1528
  }
1472
1529
 
1473
- // Sync files to collection first
1474
- await syncFilesToCollection(chatHistory, contextId, contextKey);
1530
+ // Load merged collection once
1531
+ const collection = await loadMergedFileCollection(agentContext);
1475
1532
 
1476
- // Get available files string
1477
- const availableFiles = await getAvailableFilesFromCollection(contextId, contextKey);
1533
+ // Build lookup map from contextId to contextKey for updates
1534
+ const contextKeyMap = new Map(agentContext.map(ctx => [ctx.contextId, ctx.contextKey || null]));
1478
1535
 
1479
- // Strip all file content from chat history
1480
- const strippedHistory = stripAllFilesFromChatHistory(chatHistory);
1481
-
1482
- return { chatHistory: strippedHistory, availableFiles };
1483
- }
1536
+ // Build lookup maps for fast matching and context lookup (need Maps, not Sets, to get full file object)
1537
+ const collectionByHash = new Map(collection.filter(f => f.hash).map(f => [f.hash, f]));
1538
+ const collectionByUrl = new Map(collection.filter(f => f.url).map(f => [f.url, f]));
1539
+ const collectionByGcs = new Map(collection.filter(f => f.gcs).map(f => [f.gcs, f]));
1484
1540
 
1485
- /**
1486
- * Strip all file and image content from chat history, replacing with placeholders.
1487
- * @param {Array} chatHistory - Chat history array
1488
- * @returns {Array} Chat history with file content replaced by placeholders
1489
- */
1490
- function stripAllFilesFromChatHistory(chatHistory) {
1491
- if (!chatHistory || !Array.isArray(chatHistory)) {
1492
- return chatHistory || [];
1493
- }
1541
+ // Helper to get file from collection (by hash, URL, or GCS) to find _contextId
1542
+ const getFileFromCollection = (contentObj) => {
1543
+ const fileHash = contentObj.hash;
1544
+ const fileUrl = contentObj.url || contentObj.image_url?.url;
1545
+ const fileGcs = contentObj.gcs;
1546
+
1547
+ if (fileHash && collectionByHash.has(fileHash)) {
1548
+ return collectionByHash.get(fileHash);
1549
+ }
1550
+ if (fileUrl && collectionByUrl.has(fileUrl)) {
1551
+ return collectionByUrl.get(fileUrl);
1552
+ }
1553
+ if (fileGcs && collectionByGcs.has(fileGcs)) {
1554
+ return collectionByGcs.get(fileGcs);
1555
+ }
1556
+ return null;
1557
+ };
1494
1558
 
1495
- return chatHistory.map(message => {
1559
+ // Helper to check if a file content object is in the collection
1560
+ const isInCollection = (contentObj) => {
1561
+ return getFileFromCollection(contentObj) !== null;
1562
+ };
1563
+
1564
+ // Helper to update file when stripped - use _contextId from collection to know which context to update
1565
+ const updateStrippedFile = (contentObj) => {
1566
+ const file = getFileFromCollection(contentObj);
1567
+ if (!file || !file._contextId) return;
1568
+
1569
+ // Use hash from the found file (may not be in contentObj)
1570
+ const hash = file.hash;
1571
+ if (!hash) return;
1572
+
1573
+ // Get the correct contextKey for this file's context
1574
+ const fileContextKey = contextKeyMap.get(file._contextId) || null;
1575
+
1576
+ const now = new Date().toISOString();
1577
+ // Update lastAccessed and add chatId to inCollection (reference counting)
1578
+ // If this file is being used in a new chat, add that chat to the list
1579
+ const updatedInCollection = addChatIdToInCollection(file.inCollection, chatId);
1580
+ updateFileMetadata(file._contextId, hash, {
1581
+ lastAccessed: now,
1582
+ inCollection: updatedInCollection
1583
+ }, fileContextKey).catch((err) => {
1584
+ logger.warn(`Failed to update metadata for stripped file (hash=${hash}): ${err?.message || err}`);
1585
+ });
1586
+ };
1587
+
1588
+ // Process chat history - only strip files that are in collection
1589
+ const processedHistory = chatHistory.map(message => {
1496
1590
  if (!message || message.role !== 'user' || !message.content) {
1497
1591
  return message;
1498
1592
  }
@@ -1502,9 +1596,14 @@ function stripAllFilesFromChatHistory(chatHistory) {
1502
1596
  const newContent = message.content.map(item => {
1503
1597
  const contentObj = typeof item === 'string' ? tryParseJson(item) : item;
1504
1598
  if (contentObj && (contentObj.type === 'image_url' || contentObj.type === 'file')) {
1505
- // Extract filename for placeholder
1506
- const filename = extractFilenameFromFileContent(contentObj);
1507
- return { type: 'text', text: `[File: ${filename} - available via file tools]` };
1599
+ if (isInCollection(contentObj)) {
1600
+ // In collection - strip and update metadata
1601
+ updateStrippedFile(contentObj); // fire and forget
1602
+ const filename = extractFilenameFromFileContent(contentObj);
1603
+ return { type: 'text', text: `[File: ${filename} - available via file tools]` };
1604
+ }
1605
+ // Not in collection - leave as-is
1606
+ return item;
1508
1607
  }
1509
1608
  return item;
1510
1609
  });
@@ -1514,8 +1613,11 @@ function stripAllFilesFromChatHistory(chatHistory) {
1514
1613
  // Handle object content
1515
1614
  if (typeof message.content === 'object' && message.content !== null) {
1516
1615
  if (message.content.type === 'image_url' || message.content.type === 'file') {
1517
- const filename = extractFilenameFromFileContent(message.content);
1518
- return { ...message, content: `[File: ${filename} - available via file tools]` };
1616
+ if (isInCollection(message.content)) {
1617
+ updateStrippedFile(message.content); // fire and forget
1618
+ const filename = extractFilenameFromFileContent(message.content);
1619
+ return { ...message, content: `[File: ${filename} - available via file tools]` };
1620
+ }
1519
1621
  }
1520
1622
  }
1521
1623
 
@@ -1523,13 +1625,21 @@ function stripAllFilesFromChatHistory(chatHistory) {
1523
1625
  if (typeof message.content === 'string') {
1524
1626
  const contentObj = tryParseJson(message.content);
1525
1627
  if (contentObj && (contentObj.type === 'image_url' || contentObj.type === 'file')) {
1526
- const filename = extractFilenameFromFileContent(contentObj);
1527
- return { ...message, content: `[File: ${filename} - available via file tools]` };
1628
+ if (isInCollection(contentObj)) {
1629
+ updateStrippedFile(contentObj); // fire and forget
1630
+ const filename = extractFilenameFromFileContent(contentObj);
1631
+ return { ...message, content: `[File: ${filename} - available via file tools]` };
1632
+ }
1528
1633
  }
1529
1634
  }
1530
1635
 
1531
1636
  return message;
1532
1637
  });
1638
+
1639
+ // Strip internal _contextId before formatting (it's only needed for updates)
1640
+ const cleanCollection = collection.map(({ _contextId, ...file }) => file);
1641
+ const availableFiles = formatFilesForTemplate(cleanCollection);
1642
+ return { chatHistory: processedHistory, availableFiles };
1533
1643
  }
1534
1644
 
1535
1645
  /**
@@ -1661,15 +1771,15 @@ function findFileInCollection(fileParam, collection) {
1661
1771
  /**
1662
1772
  * Resolve a file parameter to a URL by looking it up in the file collection
1663
1773
  * If the parameter is already a URL (starts with http:// or https://), returns it as-is
1664
- * If contextId is provided, looks up the file in the collection and returns its URL
1774
+ * If agentContext is provided, looks up the file in the merged collection and returns its URL
1665
1775
  * @param {string} fileParam - File ID, URL (Azure or GCS), hash, or filename from collection
1666
- * @param {string} contextId - Context ID for the file collection
1667
- * @param {string} contextKey - Optional context key for encryption
1776
+ * @param {Array} agentContext - Array of context objects { contextId, contextKey, default }
1668
1777
  * @param {Object} options - Optional configuration
1669
1778
  * @param {boolean} options.preferGcs - If true, prefer GCS URL over Azure URL when available
1779
+ * @param {boolean} options.useCache - If false, bypass cache (default: true, only used for single context)
1670
1780
  * @returns {Promise<string|null>} Resolved file URL, or null if not found
1671
1781
  */
1672
- export async function resolveFileParameter(fileParam, contextId, contextKey = null, options = {}) {
1782
+ export async function resolveFileParameter(fileParam, agentContext, options = {}) {
1673
1783
  if (!fileParam || typeof fileParam !== 'string') {
1674
1784
  return null;
1675
1785
  }
@@ -1677,15 +1787,16 @@ export async function resolveFileParameter(fileParam, contextId, contextKey = nu
1677
1787
  const trimmed = fileParam.trim();
1678
1788
  const { preferGcs = false, useCache = true } = options;
1679
1789
 
1680
- // If no contextId, can't look up in collection - return null
1681
- if (!contextId) {
1790
+ // If no agentContext, can't look up in collection - return null
1791
+ if (!agentContext || !Array.isArray(agentContext) || agentContext.length === 0) {
1682
1792
  return null;
1683
1793
  }
1684
1794
 
1685
1795
  try {
1686
- // Load file collection and find the file
1687
- // useCache can be set to false to bypass cache (e.g., after file edits)
1688
- const collection = await loadFileCollection(contextId, contextKey, useCache);
1796
+ // Load merged file collection (always use merged to get all files, not just global ones)
1797
+ // Note: useCache option is ignored for merged collections (they always load fresh)
1798
+ const collection = await loadMergedFileCollection(agentContext);
1799
+
1689
1800
  const foundFile = findFileInCollection(trimmed, collection);
1690
1801
 
1691
1802
  if (foundFile) {
@@ -1711,11 +1822,10 @@ export async function resolveFileParameter(fileParam, contextId, contextKey = nu
1711
1822
  /**
1712
1823
  * Generate file message content by looking up a file parameter in the file collection
1713
1824
  * @param {string} fileParam - File URL (Azure or GCS), file ID from collection, or file hash
1714
- * @param {string} contextId - Context ID for the file collection
1715
- * @param {string} contextKey - Optional context key for encryption
1825
+ * @param {Array} agentContext - Array of context objects { contextId, contextKey, default }
1716
1826
  * @returns {Promise<Object|null>} Content object in the format for chat history, or null if not found
1717
1827
  */
1718
- async function generateFileMessageContent(fileParam, contextId, contextKey = null) {
1828
+ async function generateFileMessageContent(fileParam, agentContext) {
1719
1829
  if (!fileParam || typeof fileParam !== 'string') {
1720
1830
  return null;
1721
1831
  }
@@ -1736,14 +1846,13 @@ async function generateFileMessageContent(fileParam, contextId, contextKey = nul
1736
1846
  logger.debug(`YouTube URL check failed for "${fileParam}": ${error.message}`);
1737
1847
  }
1738
1848
 
1739
- if (!contextId) {
1740
- // Without contextId, we can't look up in collection
1741
- // Return a basic content object from the URL
1849
+ if (!agentContext || !Array.isArray(agentContext) || agentContext.length === 0) {
1850
+ // Without agentContext, we can't look up in collection
1742
1851
  return null;
1743
1852
  }
1744
1853
 
1745
- // Load file collection
1746
- const collection = await loadFileCollection(contextId, contextKey, true);
1854
+ // Load merged file collection
1855
+ const collection = await loadMergedFileCollection(agentContext);
1747
1856
 
1748
1857
  // Find the file using shared matching logic
1749
1858
  const foundFile = findFileInCollection(fileParam, collection);
@@ -1754,8 +1863,9 @@ async function generateFileMessageContent(fileParam, contextId, contextKey = nul
1754
1863
  }
1755
1864
 
1756
1865
  // Resolve to short-lived URL if possible
1757
- // Pass contextId to ensure files are found in the correct context scope
1758
- const fileWithShortLivedUrl = await ensureShortLivedUrl(foundFile, MEDIA_API_URL, contextId);
1866
+ // Use default context for ensureShortLivedUrl
1867
+ const defaultCtx = getDefaultContext(agentContext);
1868
+ const fileWithShortLivedUrl = await ensureShortLivedUrl(foundFile, MEDIA_API_URL, defaultCtx?.contextId || null);
1759
1869
 
1760
1870
  return {
1761
1871
  type: 'image_url',
@@ -2393,8 +2503,9 @@ export {
2393
2503
  markCompletedForCleanUp,
2394
2504
  extractFileMetadataFromContent,
2395
2505
  extractFilesFromChatHistory,
2396
- syncFilesToCollection,
2397
2506
  getAvailableFilesFromCollection,
2507
+ getDefaultContext,
2508
+ loadMergedFileCollection,
2398
2509
  formatFilesForTemplate,
2399
2510
  getAvailableFiles,
2400
2511
  syncAndStripFilesFromChatHistory,
@@ -2404,6 +2515,7 @@ export {
2404
2515
  injectFileIntoChatHistory,
2405
2516
  addFileToCollection,
2406
2517
  loadFileCollection,
2518
+ loadFileCollectionAll,
2407
2519
  saveFileCollection,
2408
2520
  updateFileMetadata,
2409
2521
  getCollectionCacheKey,
@@ -2413,6 +2525,9 @@ export {
2413
2525
  uploadFileToCloud,
2414
2526
  uploadImageToCloud,
2415
2527
  resolveFileHashesToContent,
2528
+ getInCollectionValue,
2529
+ addChatIdToInCollection,
2530
+ removeChatIdFromInCollection,
2416
2531
  getMimeTypeFromFilename,
2417
2532
  getMimeTypeFromExtension,
2418
2533
  isTextMimeType,