@aj-archipelago/cortex 1.4.21 → 1.4.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/FILE_SYSTEM_DOCUMENTATION.md +116 -48
- package/config.js +27 -0
- package/lib/fileUtils.js +226 -201
- package/lib/requestExecutor.js +3 -2
- package/lib/util.js +71 -1
- package/package.json +1 -1
- package/pathways/image_flux.js +8 -2
- package/pathways/image_qwen.js +1 -1
- package/pathways/system/entity/files/sys_read_file_collection.js +13 -11
- package/pathways/system/entity/files/sys_update_file_metadata.js +16 -7
- package/pathways/system/entity/sys_entity_agent.js +8 -6
- package/pathways/system/entity/tools/sys_tool_codingagent.js +4 -4
- package/pathways/system/entity/tools/sys_tool_editfile.js +27 -22
- package/pathways/system/entity/tools/sys_tool_file_collection.js +15 -10
- package/pathways/system/entity/tools/sys_tool_image.js +5 -5
- package/pathways/system/entity/tools/sys_tool_image_gemini.js +1 -1
- package/pathways/system/entity/tools/sys_tool_readfile.js +4 -4
- package/pathways/system/entity/tools/sys_tool_slides_gemini.js +1 -1
- package/pathways/system/entity/tools/sys_tool_video_veo.js +1 -1
- package/pathways/system/entity/tools/sys_tool_view_image.js +10 -5
- package/pathways/system/workspaces/run_workspace_agent.js +4 -1
- package/pathways/video_seedance.js +2 -0
- package/server/executeWorkspace.js +45 -2
- package/server/pathwayResolver.js +18 -0
- package/server/plugins/claude3VertexPlugin.js +2 -6
- package/server/plugins/claude4VertexPlugin.js +5 -10
- package/server/plugins/gemini3ReasoningVisionPlugin.js +0 -2
- package/server/plugins/grokResponsesPlugin.js +3 -19
- package/server/plugins/grokVisionPlugin.js +3 -18
- package/server/plugins/modelPlugin.js +3 -0
- package/server/plugins/openAiVisionPlugin.js +3 -18
- package/server/plugins/replicateApiPlugin.js +182 -101
- package/server/resolver.js +32 -3
- package/server/typeDef.js +10 -1
- package/test.log +39427 -0
- package/tests/integration/features/tools/fileCollection.test.js +254 -248
- package/tests/integration/features/tools/fileOperations.test.js +131 -81
- package/tests/integration/graphql/async/stream/agentic.test.js +1 -1
- package/tests/integration/graphql/async/stream/vendors/claude_streaming.test.js +3 -4
- package/tests/integration/graphql/async/stream/vendors/gemini_streaming.test.js +3 -4
- package/tests/integration/graphql/async/stream/vendors/grok_streaming.test.js +3 -4
- package/tests/integration/graphql/async/stream/vendors/openai_streaming.test.js +5 -5
- package/tests/unit/core/fileCollection.test.js +86 -25
- package/pathways/system/workspaces/run_workspace_research_agent.js +0 -27
package/lib/fileUtils.js
CHANGED
|
@@ -768,6 +768,47 @@ async function loadFileCollection(contextId, contextKey = null, useCache = true,
|
|
|
768
768
|
return filterAndFormatFileCollection(rawFiles, chatId);
|
|
769
769
|
}
|
|
770
770
|
|
|
771
|
+
/**
|
|
772
|
+
* Load ALL files from a context's file collection, bypassing inCollection filtering.
|
|
773
|
+
* Used when merging alt contexts where we want all files regardless of chat scope.
|
|
774
|
+
* @param {string} contextId - Context ID
|
|
775
|
+
* @param {string|null} contextKey - Optional encryption key
|
|
776
|
+
* @returns {Promise<Array>} All files in the collection
|
|
777
|
+
*/
|
|
778
|
+
async function loadFileCollectionAll(contextId, contextKey = null) {
|
|
779
|
+
if (!contextId) {
|
|
780
|
+
return [];
|
|
781
|
+
}
|
|
782
|
+
|
|
783
|
+
try {
|
|
784
|
+
const redisClient = await getRedisClient();
|
|
785
|
+
|
|
786
|
+
if (redisClient) {
|
|
787
|
+
const contextMapKey = `FileStoreMap:ctx:${contextId}`;
|
|
788
|
+
const allFiles = await redisClient.hgetall(contextMapKey);
|
|
789
|
+
|
|
790
|
+
// Parse raw file data
|
|
791
|
+
const rawFiles = parseRawFileData(allFiles, contextKey);
|
|
792
|
+
|
|
793
|
+
// Return all files without inCollection filtering, just remove the internal metadata
|
|
794
|
+
const formatted = rawFiles.map(({ inCollection, ...file }) => file);
|
|
795
|
+
|
|
796
|
+
// Sort by lastAccessed (most recent first)
|
|
797
|
+
formatted.sort((a, b) => {
|
|
798
|
+
const aDate = new Date(a.lastAccessed || a.addedDate || 0);
|
|
799
|
+
const bDate = new Date(b.lastAccessed || b.addedDate || 0);
|
|
800
|
+
return bDate - aDate;
|
|
801
|
+
});
|
|
802
|
+
|
|
803
|
+
return formatted;
|
|
804
|
+
}
|
|
805
|
+
} catch (e) {
|
|
806
|
+
// Collection doesn't exist yet or error reading
|
|
807
|
+
}
|
|
808
|
+
|
|
809
|
+
return [];
|
|
810
|
+
}
|
|
811
|
+
|
|
771
812
|
/**
|
|
772
813
|
* Normalize inCollection value to array format
|
|
773
814
|
* @param {boolean|Array<string>|undefined} inCollection - inCollection value to normalize
|
|
@@ -819,8 +860,12 @@ async function updateFileMetadata(contextId, hash, metadata, contextKey = null)
|
|
|
819
860
|
}
|
|
820
861
|
|
|
821
862
|
const contextMapKey = `FileStoreMap:ctx:${contextId}`;
|
|
822
|
-
// Get existing file data
|
|
863
|
+
// Get existing file data - must exist to update
|
|
823
864
|
const existingDataStr = await redisClient.hget(contextMapKey, hash);
|
|
865
|
+
if (!existingDataStr) {
|
|
866
|
+
// File doesn't exist in this context - don't create new entries
|
|
867
|
+
return false;
|
|
868
|
+
}
|
|
824
869
|
const existingData = readFileDataFromRedis(existingDataStr, contextKey) || {};
|
|
825
870
|
|
|
826
871
|
// Merge CFH data with Cortex metadata
|
|
@@ -1238,135 +1283,6 @@ function getActualContentMimeType(file) {
|
|
|
1238
1283
|
return determineMimeTypeFromUrl(file.url, file.gcs, null);
|
|
1239
1284
|
}
|
|
1240
1285
|
|
|
1241
|
-
/**
|
|
1242
|
-
* Sync files from chat history to file collection
|
|
1243
|
-
* @param {Array} chatHistory - Chat history to scan
|
|
1244
|
-
* @param {string} contextId - Context ID for the file collection
|
|
1245
|
-
* @param {string} contextKey - Optional context key for encryption
|
|
1246
|
-
* @returns {Promise<Array>} Array of file metadata objects
|
|
1247
|
-
*/
|
|
1248
|
-
async function syncFilesToCollection(chatHistory, contextId, contextKey = null) {
|
|
1249
|
-
if (!chatHistory || !Array.isArray(chatHistory) || !contextId) {
|
|
1250
|
-
return [];
|
|
1251
|
-
}
|
|
1252
|
-
|
|
1253
|
-
// Extract all files from chat history
|
|
1254
|
-
const extractedFiles = extractFilesFromChatHistory(chatHistory);
|
|
1255
|
-
|
|
1256
|
-
if (extractedFiles.length === 0) {
|
|
1257
|
-
// No new files to add, return existing collection
|
|
1258
|
-
return await loadFileCollection(contextId, contextKey, true);
|
|
1259
|
-
}
|
|
1260
|
-
|
|
1261
|
-
// Sync files - check individually and update only what's needed (atomic operations)
|
|
1262
|
-
try {
|
|
1263
|
-
const redisClient = await getRedisClient();
|
|
1264
|
-
if (!redisClient) {
|
|
1265
|
-
// No Redis, return existing collection
|
|
1266
|
-
return await loadFileCollection(contextId, contextKey, true);
|
|
1267
|
-
}
|
|
1268
|
-
|
|
1269
|
-
const contextMapKey = `FileStoreMap:ctx:${contextId}`;
|
|
1270
|
-
const existingFiles = await redisClient.hgetall(contextMapKey);
|
|
1271
|
-
const existingByUrl = new Map();
|
|
1272
|
-
const existingByGcs = new Map();
|
|
1273
|
-
const existingByHash = new Map();
|
|
1274
|
-
|
|
1275
|
-
// Build lookup maps from existing files
|
|
1276
|
-
for (const [hash, dataStr] of Object.entries(existingFiles)) {
|
|
1277
|
-
try {
|
|
1278
|
-
const data = JSON.parse(dataStr);
|
|
1279
|
-
if (data.url) existingByUrl.set(data.url, hash);
|
|
1280
|
-
if (data.gcs) existingByGcs.set(data.gcs, hash);
|
|
1281
|
-
if (hash) existingByHash.set(hash, hash);
|
|
1282
|
-
} catch (e) {
|
|
1283
|
-
// Skip invalid entries
|
|
1284
|
-
}
|
|
1285
|
-
}
|
|
1286
|
-
|
|
1287
|
-
// Add/update files individually (atomic operations)
|
|
1288
|
-
for (const file of extractedFiles) {
|
|
1289
|
-
const existsByUrl = file.url && existingByUrl.has(file.url);
|
|
1290
|
-
const existsByGcs = file.gcs && existingByGcs.has(file.gcs);
|
|
1291
|
-
const existsByHash = file.hash && existingByHash.has(file.hash);
|
|
1292
|
-
|
|
1293
|
-
if (!existsByUrl && !existsByGcs && !existsByHash && file.hash) {
|
|
1294
|
-
// File not found in context-scoped map - check if CFH has it (context-scoped or unscoped)
|
|
1295
|
-
// This handles the case where file was uploaded but not yet in this context's collection
|
|
1296
|
-
const existingDataStr = await redisClient.hget(contextMapKey, file.hash);
|
|
1297
|
-
let existingData = readFileDataFromRedis(existingDataStr, contextKey);
|
|
1298
|
-
|
|
1299
|
-
// Also check unscoped map (CFH might have written it there)
|
|
1300
|
-
if (!existingData) {
|
|
1301
|
-
const unscopedDataStr = await redisClient.hget("FileStoreMap", file.hash);
|
|
1302
|
-
existingData = readFileDataFromRedis(unscopedDataStr, contextKey);
|
|
1303
|
-
}
|
|
1304
|
-
|
|
1305
|
-
if (existingData) {
|
|
1306
|
-
// CFH already has this file - merge CFH data with Cortex metadata
|
|
1307
|
-
// Only set Cortex-managed fields (tags, notes, id, dates), preserve all CFH data
|
|
1308
|
-
// Ensure mimeType is set (CFH doesn't store it, so we need to determine it)
|
|
1309
|
-
// IMPORTANT: Determine MIME type from URL (actual content), not displayFilename
|
|
1310
|
-
// displayFilename may have original extension (e.g., .docx) while URL points to converted content (e.g., .md)
|
|
1311
|
-
const mimeType = existingData.mimeType || determineMimeTypeFromUrl(existingData.url, existingData.gcs, null);
|
|
1312
|
-
|
|
1313
|
-
const fileData = {
|
|
1314
|
-
...existingData, // Preserve all CFH data (url, gcs, filename, displayFilename, permanent, etc.)
|
|
1315
|
-
mimeType: mimeType, // Ensure mimeType is set
|
|
1316
|
-
id: existingData.id || `${Date.now()}-${Math.random().toString(36).substring(2, 9)}`,
|
|
1317
|
-
tags: existingData.tags || [],
|
|
1318
|
-
notes: existingData.notes || '',
|
|
1319
|
-
addedDate: existingData.addedDate || existingData.timestamp || new Date().toISOString(),
|
|
1320
|
-
lastAccessed: new Date().toISOString(),
|
|
1321
|
-
inCollection: ['*'] // Mark as global chat file (available to all chats)
|
|
1322
|
-
};
|
|
1323
|
-
|
|
1324
|
-
// Write to Redis - encryption happens in helper
|
|
1325
|
-
await writeFileDataToRedis(redisClient, contextMapKey, file.hash, fileData, contextKey);
|
|
1326
|
-
} else {
|
|
1327
|
-
// File doesn't exist in CFH - create minimal entry (file referenced in chat but not uploaded)
|
|
1328
|
-
const mimeType = determineMimeTypeFromUrl(file.url, file.gcs, null);
|
|
1329
|
-
|
|
1330
|
-
const fileData = {
|
|
1331
|
-
url: file.url,
|
|
1332
|
-
gcs: file.gcs || null,
|
|
1333
|
-
mimeType: mimeType,
|
|
1334
|
-
id: `${Date.now()}-${Math.random().toString(36).substring(2, 9)}`,
|
|
1335
|
-
tags: [],
|
|
1336
|
-
notes: '',
|
|
1337
|
-
hash: file.hash,
|
|
1338
|
-
permanent: false,
|
|
1339
|
-
addedDate: new Date().toISOString(),
|
|
1340
|
-
lastAccessed: new Date().toISOString(),
|
|
1341
|
-
inCollection: ['*'] // Mark as global chat file (available to all chats)
|
|
1342
|
-
};
|
|
1343
|
-
|
|
1344
|
-
// Write to Redis - encryption happens in helper
|
|
1345
|
-
await writeFileDataToRedis(redisClient, contextMapKey, file.hash, fileData, contextKey);
|
|
1346
|
-
}
|
|
1347
|
-
} else if (file.hash) {
|
|
1348
|
-
// File exists - update lastAccessed directly
|
|
1349
|
-
await updateFileMetadata(contextId, file.hash, {
|
|
1350
|
-
lastAccessed: new Date().toISOString()
|
|
1351
|
-
}, contextKey);
|
|
1352
|
-
}
|
|
1353
|
-
}
|
|
1354
|
-
|
|
1355
|
-
// Invalidate cache
|
|
1356
|
-
const cacheKey = getCollectionCacheKey(contextId, contextKey);
|
|
1357
|
-
fileCollectionCache.delete(cacheKey);
|
|
1358
|
-
} catch (e) {
|
|
1359
|
-
// Fallback: log error and return existing collection
|
|
1360
|
-
const logger = (await import('./logger.js')).default;
|
|
1361
|
-
logger.warn(`Failed to sync files individually: ${e.message}`);
|
|
1362
|
-
// Return existing collection on error
|
|
1363
|
-
return await loadFileCollection(contextId, contextKey, true);
|
|
1364
|
-
}
|
|
1365
|
-
|
|
1366
|
-
// Return updated collection
|
|
1367
|
-
return await loadFileCollection(contextId, contextKey, false);
|
|
1368
|
-
}
|
|
1369
|
-
|
|
1370
1286
|
/**
|
|
1371
1287
|
* Get available files from file collection and format for template
|
|
1372
1288
|
* @param {string} contextId - Context ID for the file collection
|
|
@@ -1430,69 +1346,160 @@ function formatFilesForTemplate(collection) {
|
|
|
1430
1346
|
}
|
|
1431
1347
|
|
|
1432
1348
|
/**
|
|
1433
|
-
*
|
|
1434
|
-
* @param {Array}
|
|
1435
|
-
* @
|
|
1436
|
-
* @param {string} contextKey - Optional context key for encryption
|
|
1437
|
-
* @returns {Promise<string>} Formatted string of available files
|
|
1349
|
+
* Extract default context from agentContext array (for writes/updates)
|
|
1350
|
+
* @param {Array} agentContext - Array of context objects { contextId, contextKey, default }
|
|
1351
|
+
* @returns {Object|null} Default context object or null if not found
|
|
1438
1352
|
*/
|
|
1439
|
-
|
|
1440
|
-
if (!
|
|
1441
|
-
|
|
1442
|
-
const files = extractFilesFromChatHistory(chatHistory);
|
|
1443
|
-
return files.map(f => f.url).filter(Boolean).join('\n') || 'No files available.';
|
|
1353
|
+
function getDefaultContext(agentContext) {
|
|
1354
|
+
if (!agentContext || !Array.isArray(agentContext) || agentContext.length === 0) {
|
|
1355
|
+
return null;
|
|
1444
1356
|
}
|
|
1357
|
+
return agentContext.find(ctx => ctx.default === true) || agentContext[0] || null;
|
|
1358
|
+
}
|
|
1445
1359
|
|
|
1446
|
-
|
|
1447
|
-
|
|
1360
|
+
/**
|
|
1361
|
+
* Load merged file collection from agentContext array
|
|
1362
|
+
* Merges all contexts in the array for read operations
|
|
1363
|
+
* @param {Array} agentContext - Array of context objects { contextId, contextKey, default }
|
|
1364
|
+
* @returns {Promise<Array>} Merged file collection
|
|
1365
|
+
*/
|
|
1366
|
+
async function loadMergedFileCollection(agentContext) {
|
|
1367
|
+
if (!agentContext || !Array.isArray(agentContext) || agentContext.length === 0) {
|
|
1368
|
+
return [];
|
|
1369
|
+
}
|
|
1370
|
+
|
|
1371
|
+
// Load first context as primary - use loadFileCollectionAll to get all files (not filtered by inCollection)
|
|
1372
|
+
const primaryCtx = agentContext[0];
|
|
1373
|
+
const primaryCollection = await loadFileCollectionAll(primaryCtx.contextId, primaryCtx.contextKey || null);
|
|
1374
|
+
|
|
1375
|
+
// Tag primary files with their source context
|
|
1376
|
+
const collection = primaryCollection.map(f => ({ ...f, _contextId: primaryCtx.contextId }));
|
|
1377
|
+
|
|
1378
|
+
// If only one context, return early
|
|
1379
|
+
if (agentContext.length === 1) {
|
|
1380
|
+
return collection;
|
|
1381
|
+
}
|
|
1382
|
+
|
|
1383
|
+
// Load and merge additional contexts
|
|
1384
|
+
for (let i = 1; i < agentContext.length; i++) {
|
|
1385
|
+
const ctx = agentContext[i];
|
|
1386
|
+
if (!ctx.contextId) continue;
|
|
1387
|
+
|
|
1388
|
+
// Load alternate collection - use loadFileCollectionAll to bypass inCollection filtering
|
|
1389
|
+
// (we want ALL files from the alt context, not just global ones)
|
|
1390
|
+
const altCollection = await loadFileCollectionAll(ctx.contextId, ctx.contextKey || null);
|
|
1391
|
+
|
|
1392
|
+
// Build set of existing identifiers from current collection
|
|
1393
|
+
const existingHashes = new Set(collection.map(f => f.hash).filter(Boolean));
|
|
1394
|
+
const existingUrls = new Set(collection.map(f => f.url).filter(Boolean));
|
|
1395
|
+
const existingGcs = new Set(collection.map(f => f.gcs).filter(Boolean));
|
|
1396
|
+
|
|
1397
|
+
// Add files from alt collection that aren't already in collection, tagged with alt context
|
|
1398
|
+
for (const file of altCollection) {
|
|
1399
|
+
const isDupe = (file.hash && existingHashes.has(file.hash)) ||
|
|
1400
|
+
(file.url && existingUrls.has(file.url)) ||
|
|
1401
|
+
(file.gcs && existingGcs.has(file.gcs));
|
|
1402
|
+
if (!isDupe) {
|
|
1403
|
+
collection.push({ ...file, _contextId: ctx.contextId });
|
|
1404
|
+
}
|
|
1405
|
+
}
|
|
1406
|
+
}
|
|
1448
1407
|
|
|
1449
|
-
|
|
1450
|
-
return await getAvailableFilesFromCollection(contextId, contextKey);
|
|
1408
|
+
return collection;
|
|
1451
1409
|
}
|
|
1452
1410
|
|
|
1453
1411
|
/**
|
|
1454
|
-
*
|
|
1455
|
-
*
|
|
1456
|
-
*
|
|
1412
|
+
* Get available files from file collection (no syncing from chat history)
|
|
1413
|
+
* @param {Array} chatHistory - Unused, kept for API compatibility
|
|
1414
|
+
* @param {Array} agentContext - Array of context objects { contextId, contextKey, default }
|
|
1415
|
+
* @returns {Promise<string>} Formatted string of available files
|
|
1416
|
+
*/
|
|
1417
|
+
async function getAvailableFiles(chatHistory, agentContext) {
|
|
1418
|
+
if (!agentContext || !Array.isArray(agentContext) || agentContext.length === 0) {
|
|
1419
|
+
return 'No files available.';
|
|
1420
|
+
}
|
|
1421
|
+
const collection = await loadMergedFileCollection(agentContext);
|
|
1422
|
+
// Strip internal _contextId before formatting
|
|
1423
|
+
const cleanCollection = collection.map(({ _contextId, ...file }) => file);
|
|
1424
|
+
return formatFilesForTemplate(cleanCollection);
|
|
1425
|
+
}
|
|
1426
|
+
|
|
1427
|
+
/**
|
|
1428
|
+
* Process files in chat history:
|
|
1429
|
+
* - Files IN collection (all agentContext contexts): update lastAccessed, strip from message (tools can access)
|
|
1430
|
+
* - Files NOT in collection: leave in message (model sees directly)
|
|
1431
|
+
*
|
|
1457
1432
|
* @param {Array} chatHistory - Chat history array
|
|
1458
|
-
* @param {
|
|
1459
|
-
* @
|
|
1460
|
-
* @returns {Promise<{chatHistory: Array, availableFiles: string}>} Modified chat history and available files string
|
|
1433
|
+
* @param {Array} agentContext - Array of context objects { contextId, contextKey, default }
|
|
1434
|
+
* @returns {Promise<{chatHistory: Array, availableFiles: string}>}
|
|
1461
1435
|
*/
|
|
1462
|
-
async function syncAndStripFilesFromChatHistory(chatHistory,
|
|
1436
|
+
async function syncAndStripFilesFromChatHistory(chatHistory, agentContext) {
|
|
1463
1437
|
if (!chatHistory || !Array.isArray(chatHistory)) {
|
|
1464
1438
|
return { chatHistory: chatHistory || [], availableFiles: 'No files available.' };
|
|
1465
1439
|
}
|
|
1466
1440
|
|
|
1467
|
-
if (!
|
|
1468
|
-
// No
|
|
1469
|
-
|
|
1470
|
-
return { chatHistory: strippedHistory, availableFiles: 'No files available.' };
|
|
1441
|
+
if (!agentContext || !Array.isArray(agentContext) || agentContext.length === 0) {
|
|
1442
|
+
// No agentContext - no collection to check, leave all files in messages
|
|
1443
|
+
return { chatHistory, availableFiles: 'No files available.' };
|
|
1471
1444
|
}
|
|
1472
1445
|
|
|
1473
|
-
//
|
|
1474
|
-
await
|
|
1475
|
-
|
|
1476
|
-
// Get available files string
|
|
1477
|
-
const availableFiles = await getAvailableFilesFromCollection(contextId, contextKey);
|
|
1446
|
+
// Load merged collection once
|
|
1447
|
+
const collection = await loadMergedFileCollection(agentContext);
|
|
1478
1448
|
|
|
1479
|
-
//
|
|
1480
|
-
const
|
|
1449
|
+
// Build lookup map from contextId to contextKey for updates
|
|
1450
|
+
const contextKeyMap = new Map(agentContext.map(ctx => [ctx.contextId, ctx.contextKey || null]));
|
|
1481
1451
|
|
|
1482
|
-
|
|
1483
|
-
|
|
1452
|
+
// Build lookup maps for fast matching and context lookup (need Maps, not Sets, to get full file object)
|
|
1453
|
+
const collectionByHash = new Map(collection.filter(f => f.hash).map(f => [f.hash, f]));
|
|
1454
|
+
const collectionByUrl = new Map(collection.filter(f => f.url).map(f => [f.url, f]));
|
|
1455
|
+
const collectionByGcs = new Map(collection.filter(f => f.gcs).map(f => [f.gcs, f]));
|
|
1484
1456
|
|
|
1485
|
-
|
|
1486
|
-
|
|
1487
|
-
|
|
1488
|
-
|
|
1489
|
-
|
|
1490
|
-
|
|
1491
|
-
|
|
1492
|
-
|
|
1493
|
-
|
|
1457
|
+
// Helper to get file from collection (by hash, URL, or GCS) to find _contextId
|
|
1458
|
+
const getFileFromCollection = (contentObj) => {
|
|
1459
|
+
const fileHash = contentObj.hash;
|
|
1460
|
+
const fileUrl = contentObj.url || contentObj.image_url?.url;
|
|
1461
|
+
const fileGcs = contentObj.gcs;
|
|
1462
|
+
|
|
1463
|
+
if (fileHash && collectionByHash.has(fileHash)) {
|
|
1464
|
+
return collectionByHash.get(fileHash);
|
|
1465
|
+
}
|
|
1466
|
+
if (fileUrl && collectionByUrl.has(fileUrl)) {
|
|
1467
|
+
return collectionByUrl.get(fileUrl);
|
|
1468
|
+
}
|
|
1469
|
+
if (fileGcs && collectionByGcs.has(fileGcs)) {
|
|
1470
|
+
return collectionByGcs.get(fileGcs);
|
|
1471
|
+
}
|
|
1472
|
+
return null;
|
|
1473
|
+
};
|
|
1474
|
+
|
|
1475
|
+
// Helper to check if a file content object is in the collection
|
|
1476
|
+
const isInCollection = (contentObj) => {
|
|
1477
|
+
return getFileFromCollection(contentObj) !== null;
|
|
1478
|
+
};
|
|
1479
|
+
|
|
1480
|
+
// Helper to update file when stripped - use _contextId from collection to know which context to update
|
|
1481
|
+
const updateStrippedFile = (contentObj) => {
|
|
1482
|
+
const file = getFileFromCollection(contentObj);
|
|
1483
|
+
if (!file || !file._contextId) return;
|
|
1484
|
+
|
|
1485
|
+
// Use hash from the found file (may not be in contentObj)
|
|
1486
|
+
const hash = file.hash;
|
|
1487
|
+
if (!hash) return;
|
|
1488
|
+
|
|
1489
|
+
// Get the correct contextKey for this file's context
|
|
1490
|
+
const fileContextKey = contextKeyMap.get(file._contextId) || null;
|
|
1491
|
+
|
|
1492
|
+
const now = new Date().toISOString();
|
|
1493
|
+
updateFileMetadata(file._contextId, hash, {
|
|
1494
|
+
lastAccessed: now,
|
|
1495
|
+
inCollection: ['*']
|
|
1496
|
+
}, fileContextKey).catch((err) => {
|
|
1497
|
+
logger.warn(`Failed to update metadata for stripped file (hash=${hash}): ${err?.message || err}`);
|
|
1498
|
+
});
|
|
1499
|
+
};
|
|
1494
1500
|
|
|
1495
|
-
|
|
1501
|
+
// Process chat history - only strip files that are in collection
|
|
1502
|
+
const processedHistory = chatHistory.map(message => {
|
|
1496
1503
|
if (!message || message.role !== 'user' || !message.content) {
|
|
1497
1504
|
return message;
|
|
1498
1505
|
}
|
|
@@ -1502,9 +1509,14 @@ function stripAllFilesFromChatHistory(chatHistory) {
|
|
|
1502
1509
|
const newContent = message.content.map(item => {
|
|
1503
1510
|
const contentObj = typeof item === 'string' ? tryParseJson(item) : item;
|
|
1504
1511
|
if (contentObj && (contentObj.type === 'image_url' || contentObj.type === 'file')) {
|
|
1505
|
-
|
|
1506
|
-
|
|
1507
|
-
|
|
1512
|
+
if (isInCollection(contentObj)) {
|
|
1513
|
+
// In collection - strip and update metadata
|
|
1514
|
+
updateStrippedFile(contentObj); // fire and forget
|
|
1515
|
+
const filename = extractFilenameFromFileContent(contentObj);
|
|
1516
|
+
return { type: 'text', text: `[File: ${filename} - available via file tools]` };
|
|
1517
|
+
}
|
|
1518
|
+
// Not in collection - leave as-is
|
|
1519
|
+
return item;
|
|
1508
1520
|
}
|
|
1509
1521
|
return item;
|
|
1510
1522
|
});
|
|
@@ -1514,8 +1526,11 @@ function stripAllFilesFromChatHistory(chatHistory) {
|
|
|
1514
1526
|
// Handle object content
|
|
1515
1527
|
if (typeof message.content === 'object' && message.content !== null) {
|
|
1516
1528
|
if (message.content.type === 'image_url' || message.content.type === 'file') {
|
|
1517
|
-
|
|
1518
|
-
|
|
1529
|
+
if (isInCollection(message.content)) {
|
|
1530
|
+
updateStrippedFile(message.content); // fire and forget
|
|
1531
|
+
const filename = extractFilenameFromFileContent(message.content);
|
|
1532
|
+
return { ...message, content: `[File: ${filename} - available via file tools]` };
|
|
1533
|
+
}
|
|
1519
1534
|
}
|
|
1520
1535
|
}
|
|
1521
1536
|
|
|
@@ -1523,13 +1538,21 @@ function stripAllFilesFromChatHistory(chatHistory) {
|
|
|
1523
1538
|
if (typeof message.content === 'string') {
|
|
1524
1539
|
const contentObj = tryParseJson(message.content);
|
|
1525
1540
|
if (contentObj && (contentObj.type === 'image_url' || contentObj.type === 'file')) {
|
|
1526
|
-
|
|
1527
|
-
|
|
1541
|
+
if (isInCollection(contentObj)) {
|
|
1542
|
+
updateStrippedFile(contentObj); // fire and forget
|
|
1543
|
+
const filename = extractFilenameFromFileContent(contentObj);
|
|
1544
|
+
return { ...message, content: `[File: ${filename} - available via file tools]` };
|
|
1545
|
+
}
|
|
1528
1546
|
}
|
|
1529
1547
|
}
|
|
1530
1548
|
|
|
1531
1549
|
return message;
|
|
1532
1550
|
});
|
|
1551
|
+
|
|
1552
|
+
// Strip internal _contextId before formatting (it's only needed for updates)
|
|
1553
|
+
const cleanCollection = collection.map(({ _contextId, ...file }) => file);
|
|
1554
|
+
const availableFiles = formatFilesForTemplate(cleanCollection);
|
|
1555
|
+
return { chatHistory: processedHistory, availableFiles };
|
|
1533
1556
|
}
|
|
1534
1557
|
|
|
1535
1558
|
/**
|
|
@@ -1661,15 +1684,15 @@ function findFileInCollection(fileParam, collection) {
|
|
|
1661
1684
|
/**
|
|
1662
1685
|
* Resolve a file parameter to a URL by looking it up in the file collection
|
|
1663
1686
|
* If the parameter is already a URL (starts with http:// or https://), returns it as-is
|
|
1664
|
-
* If
|
|
1687
|
+
* If agentContext is provided, looks up the file in the merged collection and returns its URL
|
|
1665
1688
|
* @param {string} fileParam - File ID, URL (Azure or GCS), hash, or filename from collection
|
|
1666
|
-
* @param {
|
|
1667
|
-
* @param {string} contextKey - Optional context key for encryption
|
|
1689
|
+
* @param {Array} agentContext - Array of context objects { contextId, contextKey, default }
|
|
1668
1690
|
* @param {Object} options - Optional configuration
|
|
1669
1691
|
* @param {boolean} options.preferGcs - If true, prefer GCS URL over Azure URL when available
|
|
1692
|
+
* @param {boolean} options.useCache - If false, bypass cache (default: true, only used for single context)
|
|
1670
1693
|
* @returns {Promise<string|null>} Resolved file URL, or null if not found
|
|
1671
1694
|
*/
|
|
1672
|
-
export async function resolveFileParameter(fileParam,
|
|
1695
|
+
export async function resolveFileParameter(fileParam, agentContext, options = {}) {
|
|
1673
1696
|
if (!fileParam || typeof fileParam !== 'string') {
|
|
1674
1697
|
return null;
|
|
1675
1698
|
}
|
|
@@ -1677,15 +1700,16 @@ export async function resolveFileParameter(fileParam, contextId, contextKey = nu
|
|
|
1677
1700
|
const trimmed = fileParam.trim();
|
|
1678
1701
|
const { preferGcs = false, useCache = true } = options;
|
|
1679
1702
|
|
|
1680
|
-
// If no
|
|
1681
|
-
if (!
|
|
1703
|
+
// If no agentContext, can't look up in collection - return null
|
|
1704
|
+
if (!agentContext || !Array.isArray(agentContext) || agentContext.length === 0) {
|
|
1682
1705
|
return null;
|
|
1683
1706
|
}
|
|
1684
1707
|
|
|
1685
1708
|
try {
|
|
1686
|
-
// Load file collection
|
|
1687
|
-
// useCache
|
|
1688
|
-
const collection = await
|
|
1709
|
+
// Load merged file collection (always use merged to get all files, not just global ones)
|
|
1710
|
+
// Note: useCache option is ignored for merged collections (they always load fresh)
|
|
1711
|
+
const collection = await loadMergedFileCollection(agentContext);
|
|
1712
|
+
|
|
1689
1713
|
const foundFile = findFileInCollection(trimmed, collection);
|
|
1690
1714
|
|
|
1691
1715
|
if (foundFile) {
|
|
@@ -1711,11 +1735,10 @@ export async function resolveFileParameter(fileParam, contextId, contextKey = nu
|
|
|
1711
1735
|
/**
|
|
1712
1736
|
* Generate file message content by looking up a file parameter in the file collection
|
|
1713
1737
|
* @param {string} fileParam - File URL (Azure or GCS), file ID from collection, or file hash
|
|
1714
|
-
* @param {
|
|
1715
|
-
* @param {string} contextKey - Optional context key for encryption
|
|
1738
|
+
* @param {Array} agentContext - Array of context objects { contextId, contextKey, default }
|
|
1716
1739
|
* @returns {Promise<Object|null>} Content object in the format for chat history, or null if not found
|
|
1717
1740
|
*/
|
|
1718
|
-
async function generateFileMessageContent(fileParam,
|
|
1741
|
+
async function generateFileMessageContent(fileParam, agentContext) {
|
|
1719
1742
|
if (!fileParam || typeof fileParam !== 'string') {
|
|
1720
1743
|
return null;
|
|
1721
1744
|
}
|
|
@@ -1736,14 +1759,13 @@ async function generateFileMessageContent(fileParam, contextId, contextKey = nul
|
|
|
1736
1759
|
logger.debug(`YouTube URL check failed for "${fileParam}": ${error.message}`);
|
|
1737
1760
|
}
|
|
1738
1761
|
|
|
1739
|
-
if (!
|
|
1740
|
-
// Without
|
|
1741
|
-
// Return a basic content object from the URL
|
|
1762
|
+
if (!agentContext || !Array.isArray(agentContext) || agentContext.length === 0) {
|
|
1763
|
+
// Without agentContext, we can't look up in collection
|
|
1742
1764
|
return null;
|
|
1743
1765
|
}
|
|
1744
1766
|
|
|
1745
|
-
// Load file collection
|
|
1746
|
-
const collection = await
|
|
1767
|
+
// Load merged file collection
|
|
1768
|
+
const collection = await loadMergedFileCollection(agentContext);
|
|
1747
1769
|
|
|
1748
1770
|
// Find the file using shared matching logic
|
|
1749
1771
|
const foundFile = findFileInCollection(fileParam, collection);
|
|
@@ -1754,8 +1776,9 @@ async function generateFileMessageContent(fileParam, contextId, contextKey = nul
|
|
|
1754
1776
|
}
|
|
1755
1777
|
|
|
1756
1778
|
// Resolve to short-lived URL if possible
|
|
1757
|
-
//
|
|
1758
|
-
const
|
|
1779
|
+
// Use default context for ensureShortLivedUrl
|
|
1780
|
+
const defaultCtx = getDefaultContext(agentContext);
|
|
1781
|
+
const fileWithShortLivedUrl = await ensureShortLivedUrl(foundFile, MEDIA_API_URL, defaultCtx?.contextId || null);
|
|
1759
1782
|
|
|
1760
1783
|
return {
|
|
1761
1784
|
type: 'image_url',
|
|
@@ -2393,8 +2416,9 @@ export {
|
|
|
2393
2416
|
markCompletedForCleanUp,
|
|
2394
2417
|
extractFileMetadataFromContent,
|
|
2395
2418
|
extractFilesFromChatHistory,
|
|
2396
|
-
syncFilesToCollection,
|
|
2397
2419
|
getAvailableFilesFromCollection,
|
|
2420
|
+
getDefaultContext,
|
|
2421
|
+
loadMergedFileCollection,
|
|
2398
2422
|
formatFilesForTemplate,
|
|
2399
2423
|
getAvailableFiles,
|
|
2400
2424
|
syncAndStripFilesFromChatHistory,
|
|
@@ -2404,6 +2428,7 @@ export {
|
|
|
2404
2428
|
injectFileIntoChatHistory,
|
|
2405
2429
|
addFileToCollection,
|
|
2406
2430
|
loadFileCollection,
|
|
2431
|
+
loadFileCollectionAll,
|
|
2407
2432
|
saveFileCollection,
|
|
2408
2433
|
updateFileMetadata,
|
|
2409
2434
|
getCollectionCacheKey,
|
package/lib/requestExecutor.js
CHANGED
|
@@ -6,6 +6,7 @@ import { setupCache } from 'axios-cache-interceptor';
|
|
|
6
6
|
import Redis from 'ioredis';
|
|
7
7
|
import logger from './logger.js';
|
|
8
8
|
import { v4 as uuidv4 } from 'uuid';
|
|
9
|
+
import { sanitizeBase64 } from './util.js';
|
|
9
10
|
|
|
10
11
|
const connectionString = config.get('storageConnectionString');
|
|
11
12
|
|
|
@@ -229,10 +230,10 @@ const requestWithMonitor = async (endpoint, url, data, axiosConfigObj) => {
|
|
|
229
230
|
let response;
|
|
230
231
|
try {
|
|
231
232
|
if (axiosConfigObj?.method == 'GET'){
|
|
232
|
-
logger.debug(`Getting ${url} with data: ${JSON.stringify(data)}`);
|
|
233
|
+
logger.debug(`Getting ${url} with data: ${JSON.stringify(sanitizeBase64(data))}`);
|
|
233
234
|
response = await cortexAxios.get(url, axiosConfigObj);
|
|
234
235
|
} else {
|
|
235
|
-
logger.debug(`Posting ${url} with data: ${JSON.stringify(data)}`);
|
|
236
|
+
logger.debug(`Posting ${url} with data: ${JSON.stringify(sanitizeBase64(data))}`);
|
|
236
237
|
response = await cortexAxios.post(url, data, axiosConfigObj);
|
|
237
238
|
}
|
|
238
239
|
} catch (error) {
|