@aj-archipelago/cortex 1.4.22 → 1.4.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/FILE_SYSTEM_DOCUMENTATION.md +116 -48
- package/config.js +9 -0
- package/lib/fileUtils.js +329 -214
- package/package.json +1 -1
- package/pathways/system/entity/files/sys_read_file_collection.js +22 -11
- package/pathways/system/entity/files/sys_update_file_metadata.js +18 -8
- package/pathways/system/entity/sys_entity_agent.js +8 -6
- package/pathways/system/entity/tools/sys_tool_codingagent.js +4 -4
- package/pathways/system/entity/tools/sys_tool_editfile.js +35 -24
- package/pathways/system/entity/tools/sys_tool_file_collection.js +93 -36
- package/pathways/system/entity/tools/sys_tool_image.js +1 -1
- package/pathways/system/entity/tools/sys_tool_image_gemini.js +1 -1
- package/pathways/system/entity/tools/sys_tool_readfile.js +4 -4
- package/pathways/system/entity/tools/sys_tool_slides_gemini.js +1 -1
- package/pathways/system/entity/tools/sys_tool_video_veo.js +1 -1
- package/pathways/system/entity/tools/sys_tool_view_image.js +10 -5
- package/pathways/system/workspaces/run_workspace_agent.js +4 -1
- package/pathways/video_seedance.js +2 -0
- package/server/executeWorkspace.js +45 -2
- package/server/pathwayResolver.js +18 -0
- package/server/plugins/replicateApiPlugin.js +18 -0
- package/server/typeDef.js +10 -1
- package/tests/integration/features/tools/fileCollection.test.js +254 -248
- package/tests/integration/features/tools/fileOperations.test.js +131 -81
- package/tests/integration/graphql/async/stream/vendors/claude_streaming.test.js +3 -4
- package/tests/integration/graphql/async/stream/vendors/gemini_streaming.test.js +3 -4
- package/tests/integration/graphql/async/stream/vendors/grok_streaming.test.js +3 -4
- package/tests/integration/graphql/async/stream/vendors/openai_streaming.test.js +5 -5
- package/tests/unit/core/fileCollection.test.js +86 -25
- package/pathways/system/workspaces/run_workspace_research_agent.js +0 -27
|
@@ -352,15 +352,14 @@ addFileToCollection(contextId, contextKey, url, gcs, filename, tags, notes, hash
|
|
|
352
352
|
- Merges with existing CFH data if file with same hash already exists
|
|
353
353
|
- Returns file entry object with `id`
|
|
354
354
|
|
|
355
|
-
####
|
|
355
|
+
#### Processing Chat History Files
|
|
356
356
|
```javascript
|
|
357
|
-
|
|
357
|
+
syncAndStripFilesFromChatHistory(chatHistory, contextId, contextKey)
|
|
358
358
|
```
|
|
359
|
-
-
|
|
360
|
-
-
|
|
361
|
-
-
|
|
362
|
-
-
|
|
363
|
-
- **Used by**: `getAvailableFiles()` to sync files from conversation
|
|
359
|
+
- Files IN collection: stripped from message (replaced with placeholder), tools can access them
|
|
360
|
+
- Files NOT in collection: left in message as-is (model sees them directly)
|
|
361
|
+
- Updates lastAccessed for collection files
|
|
362
|
+
- **Used by**: `sys_entity_agent` to process incoming chat history
|
|
364
363
|
|
|
365
364
|
### File Entry Schema
|
|
366
365
|
|
|
@@ -729,32 +728,67 @@ RemoveFileFromCollection Tool
|
|
|
729
728
|
|
|
730
729
|
## Key Concepts
|
|
731
730
|
|
|
732
|
-
### 1. Context Scoping (`
|
|
731
|
+
### 1. Context Scoping (`agentContext`)
|
|
733
732
|
|
|
734
|
-
**Purpose**: Per-user/per-context file isolation
|
|
733
|
+
**Purpose**: Per-user/per-context file isolation with optional cross-context reading
|
|
735
734
|
|
|
736
735
|
**Usage**:
|
|
737
|
-
-
|
|
738
|
-
-
|
|
739
|
-
-
|
|
736
|
+
- **`agentContext`**: Array of context objects, each with:
|
|
737
|
+
- `contextId`: Context identifier (required)
|
|
738
|
+
- `contextKey`: Encryption key for this context (optional, `null` for unencrypted)
|
|
739
|
+
- `default`: Boolean indicating the default context for write operations (required)
|
|
740
|
+
- Stored in Redis with scoped keys: `FileStoreMap:ctx:<contextId>`
|
|
740
741
|
|
|
741
742
|
**Benefits**:
|
|
742
743
|
- Prevents hash collisions between users
|
|
743
744
|
- Enables per-user file management
|
|
744
745
|
- Supports multi-tenant applications
|
|
746
|
+
- Multiple contexts allow reading files from secondary contexts (e.g., workspace files)
|
|
747
|
+
- Separate encryption keys allow user-encrypted files alongside unencrypted shared workspace files
|
|
748
|
+
- Centralized context management (single parameter instead of multiple)
|
|
745
749
|
|
|
746
750
|
**Example**:
|
|
747
751
|
```javascript
|
|
748
|
-
// Upload with contextId
|
|
749
|
-
|
|
752
|
+
// Upload with contextId (from default context)
|
|
753
|
+
const agentContext = [
|
|
754
|
+
{ contextId: "user-123", contextKey: userContextKey, default: true }
|
|
755
|
+
];
|
|
756
|
+
await uploadFileToCloud(fileBuffer, mimeType, filename, resolver, agentContext[0].contextId);
|
|
750
757
|
|
|
751
758
|
// Check hash with contextId
|
|
752
|
-
await checkHashExists(hash, fileHandlerUrl, null,
|
|
759
|
+
await checkHashExists(hash, fileHandlerUrl, null, agentContext[0].contextId);
|
|
753
760
|
|
|
754
761
|
// Delete with contextId
|
|
755
|
-
await deleteFileByHash(hash, resolver,
|
|
762
|
+
await deleteFileByHash(hash, resolver, agentContext[0].contextId);
|
|
763
|
+
|
|
764
|
+
// Load merged collection (reads from both contexts)
|
|
765
|
+
// User context is encrypted (userContextKey), workspace is not (null)
|
|
766
|
+
const agentContext = [
|
|
767
|
+
{ contextId: "user-123", contextKey: userContextKey, default: true },
|
|
768
|
+
{ contextId: "workspace-456", contextKey: null, default: false } // Shared workspace, unencrypted
|
|
769
|
+
];
|
|
770
|
+
const collection = await loadMergedFileCollection(agentContext);
|
|
771
|
+
|
|
772
|
+
// Resolve file from any context in agentContext
|
|
773
|
+
const url = await resolveFileParameter("file.pdf", agentContext);
|
|
756
774
|
```
|
|
757
775
|
|
|
776
|
+
**`agentContext` Behavior**:
|
|
777
|
+
- Files are read from all contexts in the array (union)
|
|
778
|
+
- Each context uses its own encryption key (`contextKey`)
|
|
779
|
+
- Shared workspaces typically use `contextKey: null` (unencrypted) since they're shared between users
|
|
780
|
+
- Writes/updates only go to the context marked as `default: true`, using its `contextKey`
|
|
781
|
+
- Deduplication: if a file exists in multiple contexts (same hash), the first context takes precedence
|
|
782
|
+
- Files from non-default contexts bypass `inCollection` filtering (all files accessible)
|
|
783
|
+
- The default context is used for all write operations (uploads, updates, deletions)
|
|
784
|
+
|
|
785
|
+
**`agentContext` Security Note**:
|
|
786
|
+
- `agentContext` allows reading files from multiple contexts, including files that bypass `inCollection` filtering
|
|
787
|
+
- **Important**: `agentContext` should be treated as a privileged, server-derived value
|
|
788
|
+
- Server-side authorization MUST verify that any contexts in `agentContext` are restricted to trusted, same-tenant contexts (e.g., derived from workspace membership) before use
|
|
789
|
+
- Never accept `agentContext` directly from untrusted client inputs without validation
|
|
790
|
+
- Only the default context should be used for write operations - non-default contexts are read-only
|
|
791
|
+
|
|
758
792
|
### 2. Permanent Files (`permanent` flag)
|
|
759
793
|
|
|
760
794
|
**Purpose**: Indicate files that should be kept indefinitely
|
|
@@ -938,15 +972,37 @@ Marks request as completed for cleanup.
|
|
|
938
972
|
Loads file collection from Redis hash map.
|
|
939
973
|
- **Parameters**:
|
|
940
974
|
- `contextId`: Context ID (required)
|
|
941
|
-
- `contextKey`: Optional encryption key
|
|
975
|
+
- `contextKey`: Optional encryption key
|
|
942
976
|
- `useCache`: Whether to use cache (default: true)
|
|
943
977
|
- **Returns**: Array of file entries (sorted by lastAccessed, most recent first)
|
|
944
978
|
- **Process**:
|
|
945
979
|
1. Checks in-memory cache (5-second TTL)
|
|
946
980
|
2. Loads from Redis hash map `FileStoreMap:ctx:<contextId>`
|
|
947
|
-
3.
|
|
948
|
-
4.
|
|
949
|
-
|
|
981
|
+
3. Filters by `inCollection` (only returns global files or chat-specific files)
|
|
982
|
+
4. Converts hash map entries to array format
|
|
983
|
+
5. Updates cache
|
|
984
|
+
- **Used by**: Primary file collection operations
|
|
985
|
+
|
|
986
|
+
#### `loadFileCollectionAll(contextId, contextKey)`
|
|
987
|
+
Loads ALL files from a context, bypassing `inCollection` filtering.
|
|
988
|
+
- **Parameters**:
|
|
989
|
+
- `contextId`: Context ID (required)
|
|
990
|
+
- `contextKey`: Optional encryption key
|
|
991
|
+
- **Returns**: Array of all file entries (no filtering)
|
|
992
|
+
- **Used by**: `loadMergedFileCollection` when loading files from all contexts
|
|
993
|
+
|
|
994
|
+
#### `loadMergedFileCollection(agentContext)`
|
|
995
|
+
Loads merged file collection from one or more contexts.
|
|
996
|
+
- **Parameters**:
|
|
997
|
+
- `agentContext`: Array of context objects, each with `{ contextId, contextKey, default }` (required)
|
|
998
|
+
- **Returns**: Array of file entries from all contexts (deduplicated by hash/url/gcs)
|
|
999
|
+
- **Process**:
|
|
1000
|
+
1. Loads first context collection via `loadFileCollectionAll()` with its `contextKey`
|
|
1001
|
+
2. Tags each file with `_contextId` (internal, stripped before returning to callers)
|
|
1002
|
+
3. For each additional context, loads collection via `loadFileCollectionAll()` with its `contextKey`
|
|
1003
|
+
4. Deduplicates: earlier contexts take precedence if same file exists in multiple
|
|
1004
|
+
5. Returns merged collection (with `_contextId` stripped before returning)
|
|
1005
|
+
- **Used by**: `syncAndStripFilesFromChatHistory`, `getAvailableFiles`, `resolveFileParameter`, file tools
|
|
950
1006
|
|
|
951
1007
|
#### `saveFileCollection(contextId, contextKey, collection)`
|
|
952
1008
|
Saves file collection to Redis hash map (optimized - only updates changed entries).
|
|
@@ -999,29 +1055,31 @@ Adds file to collection via atomic operation.
|
|
|
999
1055
|
5. Merges with existing CFH data if hash already exists
|
|
1000
1056
|
- **Used by**: WriteFile, Image tools, FileCollection tool
|
|
1001
1057
|
|
|
1002
|
-
#### `
|
|
1003
|
-
|
|
1058
|
+
#### `syncAndStripFilesFromChatHistory(chatHistory, agentContext)`
|
|
1059
|
+
Processes chat history files based on collection membership.
|
|
1004
1060
|
- **Parameters**:
|
|
1005
|
-
- `chatHistory`: Chat history array to
|
|
1006
|
-
- `
|
|
1007
|
-
|
|
1008
|
-
- **Returns**: Updated file collection array
|
|
1061
|
+
- `chatHistory`: Chat history array to process
|
|
1062
|
+
- `agentContext`: Array of context objects, each with `{ contextId, contextKey, default }` (required)
|
|
1063
|
+
- **Returns**: `{ chatHistory, availableFiles }` - processed chat history and formatted file list
|
|
1009
1064
|
- **Process**:
|
|
1010
|
-
1.
|
|
1011
|
-
2.
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1065
|
+
1. Loads merged file collection from all contexts in `agentContext`
|
|
1066
|
+
2. For each file in chat history:
|
|
1067
|
+
- If in collection: strip from message, update lastAccessed and inCollection in owning context (using that context's key)
|
|
1068
|
+
- If not in collection: leave in message as-is
|
|
1069
|
+
3. Returns processed history and available files string
|
|
1070
|
+
4. Uses atomic operations per file, updating the context that owns each file (identified by `_contextId` tag)
|
|
1071
|
+
- **Used by**: `sys_entity_agent` to process incoming chat history
|
|
1015
1072
|
|
|
1016
1073
|
### File Resolution
|
|
1017
1074
|
|
|
1018
|
-
#### `resolveFileParameter(fileParam,
|
|
1075
|
+
#### `resolveFileParameter(fileParam, agentContext, options)`
|
|
1019
1076
|
Resolves file parameter to file URL.
|
|
1020
1077
|
- **Parameters**:
|
|
1021
1078
|
- `fileParam`: File ID, filename, URL, or hash
|
|
1022
|
-
- `
|
|
1023
|
-
- `
|
|
1024
|
-
|
|
1079
|
+
- `agentContext`: Array of context objects, each with `{ contextId, contextKey, default }` (required)
|
|
1080
|
+
- `options`: Optional options object:
|
|
1081
|
+
- `preferGcs`: Boolean - prefer GCS URL over Azure URL
|
|
1082
|
+
- `useCache`: Boolean - use cache (default: true)
|
|
1025
1083
|
- **Returns**: File URL string (Azure or GCS) or `null` if not found
|
|
1026
1084
|
- **Matching** (via `findFileInCollection()`):
|
|
1027
1085
|
- Exact ID match
|
|
@@ -1029,6 +1087,10 @@ Resolves file parameter to file URL.
|
|
|
1029
1087
|
- Exact URL match (Azure or GCS)
|
|
1030
1088
|
- Exact filename match (case-insensitive, basename comparison)
|
|
1031
1089
|
- Fuzzy filename match (contains, minimum 4 characters)
|
|
1090
|
+
- **Process**:
|
|
1091
|
+
1. Loads merged file collection from all contexts in `agentContext`
|
|
1092
|
+
2. Searches merged collection for matching file
|
|
1093
|
+
3. Returns file URL if found
|
|
1032
1094
|
- **Used by**: ReadFile, EditFile, and other tools that need file URLs
|
|
1033
1095
|
|
|
1034
1096
|
#### `findFileInCollection(fileParam, collection)`
|
|
@@ -1039,17 +1101,17 @@ Finds file in collection array.
|
|
|
1039
1101
|
- **Returns**: File entry or `null`
|
|
1040
1102
|
- **Used by**: `resolveFileParameter`
|
|
1041
1103
|
|
|
1042
|
-
#### `generateFileMessageContent(fileParam,
|
|
1104
|
+
#### `generateFileMessageContent(fileParam, agentContext)`
|
|
1043
1105
|
Generates file content for LLM messages.
|
|
1044
1106
|
- **Parameters**:
|
|
1045
1107
|
- `fileParam`: File identifier (ID, filename, URL, or hash)
|
|
1046
|
-
- `
|
|
1047
|
-
- `contextKey`: Optional encryption key
|
|
1108
|
+
- `agentContext`: Array of context objects, each with `{ contextId, contextKey, default }` (required)
|
|
1048
1109
|
- **Returns**: File content object with `type`, `url`, `gcs`, `hash` or `null`
|
|
1049
1110
|
- **Process**:
|
|
1050
|
-
1.
|
|
1051
|
-
2.
|
|
1052
|
-
3.
|
|
1111
|
+
1. Loads merged file collection from all contexts in `agentContext`
|
|
1112
|
+
2. Finds file in merged collection via `findFileInCollection()`
|
|
1113
|
+
3. Resolves to short-lived URL via `ensureShortLivedUrl()` using default context
|
|
1114
|
+
4. Returns OpenAI-compatible format: `{type: 'image_url', url, gcs, hash}`
|
|
1053
1115
|
- **Used by**: AnalyzeFile tool to inject files into chat history
|
|
1054
1116
|
|
|
1055
1117
|
#### `extractFilesFromChatHistory(chatHistory)`
|
|
@@ -1061,23 +1123,29 @@ Extracts file metadata from chat history messages.
|
|
|
1061
1123
|
1. Scans all messages for file content objects
|
|
1062
1124
|
2. Extracts from `image_url`, `file`, or direct URL objects
|
|
1063
1125
|
3. Returns normalized format
|
|
1064
|
-
- **Used by**:
|
|
1126
|
+
- **Used by**: File extraction utilities
|
|
1065
1127
|
|
|
1066
|
-
#### `getAvailableFiles(chatHistory,
|
|
1067
|
-
Gets formatted list of available files
|
|
1128
|
+
#### `getAvailableFiles(chatHistory, agentContext)`
|
|
1129
|
+
Gets formatted list of available files from collection.
|
|
1068
1130
|
- **Parameters**:
|
|
1069
|
-
- `chatHistory`:
|
|
1070
|
-
- `
|
|
1071
|
-
- `contextKey`: Optional encryption key
|
|
1131
|
+
- `chatHistory`: Unused (kept for API compatibility)
|
|
1132
|
+
- `agentContext`: Array of context objects, each with `{ contextId, contextKey, default }` (required)
|
|
1072
1133
|
- **Returns**: Formatted string of available files (last 10 most recent)
|
|
1073
1134
|
- **Process**:
|
|
1074
|
-
1.
|
|
1135
|
+
1. Loads merged file collection from all contexts in `agentContext`
|
|
1075
1136
|
2. Formats files via `formatFilesForTemplate()`
|
|
1076
1137
|
3. Returns compact one-line format per file
|
|
1077
1138
|
- **Used by**: Template rendering to show available files
|
|
1078
1139
|
|
|
1079
1140
|
### Utility Functions
|
|
1080
1141
|
|
|
1142
|
+
#### `getDefaultContext(agentContext)`
|
|
1143
|
+
Helper function to extract the default context from an agentContext array.
|
|
1144
|
+
- **Parameters**:
|
|
1145
|
+
- `agentContext`: Array of context objects, each with `{ contextId, contextKey, default }`
|
|
1146
|
+
- **Returns**: Context object with `default: true`, or first context if none marked as default, or `null` if array is empty
|
|
1147
|
+
- **Used by**: Functions that need to determine which context to use for write operations
|
|
1148
|
+
|
|
1081
1149
|
#### `computeFileHash(filePath)`
|
|
1082
1150
|
Computes xxhash64 hash of file.
|
|
1083
1151
|
- **Returns**: Hash string (hex)
|
package/config.js
CHANGED
|
@@ -374,6 +374,15 @@ var config = convict({
|
|
|
374
374
|
"Content-Type": "application/json"
|
|
375
375
|
},
|
|
376
376
|
},
|
|
377
|
+
"replicate-seedance-1.5-pro": {
|
|
378
|
+
"type": "REPLICATE-API",
|
|
379
|
+
"url": "https://api.replicate.com/v1/models/bytedance/seedance-1.5-pro/predictions",
|
|
380
|
+
"headers": {
|
|
381
|
+
"Prefer": "wait",
|
|
382
|
+
"Authorization": "Token {{REPLICATE_API_KEY}}",
|
|
383
|
+
"Content-Type": "application/json"
|
|
384
|
+
},
|
|
385
|
+
},
|
|
377
386
|
"replicate-flux-11-pro": {
|
|
378
387
|
"type": "REPLICATE-API",
|
|
379
388
|
"url": "https://api.replicate.com/v1/models/black-forest-labs/flux-1.1-pro/predictions",
|