@aj-archipelago/cortex 1.4.6 → 1.4.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/helper-apps/cortex-file-handler/package-lock.json +2 -2
- package/helper-apps/cortex-file-handler/package.json +1 -1
- package/helper-apps/cortex-file-handler/src/index.js +27 -4
- package/helper-apps/cortex-file-handler/src/services/storage/AzureStorageProvider.js +74 -10
- package/helper-apps/cortex-file-handler/src/services/storage/StorageService.js +23 -2
- package/helper-apps/cortex-file-handler/src/start.js +2 -0
- package/helper-apps/cortex-file-handler/tests/deleteOperations.test.js +287 -0
- package/helper-apps/cortex-file-handler/tests/start.test.js +1 -1
- package/lib/entityConstants.js +1 -1
- package/lib/fileUtils.js +1481 -0
- package/lib/pathwayTools.js +7 -1
- package/lib/util.js +2 -313
- package/package.json +4 -3
- package/pathways/image_qwen.js +1 -1
- package/pathways/system/entity/memory/sys_read_memory.js +17 -3
- package/pathways/system/entity/memory/sys_save_memory.js +22 -6
- package/pathways/system/entity/sys_entity_agent.js +21 -4
- package/pathways/system/entity/tools/sys_tool_analyzefile.js +171 -0
- package/pathways/system/entity/tools/sys_tool_codingagent.js +38 -4
- package/pathways/system/entity/tools/sys_tool_editfile.js +403 -0
- package/pathways/system/entity/tools/sys_tool_file_collection.js +433 -0
- package/pathways/system/entity/tools/sys_tool_image.js +172 -10
- package/pathways/system/entity/tools/sys_tool_image_gemini.js +123 -10
- package/pathways/system/entity/tools/sys_tool_readfile.js +217 -124
- package/pathways/system/entity/tools/sys_tool_validate_url.js +137 -0
- package/pathways/system/entity/tools/sys_tool_writefile.js +211 -0
- package/pathways/system/workspaces/run_workspace_prompt.js +4 -3
- package/pathways/transcribe_gemini.js +2 -1
- package/server/executeWorkspace.js +1 -1
- package/server/plugins/neuralSpacePlugin.js +2 -6
- package/server/plugins/openAiWhisperPlugin.js +2 -1
- package/server/plugins/replicateApiPlugin.js +4 -14
- package/server/typeDef.js +10 -1
- package/tests/integration/features/tools/fileCollection.test.js +858 -0
- package/tests/integration/features/tools/fileOperations.test.js +851 -0
- package/tests/integration/features/tools/writefile.test.js +350 -0
- package/tests/unit/core/fileCollection.test.js +259 -0
- package/tests/unit/core/util.test.js +320 -1
package/lib/fileUtils.js
ADDED
|
@@ -0,0 +1,1481 @@
|
|
|
1
|
+
import logger from "./logger.js";
|
|
2
|
+
import stream from 'stream';
|
|
3
|
+
import os from 'os';
|
|
4
|
+
import http from 'http';
|
|
5
|
+
import https from 'https';
|
|
6
|
+
import { URL } from 'url';
|
|
7
|
+
import { v4 as uuidv4 } from 'uuid';
|
|
8
|
+
import { promisify } from 'util';
|
|
9
|
+
import { axios } from './requestExecutor.js';
|
|
10
|
+
import { config } from '../config.js';
|
|
11
|
+
import fs from 'fs';
|
|
12
|
+
import path from 'path';
|
|
13
|
+
import FormData from 'form-data';
|
|
14
|
+
import xxhash from 'xxhash-wasm';
|
|
15
|
+
import mime from 'mime-types';
|
|
16
|
+
|
|
17
|
+
const pipeline = promisify(stream.pipeline);
|
|
18
|
+
const MEDIA_API_URL = config.get('whisperMediaApiUrl');
|
|
19
|
+
|
|
20
|
+
// Cache xxhash instance for reuse
|
|
21
|
+
let xxhashInstance = null;
|
|
22
|
+
let xxhashInitPromise = null;
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Get or initialize xxhash instance (reused for performance)
|
|
26
|
+
* Thread-safe initialization to prevent race conditions in high-volume scenarios
|
|
27
|
+
* @returns {Promise<Object>} xxhash instance
|
|
28
|
+
*/
|
|
29
|
+
async function getXXHashInstance() {
|
|
30
|
+
// If already initialized, return immediately
|
|
31
|
+
if (xxhashInstance) {
|
|
32
|
+
return xxhashInstance;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// If initialization is in progress, wait for it
|
|
36
|
+
if (xxhashInitPromise) {
|
|
37
|
+
return await xxhashInitPromise;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// Start initialization (only one will execute)
|
|
41
|
+
xxhashInitPromise = (async () => {
|
|
42
|
+
try {
|
|
43
|
+
const instance = await xxhash();
|
|
44
|
+
xxhashInstance = instance;
|
|
45
|
+
return instance;
|
|
46
|
+
} finally {
|
|
47
|
+
// Clear the promise so we can retry if initialization fails
|
|
48
|
+
xxhashInitPromise = null;
|
|
49
|
+
}
|
|
50
|
+
})();
|
|
51
|
+
|
|
52
|
+
return await xxhashInitPromise;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Compute xxhash64 hash of a file (super fast hash for file deduplication)
|
|
57
|
+
* Uses xxhash64 to match the hash format used in labeeb and cortex file handler
|
|
58
|
+
* @param {string} filePath - Path to the file
|
|
59
|
+
* @returns {Promise<string>} xxhash64 hash in hex format
|
|
60
|
+
*/
|
|
61
|
+
async function computeFileHash(filePath) {
|
|
62
|
+
const hasher = await getXXHashInstance();
|
|
63
|
+
|
|
64
|
+
return new Promise((resolve, reject) => {
|
|
65
|
+
// Create a new xxh64 instance for this file to avoid concurrency issues
|
|
66
|
+
const xxh64 = hasher.create64();
|
|
67
|
+
const stream = fs.createReadStream(filePath);
|
|
68
|
+
|
|
69
|
+
stream.on('data', (data) => xxh64.update(data));
|
|
70
|
+
stream.on('end', () => resolve(xxh64.digest().toString(16)));
|
|
71
|
+
stream.on('error', (error) => reject(error));
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Compute xxhash64 hash of a buffer
|
|
77
|
+
* @param {Buffer} buffer - Buffer to hash
|
|
78
|
+
* @returns {Promise<string>} xxhash64 hash in hex format
|
|
79
|
+
*/
|
|
80
|
+
async function computeBufferHash(buffer) {
|
|
81
|
+
const hasher = await getXXHashInstance();
|
|
82
|
+
const xxh64 = hasher.create64();
|
|
83
|
+
xxh64.update(buffer);
|
|
84
|
+
return xxh64.digest().toString(16);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
async function deleteTempPath(path) {
|
|
88
|
+
try {
|
|
89
|
+
if (!path) {
|
|
90
|
+
logger.warn('Temporary path is not defined.');
|
|
91
|
+
return;
|
|
92
|
+
}
|
|
93
|
+
if (!fs.existsSync(path)) {
|
|
94
|
+
logger.warn(`Temporary path ${path} does not exist.`);
|
|
95
|
+
return;
|
|
96
|
+
}
|
|
97
|
+
const stats = fs.statSync(path);
|
|
98
|
+
if (stats.isFile()) {
|
|
99
|
+
fs.unlinkSync(path);
|
|
100
|
+
logger.info(`Temporary file ${path} deleted successfully.`);
|
|
101
|
+
} else if (stats.isDirectory()) {
|
|
102
|
+
fs.rmSync(path, { recursive: true });
|
|
103
|
+
logger.info(`Temporary folder ${path} and its contents deleted successfully.`);
|
|
104
|
+
}
|
|
105
|
+
} catch (err) {
|
|
106
|
+
logger.error(`Error occurred while deleting the temporary path: ${err}`);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
function generateUniqueFilename(extension) {
|
|
111
|
+
return `${uuidv4()}.${extension}`;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
const downloadFile = async (fileUrl) => {
|
|
115
|
+
const urlObj = new URL(fileUrl);
|
|
116
|
+
const pathname = urlObj.pathname;
|
|
117
|
+
const fileExtension = pathname.substring(pathname.lastIndexOf('.') + 1);
|
|
118
|
+
const uniqueFilename = generateUniqueFilename(fileExtension);
|
|
119
|
+
const tempDir = os.tmpdir();
|
|
120
|
+
const localFilePath = `${tempDir}/${uniqueFilename}`;
|
|
121
|
+
|
|
122
|
+
// eslint-disable-next-line no-async-promise-executor
|
|
123
|
+
return new Promise(async (resolve, reject) => {
|
|
124
|
+
try {
|
|
125
|
+
const parsedUrl = new URL(fileUrl);
|
|
126
|
+
const protocol = parsedUrl.protocol === 'https:' ? https : http;
|
|
127
|
+
|
|
128
|
+
const response = await new Promise((resolve, reject) => {
|
|
129
|
+
protocol.get(parsedUrl, (res) => {
|
|
130
|
+
if (res.statusCode === 200) {
|
|
131
|
+
resolve(res);
|
|
132
|
+
} else {
|
|
133
|
+
reject(new Error(`HTTP request failed with status code ${res.statusCode}`));
|
|
134
|
+
}
|
|
135
|
+
}).on('error', reject);
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
await pipeline(response, fs.createWriteStream(localFilePath));
|
|
139
|
+
logger.info(`Downloaded file to ${localFilePath}`);
|
|
140
|
+
resolve(localFilePath);
|
|
141
|
+
} catch (error) {
|
|
142
|
+
fs.unlink(localFilePath, () => {
|
|
143
|
+
reject(error);
|
|
144
|
+
});
|
|
145
|
+
//throw error;
|
|
146
|
+
}
|
|
147
|
+
});
|
|
148
|
+
};
|
|
149
|
+
|
|
150
|
+
async function getMediaChunks(file, requestId) {
|
|
151
|
+
try {
|
|
152
|
+
if (MEDIA_API_URL) {
|
|
153
|
+
//call helper api and get list of file uris
|
|
154
|
+
const res = await axios.get(MEDIA_API_URL, { params: { uri: file, requestId } });
|
|
155
|
+
return res.data;
|
|
156
|
+
} else {
|
|
157
|
+
logger.info(`No API_URL set, returning file as chunk`);
|
|
158
|
+
return [file];
|
|
159
|
+
}
|
|
160
|
+
} catch (err) {
|
|
161
|
+
logger.error(`Error getting media chunks list from api: ${err}`);
|
|
162
|
+
throw err;
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
async function markCompletedForCleanUp(requestId) {
|
|
167
|
+
try {
|
|
168
|
+
if (MEDIA_API_URL) {
|
|
169
|
+
//call helper api to mark processing as completed
|
|
170
|
+
const res = await axios.delete(MEDIA_API_URL, { params: { requestId } });
|
|
171
|
+
logger.info(`Marked request ${requestId} as completed: ${JSON.stringify(res.data)}`);
|
|
172
|
+
return res.data;
|
|
173
|
+
}
|
|
174
|
+
} catch (err) {
|
|
175
|
+
logger.error(`Error marking request ${requestId} as completed: ${err}`);
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
/**
|
|
180
|
+
* Delete a file from cloud storage by hash
|
|
181
|
+
* @param {string} hash - File hash to delete
|
|
182
|
+
* @param {pathwayResolver} pathwayResolver - Optional pathway resolver for logging
|
|
183
|
+
* @returns {Promise<boolean>} True if file was deleted, false if not found or error
|
|
184
|
+
*/
|
|
185
|
+
async function deleteFileByHash(hash, pathwayResolver = null) {
|
|
186
|
+
if (!hash || typeof hash !== 'string') {
|
|
187
|
+
logger.warn('deleteFileByHash: hash is required and must be a string');
|
|
188
|
+
return false;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
const fileHandlerUrl = MEDIA_API_URL;
|
|
192
|
+
if (!fileHandlerUrl) {
|
|
193
|
+
logger.warn('deleteFileByHash: WHISPER_MEDIA_API_URL is not set, cannot delete file');
|
|
194
|
+
return false;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
try {
|
|
198
|
+
const separator = fileHandlerUrl.includes('?') ? '&' : '?';
|
|
199
|
+
const deleteUrl = `${fileHandlerUrl}${separator}hash=${encodeURIComponent(hash)}`;
|
|
200
|
+
|
|
201
|
+
const response = await axios.delete(deleteUrl, {
|
|
202
|
+
validateStatus: (status) => status >= 200 && status < 500, // Accept 200-499 as valid responses
|
|
203
|
+
timeout: 30000
|
|
204
|
+
});
|
|
205
|
+
|
|
206
|
+
if (response.status === 200) {
|
|
207
|
+
logger.info(`Successfully deleted file with hash ${hash}`);
|
|
208
|
+
return true;
|
|
209
|
+
} else if (response.status === 404) {
|
|
210
|
+
logger.info(`File with hash ${hash} not found (may have already been deleted)`);
|
|
211
|
+
return false; // Not an error - file doesn't exist
|
|
212
|
+
} else {
|
|
213
|
+
logger.warn(`Unexpected status ${response.status} when deleting file with hash ${hash}`);
|
|
214
|
+
return false;
|
|
215
|
+
}
|
|
216
|
+
} catch (error) {
|
|
217
|
+
// If it's a 404, that's fine - file doesn't exist
|
|
218
|
+
if (error?.response?.status === 404) {
|
|
219
|
+
logger.info(`File with hash ${hash} not found during deletion (may have already been deleted)`);
|
|
220
|
+
return false;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
// Log other errors but don't throw - deletion failure shouldn't block modification
|
|
224
|
+
const errorMsg = error?.message || String(error);
|
|
225
|
+
logger.warn(`Error deleting file with hash ${hash}: ${errorMsg}`);
|
|
226
|
+
return false;
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
// Helper function to extract file metadata from a content object
|
|
231
|
+
// Returns normalized format with url and gcs (for file collection storage)
|
|
232
|
+
function extractFileMetadataFromContent(contentObj) {
|
|
233
|
+
const files = [];
|
|
234
|
+
|
|
235
|
+
if (contentObj.type === 'image_url' && contentObj.image_url?.url) {
|
|
236
|
+
files.push({
|
|
237
|
+
url: contentObj.image_url.url,
|
|
238
|
+
gcs: contentObj.gcs || null,
|
|
239
|
+
filename: contentObj.originalFilename || contentObj.name || contentObj.filename || null,
|
|
240
|
+
hash: contentObj.hash || null,
|
|
241
|
+
type: 'image_url'
|
|
242
|
+
});
|
|
243
|
+
} else if (contentObj.type === 'file' && contentObj.url) {
|
|
244
|
+
files.push({
|
|
245
|
+
url: contentObj.url,
|
|
246
|
+
gcs: contentObj.gcs || null,
|
|
247
|
+
filename: contentObj.originalFilename || contentObj.name || contentObj.filename || null,
|
|
248
|
+
hash: contentObj.hash || null,
|
|
249
|
+
type: 'file'
|
|
250
|
+
});
|
|
251
|
+
} else if (contentObj.url && (contentObj.type === 'image_url' || !contentObj.type)) {
|
|
252
|
+
// Handle direct URL objects
|
|
253
|
+
files.push({
|
|
254
|
+
url: contentObj.url,
|
|
255
|
+
gcs: contentObj.gcs || null,
|
|
256
|
+
filename: contentObj.originalFilename || contentObj.name || contentObj.filename || null,
|
|
257
|
+
hash: contentObj.hash || null,
|
|
258
|
+
type: contentObj.type || 'file'
|
|
259
|
+
});
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
return files;
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
// Cache for file collections during a request lifecycle
|
|
266
|
+
const fileCollectionCache = new Map();
|
|
267
|
+
const CACHE_TTL = 5000; // 5 seconds
|
|
268
|
+
|
|
269
|
+
/**
|
|
270
|
+
* Get cache key for file collection
|
|
271
|
+
*/
|
|
272
|
+
function getCollectionCacheKey(contextId, contextKey) {
|
|
273
|
+
// Use memoryFiles section key for cache
|
|
274
|
+
return `${contextId}-memoryFiles-${contextKey || 'default'}`;
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
/**
|
|
278
|
+
* Extract files from chat history
|
|
279
|
+
* @param {Array} chatHistory - Chat history to scan
|
|
280
|
+
* @returns {Array} Array of file metadata objects
|
|
281
|
+
*/
|
|
282
|
+
function extractFilesFromChatHistory(chatHistory) {
|
|
283
|
+
if (!chatHistory || !Array.isArray(chatHistory)) {
|
|
284
|
+
return [];
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
const extractedFiles = [];
|
|
288
|
+
for (const message of chatHistory) {
|
|
289
|
+
if (!message || !message.content) {
|
|
290
|
+
continue;
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
// Handle array content
|
|
294
|
+
if (Array.isArray(message.content)) {
|
|
295
|
+
for (const content of message.content) {
|
|
296
|
+
try {
|
|
297
|
+
const contentObj = typeof content === 'string' ? JSON.parse(content) : content;
|
|
298
|
+
extractedFiles.push(...extractFileMetadataFromContent(contentObj));
|
|
299
|
+
} catch (e) {
|
|
300
|
+
// Not JSON or couldn't be parsed, continue
|
|
301
|
+
continue;
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
// Handle string content
|
|
306
|
+
else if (typeof message.content === 'string') {
|
|
307
|
+
try {
|
|
308
|
+
const contentObj = JSON.parse(message.content);
|
|
309
|
+
extractedFiles.push(...extractFileMetadataFromContent(contentObj));
|
|
310
|
+
} catch (e) {
|
|
311
|
+
// Not JSON or couldn't be parsed, continue
|
|
312
|
+
continue;
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
// Handle object content
|
|
316
|
+
else if (typeof message.content === 'object') {
|
|
317
|
+
extractedFiles.push(...extractFileMetadataFromContent(message.content));
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
return extractedFiles;
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
/**
|
|
325
|
+
* Load file collection from memory system or cache
|
|
326
|
+
* Returns both the collection data and version for optimistic locking
|
|
327
|
+
* @param {string} contextId - Context ID for the file collection
|
|
328
|
+
* @param {string} contextKey - Optional context key for encryption
|
|
329
|
+
* @param {boolean} useCache - Whether to check cache first (default: true)
|
|
330
|
+
* @returns {Promise<{files: Array, version: string}>} File collection with version
|
|
331
|
+
*/
|
|
332
|
+
async function loadFileCollectionWithVersion(contextId, contextKey = null, useCache = true) {
|
|
333
|
+
if (!contextId) {
|
|
334
|
+
return { files: [], version: new Date().toISOString() };
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
const cacheKey = getCollectionCacheKey(contextId, contextKey);
|
|
338
|
+
|
|
339
|
+
// Check cache first
|
|
340
|
+
if (useCache && fileCollectionCache.has(cacheKey)) {
|
|
341
|
+
const cached = fileCollectionCache.get(cacheKey);
|
|
342
|
+
if (Date.now() - cached.timestamp < CACHE_TTL) {
|
|
343
|
+
return { files: cached.collection, version: cached.version || new Date().toISOString() };
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
// Load from memory system
|
|
348
|
+
const { callPathway } = await import('./pathwayTools.js');
|
|
349
|
+
let files = [];
|
|
350
|
+
let version = new Date().toISOString();
|
|
351
|
+
|
|
352
|
+
try {
|
|
353
|
+
const memoryContent = await callPathway('sys_read_memory', {
|
|
354
|
+
contextId,
|
|
355
|
+
section: 'memoryFiles',
|
|
356
|
+
contextKey
|
|
357
|
+
});
|
|
358
|
+
if (memoryContent) {
|
|
359
|
+
const parsed = JSON.parse(memoryContent);
|
|
360
|
+
|
|
361
|
+
// Handle new format: { version, files }
|
|
362
|
+
if (parsed && typeof parsed === 'object' && !Array.isArray(parsed) && parsed.files) {
|
|
363
|
+
files = Array.isArray(parsed.files) ? parsed.files : [];
|
|
364
|
+
version = parsed.version || new Date().toISOString();
|
|
365
|
+
}
|
|
366
|
+
// Handle old format: just an array (backward compatibility)
|
|
367
|
+
else if (Array.isArray(parsed)) {
|
|
368
|
+
files = parsed;
|
|
369
|
+
version = new Date().toISOString(); // Assign new version for migration
|
|
370
|
+
}
|
|
371
|
+
// Invalid format
|
|
372
|
+
else {
|
|
373
|
+
files = [];
|
|
374
|
+
version = new Date().toISOString();
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
} catch (e) {
|
|
378
|
+
// Collection doesn't exist yet, start with empty array
|
|
379
|
+
files = [];
|
|
380
|
+
version = new Date().toISOString();
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
// Update cache
|
|
384
|
+
fileCollectionCache.set(cacheKey, {
|
|
385
|
+
collection: files,
|
|
386
|
+
version: version,
|
|
387
|
+
timestamp: Date.now()
|
|
388
|
+
});
|
|
389
|
+
|
|
390
|
+
return { files, version };
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
/**
|
|
394
|
+
* Load file collection from memory system or cache
|
|
395
|
+
* @param {string} contextId - Context ID for the file collection
|
|
396
|
+
* @param {string} contextKey - Optional context key for encryption
|
|
397
|
+
* @param {boolean} useCache - Whether to check cache first (default: true)
|
|
398
|
+
* @returns {Promise<Array>} File collection array
|
|
399
|
+
*/
|
|
400
|
+
async function loadFileCollection(contextId, contextKey = null, useCache = true) {
|
|
401
|
+
const { files } = await loadFileCollectionWithVersion(contextId, contextKey, useCache);
|
|
402
|
+
return files;
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
/**
|
|
406
|
+
* Save file collection to memory system with version checking
|
|
407
|
+
* @param {string} contextId - Context ID for the file collection
|
|
408
|
+
* @param {string} contextKey - Optional context key for encryption
|
|
409
|
+
* @param {Array} collection - File collection array
|
|
410
|
+
* @param {string} expectedVersion - Expected version for optimistic locking (if provided)
|
|
411
|
+
* @returns {Promise<boolean>} True if save succeeded, false if version mismatch
|
|
412
|
+
*/
|
|
413
|
+
async function saveFileCollectionWithVersion(contextId, contextKey, collection, expectedVersion = null) {
|
|
414
|
+
const cacheKey = getCollectionCacheKey(contextId, contextKey);
|
|
415
|
+
const newVersion = new Date().toISOString();
|
|
416
|
+
|
|
417
|
+
try {
|
|
418
|
+
const { callPathway } = await import('./pathwayTools.js');
|
|
419
|
+
|
|
420
|
+
// If expectedVersion is provided, verify it matches RIGHT before saving
|
|
421
|
+
// This minimizes the race condition window
|
|
422
|
+
if (expectedVersion !== null) {
|
|
423
|
+
// Read directly from memory (bypass cache) to get the absolute latest version
|
|
424
|
+
const memoryContent = await callPathway('sys_read_memory', {
|
|
425
|
+
contextId,
|
|
426
|
+
section: 'memoryFiles',
|
|
427
|
+
contextKey
|
|
428
|
+
});
|
|
429
|
+
|
|
430
|
+
let currentVersion = null;
|
|
431
|
+
let collectionExists = false;
|
|
432
|
+
let isOldFormat = false;
|
|
433
|
+
|
|
434
|
+
if (memoryContent && memoryContent.trim() !== '' && memoryContent.trim() !== '[]') {
|
|
435
|
+
collectionExists = true;
|
|
436
|
+
try {
|
|
437
|
+
const parsed = JSON.parse(memoryContent);
|
|
438
|
+
// Handle new format: { version, files }
|
|
439
|
+
if (parsed && typeof parsed === 'object' && !Array.isArray(parsed) && parsed.version) {
|
|
440
|
+
currentVersion = parsed.version;
|
|
441
|
+
}
|
|
442
|
+
// Handle old format: just an array (no version yet)
|
|
443
|
+
else if (Array.isArray(parsed)) {
|
|
444
|
+
// Old format - we'll allow migration if the content matches
|
|
445
|
+
isOldFormat = true;
|
|
446
|
+
currentVersion = null;
|
|
447
|
+
}
|
|
448
|
+
} catch (e) {
|
|
449
|
+
// Invalid format - treat as version mismatch
|
|
450
|
+
currentVersion = null;
|
|
451
|
+
}
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
// If collection doesn't exist yet (empty memoryContent or just "[]"), allow the save
|
|
455
|
+
// since there's nothing to conflict with. The version check is only needed
|
|
456
|
+
// when there's an existing collection that might have been modified.
|
|
457
|
+
// Also allow save if we're migrating from old format (isOldFormat) - the migration
|
|
458
|
+
// will happen on the next load, so we allow this save to proceed.
|
|
459
|
+
if (collectionExists && !isOldFormat && currentVersion !== expectedVersion) {
|
|
460
|
+
// Version mismatch - return false to trigger retry
|
|
461
|
+
return false;
|
|
462
|
+
}
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
// Save with version (minimal window between check and save)
|
|
466
|
+
const collectionData = {
|
|
467
|
+
version: newVersion,
|
|
468
|
+
files: collection
|
|
469
|
+
};
|
|
470
|
+
|
|
471
|
+
await callPathway('sys_save_memory', {
|
|
472
|
+
contextId,
|
|
473
|
+
section: 'memoryFiles',
|
|
474
|
+
aiMemory: JSON.stringify(collectionData),
|
|
475
|
+
contextKey
|
|
476
|
+
});
|
|
477
|
+
|
|
478
|
+
// Update cache
|
|
479
|
+
fileCollectionCache.set(cacheKey, {
|
|
480
|
+
collection,
|
|
481
|
+
version: newVersion,
|
|
482
|
+
timestamp: Date.now()
|
|
483
|
+
});
|
|
484
|
+
|
|
485
|
+
return true;
|
|
486
|
+
} catch (e) {
|
|
487
|
+
// Log but don't fail - collection update is best effort
|
|
488
|
+
const logger = (await import('./logger.js')).default;
|
|
489
|
+
logger.warn(`Failed to save file collection: ${e.message}`);
|
|
490
|
+
return false;
|
|
491
|
+
}
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
/**
|
|
495
|
+
* Save file collection to memory system
|
|
496
|
+
* @param {string} contextId - Context ID for the file collection
|
|
497
|
+
* @param {string} contextKey - Optional context key for encryption
|
|
498
|
+
* @param {Array} collection - File collection array
|
|
499
|
+
*/
|
|
500
|
+
async function saveFileCollection(contextId, contextKey, collection) {
|
|
501
|
+
await saveFileCollectionWithVersion(contextId, contextKey, collection, null);
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
/**
|
|
505
|
+
* Modify file collection with optimistic locking and automatic retries
|
|
506
|
+
* This is the main function that all modify operations should use to ensure concurrency safety
|
|
507
|
+
* @param {string} contextId - Context ID for the file collection
|
|
508
|
+
* @param {string} contextKey - Optional context key for encryption
|
|
509
|
+
* @param {Function} modifierCallback - Callback function that modifies the collection array
|
|
510
|
+
* The callback receives (collection) and should return the modified collection array
|
|
511
|
+
* @param {number} maxRetries - Maximum number of retry attempts (default: 5)
|
|
512
|
+
* @returns {Promise<Array>} The final modified collection array
|
|
513
|
+
*/
|
|
514
|
+
async function modifyFileCollectionWithLock(contextId, contextKey, modifierCallback, maxRetries = 5) {
|
|
515
|
+
if (!contextId) {
|
|
516
|
+
throw new Error("contextId is required");
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
if (typeof modifierCallback !== 'function') {
|
|
520
|
+
throw new Error("modifierCallback must be a function");
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
let lastError = null;
|
|
524
|
+
|
|
525
|
+
for (let attempt = 0; attempt < maxRetries; attempt++) {
|
|
526
|
+
try {
|
|
527
|
+
// Load collection with version (skip cache to get latest version)
|
|
528
|
+
const { files, version } = await loadFileCollectionWithVersion(contextId, contextKey, false);
|
|
529
|
+
|
|
530
|
+
// Create a copy to avoid mutating the original
|
|
531
|
+
const collectionCopy = [...files];
|
|
532
|
+
|
|
533
|
+
// Execute the modifier callback
|
|
534
|
+
const modifiedCollection = await modifierCallback(collectionCopy);
|
|
535
|
+
|
|
536
|
+
// Validate that callback returned an array
|
|
537
|
+
if (!Array.isArray(modifiedCollection)) {
|
|
538
|
+
throw new Error("modifierCallback must return an array");
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
// Try to save with version check
|
|
542
|
+
const saved = await saveFileCollectionWithVersion(contextId, contextKey, modifiedCollection, version);
|
|
543
|
+
|
|
544
|
+
if (saved) {
|
|
545
|
+
// Success! Return the modified collection
|
|
546
|
+
return modifiedCollection;
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
// Version mismatch - will retry on next iteration
|
|
550
|
+
// Add a small delay to reduce contention (exponential backoff)
|
|
551
|
+
if (attempt < maxRetries - 1) {
|
|
552
|
+
const delay = Math.min(10 * Math.pow(2, attempt), 100); // Max 100ms
|
|
553
|
+
await new Promise(resolve => setTimeout(resolve, delay));
|
|
554
|
+
}
|
|
555
|
+
} catch (error) {
|
|
556
|
+
lastError = error;
|
|
557
|
+
// For non-version-mismatch errors, we might want to retry or fail immediately
|
|
558
|
+
// For now, we'll retry a few times then throw
|
|
559
|
+
if (attempt === maxRetries - 1) {
|
|
560
|
+
throw error;
|
|
561
|
+
}
|
|
562
|
+
// Small delay before retry
|
|
563
|
+
const delay = Math.min(10 * Math.pow(2, attempt), 100);
|
|
564
|
+
await new Promise(resolve => setTimeout(resolve, delay));
|
|
565
|
+
}
|
|
566
|
+
}
|
|
567
|
+
|
|
568
|
+
// If we get here, all retries failed due to version mismatches
|
|
569
|
+
throw new Error(`Failed to modify file collection after ${maxRetries} attempts due to concurrent modifications`);
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
/**
|
|
573
|
+
* Add a file to the file collection
|
|
574
|
+
* If fileUrl is provided and is not already a cloud URL, it will be uploaded first
|
|
575
|
+
* @param {string} contextId - Context ID for the file collection
|
|
576
|
+
* @param {string} contextKey - Optional context key for encryption
|
|
577
|
+
* @param {string} url - Cloud storage URL (Azure URL) - if fileUrl is provided, this can be null
|
|
578
|
+
* @param {string} gcs - Optional Google Cloud Storage URL
|
|
579
|
+
* @param {string} filename - Filename or title for the file
|
|
580
|
+
* @param {Array<string>} tags - Optional array of tags
|
|
581
|
+
* @param {string} notes - Optional notes or description
|
|
582
|
+
* @param {string} hash - Optional file hash
|
|
583
|
+
* @param {string} fileUrl - Optional: URL of file to upload (if not already in cloud storage)
|
|
584
|
+
* @param {pathwayResolver} pathwayResolver - Optional pathway resolver for logging
|
|
585
|
+
* @returns {Promise<Object>} File entry object with id
|
|
586
|
+
*/
|
|
587
|
+
async function addFileToCollection(contextId, contextKey, url, gcs, filename, tags = [], notes = '', hash = null, fileUrl = null, pathwayResolver = null) {
|
|
588
|
+
if (!contextId || !filename) {
|
|
589
|
+
throw new Error("contextId and filename are required");
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
// If fileUrl is provided and url is not already a cloud URL, upload the file first
|
|
593
|
+
let finalUrl = url;
|
|
594
|
+
let finalGcs = gcs;
|
|
595
|
+
let finalHash = hash;
|
|
596
|
+
|
|
597
|
+
if (fileUrl && (!url || (!url.includes('blob.core.windows.net') && !url.includes('storage.googleapis.com')))) {
|
|
598
|
+
// Upload the file from the URL
|
|
599
|
+
// uploadFileToCloud will download it, compute hash, check if it exists, and upload if needed
|
|
600
|
+
// It uploads the local file stream, not the URL, to avoid triggering remoteFile fetch
|
|
601
|
+
const uploadResult = await uploadFileToCloud(fileUrl, null, filename, pathwayResolver);
|
|
602
|
+
finalUrl = uploadResult.url;
|
|
603
|
+
finalGcs = uploadResult.gcs;
|
|
604
|
+
finalHash = uploadResult.hash || hash;
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
if (!finalUrl) {
|
|
608
|
+
throw new Error("url or fileUrl is required");
|
|
609
|
+
}
|
|
610
|
+
|
|
611
|
+
// Create file entry (before locking to avoid recreating on retry)
|
|
612
|
+
const fileEntry = {
|
|
613
|
+
id: `${Date.now()}-${Math.random().toString(36).substring(2, 9)}`,
|
|
614
|
+
url: finalUrl,
|
|
615
|
+
gcs: finalGcs || null,
|
|
616
|
+
filename: filename,
|
|
617
|
+
tags: Array.isArray(tags) ? tags : [],
|
|
618
|
+
notes: notes || '',
|
|
619
|
+
hash: finalHash || null,
|
|
620
|
+
addedDate: new Date().toISOString(),
|
|
621
|
+
lastAccessed: new Date().toISOString()
|
|
622
|
+
};
|
|
623
|
+
|
|
624
|
+
// Use optimistic locking to add file to collection
|
|
625
|
+
await modifyFileCollectionWithLock(contextId, contextKey, (collection) => {
|
|
626
|
+
collection.push(fileEntry);
|
|
627
|
+
return collection;
|
|
628
|
+
});
|
|
629
|
+
|
|
630
|
+
return fileEntry;
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
/**
|
|
634
|
+
* Sync files from chat history to file collection
|
|
635
|
+
* @param {Array} chatHistory - Chat history to scan
|
|
636
|
+
* @param {string} contextId - Context ID for the file collection
|
|
637
|
+
* @param {string} contextKey - Optional context key for encryption
|
|
638
|
+
* @returns {Promise<Array>} Array of file metadata objects
|
|
639
|
+
*/
|
|
640
|
+
async function syncFilesToCollection(chatHistory, contextId, contextKey = null) {
|
|
641
|
+
if (!chatHistory || !Array.isArray(chatHistory) || !contextId) {
|
|
642
|
+
return [];
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
// Extract all files from chat history
|
|
646
|
+
const extractedFiles = extractFilesFromChatHistory(chatHistory);
|
|
647
|
+
|
|
648
|
+
if (extractedFiles.length === 0) {
|
|
649
|
+
// No new files to add, return existing collection
|
|
650
|
+
return await loadFileCollection(contextId, contextKey, true);
|
|
651
|
+
}
|
|
652
|
+
|
|
653
|
+
// Use optimistic locking to sync files
|
|
654
|
+
const collection = await modifyFileCollectionWithLock(contextId, contextKey, (collection) => {
|
|
655
|
+
// Create a map of existing files by URL and hash for fast lookup
|
|
656
|
+
const existingFilesMap = new Map();
|
|
657
|
+
collection.forEach(file => {
|
|
658
|
+
if (file.url) {
|
|
659
|
+
existingFilesMap.set(file.url, file);
|
|
660
|
+
}
|
|
661
|
+
if (file.gcs) {
|
|
662
|
+
existingFilesMap.set(file.gcs, file);
|
|
663
|
+
}
|
|
664
|
+
if (file.hash) {
|
|
665
|
+
existingFilesMap.set(`hash:${file.hash}`, file);
|
|
666
|
+
}
|
|
667
|
+
});
|
|
668
|
+
|
|
669
|
+
// Add new files that aren't already in the collection
|
|
670
|
+
for (const file of extractedFiles) {
|
|
671
|
+
// Check if file already exists by URL or hash
|
|
672
|
+
const existsByUrl = file.url && existingFilesMap.has(file.url);
|
|
673
|
+
const existsByGcs = file.gcs && existingFilesMap.has(file.gcs);
|
|
674
|
+
const existsByHash = file.hash && existingFilesMap.has(`hash:${file.hash}`);
|
|
675
|
+
|
|
676
|
+
if (!existsByUrl && !existsByGcs && !existsByHash) {
|
|
677
|
+
// New file - add to collection
|
|
678
|
+
const fileEntry = {
|
|
679
|
+
id: `${Date.now()}-${Math.random().toString(36).substring(2, 9)}`,
|
|
680
|
+
url: file.url,
|
|
681
|
+
gcs: file.gcs || null,
|
|
682
|
+
filename: file.filename || (file.url ? file.url.split('/').pop().split('?')[0] : 'unknown'),
|
|
683
|
+
hash: file.hash || null,
|
|
684
|
+
type: file.type || 'file',
|
|
685
|
+
addedDate: new Date().toISOString(),
|
|
686
|
+
lastAccessed: new Date().toISOString()
|
|
687
|
+
};
|
|
688
|
+
|
|
689
|
+
collection.push(fileEntry);
|
|
690
|
+
existingFilesMap.set(file.url, fileEntry);
|
|
691
|
+
if (file.gcs) {
|
|
692
|
+
existingFilesMap.set(file.gcs, fileEntry);
|
|
693
|
+
}
|
|
694
|
+
if (file.hash) {
|
|
695
|
+
existingFilesMap.set(`hash:${file.hash}`, fileEntry);
|
|
696
|
+
}
|
|
697
|
+
} else {
|
|
698
|
+
// File exists - update lastAccessed and merge URLs if needed
|
|
699
|
+
const existingFile = existsByUrl ? existingFilesMap.get(file.url) :
|
|
700
|
+
existsByGcs ? existingFilesMap.get(file.gcs) :
|
|
701
|
+
existingFilesMap.get(`hash:${file.hash}`);
|
|
702
|
+
|
|
703
|
+
if (existingFile) {
|
|
704
|
+
existingFile.lastAccessed = new Date().toISOString();
|
|
705
|
+
|
|
706
|
+
// Merge URLs if we have new ones
|
|
707
|
+
if (file.url && !existingFile.url) {
|
|
708
|
+
existingFile.url = file.url;
|
|
709
|
+
}
|
|
710
|
+
if (file.gcs && !existingFile.gcs) {
|
|
711
|
+
existingFile.gcs = file.gcs;
|
|
712
|
+
}
|
|
713
|
+
if (file.hash && !existingFile.hash) {
|
|
714
|
+
existingFile.hash = file.hash;
|
|
715
|
+
}
|
|
716
|
+
if (file.filename && !existingFile.filename) {
|
|
717
|
+
existingFile.filename = file.filename;
|
|
718
|
+
}
|
|
719
|
+
}
|
|
720
|
+
}
|
|
721
|
+
}
|
|
722
|
+
|
|
723
|
+
return collection;
|
|
724
|
+
});
|
|
725
|
+
|
|
726
|
+
return collection;
|
|
727
|
+
}
|
|
728
|
+
|
|
729
|
+
/**
|
|
730
|
+
* Get available files from file collection and format for template
|
|
731
|
+
* @param {string} contextId - Context ID for the file collection
|
|
732
|
+
* @param {string} contextKey - Optional context key for encryption
|
|
733
|
+
* @returns {Promise<string>} Formatted string of available files
|
|
734
|
+
*/
|
|
735
|
+
async function getAvailableFilesFromCollection(contextId, contextKey = null) {
|
|
736
|
+
if (!contextId) {
|
|
737
|
+
return 'No files available.';
|
|
738
|
+
}
|
|
739
|
+
|
|
740
|
+
const collection = await loadFileCollection(contextId, contextKey, true);
|
|
741
|
+
return formatFilesForTemplate(collection);
|
|
742
|
+
}
|
|
743
|
+
|
|
744
|
+
/**
|
|
745
|
+
* Format file collection for template display
|
|
746
|
+
* Shows the last 10 most recently used files in a compact one-line format
|
|
747
|
+
* @param {Array} collection - File collection array
|
|
748
|
+
* @returns {string} Formatted string
|
|
749
|
+
*/
|
|
750
|
+
function formatFilesForTemplate(collection) {
|
|
751
|
+
if (!collection || collection.length === 0) {
|
|
752
|
+
return 'No files available.';
|
|
753
|
+
}
|
|
754
|
+
|
|
755
|
+
// Sort by lastAccessed (most recent first), fallback to addedDate
|
|
756
|
+
const sorted = [...collection].sort((a, b) => {
|
|
757
|
+
const aDate = a.lastAccessed || a.addedDate || '';
|
|
758
|
+
const bDate = b.lastAccessed || b.addedDate || '';
|
|
759
|
+
return new Date(bDate) - new Date(aDate);
|
|
760
|
+
});
|
|
761
|
+
|
|
762
|
+
// Take only the last 10 most recently used files
|
|
763
|
+
const recentFiles = sorted.slice(0, 10);
|
|
764
|
+
const totalFiles = collection.length;
|
|
765
|
+
const hasMore = totalFiles > 10;
|
|
766
|
+
|
|
767
|
+
// Format as one line per file: hash | filename | url | date added | notes
|
|
768
|
+
const header = 'Hash | Filename | URL | Date Added | Notes';
|
|
769
|
+
const separator = '-'.repeat(Math.max(header.length, 80));
|
|
770
|
+
|
|
771
|
+
const fileList = recentFiles.map((file) => {
|
|
772
|
+
const hash = file.hash || '';
|
|
773
|
+
const filename = file.filename || 'Unnamed file';
|
|
774
|
+
const url = file.url || '';
|
|
775
|
+
const dateAdded = file.addedDate
|
|
776
|
+
? new Date(file.addedDate).toLocaleDateString('en-US', { month: 'short', day: 'numeric', year: 'numeric' })
|
|
777
|
+
: '';
|
|
778
|
+
const notes = file.notes || '';
|
|
779
|
+
return `${hash} | ${filename} | ${url} | ${dateAdded} | ${notes}`;
|
|
780
|
+
}).join('\n');
|
|
781
|
+
|
|
782
|
+
let result = `${header}\n${separator}\n${fileList}`;
|
|
783
|
+
|
|
784
|
+
if (hasMore) {
|
|
785
|
+
result += `\n\nNote: Showing the last 10 most recently used files. ${totalFiles - 10} more file(s) are available in your collection. Use ListFileCollection or SearchFileCollection to see all files.`;
|
|
786
|
+
}
|
|
787
|
+
|
|
788
|
+
return result;
|
|
789
|
+
}
|
|
790
|
+
|
|
791
|
+
/**
|
|
792
|
+
* Get available files - now async and works with file collection
|
|
793
|
+
* @param {Array} chatHistory - Chat history to scan
|
|
794
|
+
* @param {string} contextId - Context ID for the file collection
|
|
795
|
+
* @param {string} contextKey - Optional context key for encryption
|
|
796
|
+
* @returns {Promise<string>} Formatted string of available files
|
|
797
|
+
*/
|
|
798
|
+
async function getAvailableFiles(chatHistory, contextId, contextKey = null) {
|
|
799
|
+
if (!contextId) {
|
|
800
|
+
// Fallback to old behavior if no contextId
|
|
801
|
+
const files = extractFilesFromChatHistory(chatHistory);
|
|
802
|
+
return files.map(f => f.url).filter(Boolean).join('\n') || 'No files available.';
|
|
803
|
+
}
|
|
804
|
+
|
|
805
|
+
// Sync files from chat history to collection
|
|
806
|
+
await syncFilesToCollection(chatHistory, contextId, contextKey);
|
|
807
|
+
|
|
808
|
+
// Return formatted files from collection
|
|
809
|
+
return await getAvailableFilesFromCollection(contextId, contextKey);
|
|
810
|
+
}
|
|
811
|
+
|
|
812
|
+
/**
|
|
813
|
+
* Find a file in the collection by ID, URL, hash, or filename (with fuzzy matching)
|
|
814
|
+
* @param {string} fileParam - File ID, URL (Azure or GCS), hash, or filename
|
|
815
|
+
* @param {Array} collection - File collection array
|
|
816
|
+
* @returns {Object|null} File entry from collection, or null if not found
|
|
817
|
+
*/
|
|
818
|
+
function findFileInCollection(fileParam, collection) {
|
|
819
|
+
if (!fileParam || typeof fileParam !== 'string' || !Array.isArray(collection)) {
|
|
820
|
+
return null;
|
|
821
|
+
}
|
|
822
|
+
|
|
823
|
+
// Normalize the search parameter for comparison
|
|
824
|
+
const normalizedParam = fileParam.trim().toLowerCase();
|
|
825
|
+
|
|
826
|
+
// Try to find the file by ID, URL, GCS URL, or hash (exact matches first)
|
|
827
|
+
let foundFile = null;
|
|
828
|
+
|
|
829
|
+
for (const file of collection) {
|
|
830
|
+
// Check by ID
|
|
831
|
+
if (file.id === fileParam) {
|
|
832
|
+
foundFile = file;
|
|
833
|
+
break;
|
|
834
|
+
}
|
|
835
|
+
// Check by Azure URL
|
|
836
|
+
if (file.url === fileParam) {
|
|
837
|
+
foundFile = file;
|
|
838
|
+
break;
|
|
839
|
+
}
|
|
840
|
+
// Check by GCS URL
|
|
841
|
+
if (file.gcs === fileParam) {
|
|
842
|
+
foundFile = file;
|
|
843
|
+
break;
|
|
844
|
+
}
|
|
845
|
+
// Check by hash
|
|
846
|
+
if (file.hash === fileParam) {
|
|
847
|
+
foundFile = file;
|
|
848
|
+
break;
|
|
849
|
+
}
|
|
850
|
+
}
|
|
851
|
+
|
|
852
|
+
// If no exact match, try fuzzy matching against filenames
|
|
853
|
+
if (!foundFile) {
|
|
854
|
+
const candidates = [];
|
|
855
|
+
|
|
856
|
+
// Extract base filename and extension from parameter for better matching
|
|
857
|
+
const paramBaseName = normalizedParam.split('/').pop().split('\\').pop(); // Get just filename, no path
|
|
858
|
+
const paramNameWithoutExt = paramBaseName.replace(/\.[^.]*$/, ''); // Remove extension
|
|
859
|
+
const paramExt = paramBaseName.includes('.') ? paramBaseName.split('.').pop() : '';
|
|
860
|
+
|
|
861
|
+
for (const file of collection) {
|
|
862
|
+
if (!file.filename) continue;
|
|
863
|
+
|
|
864
|
+
const normalizedFilename = file.filename.toLowerCase();
|
|
865
|
+
const fileBaseName = normalizedFilename.split('/').pop().split('\\').pop(); // Get just filename, no path
|
|
866
|
+
const fileNameWithoutExt = fileBaseName.replace(/\.[^.]*$/, ''); // Remove extension
|
|
867
|
+
const fileExt = fileBaseName.includes('.') ? fileBaseName.split('.').pop() : '';
|
|
868
|
+
|
|
869
|
+
// Exact filename match (case-insensitive)
|
|
870
|
+
if (fileBaseName === paramBaseName) {
|
|
871
|
+
foundFile = file;
|
|
872
|
+
break;
|
|
873
|
+
}
|
|
874
|
+
|
|
875
|
+
// Exact match without path (e.g., "document.pdf" matches "path/to/document.pdf")
|
|
876
|
+
if (fileBaseName === normalizedParam) {
|
|
877
|
+
candidates.push({ file, score: 1.0 }); // Highest score
|
|
878
|
+
continue;
|
|
879
|
+
}
|
|
880
|
+
|
|
881
|
+
// Base name matches (without extension)
|
|
882
|
+
if (fileNameWithoutExt === paramNameWithoutExt && paramNameWithoutExt.length > 0) {
|
|
883
|
+
candidates.push({ file, score: 0.9 }); // Very high score
|
|
884
|
+
continue;
|
|
885
|
+
}
|
|
886
|
+
|
|
887
|
+
// Filename ends with the parameter (e.g., "my-document.pdf" ends with "document.pdf")
|
|
888
|
+
if (fileBaseName.endsWith(normalizedParam) && fileBaseName.length > normalizedParam.length) {
|
|
889
|
+
candidates.push({ file, score: 0.7 }); // High score for end match
|
|
890
|
+
continue;
|
|
891
|
+
}
|
|
892
|
+
|
|
893
|
+
// Base name contains the parameter (e.g., "document" matches "my-document.pdf")
|
|
894
|
+
if (fileNameWithoutExt.includes(paramNameWithoutExt) && paramNameWithoutExt.length > 2) {
|
|
895
|
+
candidates.push({ file, score: 0.6 }); // Good score
|
|
896
|
+
continue;
|
|
897
|
+
}
|
|
898
|
+
|
|
899
|
+
// Filename contains the parameter
|
|
900
|
+
if (normalizedFilename.includes(normalizedParam)) {
|
|
901
|
+
candidates.push({ file, score: 0.5 }); // Lower score for contains match
|
|
902
|
+
continue;
|
|
903
|
+
}
|
|
904
|
+
|
|
905
|
+
// Extension matches (as last resort)
|
|
906
|
+
if (paramExt && fileExt === paramExt && paramExt.length > 0) {
|
|
907
|
+
candidates.push({ file, score: 0.2 }); // Low score for extension-only match
|
|
908
|
+
}
|
|
909
|
+
}
|
|
910
|
+
|
|
911
|
+
// If we found exact match, use it; otherwise use best candidate
|
|
912
|
+
if (!foundFile && candidates.length > 0) {
|
|
913
|
+
// Sort by score (highest first) and take the best match
|
|
914
|
+
candidates.sort((a, b) => b.score - a.score);
|
|
915
|
+
foundFile = candidates[0].file;
|
|
916
|
+
}
|
|
917
|
+
}
|
|
918
|
+
|
|
919
|
+
return foundFile;
|
|
920
|
+
}
|
|
921
|
+
|
|
922
|
+
/**
|
|
923
|
+
* Resolve a file parameter to a URL by looking it up in the file collection
|
|
924
|
+
* If the parameter is already a URL (starts with http:// or https://), returns it as-is
|
|
925
|
+
* If contextId is provided, looks up the file in the collection and returns its URL
|
|
926
|
+
* @param {string} fileParam - File ID, URL (Azure or GCS), hash, or filename from collection
|
|
927
|
+
* @param {string} contextId - Context ID for the file collection
|
|
928
|
+
* @param {string} contextKey - Optional context key for encryption
|
|
929
|
+
* @param {Object} options - Optional configuration
|
|
930
|
+
* @param {boolean} options.preferGcs - If true, prefer GCS URL over Azure URL when available
|
|
931
|
+
* @returns {Promise<string|null>} Resolved file URL, or null if not found
|
|
932
|
+
*/
|
|
933
|
+
export async function resolveFileParameter(fileParam, contextId, contextKey = null, options = {}) {
|
|
934
|
+
if (!fileParam || typeof fileParam !== 'string') {
|
|
935
|
+
return null;
|
|
936
|
+
}
|
|
937
|
+
|
|
938
|
+
const trimmed = fileParam.trim();
|
|
939
|
+
const { preferGcs = false } = options;
|
|
940
|
+
|
|
941
|
+
// If no contextId, can't look up in collection - return null
|
|
942
|
+
if (!contextId) {
|
|
943
|
+
return null;
|
|
944
|
+
}
|
|
945
|
+
|
|
946
|
+
try {
|
|
947
|
+
// Load file collection and find the file
|
|
948
|
+
const collection = await loadFileCollection(contextId, contextKey, true);
|
|
949
|
+
const foundFile = findFileInCollection(trimmed, collection);
|
|
950
|
+
|
|
951
|
+
if (foundFile) {
|
|
952
|
+
// If preferGcs is true and GCS URL is available, return it
|
|
953
|
+
if (preferGcs && foundFile.gcs) {
|
|
954
|
+
return foundFile.gcs;
|
|
955
|
+
}
|
|
956
|
+
// Otherwise return the regular URL (Azure)
|
|
957
|
+
if (foundFile.url) {
|
|
958
|
+
return foundFile.url;
|
|
959
|
+
}
|
|
960
|
+
}
|
|
961
|
+
|
|
962
|
+
// File not found in collection
|
|
963
|
+
return null;
|
|
964
|
+
} catch (error) {
|
|
965
|
+
// Log error but return null
|
|
966
|
+
logger.warn(`Failed to resolve file parameter "${trimmed}": ${error.message}`);
|
|
967
|
+
return null;
|
|
968
|
+
}
|
|
969
|
+
}
|
|
970
|
+
|
|
971
|
+
/**
|
|
972
|
+
* Generate file message content by looking up a file parameter in the file collection
|
|
973
|
+
* @param {string} fileParam - File URL (Azure or GCS), file ID from collection, or file hash
|
|
974
|
+
* @param {string} contextId - Context ID for the file collection
|
|
975
|
+
* @param {string} contextKey - Optional context key for encryption
|
|
976
|
+
* @returns {Promise<Object|null>} Content object in the format for chat history, or null if not found
|
|
977
|
+
*/
|
|
978
|
+
async function generateFileMessageContent(fileParam, contextId, contextKey = null) {
|
|
979
|
+
if (!fileParam || typeof fileParam !== 'string') {
|
|
980
|
+
return null;
|
|
981
|
+
}
|
|
982
|
+
|
|
983
|
+
if (!contextId) {
|
|
984
|
+
// Without contextId, we can't look up in collection
|
|
985
|
+
// Return a basic content object from the URL
|
|
986
|
+
return {
|
|
987
|
+
type: 'file',
|
|
988
|
+
url: fileParam
|
|
989
|
+
};
|
|
990
|
+
}
|
|
991
|
+
|
|
992
|
+
// Load file collection
|
|
993
|
+
const collection = await loadFileCollection(contextId, contextKey, true);
|
|
994
|
+
|
|
995
|
+
// Find the file using shared matching logic
|
|
996
|
+
const foundFile = findFileInCollection(fileParam, collection);
|
|
997
|
+
|
|
998
|
+
if (!foundFile) {
|
|
999
|
+
// File not found in collection, return null
|
|
1000
|
+
return null;
|
|
1001
|
+
}
|
|
1002
|
+
|
|
1003
|
+
// Determine file type based on filename extension or existing type
|
|
1004
|
+
const filename = foundFile.filename || '';
|
|
1005
|
+
const extension = filename.split('.').pop()?.toLowerCase() || '';
|
|
1006
|
+
const isImage = ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'webp', 'svg'].includes(extension);
|
|
1007
|
+
const fileType = foundFile.type || (isImage ? 'image_url' : 'file');
|
|
1008
|
+
|
|
1009
|
+
// Create content object in the proper format for plugins (url and gcs)
|
|
1010
|
+
if (fileType === 'image_url') {
|
|
1011
|
+
return {
|
|
1012
|
+
type: 'image_url',
|
|
1013
|
+
image_url: { url: foundFile.url },
|
|
1014
|
+
url: foundFile.url,
|
|
1015
|
+
gcs: foundFile.gcs || null,
|
|
1016
|
+
originalFilename: foundFile.filename || null,
|
|
1017
|
+
hash: foundFile.hash || null
|
|
1018
|
+
};
|
|
1019
|
+
} else {
|
|
1020
|
+
return {
|
|
1021
|
+
type: 'file',
|
|
1022
|
+
url: foundFile.url,
|
|
1023
|
+
gcs: foundFile.gcs || null,
|
|
1024
|
+
originalFilename: foundFile.filename || null,
|
|
1025
|
+
hash: foundFile.hash || null
|
|
1026
|
+
};
|
|
1027
|
+
}
|
|
1028
|
+
}
|
|
1029
|
+
|
|
1030
|
+
/**
|
|
1031
|
+
* Inject a file into chat history as a content object
|
|
1032
|
+
* Only injects if the file is not already present in the chat history
|
|
1033
|
+
* @param {Array} chatHistory - Chat history array to modify
|
|
1034
|
+
* @param {Object} fileContent - Content object from generateFileMessageContent
|
|
1035
|
+
* @returns {Array} Modified chat history with file injected (or unchanged if already present)
|
|
1036
|
+
*/
|
|
1037
|
+
function injectFileIntoChatHistory(chatHistory, fileContent) {
|
|
1038
|
+
if (!chatHistory || !Array.isArray(chatHistory)) {
|
|
1039
|
+
return [{ role: 'user', content: [fileContent] }];
|
|
1040
|
+
}
|
|
1041
|
+
|
|
1042
|
+
if (!fileContent) {
|
|
1043
|
+
return chatHistory;
|
|
1044
|
+
}
|
|
1045
|
+
|
|
1046
|
+
// Extract URLs and hash from the file content to check for duplicates
|
|
1047
|
+
const fileUrl = fileContent.url || fileContent.image_url?.url;
|
|
1048
|
+
const fileGcs = fileContent.gcs;
|
|
1049
|
+
const fileHash = fileContent.hash;
|
|
1050
|
+
|
|
1051
|
+
// Check if file already exists in chat history
|
|
1052
|
+
const existingFiles = extractFilesFromChatHistory(chatHistory);
|
|
1053
|
+
const fileAlreadyExists = existingFiles.some(existingFile => {
|
|
1054
|
+
// Check by URL (existingFile uses url from extractFileMetadataFromContent)
|
|
1055
|
+
if (fileUrl && existingFile.url === fileUrl) {
|
|
1056
|
+
return true;
|
|
1057
|
+
}
|
|
1058
|
+
// Check by GCS URL
|
|
1059
|
+
if (fileGcs && existingFile.gcs === fileGcs) {
|
|
1060
|
+
return true;
|
|
1061
|
+
}
|
|
1062
|
+
// Check by hash
|
|
1063
|
+
if (fileHash && existingFile.hash === fileHash) {
|
|
1064
|
+
return true;
|
|
1065
|
+
}
|
|
1066
|
+
return false;
|
|
1067
|
+
});
|
|
1068
|
+
|
|
1069
|
+
// If file already exists, return chat history unchanged
|
|
1070
|
+
if (fileAlreadyExists) {
|
|
1071
|
+
return chatHistory;
|
|
1072
|
+
}
|
|
1073
|
+
|
|
1074
|
+
// Create a new user message with the file content
|
|
1075
|
+
// Use OpenAI-compatible format: content is an array of objects (not JSON strings)
|
|
1076
|
+
const fileMessage = {
|
|
1077
|
+
role: 'user',
|
|
1078
|
+
content: [fileContent]
|
|
1079
|
+
};
|
|
1080
|
+
|
|
1081
|
+
// Add to the end of chat history
|
|
1082
|
+
return [...chatHistory, fileMessage];
|
|
1083
|
+
}
|
|
1084
|
+
|
|
1085
|
+
/**
|
|
1086
|
+
* Check if a file exists by hash using the file handler
|
|
1087
|
+
* @param {string} hash - File hash to check
|
|
1088
|
+
* @param {string} fileHandlerUrl - File handler service URL
|
|
1089
|
+
* @param {pathwayResolver} pathwayResolver - Optional pathway resolver for logging
|
|
1090
|
+
* @returns {Promise<Object|null>} {url, gcs, hash} if file exists, null otherwise
|
|
1091
|
+
*/
|
|
1092
|
+
async function checkHashExists(hash, fileHandlerUrl, pathwayResolver = null) {
|
|
1093
|
+
if (!hash || !fileHandlerUrl) {
|
|
1094
|
+
return null;
|
|
1095
|
+
}
|
|
1096
|
+
|
|
1097
|
+
try {
|
|
1098
|
+
const separator = fileHandlerUrl.includes('?') ? '&' : '?';
|
|
1099
|
+
const checkHashUrl = `${fileHandlerUrl}${separator}hash=${hash}&checkHash=true`;
|
|
1100
|
+
|
|
1101
|
+
const checkResponse = await axios.get(checkHashUrl, {
|
|
1102
|
+
timeout: 10000,
|
|
1103
|
+
validateStatus: (status) => status >= 200 && status < 500
|
|
1104
|
+
});
|
|
1105
|
+
|
|
1106
|
+
// If file exists (200), return existing URLs
|
|
1107
|
+
// Use converted URLs if available (for converted files like XLSX->CSV, DOCX->TXT, etc.)
|
|
1108
|
+
if (checkResponse.status === 200 && checkResponse.data && checkResponse.data.url) {
|
|
1109
|
+
const data = checkResponse.data;
|
|
1110
|
+
// Prefer converted URLs if they exist, otherwise use original URLs
|
|
1111
|
+
const url = data.converted?.url || data.url;
|
|
1112
|
+
const gcs = data.converted?.gcs || data.gcs || null;
|
|
1113
|
+
|
|
1114
|
+
return {
|
|
1115
|
+
url: url,
|
|
1116
|
+
gcs: gcs,
|
|
1117
|
+
hash: data.hash || hash
|
|
1118
|
+
};
|
|
1119
|
+
}
|
|
1120
|
+
|
|
1121
|
+
return null;
|
|
1122
|
+
} catch (checkError) {
|
|
1123
|
+
// If checkHash fails, log but don't throw - this is an optimization
|
|
1124
|
+
let errorMsg;
|
|
1125
|
+
if (checkError?.message) {
|
|
1126
|
+
errorMsg = checkError.message;
|
|
1127
|
+
} else if (checkError?.errors && Array.isArray(checkError.errors)) {
|
|
1128
|
+
// Handle AggregateError
|
|
1129
|
+
errorMsg = checkError.errors.map(e => e?.message || String(e)).join('; ');
|
|
1130
|
+
} else {
|
|
1131
|
+
errorMsg = String(checkError);
|
|
1132
|
+
}
|
|
1133
|
+
if (pathwayResolver && pathwayResolver.logWarning) {
|
|
1134
|
+
pathwayResolver.logWarning(`checkHash failed: ${errorMsg}`);
|
|
1135
|
+
}
|
|
1136
|
+
return null;
|
|
1137
|
+
}
|
|
1138
|
+
}
|
|
1139
|
+
|
|
1140
|
+
/**
|
|
1141
|
+
* Generic function to upload a file to cloud storage
|
|
1142
|
+
* Handles both URLs (downloads then uploads) and base64 data
|
|
1143
|
+
* Checks hash before uploading to avoid duplicates
|
|
1144
|
+
* @param {string|Buffer} fileInput - URL to download from, or base64 string, or Buffer
|
|
1145
|
+
* @param {string} mimeType - MIME type of the file (optional for URLs)
|
|
1146
|
+
* @param {string} filename - Optional filename (will be inferred if not provided)
|
|
1147
|
+
* @param {pathwayResolver} pathwayResolver - Optional pathway resolver for logging
|
|
1148
|
+
* @returns {Promise<Object>} {url, gcs, hash}
|
|
1149
|
+
*/
|
|
1150
|
+
async function uploadFileToCloud(fileInput, mimeType = null, filename = null, pathwayResolver = null) {
|
|
1151
|
+
let tempFilePath = null;
|
|
1152
|
+
let tempDir = null;
|
|
1153
|
+
let fileBuffer = null;
|
|
1154
|
+
let fileHash = null;
|
|
1155
|
+
|
|
1156
|
+
try {
|
|
1157
|
+
const fileHandlerUrl = MEDIA_API_URL;
|
|
1158
|
+
if (!fileHandlerUrl) {
|
|
1159
|
+
throw new Error('WHISPER_MEDIA_API_URL is not set');
|
|
1160
|
+
}
|
|
1161
|
+
|
|
1162
|
+
// Handle different input types
|
|
1163
|
+
if (typeof fileInput === 'string') {
|
|
1164
|
+
// Check if it's a URL or base64 data
|
|
1165
|
+
if (fileInput.startsWith('http://') || fileInput.startsWith('https://')) {
|
|
1166
|
+
// It's a URL (could be remote or cloud) - download it directly so we can compute the hash
|
|
1167
|
+
// Even if it's a cloud URL, we need to download it to compute hash and check if it exists
|
|
1168
|
+
// We'll upload the local file stream, not the URL, to avoid triggering remoteFile fetch
|
|
1169
|
+
// Download the file to a temporary location
|
|
1170
|
+
tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'file-upload-'));
|
|
1171
|
+
|
|
1172
|
+
// Determine file extension from URL or filename
|
|
1173
|
+
let extension = 'bin';
|
|
1174
|
+
if (filename) {
|
|
1175
|
+
const extMatch = filename.match(/\.([^.]+)$/);
|
|
1176
|
+
if (extMatch) extension = extMatch[1];
|
|
1177
|
+
} else {
|
|
1178
|
+
try {
|
|
1179
|
+
const urlObj = new URL(fileInput);
|
|
1180
|
+
const pathname = urlObj.pathname;
|
|
1181
|
+
const extMatch = pathname.match(/\.([^.]+)$/);
|
|
1182
|
+
if (extMatch) extension = extMatch[1];
|
|
1183
|
+
} catch (e) {
|
|
1184
|
+
// URL parsing failed, use default
|
|
1185
|
+
}
|
|
1186
|
+
}
|
|
1187
|
+
|
|
1188
|
+
const downloadFilename = filename || `download_${Date.now()}.${extension}`;
|
|
1189
|
+
tempFilePath = path.join(tempDir, downloadFilename);
|
|
1190
|
+
|
|
1191
|
+
// Download the file directly using axios so we can compute hash
|
|
1192
|
+
const downloadResponse = await axios.get(fileInput, {
|
|
1193
|
+
responseType: 'stream',
|
|
1194
|
+
timeout: 60000,
|
|
1195
|
+
validateStatus: (status) => status >= 200 && status < 400
|
|
1196
|
+
});
|
|
1197
|
+
|
|
1198
|
+
if (downloadResponse.status !== 200) {
|
|
1199
|
+
throw new Error(`Failed to download file: ${downloadResponse.status}`);
|
|
1200
|
+
}
|
|
1201
|
+
|
|
1202
|
+
const writeStream = fs.createWriteStream(tempFilePath);
|
|
1203
|
+
await pipeline(downloadResponse.data, writeStream);
|
|
1204
|
+
|
|
1205
|
+
// Read the downloaded file into buffer to compute hash
|
|
1206
|
+
fileBuffer = fs.readFileSync(tempFilePath);
|
|
1207
|
+
} else {
|
|
1208
|
+
// It's base64 data
|
|
1209
|
+
fileBuffer = Buffer.from(fileInput, 'base64');
|
|
1210
|
+
}
|
|
1211
|
+
} else if (Buffer.isBuffer(fileInput)) {
|
|
1212
|
+
fileBuffer = fileInput;
|
|
1213
|
+
} else {
|
|
1214
|
+
throw new Error('fileInput must be a URL string, base64 string, or Buffer');
|
|
1215
|
+
}
|
|
1216
|
+
|
|
1217
|
+
// For buffer data, compute hash and check if file exists
|
|
1218
|
+
if (fileBuffer) {
|
|
1219
|
+
fileHash = await computeBufferHash(fileBuffer);
|
|
1220
|
+
|
|
1221
|
+
// Check if file already exists using checkHash
|
|
1222
|
+
const existingFile = await checkHashExists(fileHash, fileHandlerUrl, pathwayResolver);
|
|
1223
|
+
if (existingFile) {
|
|
1224
|
+
return existingFile;
|
|
1225
|
+
}
|
|
1226
|
+
|
|
1227
|
+
// File doesn't exist or checkHash failed - proceed with upload
|
|
1228
|
+
// If we don't already have a tempFilePath (from URL download), create one
|
|
1229
|
+
if (!tempFilePath) {
|
|
1230
|
+
// Determine file extension from mime type or filename
|
|
1231
|
+
let extension = 'bin';
|
|
1232
|
+
if (mimeType) {
|
|
1233
|
+
extension = mimeType.split('/')[1] || 'bin';
|
|
1234
|
+
} else if (filename) {
|
|
1235
|
+
const extMatch = filename.match(/\.([^.]+)$/);
|
|
1236
|
+
if (extMatch) extension = extMatch[1];
|
|
1237
|
+
}
|
|
1238
|
+
|
|
1239
|
+
const uploadFilename = filename || `upload_${Date.now()}.${extension}`;
|
|
1240
|
+
|
|
1241
|
+
// Create temporary file
|
|
1242
|
+
if (!tempDir) {
|
|
1243
|
+
tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'file-upload-'));
|
|
1244
|
+
}
|
|
1245
|
+
tempFilePath = path.join(tempDir, uploadFilename);
|
|
1246
|
+
|
|
1247
|
+
// Write buffer to temp file
|
|
1248
|
+
fs.writeFileSync(tempFilePath, fileBuffer);
|
|
1249
|
+
}
|
|
1250
|
+
// If tempFilePath already exists (from URL download), we can use it directly
|
|
1251
|
+
}
|
|
1252
|
+
|
|
1253
|
+
// Upload the file (only if we have buffer data and created tempFilePath)
|
|
1254
|
+
if (!tempFilePath) {
|
|
1255
|
+
throw new Error('No file to upload - tempFilePath not created');
|
|
1256
|
+
}
|
|
1257
|
+
|
|
1258
|
+
const requestId = uuidv4();
|
|
1259
|
+
const formData = new FormData();
|
|
1260
|
+
formData.append('file', fs.createReadStream(tempFilePath), {
|
|
1261
|
+
filename: path.basename(tempFilePath),
|
|
1262
|
+
contentType: mimeType || 'application/octet-stream'
|
|
1263
|
+
});
|
|
1264
|
+
// Add hash for deduplication if we computed it
|
|
1265
|
+
if (fileHash) {
|
|
1266
|
+
formData.append('hash', fileHash);
|
|
1267
|
+
}
|
|
1268
|
+
|
|
1269
|
+
// Append requestId parameter
|
|
1270
|
+
const separator = fileHandlerUrl.includes('?') ? '&' : '?';
|
|
1271
|
+
const uploadUrl = `${fileHandlerUrl}${separator}requestId=${requestId}`;
|
|
1272
|
+
|
|
1273
|
+
// Upload file
|
|
1274
|
+
const uploadResponse = await axios.post(uploadUrl, formData, {
|
|
1275
|
+
headers: {
|
|
1276
|
+
...formData.getHeaders()
|
|
1277
|
+
},
|
|
1278
|
+
timeout: 30000
|
|
1279
|
+
});
|
|
1280
|
+
|
|
1281
|
+
if (uploadResponse.data && uploadResponse.data.url) {
|
|
1282
|
+
// Return both url and gcs if available
|
|
1283
|
+
return {
|
|
1284
|
+
url: uploadResponse.data.url,
|
|
1285
|
+
gcs: uploadResponse.data.gcs || null,
|
|
1286
|
+
hash: uploadResponse.data.hash || fileHash
|
|
1287
|
+
};
|
|
1288
|
+
} else {
|
|
1289
|
+
throw new Error('No URL returned from file handler');
|
|
1290
|
+
}
|
|
1291
|
+
|
|
1292
|
+
} catch (error) {
|
|
1293
|
+
let errorMsg;
|
|
1294
|
+
if (error?.message) {
|
|
1295
|
+
errorMsg = error.message;
|
|
1296
|
+
} else if (error?.errors && Array.isArray(error.errors)) {
|
|
1297
|
+
// Handle AggregateError
|
|
1298
|
+
errorMsg = error.errors.map(e => e?.message || String(e)).join('; ');
|
|
1299
|
+
} else {
|
|
1300
|
+
errorMsg = String(error);
|
|
1301
|
+
}
|
|
1302
|
+
const errorMessage = `Failed to upload file: ${errorMsg}`;
|
|
1303
|
+
if (pathwayResolver && pathwayResolver.logError) {
|
|
1304
|
+
pathwayResolver.logError(errorMessage);
|
|
1305
|
+
} else {
|
|
1306
|
+
logger.error(errorMessage);
|
|
1307
|
+
}
|
|
1308
|
+
throw error;
|
|
1309
|
+
} finally {
|
|
1310
|
+
// Clean up temp files - always runs regardless of success or failure
|
|
1311
|
+
if (tempDir && fs.existsSync(tempDir)) {
|
|
1312
|
+
try {
|
|
1313
|
+
fs.rmSync(tempDir, { recursive: true, force: true });
|
|
1314
|
+
} catch (cleanupError) {
|
|
1315
|
+
const warningMessage = `Failed to clean up temp directory: ${cleanupError.message}`;
|
|
1316
|
+
if (pathwayResolver && pathwayResolver.logWarning) {
|
|
1317
|
+
pathwayResolver.logWarning(warningMessage);
|
|
1318
|
+
} else {
|
|
1319
|
+
logger.warn(warningMessage);
|
|
1320
|
+
}
|
|
1321
|
+
}
|
|
1322
|
+
} else if (tempFilePath && fs.existsSync(tempFilePath)) {
|
|
1323
|
+
// Fallback: if tempDir doesn't exist but tempFilePath does, delete just the file
|
|
1324
|
+
try {
|
|
1325
|
+
fs.unlinkSync(tempFilePath);
|
|
1326
|
+
} catch (cleanupError) {
|
|
1327
|
+
const warningMessage = `Failed to clean up temp file: ${cleanupError.message}`;
|
|
1328
|
+
if (pathwayResolver && pathwayResolver.logWarning) {
|
|
1329
|
+
pathwayResolver.logWarning(warningMessage);
|
|
1330
|
+
} else {
|
|
1331
|
+
logger.warn(warningMessage);
|
|
1332
|
+
}
|
|
1333
|
+
}
|
|
1334
|
+
}
|
|
1335
|
+
}
|
|
1336
|
+
}
|
|
1337
|
+
|
|
1338
|
+
// Helper function to upload base64 image data to cloud storage
|
|
1339
|
+
// Now uses the generic uploadFileToCloud function
|
|
1340
|
+
const uploadImageToCloud = async (base64Data, mimeType, pathwayResolver = null) => {
|
|
1341
|
+
return await uploadFileToCloud(base64Data, mimeType, null, pathwayResolver);
|
|
1342
|
+
};
|
|
1343
|
+
|
|
1344
|
+
/**
|
|
1345
|
+
* Convert file hashes to content format suitable for LLM processing
|
|
1346
|
+
* @param {Array<string>} fileHashes - Array of file hashes to resolve
|
|
1347
|
+
* @param {Object} config - Configuration object with file service endpoints
|
|
1348
|
+
* @returns {Promise<Array<string>>} Array of stringified file content objects
|
|
1349
|
+
*/
|
|
1350
|
+
async function resolveFileHashesToContent(fileHashes, config) {
|
|
1351
|
+
if (!fileHashes || fileHashes.length === 0) return [];
|
|
1352
|
+
|
|
1353
|
+
const fileContentPromises = fileHashes.map(async (hash) => {
|
|
1354
|
+
try {
|
|
1355
|
+
// Use the existing file handler (cortex-file-handler) to resolve file hashes
|
|
1356
|
+
const fileHandlerUrl = config?.get?.('whisperMediaApiUrl');
|
|
1357
|
+
|
|
1358
|
+
if (fileHandlerUrl && fileHandlerUrl !== 'null') {
|
|
1359
|
+
// Use shared checkHashExists function
|
|
1360
|
+
const existingFile = await checkHashExists(hash, fileHandlerUrl);
|
|
1361
|
+
if (existingFile) {
|
|
1362
|
+
const fileData = existingFile;
|
|
1363
|
+
const fileUrl = fileData.url;
|
|
1364
|
+
const convertedUrl = fileData.converted?.url;
|
|
1365
|
+
const convertedGcsUrl = fileData.converted?.gcs;
|
|
1366
|
+
|
|
1367
|
+
return JSON.stringify({
|
|
1368
|
+
type: "image_url",
|
|
1369
|
+
url: convertedUrl || fileUrl,
|
|
1370
|
+
image_url: { url: convertedUrl || fileUrl },
|
|
1371
|
+
gcs: convertedGcsUrl || fileData.gcs, // Add GCS URL for Gemini models
|
|
1372
|
+
originalFilename: fileData.filename,
|
|
1373
|
+
hash: hash
|
|
1374
|
+
});
|
|
1375
|
+
}
|
|
1376
|
+
|
|
1377
|
+
// Fallback: try direct axios call for backward compatibility (in case checkHashExists doesn't work)
|
|
1378
|
+
const response = await axios.get(fileHandlerUrl, {
|
|
1379
|
+
params: { hash: hash, checkHash: true }
|
|
1380
|
+
});
|
|
1381
|
+
if (response.status === 200) {
|
|
1382
|
+
const fileData = response.data;
|
|
1383
|
+
const fileUrl = fileData.shortLivedUrl || fileData.url;
|
|
1384
|
+
const convertedUrl = fileData.converted?.url;
|
|
1385
|
+
const convertedGcsUrl = fileData.converted?.gcs;
|
|
1386
|
+
|
|
1387
|
+
return JSON.stringify({
|
|
1388
|
+
type: "image_url",
|
|
1389
|
+
url: convertedUrl || fileUrl,
|
|
1390
|
+
image_url: { url: convertedUrl || fileUrl },
|
|
1391
|
+
gcs: convertedGcsUrl || fileData.gcs, // Add GCS URL for Gemini models
|
|
1392
|
+
originalFilename: fileData.filename,
|
|
1393
|
+
hash: hash
|
|
1394
|
+
});
|
|
1395
|
+
}
|
|
1396
|
+
}
|
|
1397
|
+
|
|
1398
|
+
// Fallback: create a placeholder that indicates file resolution is needed
|
|
1399
|
+
return JSON.stringify({
|
|
1400
|
+
type: "file_hash",
|
|
1401
|
+
hash: hash,
|
|
1402
|
+
_cortex_needs_resolution: true
|
|
1403
|
+
});
|
|
1404
|
+
} catch (error) {
|
|
1405
|
+
// Return error indicator
|
|
1406
|
+
return JSON.stringify({
|
|
1407
|
+
type: "file_error",
|
|
1408
|
+
hash: hash,
|
|
1409
|
+
error: error.message
|
|
1410
|
+
});
|
|
1411
|
+
}
|
|
1412
|
+
});
|
|
1413
|
+
|
|
1414
|
+
return Promise.all(fileContentPromises);
|
|
1415
|
+
}
|
|
1416
|
+
|
|
1417
|
+
/**
|
|
1418
|
+
* Get MIME type from filename or file path
|
|
1419
|
+
* Uses the mime-types package for comprehensive MIME type detection
|
|
1420
|
+
* @param {string} filenameOrPath - Filename or full file path
|
|
1421
|
+
* @param {string} defaultMimeType - Optional default MIME type if detection fails (default: 'application/octet-stream')
|
|
1422
|
+
* @returns {string} MIME type string
|
|
1423
|
+
*/
|
|
1424
|
+
function getMimeTypeFromFilename(filenameOrPath, defaultMimeType = 'application/octet-stream') {
|
|
1425
|
+
if (!filenameOrPath) {
|
|
1426
|
+
return defaultMimeType;
|
|
1427
|
+
}
|
|
1428
|
+
|
|
1429
|
+
// mime.lookup can handle both filenames and paths
|
|
1430
|
+
const mimeType = mime.lookup(filenameOrPath);
|
|
1431
|
+
return mimeType || defaultMimeType;
|
|
1432
|
+
}
|
|
1433
|
+
|
|
1434
|
+
/**
|
|
1435
|
+
* Get MIME type from file extension
|
|
1436
|
+
* @param {string} extension - File extension (with or without leading dot, e.g., '.txt' or 'txt')
|
|
1437
|
+
* @param {string} defaultMimeType - Optional default MIME type if detection fails (default: 'application/octet-stream')
|
|
1438
|
+
* @returns {string} MIME type string
|
|
1439
|
+
*/
|
|
1440
|
+
function getMimeTypeFromExtension(extension, defaultMimeType = 'application/octet-stream') {
|
|
1441
|
+
if (!extension) {
|
|
1442
|
+
return defaultMimeType;
|
|
1443
|
+
}
|
|
1444
|
+
|
|
1445
|
+
// Ensure extension starts with a dot for mime.lookup
|
|
1446
|
+
const normalizedExt = extension.startsWith('.') ? extension : `.${extension}`;
|
|
1447
|
+
const mimeType = mime.lookup(normalizedExt);
|
|
1448
|
+
return mimeType || defaultMimeType;
|
|
1449
|
+
}
|
|
1450
|
+
|
|
1451
|
+
export {
|
|
1452
|
+
computeFileHash,
|
|
1453
|
+
computeBufferHash,
|
|
1454
|
+
deleteTempPath,
|
|
1455
|
+
deleteFileByHash,
|
|
1456
|
+
downloadFile,
|
|
1457
|
+
generateUniqueFilename,
|
|
1458
|
+
getMediaChunks,
|
|
1459
|
+
markCompletedForCleanUp,
|
|
1460
|
+
extractFileMetadataFromContent,
|
|
1461
|
+
extractFilesFromChatHistory,
|
|
1462
|
+
syncFilesToCollection,
|
|
1463
|
+
getAvailableFilesFromCollection,
|
|
1464
|
+
formatFilesForTemplate,
|
|
1465
|
+
getAvailableFiles,
|
|
1466
|
+
findFileInCollection,
|
|
1467
|
+
// resolveFileParameter is exported inline above
|
|
1468
|
+
generateFileMessageContent,
|
|
1469
|
+
injectFileIntoChatHistory,
|
|
1470
|
+
addFileToCollection,
|
|
1471
|
+
loadFileCollection,
|
|
1472
|
+
saveFileCollection,
|
|
1473
|
+
modifyFileCollectionWithLock,
|
|
1474
|
+
checkHashExists,
|
|
1475
|
+
uploadFileToCloud,
|
|
1476
|
+
uploadImageToCloud,
|
|
1477
|
+
resolveFileHashesToContent,
|
|
1478
|
+
getMimeTypeFromFilename,
|
|
1479
|
+
getMimeTypeFromExtension
|
|
1480
|
+
};
|
|
1481
|
+
|