@aj-archipelago/cortex 1.3.49 → 1.3.51
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config.js +1 -1
- package/helper-apps/cortex-browser/Dockerfile +19 -31
- package/helper-apps/cortex-browser/function_app.py +708 -181
- package/helper-apps/cortex-browser/requirements.txt +4 -4
- package/helper-apps/cortex-file-handler/blobHandler.js +850 -429
- package/helper-apps/cortex-file-handler/constants.js +64 -48
- package/helper-apps/cortex-file-handler/docHelper.js +7 -114
- package/helper-apps/cortex-file-handler/fileChunker.js +96 -51
- package/helper-apps/cortex-file-handler/function.json +2 -6
- package/helper-apps/cortex-file-handler/helper.js +34 -25
- package/helper-apps/cortex-file-handler/index.js +324 -136
- package/helper-apps/cortex-file-handler/localFileHandler.js +56 -57
- package/helper-apps/cortex-file-handler/package-lock.json +6065 -5964
- package/helper-apps/cortex-file-handler/package.json +8 -4
- package/helper-apps/cortex-file-handler/redis.js +23 -17
- package/helper-apps/cortex-file-handler/scripts/setup-azure-container.js +12 -9
- package/helper-apps/cortex-file-handler/scripts/setup-test-containers.js +21 -18
- package/helper-apps/cortex-file-handler/scripts/test-azure.sh +1 -1
- package/helper-apps/cortex-file-handler/scripts/test-gcs.sh +1 -1
- package/helper-apps/cortex-file-handler/services/ConversionService.js +288 -0
- package/helper-apps/cortex-file-handler/services/FileConversionService.js +53 -0
- package/helper-apps/cortex-file-handler/start.js +63 -38
- package/helper-apps/cortex-file-handler/tests/FileConversionService.test.js +144 -0
- package/helper-apps/cortex-file-handler/tests/blobHandler.test.js +88 -64
- package/helper-apps/cortex-file-handler/tests/fileChunker.test.js +114 -91
- package/helper-apps/cortex-file-handler/tests/fileUpload.test.js +351 -0
- package/helper-apps/cortex-file-handler/tests/files/DOCX_TestPage.docx +0 -0
- package/helper-apps/cortex-file-handler/tests/files/tests-example.xls +0 -0
- package/helper-apps/cortex-file-handler/tests/start.test.js +943 -642
- package/helper-apps/cortex-file-handler/tests/testUtils.helper.js +31 -0
- package/helper-apps/cortex-markitdown/.funcignore +1 -0
- package/helper-apps/cortex-markitdown/MarkitdownConverterFunction/__init__.py +64 -0
- package/helper-apps/cortex-markitdown/MarkitdownConverterFunction/function.json +21 -0
- package/helper-apps/cortex-markitdown/README.md +94 -0
- package/helper-apps/cortex-markitdown/host.json +15 -0
- package/helper-apps/cortex-markitdown/requirements.txt +2 -0
- package/lib/requestExecutor.js +44 -36
- package/package.json +1 -1
- package/pathways/system/entity/tools/sys_tool_cognitive_search.js +1 -1
- package/pathways/system/entity/tools/sys_tool_readfile.js +24 -2
- package/server/plugins/openAiWhisperPlugin.js +59 -87
- package/helper-apps/cortex-file-handler/tests/docHelper.test.js +0 -148
|
@@ -0,0 +1,351 @@
|
|
|
1
|
+
import test from 'ava';
|
|
2
|
+
import fs from 'fs';
|
|
3
|
+
import path from 'path';
|
|
4
|
+
import { fileURLToPath } from 'url';
|
|
5
|
+
import { v4 as uuidv4 } from 'uuid';
|
|
6
|
+
import axios from 'axios';
|
|
7
|
+
import FormData from 'form-data';
|
|
8
|
+
import { port } from '../start.js';
|
|
9
|
+
import { gcs } from '../blobHandler.js';
|
|
10
|
+
import { cleanupHashAndFile } from './testUtils.helper.js';
|
|
11
|
+
|
|
12
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
13
|
+
const __dirname = path.dirname(__filename);
|
|
14
|
+
const baseUrl = `http://localhost:${port}/api/CortexFileHandler`;
|
|
15
|
+
|
|
16
|
+
// Helper function to determine if GCS is configured
|
|
17
|
+
function isGCSConfigured() {
|
|
18
|
+
return (
|
|
19
|
+
process.env.GCP_SERVICE_ACCOUNT_KEY_BASE64 ||
|
|
20
|
+
process.env.GCP_SERVICE_ACCOUNT_KEY
|
|
21
|
+
);
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
// Helper function to create test files
|
|
25
|
+
async function createTestFile(content, extension) {
|
|
26
|
+
const testDir = path.join(__dirname, 'test-files');
|
|
27
|
+
if (!fs.existsSync(testDir)) {
|
|
28
|
+
fs.mkdirSync(testDir, { recursive: true });
|
|
29
|
+
}
|
|
30
|
+
const filename = path.join(testDir, `${uuidv4()}.${extension}`);
|
|
31
|
+
fs.writeFileSync(filename, content);
|
|
32
|
+
return filename;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// Helper function to upload file
|
|
36
|
+
async function uploadFile(filePath, requestId = null, hash = null) {
|
|
37
|
+
const form = new FormData();
|
|
38
|
+
form.append('file', fs.createReadStream(filePath));
|
|
39
|
+
if (requestId) form.append('requestId', requestId);
|
|
40
|
+
if (hash) form.append('hash', hash);
|
|
41
|
+
|
|
42
|
+
const response = await axios.post(baseUrl, form, {
|
|
43
|
+
headers: {
|
|
44
|
+
...form.getHeaders(),
|
|
45
|
+
'Content-Type': 'multipart/form-data',
|
|
46
|
+
},
|
|
47
|
+
validateStatus: (status) => true,
|
|
48
|
+
timeout: 5000,
|
|
49
|
+
maxContentLength: Infinity,
|
|
50
|
+
maxBodyLength: Infinity,
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
return response;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// Helper function to verify GCS file
|
|
57
|
+
async function verifyGCSFile(gcsUrl) {
|
|
58
|
+
if (!isGCSConfigured() || !gcs) return true;
|
|
59
|
+
|
|
60
|
+
try {
|
|
61
|
+
const bucket = gcsUrl.split('/')[2];
|
|
62
|
+
const filename = gcsUrl.split('/').slice(3).join('/');
|
|
63
|
+
const [exists] = await gcs.bucket(bucket).file(filename).exists();
|
|
64
|
+
return exists;
|
|
65
|
+
} catch (error) {
|
|
66
|
+
console.error('Error verifying GCS file:', error);
|
|
67
|
+
return false;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// Helper function to fetch file content from a URL
|
|
72
|
+
async function fetchFileContent(url) {
|
|
73
|
+
const response = await axios.get(url, { responseType: 'arraybuffer' });
|
|
74
|
+
return Buffer.from(response.data);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Setup: Create test directory
|
|
78
|
+
test.before(async (t) => {
|
|
79
|
+
const testDir = path.join(__dirname, 'test-files');
|
|
80
|
+
await fs.promises.mkdir(testDir, { recursive: true });
|
|
81
|
+
t.context = { testDir };
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
// Cleanup
|
|
85
|
+
test.after.always(async (t) => {
|
|
86
|
+
await fs.promises.rm(t.context.testDir, { recursive: true, force: true });
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
// Basic File Upload Tests
|
|
90
|
+
test.serial('should handle basic file upload', async (t) => {
|
|
91
|
+
const fileContent = 'test content';
|
|
92
|
+
const filePath = await createTestFile(fileContent, 'txt');
|
|
93
|
+
const requestId = uuidv4();
|
|
94
|
+
|
|
95
|
+
try {
|
|
96
|
+
const response = await uploadFile(filePath, requestId);
|
|
97
|
+
|
|
98
|
+
t.is(response.status, 200);
|
|
99
|
+
t.truthy(response.data.url);
|
|
100
|
+
t.truthy(response.data.filename);
|
|
101
|
+
|
|
102
|
+
// Verify file content matches
|
|
103
|
+
const uploadedContent = await fetchFileContent(response.data.url);
|
|
104
|
+
t.deepEqual(uploadedContent, Buffer.from(fileContent), 'Uploaded file content should match');
|
|
105
|
+
} finally {
|
|
106
|
+
fs.unlinkSync(filePath);
|
|
107
|
+
}
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
test.serial('should handle file upload with hash', async (t) => {
|
|
111
|
+
const fileContent = 'test content';
|
|
112
|
+
const filePath = await createTestFile(fileContent, 'txt');
|
|
113
|
+
const requestId = uuidv4();
|
|
114
|
+
const hash = 'test-hash-' + uuidv4();
|
|
115
|
+
let uploadedUrl;
|
|
116
|
+
let convertedUrl;
|
|
117
|
+
try {
|
|
118
|
+
// First upload the file
|
|
119
|
+
const response = await uploadFile(filePath, requestId, hash);
|
|
120
|
+
t.is(response.status, 200);
|
|
121
|
+
t.truthy(response.data.url);
|
|
122
|
+
uploadedUrl = response.data.url;
|
|
123
|
+
if (response.data.converted && response.data.converted.url) {
|
|
124
|
+
convertedUrl = response.data.converted.url;
|
|
125
|
+
}
|
|
126
|
+
console.log('Upload hash response.data', response.data)
|
|
127
|
+
|
|
128
|
+
// Wait for Redis operations to complete and verify storage
|
|
129
|
+
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
130
|
+
|
|
131
|
+
const checkResponse = await axios.get(baseUrl, {
|
|
132
|
+
params: {
|
|
133
|
+
hash,
|
|
134
|
+
checkHash: true,
|
|
135
|
+
},
|
|
136
|
+
validateStatus: (status) => true,
|
|
137
|
+
});
|
|
138
|
+
console.log('Upload hash checkResponse', checkResponse)
|
|
139
|
+
if (checkResponse.status !== 200) {
|
|
140
|
+
// Only log if not 200
|
|
141
|
+
console.error('Hash check failed:', {
|
|
142
|
+
status: checkResponse.status,
|
|
143
|
+
data: checkResponse.data
|
|
144
|
+
});
|
|
145
|
+
}
|
|
146
|
+
// Hash should exist since we just uploaded it
|
|
147
|
+
t.is(checkResponse.status, 200);
|
|
148
|
+
t.truthy(checkResponse.data.hash);
|
|
149
|
+
|
|
150
|
+
// Verify file exists and content matches
|
|
151
|
+
const fileResponse = await axios.get(response.data.url, { responseType: 'arraybuffer' });
|
|
152
|
+
t.is(fileResponse.status, 200);
|
|
153
|
+
t.deepEqual(Buffer.from(fileResponse.data), Buffer.from(fileContent), 'Uploaded file content should match');
|
|
154
|
+
} finally {
|
|
155
|
+
fs.unlinkSync(filePath);
|
|
156
|
+
await cleanupHashAndFile(hash, uploadedUrl, baseUrl);
|
|
157
|
+
if (convertedUrl) {
|
|
158
|
+
await cleanupHashAndFile(`${hash}_converted`, convertedUrl, baseUrl);
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
});
|
|
162
|
+
|
|
163
|
+
// Document Processing Tests
|
|
164
|
+
test.serial('should handle PDF document upload and conversion', async (t) => {
|
|
165
|
+
// Create a simple PDF file
|
|
166
|
+
const fileContent = '%PDF-1.4\nTest PDF content';
|
|
167
|
+
const filePath = await createTestFile(fileContent, 'pdf');
|
|
168
|
+
const requestId = uuidv4();
|
|
169
|
+
|
|
170
|
+
try {
|
|
171
|
+
const response = await uploadFile(filePath, requestId);
|
|
172
|
+
t.is(response.status, 200);
|
|
173
|
+
t.truthy(response.data.url);
|
|
174
|
+
|
|
175
|
+
// Verify original PDF content matches
|
|
176
|
+
const uploadedContent = await fetchFileContent(response.data.url);
|
|
177
|
+
t.deepEqual(uploadedContent, Buffer.from(fileContent), 'Uploaded PDF content should match');
|
|
178
|
+
|
|
179
|
+
// Check if converted version exists
|
|
180
|
+
if (response.data.converted) {
|
|
181
|
+
t.truthy(response.data.converted.url);
|
|
182
|
+
const convertedResponse = await axios.get(response.data.converted.url, { responseType: 'arraybuffer' });
|
|
183
|
+
t.is(convertedResponse.status, 200);
|
|
184
|
+
// For conversion, just check non-empty
|
|
185
|
+
t.true(Buffer.from(convertedResponse.data).length > 0, 'Converted file should not be empty');
|
|
186
|
+
}
|
|
187
|
+
} finally {
|
|
188
|
+
fs.unlinkSync(filePath);
|
|
189
|
+
}
|
|
190
|
+
});
|
|
191
|
+
|
|
192
|
+
// Media Chunking Tests
|
|
193
|
+
test.serial('should handle media file chunking', async (t) => {
|
|
194
|
+
// Create a large test file to trigger chunking
|
|
195
|
+
const chunkContent = 'x'.repeat(1024 * 1024);
|
|
196
|
+
const filePath = await createTestFile(chunkContent, 'mp4');
|
|
197
|
+
const requestId = uuidv4();
|
|
198
|
+
|
|
199
|
+
try {
|
|
200
|
+
const response = await uploadFile(filePath, requestId);
|
|
201
|
+
t.is(response.status, 200);
|
|
202
|
+
t.truthy(response.data);
|
|
203
|
+
|
|
204
|
+
// For media files, we expect either an array of chunks or a single URL
|
|
205
|
+
if (Array.isArray(response.data)) {
|
|
206
|
+
t.true(response.data.length > 0);
|
|
207
|
+
|
|
208
|
+
// Verify each chunk
|
|
209
|
+
for (const chunk of response.data) {
|
|
210
|
+
t.truthy(chunk.uri);
|
|
211
|
+
t.truthy(chunk.offset);
|
|
212
|
+
|
|
213
|
+
// Verify chunk exists and content matches
|
|
214
|
+
const chunkResponse = await axios.get(chunk.uri, { responseType: 'arraybuffer' });
|
|
215
|
+
t.is(chunkResponse.status, 200);
|
|
216
|
+
// Each chunk should be a slice of the original content
|
|
217
|
+
const expectedChunk = Buffer.from(chunkContent).slice(chunk.offset, chunk.offset + chunk.length || undefined);
|
|
218
|
+
t.deepEqual(Buffer.from(chunkResponse.data), expectedChunk, 'Chunk content should match original');
|
|
219
|
+
|
|
220
|
+
// If GCS is configured, verify backup
|
|
221
|
+
if (isGCSConfigured() && chunk.gcs) {
|
|
222
|
+
const exists = await verifyGCSFile(chunk.gcs);
|
|
223
|
+
t.true(exists, 'GCS chunk should exist');
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
} else {
|
|
227
|
+
// Single file response
|
|
228
|
+
t.truthy(response.data.url);
|
|
229
|
+
const fileResponse = await axios.get(response.data.url, { responseType: 'arraybuffer' });
|
|
230
|
+
t.is(fileResponse.status, 200);
|
|
231
|
+
t.deepEqual(Buffer.from(fileResponse.data), Buffer.from(chunkContent), 'Uploaded file content should match');
|
|
232
|
+
}
|
|
233
|
+
} finally {
|
|
234
|
+
fs.unlinkSync(filePath);
|
|
235
|
+
}
|
|
236
|
+
});
|
|
237
|
+
|
|
238
|
+
// Error Handling Tests
|
|
239
|
+
test.serial('should handle invalid file upload', async (t) => {
|
|
240
|
+
const requestId = uuidv4();
|
|
241
|
+
const form = new FormData();
|
|
242
|
+
// Send a file with no name and no content
|
|
243
|
+
form.append('file', Buffer.from(''), { filename: '' });
|
|
244
|
+
form.append('requestId', requestId);
|
|
245
|
+
|
|
246
|
+
const response = await axios.post(baseUrl, form, {
|
|
247
|
+
headers: {
|
|
248
|
+
...form.getHeaders(),
|
|
249
|
+
'Content-Type': 'multipart/form-data',
|
|
250
|
+
},
|
|
251
|
+
validateStatus: (status) => true,
|
|
252
|
+
timeout: 5000,
|
|
253
|
+
});
|
|
254
|
+
|
|
255
|
+
// Log the response for debugging
|
|
256
|
+
console.log('Invalid file upload response:', {
|
|
257
|
+
status: response.status,
|
|
258
|
+
data: response.data
|
|
259
|
+
});
|
|
260
|
+
|
|
261
|
+
t.is(response.status, 400, 'Should reject invalid file with 400 status');
|
|
262
|
+
t.is(response.data, 'Invalid file: missing filename', 'Should return correct error message');
|
|
263
|
+
});
|
|
264
|
+
|
|
265
|
+
// Cleanup Tests
|
|
266
|
+
test.serial('should handle file deletion', async (t) => {
|
|
267
|
+
const filePath = await createTestFile('test content', 'txt');
|
|
268
|
+
const requestId = uuidv4();
|
|
269
|
+
|
|
270
|
+
try {
|
|
271
|
+
// Upload file
|
|
272
|
+
const uploadResponse = await uploadFile(filePath, requestId);
|
|
273
|
+
t.is(uploadResponse.status, 200);
|
|
274
|
+
|
|
275
|
+
// Wait a moment for file to be fully written
|
|
276
|
+
await new Promise(resolve => setTimeout(resolve, 1000));
|
|
277
|
+
|
|
278
|
+
// Extract the file identifier from the URL
|
|
279
|
+
const fileUrl = new URL(uploadResponse.data.url);
|
|
280
|
+
const fileIdentifier = fileUrl.pathname.split('/').pop().split('_')[0];
|
|
281
|
+
console.log('File identifier for deletion:', fileIdentifier);
|
|
282
|
+
|
|
283
|
+
// Delete file using the correct identifier
|
|
284
|
+
const deleteUrl = `${baseUrl}?operation=delete&requestId=${fileIdentifier}`;
|
|
285
|
+
console.log('Deleting file with URL:', deleteUrl);
|
|
286
|
+
const deleteResponse = await axios.delete(deleteUrl);
|
|
287
|
+
t.is(deleteResponse.status, 200);
|
|
288
|
+
|
|
289
|
+
// Wait a moment for deletion to complete
|
|
290
|
+
await new Promise(resolve => setTimeout(resolve, 1000));
|
|
291
|
+
|
|
292
|
+
// Verify file is gone
|
|
293
|
+
const verifyResponse = await axios.get(uploadResponse.data.url, {
|
|
294
|
+
validateStatus: (status) => true,
|
|
295
|
+
});
|
|
296
|
+
t.is(verifyResponse.status, 404, 'File should be deleted');
|
|
297
|
+
|
|
298
|
+
// If GCS is configured, verify backup is gone
|
|
299
|
+
if (isGCSConfigured() && uploadResponse.data.gcs) {
|
|
300
|
+
const exists = await verifyGCSFile(uploadResponse.data.gcs);
|
|
301
|
+
t.false(exists, 'GCS file should be deleted');
|
|
302
|
+
}
|
|
303
|
+
} finally {
|
|
304
|
+
fs.unlinkSync(filePath);
|
|
305
|
+
}
|
|
306
|
+
});
|
|
307
|
+
|
|
308
|
+
// Save Option Test
|
|
309
|
+
test.serial('should handle document upload with save option', async (t) => {
|
|
310
|
+
const fileContent = 'Sample DOCX content';
|
|
311
|
+
const filePath = await createTestFile(fileContent, 'docx');
|
|
312
|
+
|
|
313
|
+
const initialRequestId = uuidv4();
|
|
314
|
+
const saveRequestId = uuidv4();
|
|
315
|
+
|
|
316
|
+
let uploadedUrl;
|
|
317
|
+
|
|
318
|
+
try {
|
|
319
|
+
// First, upload the document so we have a publicly reachable URL
|
|
320
|
+
const uploadResponse = await uploadFile(filePath, initialRequestId);
|
|
321
|
+
t.is(uploadResponse.status, 200);
|
|
322
|
+
t.truthy(uploadResponse.data.url, 'Upload should return a URL');
|
|
323
|
+
|
|
324
|
+
uploadedUrl = uploadResponse.data.url;
|
|
325
|
+
|
|
326
|
+
// Now call the handler again with the save flag
|
|
327
|
+
const saveResponse = await axios.get(baseUrl, {
|
|
328
|
+
params: {
|
|
329
|
+
uri: uploadedUrl,
|
|
330
|
+
requestId: saveRequestId,
|
|
331
|
+
save: true,
|
|
332
|
+
},
|
|
333
|
+
validateStatus: (status) => true,
|
|
334
|
+
});
|
|
335
|
+
|
|
336
|
+
// The current implementation returns an empty array but should still be 200
|
|
337
|
+
t.is(saveResponse.status, 200, 'Save request should succeed');
|
|
338
|
+
t.true(Array.isArray(saveResponse.data), 'Response body should be an array');
|
|
339
|
+
} finally {
|
|
340
|
+
fs.unlinkSync(filePath);
|
|
341
|
+
|
|
342
|
+
// Cleanup the initially uploaded file
|
|
343
|
+
if (uploadedUrl) {
|
|
344
|
+
const identifier = new URL(uploadedUrl).pathname.split('/').pop().split('_')[0];
|
|
345
|
+
await axios.delete(`${baseUrl}?operation=delete&requestId=${identifier}`).catch(() => {});
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
// Cleanup files created by save request
|
|
349
|
+
await axios.delete(`${baseUrl}?operation=delete&requestId=${saveRequestId}`).catch(() => {});
|
|
350
|
+
}
|
|
351
|
+
});
|
|
Binary file
|
|
Binary file
|