@aj-archipelago/cortex 1.3.49 → 1.3.51

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/config.js +1 -1
  2. package/helper-apps/cortex-browser/Dockerfile +19 -31
  3. package/helper-apps/cortex-browser/function_app.py +708 -181
  4. package/helper-apps/cortex-browser/requirements.txt +4 -4
  5. package/helper-apps/cortex-file-handler/blobHandler.js +850 -429
  6. package/helper-apps/cortex-file-handler/constants.js +64 -48
  7. package/helper-apps/cortex-file-handler/docHelper.js +7 -114
  8. package/helper-apps/cortex-file-handler/fileChunker.js +96 -51
  9. package/helper-apps/cortex-file-handler/function.json +2 -6
  10. package/helper-apps/cortex-file-handler/helper.js +34 -25
  11. package/helper-apps/cortex-file-handler/index.js +324 -136
  12. package/helper-apps/cortex-file-handler/localFileHandler.js +56 -57
  13. package/helper-apps/cortex-file-handler/package-lock.json +6065 -5964
  14. package/helper-apps/cortex-file-handler/package.json +8 -4
  15. package/helper-apps/cortex-file-handler/redis.js +23 -17
  16. package/helper-apps/cortex-file-handler/scripts/setup-azure-container.js +12 -9
  17. package/helper-apps/cortex-file-handler/scripts/setup-test-containers.js +21 -18
  18. package/helper-apps/cortex-file-handler/scripts/test-azure.sh +1 -1
  19. package/helper-apps/cortex-file-handler/scripts/test-gcs.sh +1 -1
  20. package/helper-apps/cortex-file-handler/services/ConversionService.js +288 -0
  21. package/helper-apps/cortex-file-handler/services/FileConversionService.js +53 -0
  22. package/helper-apps/cortex-file-handler/start.js +63 -38
  23. package/helper-apps/cortex-file-handler/tests/FileConversionService.test.js +144 -0
  24. package/helper-apps/cortex-file-handler/tests/blobHandler.test.js +88 -64
  25. package/helper-apps/cortex-file-handler/tests/fileChunker.test.js +114 -91
  26. package/helper-apps/cortex-file-handler/tests/fileUpload.test.js +351 -0
  27. package/helper-apps/cortex-file-handler/tests/files/DOCX_TestPage.docx +0 -0
  28. package/helper-apps/cortex-file-handler/tests/files/tests-example.xls +0 -0
  29. package/helper-apps/cortex-file-handler/tests/start.test.js +943 -642
  30. package/helper-apps/cortex-file-handler/tests/testUtils.helper.js +31 -0
  31. package/helper-apps/cortex-markitdown/.funcignore +1 -0
  32. package/helper-apps/cortex-markitdown/MarkitdownConverterFunction/__init__.py +64 -0
  33. package/helper-apps/cortex-markitdown/MarkitdownConverterFunction/function.json +21 -0
  34. package/helper-apps/cortex-markitdown/README.md +94 -0
  35. package/helper-apps/cortex-markitdown/host.json +15 -0
  36. package/helper-apps/cortex-markitdown/requirements.txt +2 -0
  37. package/lib/requestExecutor.js +44 -36
  38. package/package.json +1 -1
  39. package/pathways/system/entity/tools/sys_tool_cognitive_search.js +1 -1
  40. package/pathways/system/entity/tools/sys_tool_readfile.js +24 -2
  41. package/server/plugins/openAiWhisperPlugin.js +59 -87
  42. package/helper-apps/cortex-file-handler/tests/docHelper.test.js +0 -148
@@ -0,0 +1,351 @@
1
+ import test from 'ava';
2
+ import fs from 'fs';
3
+ import path from 'path';
4
+ import { fileURLToPath } from 'url';
5
+ import { v4 as uuidv4 } from 'uuid';
6
+ import axios from 'axios';
7
+ import FormData from 'form-data';
8
+ import { port } from '../start.js';
9
+ import { gcs } from '../blobHandler.js';
10
+ import { cleanupHashAndFile } from './testUtils.helper.js';
11
+
12
+ const __filename = fileURLToPath(import.meta.url);
13
+ const __dirname = path.dirname(__filename);
14
+ const baseUrl = `http://localhost:${port}/api/CortexFileHandler`;
15
+
16
+ // Helper function to determine if GCS is configured
17
+ function isGCSConfigured() {
18
+ return (
19
+ process.env.GCP_SERVICE_ACCOUNT_KEY_BASE64 ||
20
+ process.env.GCP_SERVICE_ACCOUNT_KEY
21
+ );
22
+ }
23
+
24
+ // Helper function to create test files
25
+ async function createTestFile(content, extension) {
26
+ const testDir = path.join(__dirname, 'test-files');
27
+ if (!fs.existsSync(testDir)) {
28
+ fs.mkdirSync(testDir, { recursive: true });
29
+ }
30
+ const filename = path.join(testDir, `${uuidv4()}.${extension}`);
31
+ fs.writeFileSync(filename, content);
32
+ return filename;
33
+ }
34
+
35
+ // Helper function to upload file
36
+ async function uploadFile(filePath, requestId = null, hash = null) {
37
+ const form = new FormData();
38
+ form.append('file', fs.createReadStream(filePath));
39
+ if (requestId) form.append('requestId', requestId);
40
+ if (hash) form.append('hash', hash);
41
+
42
+ const response = await axios.post(baseUrl, form, {
43
+ headers: {
44
+ ...form.getHeaders(),
45
+ 'Content-Type': 'multipart/form-data',
46
+ },
47
+ validateStatus: (status) => true,
48
+ timeout: 5000,
49
+ maxContentLength: Infinity,
50
+ maxBodyLength: Infinity,
51
+ });
52
+
53
+ return response;
54
+ }
55
+
56
+ // Helper function to verify GCS file
57
+ async function verifyGCSFile(gcsUrl) {
58
+ if (!isGCSConfigured() || !gcs) return true;
59
+
60
+ try {
61
+ const bucket = gcsUrl.split('/')[2];
62
+ const filename = gcsUrl.split('/').slice(3).join('/');
63
+ const [exists] = await gcs.bucket(bucket).file(filename).exists();
64
+ return exists;
65
+ } catch (error) {
66
+ console.error('Error verifying GCS file:', error);
67
+ return false;
68
+ }
69
+ }
70
+
71
+ // Helper function to fetch file content from a URL
72
+ async function fetchFileContent(url) {
73
+ const response = await axios.get(url, { responseType: 'arraybuffer' });
74
+ return Buffer.from(response.data);
75
+ }
76
+
77
+ // Setup: Create test directory
78
+ test.before(async (t) => {
79
+ const testDir = path.join(__dirname, 'test-files');
80
+ await fs.promises.mkdir(testDir, { recursive: true });
81
+ t.context = { testDir };
82
+ });
83
+
84
+ // Cleanup
85
+ test.after.always(async (t) => {
86
+ await fs.promises.rm(t.context.testDir, { recursive: true, force: true });
87
+ });
88
+
89
+ // Basic File Upload Tests
90
+ test.serial('should handle basic file upload', async (t) => {
91
+ const fileContent = 'test content';
92
+ const filePath = await createTestFile(fileContent, 'txt');
93
+ const requestId = uuidv4();
94
+
95
+ try {
96
+ const response = await uploadFile(filePath, requestId);
97
+
98
+ t.is(response.status, 200);
99
+ t.truthy(response.data.url);
100
+ t.truthy(response.data.filename);
101
+
102
+ // Verify file content matches
103
+ const uploadedContent = await fetchFileContent(response.data.url);
104
+ t.deepEqual(uploadedContent, Buffer.from(fileContent), 'Uploaded file content should match');
105
+ } finally {
106
+ fs.unlinkSync(filePath);
107
+ }
108
+ });
109
+
110
+ test.serial('should handle file upload with hash', async (t) => {
111
+ const fileContent = 'test content';
112
+ const filePath = await createTestFile(fileContent, 'txt');
113
+ const requestId = uuidv4();
114
+ const hash = 'test-hash-' + uuidv4();
115
+ let uploadedUrl;
116
+ let convertedUrl;
117
+ try {
118
+ // First upload the file
119
+ const response = await uploadFile(filePath, requestId, hash);
120
+ t.is(response.status, 200);
121
+ t.truthy(response.data.url);
122
+ uploadedUrl = response.data.url;
123
+ if (response.data.converted && response.data.converted.url) {
124
+ convertedUrl = response.data.converted.url;
125
+ }
126
+ console.log('Upload hash response.data', response.data)
127
+
128
+ // Wait for Redis operations to complete and verify storage
129
+ await new Promise(resolve => setTimeout(resolve, 2000));
130
+
131
+ const checkResponse = await axios.get(baseUrl, {
132
+ params: {
133
+ hash,
134
+ checkHash: true,
135
+ },
136
+ validateStatus: (status) => true,
137
+ });
138
+ console.log('Upload hash checkResponse', checkResponse)
139
+ if (checkResponse.status !== 200) {
140
+ // Only log if not 200
141
+ console.error('Hash check failed:', {
142
+ status: checkResponse.status,
143
+ data: checkResponse.data
144
+ });
145
+ }
146
+ // Hash should exist since we just uploaded it
147
+ t.is(checkResponse.status, 200);
148
+ t.truthy(checkResponse.data.hash);
149
+
150
+ // Verify file exists and content matches
151
+ const fileResponse = await axios.get(response.data.url, { responseType: 'arraybuffer' });
152
+ t.is(fileResponse.status, 200);
153
+ t.deepEqual(Buffer.from(fileResponse.data), Buffer.from(fileContent), 'Uploaded file content should match');
154
+ } finally {
155
+ fs.unlinkSync(filePath);
156
+ await cleanupHashAndFile(hash, uploadedUrl, baseUrl);
157
+ if (convertedUrl) {
158
+ await cleanupHashAndFile(`${hash}_converted`, convertedUrl, baseUrl);
159
+ }
160
+ }
161
+ });
162
+
163
+ // Document Processing Tests
164
+ test.serial('should handle PDF document upload and conversion', async (t) => {
165
+ // Create a simple PDF file
166
+ const fileContent = '%PDF-1.4\nTest PDF content';
167
+ const filePath = await createTestFile(fileContent, 'pdf');
168
+ const requestId = uuidv4();
169
+
170
+ try {
171
+ const response = await uploadFile(filePath, requestId);
172
+ t.is(response.status, 200);
173
+ t.truthy(response.data.url);
174
+
175
+ // Verify original PDF content matches
176
+ const uploadedContent = await fetchFileContent(response.data.url);
177
+ t.deepEqual(uploadedContent, Buffer.from(fileContent), 'Uploaded PDF content should match');
178
+
179
+ // Check if converted version exists
180
+ if (response.data.converted) {
181
+ t.truthy(response.data.converted.url);
182
+ const convertedResponse = await axios.get(response.data.converted.url, { responseType: 'arraybuffer' });
183
+ t.is(convertedResponse.status, 200);
184
+ // For conversion, just check non-empty
185
+ t.true(Buffer.from(convertedResponse.data).length > 0, 'Converted file should not be empty');
186
+ }
187
+ } finally {
188
+ fs.unlinkSync(filePath);
189
+ }
190
+ });
191
+
192
+ // Media Chunking Tests
193
+ test.serial('should handle media file chunking', async (t) => {
194
+ // Create a large test file to trigger chunking
195
+ const chunkContent = 'x'.repeat(1024 * 1024);
196
+ const filePath = await createTestFile(chunkContent, 'mp4');
197
+ const requestId = uuidv4();
198
+
199
+ try {
200
+ const response = await uploadFile(filePath, requestId);
201
+ t.is(response.status, 200);
202
+ t.truthy(response.data);
203
+
204
+ // For media files, we expect either an array of chunks or a single URL
205
+ if (Array.isArray(response.data)) {
206
+ t.true(response.data.length > 0);
207
+
208
+ // Verify each chunk
209
+ for (const chunk of response.data) {
210
+ t.truthy(chunk.uri);
211
+ t.truthy(chunk.offset);
212
+
213
+ // Verify chunk exists and content matches
214
+ const chunkResponse = await axios.get(chunk.uri, { responseType: 'arraybuffer' });
215
+ t.is(chunkResponse.status, 200);
216
+ // Each chunk should be a slice of the original content
217
+ const expectedChunk = Buffer.from(chunkContent).slice(chunk.offset, chunk.offset + chunk.length || undefined);
218
+ t.deepEqual(Buffer.from(chunkResponse.data), expectedChunk, 'Chunk content should match original');
219
+
220
+ // If GCS is configured, verify backup
221
+ if (isGCSConfigured() && chunk.gcs) {
222
+ const exists = await verifyGCSFile(chunk.gcs);
223
+ t.true(exists, 'GCS chunk should exist');
224
+ }
225
+ }
226
+ } else {
227
+ // Single file response
228
+ t.truthy(response.data.url);
229
+ const fileResponse = await axios.get(response.data.url, { responseType: 'arraybuffer' });
230
+ t.is(fileResponse.status, 200);
231
+ t.deepEqual(Buffer.from(fileResponse.data), Buffer.from(chunkContent), 'Uploaded file content should match');
232
+ }
233
+ } finally {
234
+ fs.unlinkSync(filePath);
235
+ }
236
+ });
237
+
238
+ // Error Handling Tests
239
+ test.serial('should handle invalid file upload', async (t) => {
240
+ const requestId = uuidv4();
241
+ const form = new FormData();
242
+ // Send a file with no name and no content
243
+ form.append('file', Buffer.from(''), { filename: '' });
244
+ form.append('requestId', requestId);
245
+
246
+ const response = await axios.post(baseUrl, form, {
247
+ headers: {
248
+ ...form.getHeaders(),
249
+ 'Content-Type': 'multipart/form-data',
250
+ },
251
+ validateStatus: (status) => true,
252
+ timeout: 5000,
253
+ });
254
+
255
+ // Log the response for debugging
256
+ console.log('Invalid file upload response:', {
257
+ status: response.status,
258
+ data: response.data
259
+ });
260
+
261
+ t.is(response.status, 400, 'Should reject invalid file with 400 status');
262
+ t.is(response.data, 'Invalid file: missing filename', 'Should return correct error message');
263
+ });
264
+
265
+ // Cleanup Tests
266
+ test.serial('should handle file deletion', async (t) => {
267
+ const filePath = await createTestFile('test content', 'txt');
268
+ const requestId = uuidv4();
269
+
270
+ try {
271
+ // Upload file
272
+ const uploadResponse = await uploadFile(filePath, requestId);
273
+ t.is(uploadResponse.status, 200);
274
+
275
+ // Wait a moment for file to be fully written
276
+ await new Promise(resolve => setTimeout(resolve, 1000));
277
+
278
+ // Extract the file identifier from the URL
279
+ const fileUrl = new URL(uploadResponse.data.url);
280
+ const fileIdentifier = fileUrl.pathname.split('/').pop().split('_')[0];
281
+ console.log('File identifier for deletion:', fileIdentifier);
282
+
283
+ // Delete file using the correct identifier
284
+ const deleteUrl = `${baseUrl}?operation=delete&requestId=${fileIdentifier}`;
285
+ console.log('Deleting file with URL:', deleteUrl);
286
+ const deleteResponse = await axios.delete(deleteUrl);
287
+ t.is(deleteResponse.status, 200);
288
+
289
+ // Wait a moment for deletion to complete
290
+ await new Promise(resolve => setTimeout(resolve, 1000));
291
+
292
+ // Verify file is gone
293
+ const verifyResponse = await axios.get(uploadResponse.data.url, {
294
+ validateStatus: (status) => true,
295
+ });
296
+ t.is(verifyResponse.status, 404, 'File should be deleted');
297
+
298
+ // If GCS is configured, verify backup is gone
299
+ if (isGCSConfigured() && uploadResponse.data.gcs) {
300
+ const exists = await verifyGCSFile(uploadResponse.data.gcs);
301
+ t.false(exists, 'GCS file should be deleted');
302
+ }
303
+ } finally {
304
+ fs.unlinkSync(filePath);
305
+ }
306
+ });
307
+
308
+ // Save Option Test
309
+ test.serial('should handle document upload with save option', async (t) => {
310
+ const fileContent = 'Sample DOCX content';
311
+ const filePath = await createTestFile(fileContent, 'docx');
312
+
313
+ const initialRequestId = uuidv4();
314
+ const saveRequestId = uuidv4();
315
+
316
+ let uploadedUrl;
317
+
318
+ try {
319
+ // First, upload the document so we have a publicly reachable URL
320
+ const uploadResponse = await uploadFile(filePath, initialRequestId);
321
+ t.is(uploadResponse.status, 200);
322
+ t.truthy(uploadResponse.data.url, 'Upload should return a URL');
323
+
324
+ uploadedUrl = uploadResponse.data.url;
325
+
326
+ // Now call the handler again with the save flag
327
+ const saveResponse = await axios.get(baseUrl, {
328
+ params: {
329
+ uri: uploadedUrl,
330
+ requestId: saveRequestId,
331
+ save: true,
332
+ },
333
+ validateStatus: (status) => true,
334
+ });
335
+
336
+ // The current implementation returns an empty array but should still be 200
337
+ t.is(saveResponse.status, 200, 'Save request should succeed');
338
+ t.true(Array.isArray(saveResponse.data), 'Response body should be an array');
339
+ } finally {
340
+ fs.unlinkSync(filePath);
341
+
342
+ // Cleanup the initially uploaded file
343
+ if (uploadedUrl) {
344
+ const identifier = new URL(uploadedUrl).pathname.split('/').pop().split('_')[0];
345
+ await axios.delete(`${baseUrl}?operation=delete&requestId=${identifier}`).catch(() => {});
346
+ }
347
+
348
+ // Cleanup files created by save request
349
+ await axios.delete(`${baseUrl}?operation=delete&requestId=${saveRequestId}`).catch(() => {});
350
+ }
351
+ });