@aj-archipelago/cortex 1.3.49 → 1.3.51

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/config.js +1 -1
  2. package/helper-apps/cortex-browser/Dockerfile +19 -31
  3. package/helper-apps/cortex-browser/function_app.py +708 -181
  4. package/helper-apps/cortex-browser/requirements.txt +4 -4
  5. package/helper-apps/cortex-file-handler/blobHandler.js +850 -429
  6. package/helper-apps/cortex-file-handler/constants.js +64 -48
  7. package/helper-apps/cortex-file-handler/docHelper.js +7 -114
  8. package/helper-apps/cortex-file-handler/fileChunker.js +96 -51
  9. package/helper-apps/cortex-file-handler/function.json +2 -6
  10. package/helper-apps/cortex-file-handler/helper.js +34 -25
  11. package/helper-apps/cortex-file-handler/index.js +324 -136
  12. package/helper-apps/cortex-file-handler/localFileHandler.js +56 -57
  13. package/helper-apps/cortex-file-handler/package-lock.json +6065 -5964
  14. package/helper-apps/cortex-file-handler/package.json +8 -4
  15. package/helper-apps/cortex-file-handler/redis.js +23 -17
  16. package/helper-apps/cortex-file-handler/scripts/setup-azure-container.js +12 -9
  17. package/helper-apps/cortex-file-handler/scripts/setup-test-containers.js +21 -18
  18. package/helper-apps/cortex-file-handler/scripts/test-azure.sh +1 -1
  19. package/helper-apps/cortex-file-handler/scripts/test-gcs.sh +1 -1
  20. package/helper-apps/cortex-file-handler/services/ConversionService.js +288 -0
  21. package/helper-apps/cortex-file-handler/services/FileConversionService.js +53 -0
  22. package/helper-apps/cortex-file-handler/start.js +63 -38
  23. package/helper-apps/cortex-file-handler/tests/FileConversionService.test.js +144 -0
  24. package/helper-apps/cortex-file-handler/tests/blobHandler.test.js +88 -64
  25. package/helper-apps/cortex-file-handler/tests/fileChunker.test.js +114 -91
  26. package/helper-apps/cortex-file-handler/tests/fileUpload.test.js +351 -0
  27. package/helper-apps/cortex-file-handler/tests/files/DOCX_TestPage.docx +0 -0
  28. package/helper-apps/cortex-file-handler/tests/files/tests-example.xls +0 -0
  29. package/helper-apps/cortex-file-handler/tests/start.test.js +943 -642
  30. package/helper-apps/cortex-file-handler/tests/testUtils.helper.js +31 -0
  31. package/helper-apps/cortex-markitdown/.funcignore +1 -0
  32. package/helper-apps/cortex-markitdown/MarkitdownConverterFunction/__init__.py +64 -0
  33. package/helper-apps/cortex-markitdown/MarkitdownConverterFunction/function.json +21 -0
  34. package/helper-apps/cortex-markitdown/README.md +94 -0
  35. package/helper-apps/cortex-markitdown/host.json +15 -0
  36. package/helper-apps/cortex-markitdown/requirements.txt +2 -0
  37. package/lib/requestExecutor.js +44 -36
  38. package/package.json +1 -1
  39. package/pathways/system/entity/tools/sys_tool_cognitive_search.js +1 -1
  40. package/pathways/system/entity/tools/sys_tool_readfile.js +24 -2
  41. package/server/plugins/openAiWhisperPlugin.js +59 -87
  42. package/helper-apps/cortex-file-handler/tests/docHelper.test.js +0 -148
@@ -1,148 +0,0 @@
1
- import test from 'ava';
2
- import { fileURLToPath } from 'url';
3
- import { dirname, join } from 'path';
4
- import fs from 'fs/promises';
5
- import { documentToText, easyChunker } from '../docHelper.js';
6
-
7
- const __dirname = dirname(fileURLToPath(import.meta.url));
8
-
9
- // Setup: Create test documents
10
- test.before(async t => {
11
- const testDir = join(__dirname, 'test-docs');
12
- await fs.mkdir(testDir, { recursive: true });
13
-
14
- // Create various test files
15
- const textFile = join(testDir, 'test.txt');
16
- const largeTextFile = join(testDir, 'large.txt');
17
- const unicodeFile = join(testDir, 'unicode.txt');
18
- const jsonFile = join(testDir, 'test.json');
19
- const emptyFile = join(testDir, 'empty.txt');
20
-
21
- // Regular text content
22
- await fs.writeFile(textFile, 'This is a test document content.\nIt has multiple lines.\nThird line here.');
23
-
24
- // Large text content (>100KB)
25
- const largeContent = 'Lorem ipsum '.repeat(10000);
26
- await fs.writeFile(largeTextFile, largeContent);
27
-
28
- // Unicode content
29
- const unicodeContent = '这是中文内容\nこれは日本語です\nЭто русский текст\n🌟 emoji test';
30
- await fs.writeFile(unicodeFile, unicodeContent);
31
-
32
- // JSON content
33
- await fs.writeFile(jsonFile, JSON.stringify({ test: 'content' }));
34
-
35
- // Empty file
36
- await fs.writeFile(emptyFile, '');
37
-
38
- t.context = {
39
- testDir,
40
- textFile,
41
- largeTextFile,
42
- unicodeFile,
43
- jsonFile,
44
- emptyFile
45
- };
46
- });
47
-
48
- // Cleanup
49
- test.after.always(async t => {
50
- await fs.rm(t.context.testDir, { recursive: true, force: true });
51
- });
52
-
53
- // Test basic text file processing
54
- test('processes text files correctly', async t => {
55
- const result = await documentToText(t.context.textFile, 'text/plain');
56
- t.true(typeof result === 'string', 'Result should be a string');
57
- t.true(result.includes('test document content'), 'Result should contain file content');
58
- t.true(result.includes('multiple lines'), 'Result should preserve multiple lines');
59
- });
60
-
61
- // Test large file handling
62
- test('handles large text files', async t => {
63
- const result = await documentToText(t.context.largeTextFile, 'text/plain');
64
- t.true(result.length > 50000, 'Should handle large files');
65
- t.true(result.includes('Lorem ipsum'), 'Should contain expected content');
66
- });
67
-
68
- // Test Unicode handling
69
- test('handles Unicode content correctly', async t => {
70
- const result = await documentToText(t.context.unicodeFile, 'text/plain');
71
- t.true(result.includes('这是中文内容'), 'Should preserve Chinese characters');
72
- t.true(result.includes('これは日本語です'), 'Should preserve Japanese characters');
73
- t.true(result.includes('Это русский текст'), 'Should preserve Cyrillic characters');
74
- t.true(result.includes('🌟'), 'Should preserve emoji');
75
- });
76
-
77
- // Test JSON file handling
78
- test('rejects JSON files appropriately', async t => {
79
- await t.throwsAsync(
80
- async () => documentToText(t.context.jsonFile, 'application/json'),
81
- { message: 'Unsupported file type: json' }
82
- );
83
- });
84
-
85
- // Test empty file handling
86
- test('handles empty files appropriately', async t => {
87
- const result = await documentToText(t.context.emptyFile, 'text/plain');
88
- t.is(result, '', 'Empty file should return empty string');
89
- });
90
-
91
- // Test unsupported file types
92
- test('rejects unsupported file types', async t => {
93
- const unsupportedFile = join(t.context.testDir, 'unsupported.xyz');
94
- await fs.writeFile(unsupportedFile, 'test content');
95
- await t.throwsAsync(
96
- async () => documentToText(unsupportedFile, 'unsupported/type'),
97
- { message: 'Unsupported file type: xyz' }
98
- );
99
- });
100
-
101
- // Test text chunking functionality
102
- test('chunks text correctly with default settings', t => {
103
- const text = 'This is a test.\nSecond line.\nThird line.\nFourth line.';
104
- const chunks = easyChunker(text);
105
-
106
- t.true(Array.isArray(chunks), 'Should return an array of chunks');
107
- t.true(chunks.length > 0, 'Should create at least one chunk');
108
- t.true(chunks.every(chunk => typeof chunk === 'string'), 'All chunks should be strings');
109
- });
110
-
111
- // Test chunking with very long text
112
- test('handles chunking of long text', t => {
113
- const longText = 'Test sentence. '.repeat(1000);
114
- const chunks = easyChunker(longText);
115
-
116
- t.true(chunks.length > 1, 'Should split long text into multiple chunks');
117
- t.true(chunks.every(chunk => chunk.length <= 10000), 'Each chunk should not exceed max length');
118
- });
119
-
120
- // Test chunking with various delimiters
121
- test('respects sentence boundaries in chunking', t => {
122
- const text = 'First sentence. Second sentence! Third sentence? Fourth sentence.';
123
- const chunks = easyChunker(text);
124
-
125
- t.true(chunks.every(chunk =>
126
- chunk.match(/[.!?](\s|$)/) || chunk === chunks[chunks.length - 1]
127
- ), 'Chunks should end with sentence delimiters when possible');
128
- });
129
-
130
- // Test chunking with newlines
131
- test('handles newlines in chunking', t => {
132
- const text = 'Line 1\nLine 2\nLine 3\nLine 4';
133
- const chunks = easyChunker(text);
134
-
135
- t.true(chunks.some(chunk => chunk.includes('\n')), 'Should preserve newlines');
136
- });
137
-
138
- // Test chunking edge cases
139
- test('handles chunking edge cases', t => {
140
- // Empty string
141
- t.deepEqual(easyChunker(''), [''], 'Should handle empty string');
142
-
143
- // Single character
144
- t.deepEqual(easyChunker('a'), ['a'], 'Should handle single character');
145
-
146
- // Only whitespace
147
- t.deepEqual(easyChunker(' \n '), [' \n '], 'Should handle whitespace');
148
- });