@aj-archipelago/cortex 1.3.49 → 1.3.51
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config.js +1 -1
- package/helper-apps/cortex-browser/Dockerfile +19 -31
- package/helper-apps/cortex-browser/function_app.py +708 -181
- package/helper-apps/cortex-browser/requirements.txt +4 -4
- package/helper-apps/cortex-file-handler/blobHandler.js +850 -429
- package/helper-apps/cortex-file-handler/constants.js +64 -48
- package/helper-apps/cortex-file-handler/docHelper.js +7 -114
- package/helper-apps/cortex-file-handler/fileChunker.js +96 -51
- package/helper-apps/cortex-file-handler/function.json +2 -6
- package/helper-apps/cortex-file-handler/helper.js +34 -25
- package/helper-apps/cortex-file-handler/index.js +324 -136
- package/helper-apps/cortex-file-handler/localFileHandler.js +56 -57
- package/helper-apps/cortex-file-handler/package-lock.json +6065 -5964
- package/helper-apps/cortex-file-handler/package.json +8 -4
- package/helper-apps/cortex-file-handler/redis.js +23 -17
- package/helper-apps/cortex-file-handler/scripts/setup-azure-container.js +12 -9
- package/helper-apps/cortex-file-handler/scripts/setup-test-containers.js +21 -18
- package/helper-apps/cortex-file-handler/scripts/test-azure.sh +1 -1
- package/helper-apps/cortex-file-handler/scripts/test-gcs.sh +1 -1
- package/helper-apps/cortex-file-handler/services/ConversionService.js +288 -0
- package/helper-apps/cortex-file-handler/services/FileConversionService.js +53 -0
- package/helper-apps/cortex-file-handler/start.js +63 -38
- package/helper-apps/cortex-file-handler/tests/FileConversionService.test.js +144 -0
- package/helper-apps/cortex-file-handler/tests/blobHandler.test.js +88 -64
- package/helper-apps/cortex-file-handler/tests/fileChunker.test.js +114 -91
- package/helper-apps/cortex-file-handler/tests/fileUpload.test.js +351 -0
- package/helper-apps/cortex-file-handler/tests/files/DOCX_TestPage.docx +0 -0
- package/helper-apps/cortex-file-handler/tests/files/tests-example.xls +0 -0
- package/helper-apps/cortex-file-handler/tests/start.test.js +943 -642
- package/helper-apps/cortex-file-handler/tests/testUtils.helper.js +31 -0
- package/helper-apps/cortex-markitdown/.funcignore +1 -0
- package/helper-apps/cortex-markitdown/MarkitdownConverterFunction/__init__.py +64 -0
- package/helper-apps/cortex-markitdown/MarkitdownConverterFunction/function.json +21 -0
- package/helper-apps/cortex-markitdown/README.md +94 -0
- package/helper-apps/cortex-markitdown/host.json +15 -0
- package/helper-apps/cortex-markitdown/requirements.txt +2 -0
- package/lib/requestExecutor.js +44 -36
- package/package.json +1 -1
- package/pathways/system/entity/tools/sys_tool_cognitive_search.js +1 -1
- package/pathways/system/entity/tools/sys_tool_readfile.js +24 -2
- package/server/plugins/openAiWhisperPlugin.js +59 -87
- package/helper-apps/cortex-file-handler/tests/docHelper.test.js +0 -148
|
@@ -1,148 +0,0 @@
|
|
|
1
|
-
import test from 'ava';
|
|
2
|
-
import { fileURLToPath } from 'url';
|
|
3
|
-
import { dirname, join } from 'path';
|
|
4
|
-
import fs from 'fs/promises';
|
|
5
|
-
import { documentToText, easyChunker } from '../docHelper.js';
|
|
6
|
-
|
|
7
|
-
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
8
|
-
|
|
9
|
-
// Setup: Create test documents
|
|
10
|
-
test.before(async t => {
|
|
11
|
-
const testDir = join(__dirname, 'test-docs');
|
|
12
|
-
await fs.mkdir(testDir, { recursive: true });
|
|
13
|
-
|
|
14
|
-
// Create various test files
|
|
15
|
-
const textFile = join(testDir, 'test.txt');
|
|
16
|
-
const largeTextFile = join(testDir, 'large.txt');
|
|
17
|
-
const unicodeFile = join(testDir, 'unicode.txt');
|
|
18
|
-
const jsonFile = join(testDir, 'test.json');
|
|
19
|
-
const emptyFile = join(testDir, 'empty.txt');
|
|
20
|
-
|
|
21
|
-
// Regular text content
|
|
22
|
-
await fs.writeFile(textFile, 'This is a test document content.\nIt has multiple lines.\nThird line here.');
|
|
23
|
-
|
|
24
|
-
// Large text content (>100KB)
|
|
25
|
-
const largeContent = 'Lorem ipsum '.repeat(10000);
|
|
26
|
-
await fs.writeFile(largeTextFile, largeContent);
|
|
27
|
-
|
|
28
|
-
// Unicode content
|
|
29
|
-
const unicodeContent = '这是中文内容\nこれは日本語です\nЭто русский текст\n🌟 emoji test';
|
|
30
|
-
await fs.writeFile(unicodeFile, unicodeContent);
|
|
31
|
-
|
|
32
|
-
// JSON content
|
|
33
|
-
await fs.writeFile(jsonFile, JSON.stringify({ test: 'content' }));
|
|
34
|
-
|
|
35
|
-
// Empty file
|
|
36
|
-
await fs.writeFile(emptyFile, '');
|
|
37
|
-
|
|
38
|
-
t.context = {
|
|
39
|
-
testDir,
|
|
40
|
-
textFile,
|
|
41
|
-
largeTextFile,
|
|
42
|
-
unicodeFile,
|
|
43
|
-
jsonFile,
|
|
44
|
-
emptyFile
|
|
45
|
-
};
|
|
46
|
-
});
|
|
47
|
-
|
|
48
|
-
// Cleanup
|
|
49
|
-
test.after.always(async t => {
|
|
50
|
-
await fs.rm(t.context.testDir, { recursive: true, force: true });
|
|
51
|
-
});
|
|
52
|
-
|
|
53
|
-
// Test basic text file processing
|
|
54
|
-
test('processes text files correctly', async t => {
|
|
55
|
-
const result = await documentToText(t.context.textFile, 'text/plain');
|
|
56
|
-
t.true(typeof result === 'string', 'Result should be a string');
|
|
57
|
-
t.true(result.includes('test document content'), 'Result should contain file content');
|
|
58
|
-
t.true(result.includes('multiple lines'), 'Result should preserve multiple lines');
|
|
59
|
-
});
|
|
60
|
-
|
|
61
|
-
// Test large file handling
|
|
62
|
-
test('handles large text files', async t => {
|
|
63
|
-
const result = await documentToText(t.context.largeTextFile, 'text/plain');
|
|
64
|
-
t.true(result.length > 50000, 'Should handle large files');
|
|
65
|
-
t.true(result.includes('Lorem ipsum'), 'Should contain expected content');
|
|
66
|
-
});
|
|
67
|
-
|
|
68
|
-
// Test Unicode handling
|
|
69
|
-
test('handles Unicode content correctly', async t => {
|
|
70
|
-
const result = await documentToText(t.context.unicodeFile, 'text/plain');
|
|
71
|
-
t.true(result.includes('这是中文内容'), 'Should preserve Chinese characters');
|
|
72
|
-
t.true(result.includes('これは日本語です'), 'Should preserve Japanese characters');
|
|
73
|
-
t.true(result.includes('Это русский текст'), 'Should preserve Cyrillic characters');
|
|
74
|
-
t.true(result.includes('🌟'), 'Should preserve emoji');
|
|
75
|
-
});
|
|
76
|
-
|
|
77
|
-
// Test JSON file handling
|
|
78
|
-
test('rejects JSON files appropriately', async t => {
|
|
79
|
-
await t.throwsAsync(
|
|
80
|
-
async () => documentToText(t.context.jsonFile, 'application/json'),
|
|
81
|
-
{ message: 'Unsupported file type: json' }
|
|
82
|
-
);
|
|
83
|
-
});
|
|
84
|
-
|
|
85
|
-
// Test empty file handling
|
|
86
|
-
test('handles empty files appropriately', async t => {
|
|
87
|
-
const result = await documentToText(t.context.emptyFile, 'text/plain');
|
|
88
|
-
t.is(result, '', 'Empty file should return empty string');
|
|
89
|
-
});
|
|
90
|
-
|
|
91
|
-
// Test unsupported file types
|
|
92
|
-
test('rejects unsupported file types', async t => {
|
|
93
|
-
const unsupportedFile = join(t.context.testDir, 'unsupported.xyz');
|
|
94
|
-
await fs.writeFile(unsupportedFile, 'test content');
|
|
95
|
-
await t.throwsAsync(
|
|
96
|
-
async () => documentToText(unsupportedFile, 'unsupported/type'),
|
|
97
|
-
{ message: 'Unsupported file type: xyz' }
|
|
98
|
-
);
|
|
99
|
-
});
|
|
100
|
-
|
|
101
|
-
// Test text chunking functionality
|
|
102
|
-
test('chunks text correctly with default settings', t => {
|
|
103
|
-
const text = 'This is a test.\nSecond line.\nThird line.\nFourth line.';
|
|
104
|
-
const chunks = easyChunker(text);
|
|
105
|
-
|
|
106
|
-
t.true(Array.isArray(chunks), 'Should return an array of chunks');
|
|
107
|
-
t.true(chunks.length > 0, 'Should create at least one chunk');
|
|
108
|
-
t.true(chunks.every(chunk => typeof chunk === 'string'), 'All chunks should be strings');
|
|
109
|
-
});
|
|
110
|
-
|
|
111
|
-
// Test chunking with very long text
|
|
112
|
-
test('handles chunking of long text', t => {
|
|
113
|
-
const longText = 'Test sentence. '.repeat(1000);
|
|
114
|
-
const chunks = easyChunker(longText);
|
|
115
|
-
|
|
116
|
-
t.true(chunks.length > 1, 'Should split long text into multiple chunks');
|
|
117
|
-
t.true(chunks.every(chunk => chunk.length <= 10000), 'Each chunk should not exceed max length');
|
|
118
|
-
});
|
|
119
|
-
|
|
120
|
-
// Test chunking with various delimiters
|
|
121
|
-
test('respects sentence boundaries in chunking', t => {
|
|
122
|
-
const text = 'First sentence. Second sentence! Third sentence? Fourth sentence.';
|
|
123
|
-
const chunks = easyChunker(text);
|
|
124
|
-
|
|
125
|
-
t.true(chunks.every(chunk =>
|
|
126
|
-
chunk.match(/[.!?](\s|$)/) || chunk === chunks[chunks.length - 1]
|
|
127
|
-
), 'Chunks should end with sentence delimiters when possible');
|
|
128
|
-
});
|
|
129
|
-
|
|
130
|
-
// Test chunking with newlines
|
|
131
|
-
test('handles newlines in chunking', t => {
|
|
132
|
-
const text = 'Line 1\nLine 2\nLine 3\nLine 4';
|
|
133
|
-
const chunks = easyChunker(text);
|
|
134
|
-
|
|
135
|
-
t.true(chunks.some(chunk => chunk.includes('\n')), 'Should preserve newlines');
|
|
136
|
-
});
|
|
137
|
-
|
|
138
|
-
// Test chunking edge cases
|
|
139
|
-
test('handles chunking edge cases', t => {
|
|
140
|
-
// Empty string
|
|
141
|
-
t.deepEqual(easyChunker(''), [''], 'Should handle empty string');
|
|
142
|
-
|
|
143
|
-
// Single character
|
|
144
|
-
t.deepEqual(easyChunker('a'), ['a'], 'Should handle single character');
|
|
145
|
-
|
|
146
|
-
// Only whitespace
|
|
147
|
-
t.deepEqual(easyChunker(' \n '), [' \n '], 'Should handle whitespace');
|
|
148
|
-
});
|