@aj-archipelago/cortex 1.3.11 → 1.3.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. package/helper-apps/cortex-file-handler/.env.test +7 -0
  2. package/helper-apps/cortex-file-handler/.env.test.azure +6 -0
  3. package/helper-apps/cortex-file-handler/.env.test.gcs +9 -0
  4. package/helper-apps/cortex-file-handler/blobHandler.js +313 -204
  5. package/helper-apps/cortex-file-handler/constants.js +107 -0
  6. package/helper-apps/cortex-file-handler/docHelper.js +4 -1
  7. package/helper-apps/cortex-file-handler/fileChunker.js +170 -109
  8. package/helper-apps/cortex-file-handler/helper.js +82 -16
  9. package/helper-apps/cortex-file-handler/index.js +226 -146
  10. package/helper-apps/cortex-file-handler/localFileHandler.js +21 -3
  11. package/helper-apps/cortex-file-handler/package-lock.json +2622 -51
  12. package/helper-apps/cortex-file-handler/package.json +25 -4
  13. package/helper-apps/cortex-file-handler/redis.js +9 -18
  14. package/helper-apps/cortex-file-handler/scripts/setup-azure-container.js +22 -0
  15. package/helper-apps/cortex-file-handler/scripts/setup-test-containers.js +49 -0
  16. package/helper-apps/cortex-file-handler/scripts/test-azure.sh +34 -0
  17. package/helper-apps/cortex-file-handler/scripts/test-gcs.sh +49 -0
  18. package/helper-apps/cortex-file-handler/start.js +39 -4
  19. package/helper-apps/cortex-file-handler/tests/blobHandler.test.js +292 -0
  20. package/helper-apps/cortex-file-handler/tests/docHelper.test.js +148 -0
  21. package/helper-apps/cortex-file-handler/tests/fileChunker.test.js +311 -0
  22. package/helper-apps/cortex-file-handler/tests/start.test.js +930 -0
  23. package/package.json +1 -1
  24. package/pathways/system/entity/sys_entity_continue.js +1 -1
  25. package/pathways/system/entity/sys_entity_start.js +1 -0
  26. package/pathways/system/entity/sys_generator_video_vision.js +2 -1
  27. package/pathways/system/entity/sys_router_tool.js +6 -4
  28. package/server/plugins/openAiWhisperPlugin.js +9 -13
  29. package/server/plugins/replicateApiPlugin.js +54 -2
@@ -0,0 +1,148 @@
1
+ import test from 'ava';
2
+ import { fileURLToPath } from 'url';
3
+ import { dirname, join } from 'path';
4
+ import fs from 'fs/promises';
5
+ import { documentToText, easyChunker } from '../docHelper.js';
6
+
7
+ const __dirname = dirname(fileURLToPath(import.meta.url));
8
+
9
+ // Setup: Create test documents
10
+ test.before(async t => {
11
+ const testDir = join(__dirname, 'test-docs');
12
+ await fs.mkdir(testDir, { recursive: true });
13
+
14
+ // Create various test files
15
+ const textFile = join(testDir, 'test.txt');
16
+ const largeTextFile = join(testDir, 'large.txt');
17
+ const unicodeFile = join(testDir, 'unicode.txt');
18
+ const jsonFile = join(testDir, 'test.json');
19
+ const emptyFile = join(testDir, 'empty.txt');
20
+
21
+ // Regular text content
22
+ await fs.writeFile(textFile, 'This is a test document content.\nIt has multiple lines.\nThird line here.');
23
+
24
+ // Large text content (>100KB)
25
+ const largeContent = 'Lorem ipsum '.repeat(10000);
26
+ await fs.writeFile(largeTextFile, largeContent);
27
+
28
+ // Unicode content
29
+ const unicodeContent = '这是中文内容\nこれは日本語です\nЭто русский текст\n🌟 emoji test';
30
+ await fs.writeFile(unicodeFile, unicodeContent);
31
+
32
+ // JSON content
33
+ await fs.writeFile(jsonFile, JSON.stringify({ test: 'content' }));
34
+
35
+ // Empty file
36
+ await fs.writeFile(emptyFile, '');
37
+
38
+ t.context = {
39
+ testDir,
40
+ textFile,
41
+ largeTextFile,
42
+ unicodeFile,
43
+ jsonFile,
44
+ emptyFile
45
+ };
46
+ });
47
+
48
+ // Cleanup
49
+ test.after.always(async t => {
50
+ await fs.rm(t.context.testDir, { recursive: true, force: true });
51
+ });
52
+
53
+ // Test basic text file processing
54
+ test('processes text files correctly', async t => {
55
+ const result = await documentToText(t.context.textFile, 'text/plain');
56
+ t.true(typeof result === 'string', 'Result should be a string');
57
+ t.true(result.includes('test document content'), 'Result should contain file content');
58
+ t.true(result.includes('multiple lines'), 'Result should preserve multiple lines');
59
+ });
60
+
61
+ // Test large file handling
62
+ test('handles large text files', async t => {
63
+ const result = await documentToText(t.context.largeTextFile, 'text/plain');
64
+ t.true(result.length > 50000, 'Should handle large files');
65
+ t.true(result.includes('Lorem ipsum'), 'Should contain expected content');
66
+ });
67
+
68
+ // Test Unicode handling
69
+ test('handles Unicode content correctly', async t => {
70
+ const result = await documentToText(t.context.unicodeFile, 'text/plain');
71
+ t.true(result.includes('这是中文内容'), 'Should preserve Chinese characters');
72
+ t.true(result.includes('これは日本語です'), 'Should preserve Japanese characters');
73
+ t.true(result.includes('Это русский текст'), 'Should preserve Cyrillic characters');
74
+ t.true(result.includes('🌟'), 'Should preserve emoji');
75
+ });
76
+
77
+ // Test JSON file handling
78
+ test('rejects JSON files appropriately', async t => {
79
+ await t.throwsAsync(
80
+ async () => documentToText(t.context.jsonFile, 'application/json'),
81
+ { message: 'Unsupported file type: json' }
82
+ );
83
+ });
84
+
85
+ // Test empty file handling
86
+ test('handles empty files appropriately', async t => {
87
+ const result = await documentToText(t.context.emptyFile, 'text/plain');
88
+ t.is(result, '', 'Empty file should return empty string');
89
+ });
90
+
91
+ // Test unsupported file types
92
+ test('rejects unsupported file types', async t => {
93
+ const unsupportedFile = join(t.context.testDir, 'unsupported.xyz');
94
+ await fs.writeFile(unsupportedFile, 'test content');
95
+ await t.throwsAsync(
96
+ async () => documentToText(unsupportedFile, 'unsupported/type'),
97
+ { message: 'Unsupported file type: xyz' }
98
+ );
99
+ });
100
+
101
+ // Test text chunking functionality
102
+ test('chunks text correctly with default settings', t => {
103
+ const text = 'This is a test.\nSecond line.\nThird line.\nFourth line.';
104
+ const chunks = easyChunker(text);
105
+
106
+ t.true(Array.isArray(chunks), 'Should return an array of chunks');
107
+ t.true(chunks.length > 0, 'Should create at least one chunk');
108
+ t.true(chunks.every(chunk => typeof chunk === 'string'), 'All chunks should be strings');
109
+ });
110
+
111
+ // Test chunking with very long text
112
+ test('handles chunking of long text', t => {
113
+ const longText = 'Test sentence. '.repeat(1000);
114
+ const chunks = easyChunker(longText);
115
+
116
+ t.true(chunks.length > 1, 'Should split long text into multiple chunks');
117
+ t.true(chunks.every(chunk => chunk.length <= 10000), 'Each chunk should not exceed max length');
118
+ });
119
+
120
+ // Test chunking with various delimiters
121
+ test('respects sentence boundaries in chunking', t => {
122
+ const text = 'First sentence. Second sentence! Third sentence? Fourth sentence.';
123
+ const chunks = easyChunker(text);
124
+
125
+ t.true(chunks.every(chunk =>
126
+ chunk.match(/[.!?](\s|$)/) || chunk === chunks[chunks.length - 1]
127
+ ), 'Chunks should end with sentence delimiters when possible');
128
+ });
129
+
130
+ // Test chunking with newlines
131
+ test('handles newlines in chunking', t => {
132
+ const text = 'Line 1\nLine 2\nLine 3\nLine 4';
133
+ const chunks = easyChunker(text);
134
+
135
+ t.true(chunks.some(chunk => chunk.includes('\n')), 'Should preserve newlines');
136
+ });
137
+
138
+ // Test chunking edge cases
139
+ test('handles chunking edge cases', t => {
140
+ // Empty string
141
+ t.deepEqual(easyChunker(''), [''], 'Should handle empty string');
142
+
143
+ // Single character
144
+ t.deepEqual(easyChunker('a'), ['a'], 'Should handle single character');
145
+
146
+ // Only whitespace
147
+ t.deepEqual(easyChunker(' \n '), [' \n '], 'Should handle whitespace');
148
+ });
@@ -0,0 +1,311 @@
1
+ import test from 'ava';
2
+ import { fileURLToPath } from 'url';
3
+ import { dirname, join } from 'path';
4
+ import fs from 'fs/promises';
5
+ import { existsSync } from 'fs';
6
+ import { splitMediaFile, downloadFile } from '../fileChunker.js';
7
+ import nock from 'nock';
8
+ import os from 'os';
9
+ import { execSync } from 'child_process';
10
+ import { performance } from 'perf_hooks';
11
+
12
+ const __dirname = dirname(fileURLToPath(import.meta.url));
13
+
14
+ // Helper function to create a test media file of specified duration using ffmpeg
15
+ async function createTestMediaFile(filepath, durationSeconds = 10) {
16
+ try {
17
+ console.log(`Creating test file: ${filepath} (${durationSeconds}s)`);
18
+ // Generate silence using ffmpeg
19
+ execSync(`ffmpeg -f lavfi -i anullsrc=r=44100:cl=mono -t ${durationSeconds} -q:a 9 -acodec libmp3lame "${filepath}"`, {
20
+ stdio: ['ignore', 'pipe', 'pipe'] // Capture stdout and stderr
21
+ });
22
+
23
+ // Verify the file was created and has content
24
+ const stats = await fs.stat(filepath);
25
+ if (stats.size === 0) {
26
+ throw new Error('Generated file is empty');
27
+ }
28
+ console.log(`Successfully created ${filepath} (${(stats.size / 1024 / 1024).toFixed(2)}MB)`);
29
+ } catch (error) {
30
+ console.error(`Error creating test file ${filepath}:`, error.message);
31
+ if (error.stderr) console.error('ffmpeg error:', error.stderr.toString());
32
+ throw error;
33
+ }
34
+ }
35
+
36
+ // Setup: Create test files and mock external services
37
+ test.before(async t => {
38
+ // Check if ffmpeg is available
39
+ try {
40
+ execSync('ffmpeg -version', { stdio: 'ignore' });
41
+ } catch (error) {
42
+ console.error('ffmpeg is not installed. Please install it to run these tests.');
43
+ process.exit(1);
44
+ }
45
+
46
+ const testDir = join(__dirname, 'test-files');
47
+ await fs.mkdir(testDir, { recursive: true });
48
+
49
+ try {
50
+ // Create test files of different durations
51
+ const testFile1s = join(testDir, 'test-1s.mp3');
52
+ const testFile10s = join(testDir, 'test-10s.mp3');
53
+ const testFile600s = join(testDir, 'test-600s.mp3');
54
+
55
+ await createTestMediaFile(testFile1s, 1);
56
+ await createTestMediaFile(testFile10s, 10);
57
+ await createTestMediaFile(testFile600s, 600);
58
+
59
+ // Create large test files
60
+ const testFile1h = join(testDir, 'test-1h.mp3');
61
+ const testFile4h = join(testDir, 'test-4h.mp3');
62
+
63
+ console.log('\nCreating large test files (this may take a while)...');
64
+ await createTestMediaFile(testFile1h, 3600);
65
+ await createTestMediaFile(testFile4h, 14400);
66
+
67
+ t.context = {
68
+ testDir,
69
+ testFile1s,
70
+ testFile10s,
71
+ testFile600s,
72
+ testFile1h,
73
+ testFile4h
74
+ };
75
+
76
+ // Setup nock for URL tests with proper headers
77
+ nock('https://example.com')
78
+ .get('/media/test.mp3')
79
+ .replyWithFile(200, testFile10s, {
80
+ 'Content-Type': 'audio/mpeg',
81
+ 'Content-Length': (await fs.stat(testFile10s)).size.toString()
82
+ })
83
+ .persist();
84
+ } catch (error) {
85
+ console.error('Error during test setup:', error);
86
+ // Clean up any partially created files
87
+ try {
88
+ await fs.rm(testDir, { recursive: true, force: true });
89
+ } catch (cleanupError) {
90
+ console.error('Error during cleanup:', cleanupError);
91
+ }
92
+ throw error;
93
+ }
94
+ });
95
+
96
+ // Cleanup: Remove test files
97
+ test.after.always(async t => {
98
+ // Clean up test files
99
+ if (t.context.testDir) {
100
+ try {
101
+ await fs.rm(t.context.testDir, { recursive: true, force: true });
102
+ console.log('Test files cleaned up successfully');
103
+ } catch (error) {
104
+ console.error('Error cleaning up test files:', error);
105
+ }
106
+ }
107
+
108
+ // Clean up nock
109
+ nock.cleanAll();
110
+ });
111
+
112
+ // Test successful chunking of a short file
113
+ test('successfully chunks short media file', async t => {
114
+ const { chunkPromises, chunkOffsets, uniqueOutputPath } = await splitMediaFile(t.context.testFile1s);
115
+
116
+ t.true(Array.isArray(chunkPromises), 'Should return array of promises');
117
+ t.true(Array.isArray(chunkOffsets), 'Should return array of offsets');
118
+ t.true(typeof uniqueOutputPath === 'string', 'Should return output path');
119
+
120
+ // Should only create one chunk for 1s file
121
+ t.is(chunkPromises.length, 1, 'Should create single chunk for short file');
122
+
123
+ // Wait for chunks to process
124
+ const chunkPaths = await Promise.all(chunkPromises);
125
+
126
+ // Verify chunk exists
127
+ t.true(existsSync(chunkPaths[0]), 'Chunk file should exist');
128
+
129
+ // Cleanup
130
+ await fs.rm(uniqueOutputPath, { recursive: true, force: true });
131
+ });
132
+
133
+ // Test chunking of a longer file
134
+ test('correctly chunks longer media file', async t => {
135
+ const { chunkPromises, chunkOffsets, uniqueOutputPath } = await splitMediaFile(t.context.testFile600s);
136
+
137
+ // For 600s file with 500s chunks, should create 2 chunks
138
+ t.is(chunkPromises.length, 2, 'Should create correct number of chunks');
139
+ t.is(chunkOffsets.length, 2, 'Should create correct number of offsets');
140
+
141
+ // Verify offsets
142
+ t.is(chunkOffsets[0], 0, 'First chunk should start at 0');
143
+ t.is(chunkOffsets[1], 500, 'Second chunk should start at 500s');
144
+
145
+ // Wait for chunks to process
146
+ const chunkPaths = await Promise.all(chunkPromises);
147
+
148
+ // Verify all chunks exist
149
+ for (const chunkPath of chunkPaths) {
150
+ t.true(existsSync(chunkPath), 'Each chunk file should exist');
151
+ }
152
+
153
+ // Cleanup
154
+ await fs.rm(uniqueOutputPath, { recursive: true, force: true });
155
+ });
156
+
157
+ // Test custom chunk duration
158
+ test('respects custom chunk duration', async t => {
159
+ const customDuration = 5; // 5 seconds
160
+ const { chunkPromises, chunkOffsets } = await splitMediaFile(t.context.testFile10s, customDuration);
161
+
162
+ // For 10s file with 5s chunks, should create 2 chunks
163
+ t.is(chunkPromises.length, 2, 'Should create correct number of chunks for custom duration');
164
+ t.deepEqual(chunkOffsets, [0, 5], 'Should have correct offset points');
165
+ });
166
+
167
+ // Test URL-based file processing
168
+ test('processes media file from URL', async t => {
169
+ const url = 'https://example.com/media/test.mp3';
170
+ const { chunkPromises, uniqueOutputPath } = await splitMediaFile(url);
171
+
172
+ // Wait for chunks to process
173
+ const chunkPaths = await Promise.all(chunkPromises);
174
+
175
+ // Verify chunks were created
176
+ for (const chunkPath of chunkPaths) {
177
+ t.true(existsSync(chunkPath), 'Chunk files should exist for URL-based media');
178
+ }
179
+
180
+ // Cleanup
181
+ await fs.rm(uniqueOutputPath, { recursive: true, force: true });
182
+ });
183
+
184
+ // Test error handling for invalid files
185
+ test('handles invalid media files gracefully', async t => {
186
+ const invalidFile = join(t.context.testDir, 'invalid.mp3');
187
+ await fs.writeFile(invalidFile, 'not a valid mp3 file');
188
+
189
+ await t.throwsAsync(
190
+ async () => splitMediaFile(invalidFile),
191
+ { message: /Error processing media file/ }
192
+ );
193
+ });
194
+
195
+ // Test error handling for non-existent files
196
+ test('handles non-existent files gracefully', async t => {
197
+ const nonExistentFile = join(t.context.testDir, 'non-existent.mp3');
198
+
199
+ await t.throwsAsync(
200
+ async () => splitMediaFile(nonExistentFile),
201
+ { message: /Error processing media file/ }
202
+ );
203
+ });
204
+
205
+ // Test file download functionality
206
+ test('successfully downloads file from URL', async t => {
207
+ const url = 'https://example.com/media/test.mp3';
208
+ const outputPath = join(os.tmpdir(), 'downloaded-test.mp3');
209
+
210
+ await downloadFile(url, outputPath);
211
+ t.true(existsSync(outputPath), 'Downloaded file should exist');
212
+
213
+ // Cleanup
214
+ await fs.unlink(outputPath);
215
+ });
216
+
217
+ // Test error handling for invalid URLs in download
218
+ test('handles invalid URLs in download gracefully', async t => {
219
+ const invalidUrl = 'https://invalid-url-that-does-not-exist.com/test.mp3';
220
+ const outputPath = join(os.tmpdir(), 'should-not-exist.mp3');
221
+
222
+ await t.throwsAsync(
223
+ async () => downloadFile(invalidUrl, outputPath)
224
+ );
225
+ });
226
+
227
+ // Helper to format duration nicely
228
+ function formatDuration(ms) {
229
+ if (ms < 1000) return `${ms}ms`;
230
+ const seconds = ms / 1000;
231
+ if (seconds < 60) return `${seconds.toFixed(2)}s`;
232
+ const minutes = seconds / 60;
233
+ if (minutes < 60) return `${minutes.toFixed(2)}m`;
234
+ const hours = minutes / 60;
235
+ return `${hours.toFixed(2)}h`;
236
+ }
237
+
238
+ // Test performance with 1-hour file
239
+ test('performance test - 1 hour file', async t => {
240
+ const start = performance.now();
241
+
242
+ const { chunkPromises, uniqueOutputPath } = await splitMediaFile(t.context.testFile1h);
243
+
244
+ // Wait for all chunks to complete
245
+ const chunkPaths = await Promise.all(chunkPromises);
246
+ const end = performance.now();
247
+ const duration = end - start;
248
+
249
+ console.log(`\n1 hour file processing stats:
250
+ - Total time: ${formatDuration(duration)}
251
+ - Chunks created: ${chunkPaths.length}
252
+ - Average time per chunk: ${formatDuration(duration / chunkPaths.length)}
253
+ - Processing speed: ${((3600 / (duration / 1000))).toFixed(2)}x realtime`);
254
+
255
+ t.true(chunkPaths.length > 0, 'Should create chunks');
256
+ t.true(duration > 0, 'Should measure time');
257
+
258
+ // Cleanup
259
+ await fs.rm(uniqueOutputPath, { recursive: true, force: true });
260
+ });
261
+
262
+ // Test performance with 4-hour file
263
+ test('performance test - 4 hour file', async t => {
264
+ const start = performance.now();
265
+
266
+ const { chunkPromises, uniqueOutputPath } = await splitMediaFile(t.context.testFile4h);
267
+
268
+ // Wait for all chunks to complete
269
+ const chunkPaths = await Promise.all(chunkPromises);
270
+ const end = performance.now();
271
+ const duration = end - start;
272
+
273
+ console.log(`\n4 hour file processing stats:
274
+ - Total time: ${formatDuration(duration)}
275
+ - Chunks created: ${chunkPaths.length}
276
+ - Average time per chunk: ${formatDuration(duration / chunkPaths.length)}
277
+ - Processing speed: ${((14400 / (duration / 1000))).toFixed(2)}x realtime`);
278
+
279
+ t.true(chunkPaths.length > 0, 'Should create chunks');
280
+ t.true(duration > 0, 'Should measure time');
281
+
282
+ // Cleanup
283
+ await fs.rm(uniqueOutputPath, { recursive: true, force: true });
284
+ });
285
+
286
+ // Test memory usage during large file processing
287
+ test('memory usage during large file processing', async t => {
288
+ const initialMemory = process.memoryUsage().heapUsed;
289
+ let peakMemory = initialMemory;
290
+
291
+ const interval = setInterval(() => {
292
+ const used = process.memoryUsage().heapUsed;
293
+ peakMemory = Math.max(peakMemory, used);
294
+ }, 100);
295
+
296
+ const { chunkPromises, uniqueOutputPath } = await splitMediaFile(t.context.testFile4h);
297
+ await Promise.all(chunkPromises);
298
+
299
+ clearInterval(interval);
300
+
301
+ const memoryIncrease = (peakMemory - initialMemory) / 1024 / 1024; // Convert to MB
302
+ console.log(`\nMemory usage stats:
303
+ - Initial memory: ${(initialMemory / 1024 / 1024).toFixed(2)}MB
304
+ - Peak memory: ${(peakMemory / 1024 / 1024).toFixed(2)}MB
305
+ - Memory increase: ${memoryIncrease.toFixed(2)}MB`);
306
+
307
+ t.true(memoryIncrease >= 0, 'Should track memory usage');
308
+
309
+ // Cleanup
310
+ await fs.rm(uniqueOutputPath, { recursive: true, force: true });
311
+ });