npm - @aj-archipelago/cortex - Versions diffs - 1.3.11 → 1.3.14 - Mend

@aj-archipelago/cortex 1.3.11 → 1.3.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/helper-apps/cortex-file-handler/tests/docHelper.test.js ADDED Viewed

@@ -0,0 +1,148 @@
+import test from 'ava';
+import { fileURLToPath } from 'url';
+import { dirname, join } from 'path';
+import fs from 'fs/promises';
+import { documentToText, easyChunker } from '../docHelper.js';
+const __dirname = dirname(fileURLToPath(import.meta.url));
+// Setup: Create test documents
+test.before(async t => {
+    const testDir = join(__dirname, 'test-docs');
+    await fs.mkdir(testDir, { recursive: true });
+    // Create various test files
+    const textFile = join(testDir, 'test.txt');
+    const largeTextFile = join(testDir, 'large.txt');
+    const unicodeFile = join(testDir, 'unicode.txt');
+    const jsonFile = join(testDir, 'test.json');
+    const emptyFile = join(testDir, 'empty.txt');
+    // Regular text content
+    await fs.writeFile(textFile, 'This is a test document content.\nIt has multiple lines.\nThird line here.');
+    // Large text content (>100KB)
+    const largeContent = 'Lorem ipsum '.repeat(10000);
+    await fs.writeFile(largeTextFile, largeContent);
+    // Unicode content
+    const unicodeContent = '这是中文内容\nこれは日本語です\nЭто русский текст\n🌟 emoji test';
+    await fs.writeFile(unicodeFile, unicodeContent);
+    // JSON content
+    await fs.writeFile(jsonFile, JSON.stringify({ test: 'content' }));
+    // Empty file
+    await fs.writeFile(emptyFile, '');
+    t.context = {
+        testDir,
+        textFile,
+        largeTextFile,
+        unicodeFile,
+        jsonFile,
+        emptyFile
+    };
+});
+// Cleanup
+test.after.always(async t => {
+    await fs.rm(t.context.testDir, { recursive: true, force: true });
+});
+// Test basic text file processing
+test('processes text files correctly', async t => {
+    const result = await documentToText(t.context.textFile, 'text/plain');
+    t.true(typeof result === 'string', 'Result should be a string');
+    t.true(result.includes('test document content'), 'Result should contain file content');
+    t.true(result.includes('multiple lines'), 'Result should preserve multiple lines');
+});
+// Test large file handling
+test('handles large text files', async t => {
+    const result = await documentToText(t.context.largeTextFile, 'text/plain');
+    t.true(result.length > 50000, 'Should handle large files');
+    t.true(result.includes('Lorem ipsum'), 'Should contain expected content');
+});
+// Test Unicode handling
+test('handles Unicode content correctly', async t => {
+    const result = await documentToText(t.context.unicodeFile, 'text/plain');
+    t.true(result.includes('这是中文内容'), 'Should preserve Chinese characters');
+    t.true(result.includes('これは日本語です'), 'Should preserve Japanese characters');
+    t.true(result.includes('Это русский текст'), 'Should preserve Cyrillic characters');
+    t.true(result.includes('🌟'), 'Should preserve emoji');
+});
+// Test JSON file handling
+test('rejects JSON files appropriately', async t => {
+    await t.throwsAsync(
+        async () => documentToText(t.context.jsonFile, 'application/json'),
+        { message: 'Unsupported file type: json' }
+    );
+});
+// Test empty file handling
+test('handles empty files appropriately', async t => {
+    const result = await documentToText(t.context.emptyFile, 'text/plain');
+    t.is(result, '', 'Empty file should return empty string');
+});
+// Test unsupported file types
+test('rejects unsupported file types', async t => {
+    const unsupportedFile = join(t.context.testDir, 'unsupported.xyz');
+    await fs.writeFile(unsupportedFile, 'test content');
+    await t.throwsAsync(
+        async () => documentToText(unsupportedFile, 'unsupported/type'),
+        { message: 'Unsupported file type: xyz' }
+    );
+});
+// Test text chunking functionality
+test('chunks text correctly with default settings', t => {
+    const text = 'This is a test.\nSecond line.\nThird line.\nFourth line.';
+    const chunks = easyChunker(text);
+    t.true(Array.isArray(chunks), 'Should return an array of chunks');
+    t.true(chunks.length > 0, 'Should create at least one chunk');
+    t.true(chunks.every(chunk => typeof chunk === 'string'), 'All chunks should be strings');
+});
+// Test chunking with very long text
+test('handles chunking of long text', t => {
+    const longText = 'Test sentence. '.repeat(1000);
+    const chunks = easyChunker(longText);
+    t.true(chunks.length > 1, 'Should split long text into multiple chunks');
+    t.true(chunks.every(chunk => chunk.length <= 10000), 'Each chunk should not exceed max length');
+});
+// Test chunking with various delimiters
+test('respects sentence boundaries in chunking', t => {
+    const text = 'First sentence. Second sentence! Third sentence? Fourth sentence.';
+    const chunks = easyChunker(text);
+    t.true(chunks.every(chunk =>
+        chunk.match(/[.!?](\s|$)/) || chunk === chunks[chunks.length - 1]
+    ), 'Chunks should end with sentence delimiters when possible');
+});
+// Test chunking with newlines
+test('handles newlines in chunking', t => {
+    const text = 'Line 1\nLine 2\nLine 3\nLine 4';
+    const chunks = easyChunker(text);
+    t.true(chunks.some(chunk => chunk.includes('\n')), 'Should preserve newlines');
+});
+// Test chunking edge cases
+test('handles chunking edge cases', t => {
+    // Empty string
+    t.deepEqual(easyChunker(''), [''], 'Should handle empty string');
+    // Single character
+    t.deepEqual(easyChunker('a'), ['a'], 'Should handle single character');
+    // Only whitespace
+    t.deepEqual(easyChunker('   \n   '), ['   \n   '], 'Should handle whitespace');
+});

package/helper-apps/cortex-file-handler/tests/fileChunker.test.js ADDED Viewed

@@ -0,0 +1,311 @@
+import test from 'ava';
+import { fileURLToPath } from 'url';
+import { dirname, join } from 'path';
+import fs from 'fs/promises';
+import { existsSync } from 'fs';
+import { splitMediaFile, downloadFile } from '../fileChunker.js';
+import nock from 'nock';
+import os from 'os';
+import { execSync } from 'child_process';
+import { performance } from 'perf_hooks';
+const __dirname = dirname(fileURLToPath(import.meta.url));
+// Helper function to create a test media file of specified duration using ffmpeg
+async function createTestMediaFile(filepath, durationSeconds = 10) {
+    try {
+        console.log(`Creating test file: ${filepath} (${durationSeconds}s)`);
+        // Generate silence using ffmpeg
+        execSync(`ffmpeg -f lavfi -i anullsrc=r=44100:cl=mono -t ${durationSeconds} -q:a 9 -acodec libmp3lame "${filepath}"`, {
+            stdio: ['ignore', 'pipe', 'pipe']  // Capture stdout and stderr
+        });
+        // Verify the file was created and has content
+        const stats = await fs.stat(filepath);
+        if (stats.size === 0) {
+            throw new Error('Generated file is empty');
+        }
+        console.log(`Successfully created ${filepath} (${(stats.size / 1024 / 1024).toFixed(2)}MB)`);
+    } catch (error) {
+        console.error(`Error creating test file ${filepath}:`, error.message);
+        if (error.stderr) console.error('ffmpeg error:', error.stderr.toString());
+        throw error;
+    }
+}
+// Setup: Create test files and mock external services
+test.before(async t => {
+    // Check if ffmpeg is available
+    try {
+        execSync('ffmpeg -version', { stdio: 'ignore' });
+    } catch (error) {
+        console.error('ffmpeg is not installed. Please install it to run these tests.');
+        process.exit(1);
+    }
+    const testDir = join(__dirname, 'test-files');
+    await fs.mkdir(testDir, { recursive: true });
+    try {
+        // Create test files of different durations
+        const testFile1s = join(testDir, 'test-1s.mp3');
+        const testFile10s = join(testDir, 'test-10s.mp3');
+        const testFile600s = join(testDir, 'test-600s.mp3');
+        await createTestMediaFile(testFile1s, 1);
+        await createTestMediaFile(testFile10s, 10);
+        await createTestMediaFile(testFile600s, 600);
+        // Create large test files
+        const testFile1h = join(testDir, 'test-1h.mp3');
+        const testFile4h = join(testDir, 'test-4h.mp3');
+        console.log('\nCreating large test files (this may take a while)...');
+        await createTestMediaFile(testFile1h, 3600);
+        await createTestMediaFile(testFile4h, 14400);
+        t.context = {
+            testDir,
+            testFile1s,
+            testFile10s,
+            testFile600s,
+            testFile1h,
+            testFile4h
+        };
+        // Setup nock for URL tests with proper headers
+        nock('https://example.com')
+            .get('/media/test.mp3')
+            .replyWithFile(200, testFile10s, {
+                'Content-Type': 'audio/mpeg',
+                'Content-Length': (await fs.stat(testFile10s)).size.toString()
+            })
+            .persist();
+    } catch (error) {
+        console.error('Error during test setup:', error);
+        // Clean up any partially created files
+        try {
+            await fs.rm(testDir, { recursive: true, force: true });
+        } catch (cleanupError) {
+            console.error('Error during cleanup:', cleanupError);
+        }
+        throw error;
+    }
+});
+// Cleanup: Remove test files
+test.after.always(async t => {
+    // Clean up test files
+    if (t.context.testDir) {
+        try {
+            await fs.rm(t.context.testDir, { recursive: true, force: true });
+            console.log('Test files cleaned up successfully');
+        } catch (error) {
+            console.error('Error cleaning up test files:', error);
+        }
+    }
+    // Clean up nock
+    nock.cleanAll();
+});
+// Test successful chunking of a short file
+test('successfully chunks short media file', async t => {
+    const { chunkPromises, chunkOffsets, uniqueOutputPath } = await splitMediaFile(t.context.testFile1s);
+    t.true(Array.isArray(chunkPromises), 'Should return array of promises');
+    t.true(Array.isArray(chunkOffsets), 'Should return array of offsets');
+    t.true(typeof uniqueOutputPath === 'string', 'Should return output path');
+    // Should only create one chunk for 1s file
+    t.is(chunkPromises.length, 1, 'Should create single chunk for short file');
+    // Wait for chunks to process
+    const chunkPaths = await Promise.all(chunkPromises);
+    // Verify chunk exists
+    t.true(existsSync(chunkPaths[0]), 'Chunk file should exist');
+    // Cleanup
+    await fs.rm(uniqueOutputPath, { recursive: true, force: true });
+});
+// Test chunking of a longer file
+test('correctly chunks longer media file', async t => {
+    const { chunkPromises, chunkOffsets, uniqueOutputPath } = await splitMediaFile(t.context.testFile600s);
+    // For 600s file with 500s chunks, should create 2 chunks
+    t.is(chunkPromises.length, 2, 'Should create correct number of chunks');
+    t.is(chunkOffsets.length, 2, 'Should create correct number of offsets');
+    // Verify offsets
+    t.is(chunkOffsets[0], 0, 'First chunk should start at 0');
+    t.is(chunkOffsets[1], 500, 'Second chunk should start at 500s');
+    // Wait for chunks to process
+    const chunkPaths = await Promise.all(chunkPromises);
+    // Verify all chunks exist
+    for (const chunkPath of chunkPaths) {
+        t.true(existsSync(chunkPath), 'Each chunk file should exist');
+    }
+    // Cleanup
+    await fs.rm(uniqueOutputPath, { recursive: true, force: true });
+});
+// Test custom chunk duration
+test('respects custom chunk duration', async t => {
+    const customDuration = 5; // 5 seconds
+    const { chunkPromises, chunkOffsets } = await splitMediaFile(t.context.testFile10s, customDuration);
+    // For 10s file with 5s chunks, should create 2 chunks
+    t.is(chunkPromises.length, 2, 'Should create correct number of chunks for custom duration');
+    t.deepEqual(chunkOffsets, [0, 5], 'Should have correct offset points');
+});
+// Test URL-based file processing
+test('processes media file from URL', async t => {
+    const url = 'https://example.com/media/test.mp3';
+    const { chunkPromises, uniqueOutputPath } = await splitMediaFile(url);
+    // Wait for chunks to process
+    const chunkPaths = await Promise.all(chunkPromises);
+    // Verify chunks were created
+    for (const chunkPath of chunkPaths) {
+        t.true(existsSync(chunkPath), 'Chunk files should exist for URL-based media');
+    }
+    // Cleanup
+    await fs.rm(uniqueOutputPath, { recursive: true, force: true });
+});
+// Test error handling for invalid files
+test('handles invalid media files gracefully', async t => {
+    const invalidFile = join(t.context.testDir, 'invalid.mp3');
+    await fs.writeFile(invalidFile, 'not a valid mp3 file');
+    await t.throwsAsync(
+        async () => splitMediaFile(invalidFile),
+        { message: /Error processing media file/ }
+    );
+});
+// Test error handling for non-existent files
+test('handles non-existent files gracefully', async t => {
+    const nonExistentFile = join(t.context.testDir, 'non-existent.mp3');
+    await t.throwsAsync(
+        async () => splitMediaFile(nonExistentFile),
+        { message: /Error processing media file/ }
+    );
+});
+// Test file download functionality
+test('successfully downloads file from URL', async t => {
+    const url = 'https://example.com/media/test.mp3';
+    const outputPath = join(os.tmpdir(), 'downloaded-test.mp3');
+    await downloadFile(url, outputPath);
+    t.true(existsSync(outputPath), 'Downloaded file should exist');
+    // Cleanup
+    await fs.unlink(outputPath);
+});
+// Test error handling for invalid URLs in download
+test('handles invalid URLs in download gracefully', async t => {
+    const invalidUrl = 'https://invalid-url-that-does-not-exist.com/test.mp3';
+    const outputPath = join(os.tmpdir(), 'should-not-exist.mp3');
+    await t.throwsAsync(
+        async () => downloadFile(invalidUrl, outputPath)
+    );
+});
+// Helper to format duration nicely
+function formatDuration(ms) {
+    if (ms < 1000) return `${ms}ms`;
+    const seconds = ms / 1000;
+    if (seconds < 60) return `${seconds.toFixed(2)}s`;
+    const minutes = seconds / 60;
+    if (minutes < 60) return `${minutes.toFixed(2)}m`;
+    const hours = minutes / 60;
+    return `${hours.toFixed(2)}h`;
+}
+// Test performance with 1-hour file
+test('performance test - 1 hour file', async t => {
+    const start = performance.now();
+    const { chunkPromises, uniqueOutputPath } = await splitMediaFile(t.context.testFile1h);
+    // Wait for all chunks to complete
+    const chunkPaths = await Promise.all(chunkPromises);
+    const end = performance.now();
+    const duration = end - start;
+    console.log(`\n1 hour file processing stats:
+    - Total time: ${formatDuration(duration)}
+    - Chunks created: ${chunkPaths.length}
+    - Average time per chunk: ${formatDuration(duration / chunkPaths.length)}
+    - Processing speed: ${((3600 / (duration / 1000))).toFixed(2)}x realtime`);
+    t.true(chunkPaths.length > 0, 'Should create chunks');
+    t.true(duration > 0, 'Should measure time');
+    // Cleanup
+    await fs.rm(uniqueOutputPath, { recursive: true, force: true });
+});
+// Test performance with 4-hour file
+test('performance test - 4 hour file', async t => {
+    const start = performance.now();
+    const { chunkPromises, uniqueOutputPath } = await splitMediaFile(t.context.testFile4h);
+    // Wait for all chunks to complete
+    const chunkPaths = await Promise.all(chunkPromises);
+    const end = performance.now();
+    const duration = end - start;
+    console.log(`\n4 hour file processing stats:
+    - Total time: ${formatDuration(duration)}
+    - Chunks created: ${chunkPaths.length}
+    - Average time per chunk: ${formatDuration(duration / chunkPaths.length)}
+    - Processing speed: ${((14400 / (duration / 1000))).toFixed(2)}x realtime`);
+    t.true(chunkPaths.length > 0, 'Should create chunks');
+    t.true(duration > 0, 'Should measure time');
+    // Cleanup
+    await fs.rm(uniqueOutputPath, { recursive: true, force: true });
+});
+// Test memory usage during large file processing
+test('memory usage during large file processing', async t => {
+    const initialMemory = process.memoryUsage().heapUsed;
+    let peakMemory = initialMemory;
+    const interval = setInterval(() => {
+        const used = process.memoryUsage().heapUsed;
+        peakMemory = Math.max(peakMemory, used);
+    }, 100);
+    const { chunkPromises, uniqueOutputPath } = await splitMediaFile(t.context.testFile4h);
+    await Promise.all(chunkPromises);
+    clearInterval(interval);
+    const memoryIncrease = (peakMemory - initialMemory) / 1024 / 1024; // Convert to MB
+    console.log(`\nMemory usage stats:
+    - Initial memory: ${(initialMemory / 1024 / 1024).toFixed(2)}MB
+    - Peak memory: ${(peakMemory / 1024 / 1024).toFixed(2)}MB
+    - Memory increase: ${memoryIncrease.toFixed(2)}MB`);
+    t.true(memoryIncrease >= 0, 'Should track memory usage');
+    // Cleanup
+    await fs.rm(uniqueOutputPath, { recursive: true, force: true });
+});