npm - @aj-archipelago/cortex - Versions diffs - 1.3.49 → 1.3.51 - Mend

@aj-archipelago/cortex 1.3.49 → 1.3.51

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

package/helper-apps/cortex-file-handler/constants.js CHANGED Viewed

@@ -1,48 +1,42 @@
 export const DOC_EXTENSIONS = [
-    ".txt",
-    ".json",
-    ".csv",
-    ".md",
-    ".xml",
-    ".js",
-    ".html",
-    ".css",
-    ".doc",
-    ".docx",
-    ".xls",
-    ".xlsx"
+    '.txt',
+    '.json',
+    '.csv',
+    '.md',
+    '.xml',
+    '.js',
+    '.html',
+    '.css',
+    '.doc',
+    '.docx',
+    '.xls',
+    '.xlsx',
 ];
 export const IMAGE_EXTENSIONS = [
-    ".jpg",
-    ".jpeg",
-    ".png",
-    ".webp",
-    ".heic",
-    ".heif",
-    ".pdf"
+    '.jpg',
+    '.jpeg',
+    '.png',
+    '.webp',
+    '.heic',
+    '.heif',
+    '.pdf',
 ];
 export const VIDEO_EXTENSIONS = [
-    ".mp4",
-    ".mpeg",
-    ".mov",
-    ".avi",
-    ".flv",
-    ".mpg",
-    ".webm",
-    ".wmv",
-    ".3gp"
-];
-export const AUDIO_EXTENSIONS = [
-    ".wav",
-    ".mp3",
-    ".aac",
-    ".ogg",
-    ".flac"
+    '.mp4',
+    '.mpeg',
+    '.mov',
+    '.avi',
+    '.flv',
+    '.mpg',
+    '.webm',
+    '.wmv',
+    '.3gp',
 ];
+export const AUDIO_EXTENSIONS = ['.wav', '.mp3', '.aac', '.ogg', '.flac'];
 export const ACCEPTED_MIME_TYPES = {
     // Document types
     'text/plain': ['.txt'],
@@ -53,24 +47,35 @@ export const ACCEPTED_MIME_TYPES = {
     'text/javascript': ['.js'],
     'text/html': ['.html'],
     'text/css': ['.css'],
-    'application/vnd.openxmlformats-officedocument.wordprocessingml.document': ['.docx'],
-    'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': ['.xlsx'],
+    'application/vnd.openxmlformats-officedocument.wordprocessingml.document': [
+        '.docx',
+    ],
+    'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': [
+        '.xlsx',
+    ],
     'application/msword': ['.doc'],
     'application/vnd.ms-excel': ['.xls'],
     'application/vnd.ms-word.document.macroEnabled.12': ['.docm'],
     'application/vnd.ms-excel.sheet.macroEnabled.12': ['.xlsm'],
     'application/vnd.ms-word.template.macroEnabled.12': ['.dotm'],
     'application/vnd.ms-excel.template.macroEnabled.12': ['.xltm'],
     // Image types
     'image/jpeg': ['.jpg', '.jpeg'],
     'image/png': ['.png'],
     'image/webp': ['.webp'],
     'image/heic': ['.heic'],
     'image/heif': ['.heif'],
-    'application/octet-stream': ['.jpg', '.jpeg', '.png', '.webp', '.heic', '.heif'],
+    'application/octet-stream': [
+        '.jpg',
+        '.jpeg',
+        '.png',
+        '.webp',
+        '.heic',
+        '.heif',
+    ],
     'application/pdf': ['.pdf'],
     // Audio types
     'audio/wav': ['.wav'],
     'audio/mpeg': ['.mp3'],
@@ -81,7 +86,7 @@ export const ACCEPTED_MIME_TYPES = {
     'audio/x-m4a': ['.m4a'],
     'audio/mp3': ['.mp3'],
     'audio/mp4': ['.mp4'],
     // Video types
     'video/mp4': ['.mp4'],
     'video/mpeg': ['.mpeg', '.mpg'],
@@ -108,8 +113,19 @@ export function getExtensionsForMimeType(mimeType) {
 // Helper function to check if an extension is accepted
 export function isAcceptedExtension(extension) {
-    return DOC_EXTENSIONS.includes(extension) ||
-           IMAGE_EXTENSIONS.includes(extension) ||
-           VIDEO_EXTENSIONS.includes(extension) ||
-           AUDIO_EXTENSIONS.includes(extension);
-}
+    return (
+        DOC_EXTENSIONS.includes(extension) ||
+    IMAGE_EXTENSIONS.includes(extension) ||
+    VIDEO_EXTENSIONS.includes(extension) ||
+    AUDIO_EXTENSIONS.includes(extension)
+    );
+}
+export const CONVERTED_EXTENSIONS = [
+    '.doc',
+    '.docx',
+    '.xls',
+    '.xlsx',
+    '.ppt',
+    '.pptx',
+];

package/helper-apps/cortex-file-handler/docHelper.js CHANGED Viewed

@@ -1,115 +1,4 @@
-import * as pdfjsLib from 'pdfjs-dist';
-import fs from 'fs/promises';
-import mammoth from 'mammoth';
-import XLSX from 'xlsx';
-import Papa from 'papaparse';
-export async function txtToText(filePath) {
-    const text = await fs.readFile(filePath, 'utf-8');
-    return text;
-}
-export async function docxToText(filePath) {
-    const buffer = await fs.readFile(filePath);
-    const result = await mammoth.extractRawText({ buffer: buffer });
-    return result.value;
-}
-export async function xlsxToText(filePath) {
-    const workbook = XLSX.readFile(filePath);
-    let finalText = '';
-    workbook.SheetNames.forEach(sheetName => {
-        const sheet = workbook.Sheets[sheetName];
-        const sheetAsJson = XLSX.utils.sheet_to_json(sheet, { header: 1 });
-        sheetAsJson.forEach(row => {
-            finalText += row.join(' ') + '\n';
-        });
-    });
-    return finalText;
-}
-async function pdfToText(filePath) {
-    const pdf = await pdfjsLib.getDocument(filePath).promise;
-    const meta = await pdf.getMetadata();
-    // Check if pdf is scanned
-    if (meta && meta.metadata && meta.metadata._metadataMap && meta.metadata._metadataMap.has('dc:format')) {
-        const format = meta.metadata._metadataMap.get('dc:format');
-        if (format && format._value && format._value.toLowerCase() === 'application/pdf; version=1.3') {
-            throw new Error('Scanned PDFs are not supported');
-        }
-    }
-    // Check if pdf is encrypted
-    if (pdf._pdfInfo && pdf._pdfInfo.encrypt) {
-        throw new Error('Encrypted PDFs are not supported');
-    }
-    // Check if pdf is password protected
-    if (pdf._passwordNeeded) {
-        throw new Error('Password protected PDFs are not supported');
-    }
-    let finalText = '';
-    let ocrNeeded = true; // Initialize the variable as true
-    for (let i = 1; i <= pdf.numPages; i++) {
-        const page = await pdf.getPage(i);
-        const operatorList = await page.getOperatorList();
-        // Check if there are any fonts used in the PDF
-        if (operatorList.fnArray.some(fn => fn === pdfjsLib.OPS.setFont)) {
-            ocrNeeded = false; // Set ocrNeeded to false if fonts are found
-        }
-        const textContent = await page.getTextContent();
-        const strings = textContent.items.map(item => item.str);
-        finalText += strings.join(' ') + '\n';
-    }
-    if (ocrNeeded) {
-        throw new Error('OCR might be needed for this document!');
-    }
-    return finalText.trim();
-}
-export async function csvToText(filePath) {
-    const text = await fs.readFile(filePath, 'utf-8');
-    const results = Papa.parse(text);
-    let finalText = '';
-    results.data.forEach(row => {
-        finalText += row.join(' ') + '\n';
-    });
-    return finalText;
-}
-export async function documentToText(filePath) {
-    const fileExtension = filePath.split('.').pop();
-    switch (fileExtension) {
-        case 'pdf':
-            return pdfToText(filePath);
-        case 'txt':
-        case 'html':
-            return txtToText(filePath);
-        case 'docx':
-        case 'doc':
-            return docxToText(filePath);
-        case 'xlsx':
-        case 'xls':
-            return xlsxToText(filePath);
-        case 'csv':
-            return csvToText(filePath);
-        default:
-            throw new Error(`Unsupported file type: ${fileExtension}`);
-    }
-}
+// Utility function for chunking text into smaller pieces
 export function easyChunker(text) {
     const result = [];
     const n = 10000;
@@ -124,7 +13,11 @@ export function easyChunker(text) {
         let endIndex = Math.min(startIndex + n, text.length);
         // Make sure we don't split in the middle of a sentence
-        while (endIndex > startIndex && text[endIndex] !== '.' && text[endIndex] !== ' ') {
+        while (
+            endIndex > startIndex &&
+            text[endIndex] !== '.' &&
+            text[endIndex] !== ' '
+        ) {
             endIndex--;
         }
@@ -141,4 +34,4 @@ export function easyChunker(text) {
     }
     return result;
-}
+}

package/helper-apps/cortex-file-handler/fileChunker.js CHANGED Viewed

@@ -1,14 +1,17 @@
 import fs from 'fs';
-import path from 'path';
-import ffmpeg from 'fluent-ffmpeg';
-import { v4 as uuidv4 } from 'uuid';
+import http from 'http';
+import https from 'https';
 import os from 'os';
+import path from 'path';
+import { Transform } from 'stream';
+import { pipeline } from 'stream/promises';
 import { promisify } from 'util';
 import axios from 'axios';
+import ffmpeg from 'fluent-ffmpeg';
+import { v4 as uuidv4 } from 'uuid';
 import { ensureEncoded } from './helper.js';
-import http from 'http';
-import https from 'https';
-import { pipeline } from 'stream/promises';
 const ffmpegProbe = promisify(ffmpeg.ffprobe);
@@ -18,7 +21,6 @@ const tempDirectories = new Map(); // dir -> { createdAt, requestId }
 // Temp directory cleanup
 async function cleanupTempDirectories() {
     for (const [dir, info] of tempDirectories) {
         try {
             // Cleanup directories older than 1 hour
@@ -43,7 +45,7 @@ setInterval(async () => {
     }
 }, CLEANUP_INTERVAL_MS);
-// Process a single chunk with streaming
+// Process a single chunk with streaming and progress tracking
 async function processChunk(inputPath, outputFileName, start, duration) {
     return new Promise((resolve, reject) => {
         const command = ffmpeg(inputPath)
@@ -71,8 +73,11 @@ async function processChunk(inputPath, outputFileName, start, duration) {
                 resolve(outputFileName);
             });
-        // Use pipe() to handle streaming
-        command.pipe(fs.createWriteStream(outputFileName), { end: true });
+        // Use pipeline for better error handling and backpressure
+        pipeline(
+            command,
+            fs.createWriteStream(outputFileName, { highWaterMark: 4 * 1024 * 1024 }), // 4MB chunks
+        ).catch(reject);
     });
 }
@@ -80,38 +85,55 @@ const generateUniqueFolderName = () => {
     const uniqueFolderName = uuidv4();
     const tempFolderPath = os.tmpdir();
     return path.join(tempFolderPath, uniqueFolderName);
-}
+};
 async function downloadFile(url, outputPath) {
     try {
+        const agent = {
+            http: new http.Agent({
+                keepAlive: true,
+                maxSockets: 10,
+                maxFreeSockets: 10,
+                timeout: 60000,
+            }),
+            https: new https.Agent({
+                keepAlive: true,
+                maxSockets: 10,
+                maxFreeSockets: 10,
+                timeout: 60000,
+            }),
+        };
         let response;
         try {
-            response = await axios.get(decodeURIComponent(url), {
+            response = await axios.get(decodeURIComponent(url), {
                 responseType: 'stream',
-                // Add timeout and maxContentLength
                 timeout: 30000,
                 maxContentLength: Infinity,
-                // Enable streaming download
                 decompress: true,
-                // Use a smaller chunk size for better memory usage
-                httpAgent: new http.Agent({ keepAlive: true }),
-                httpsAgent: new https.Agent({ keepAlive: true })
+                httpAgent: agent.http,
+                httpsAgent: agent.https,
+                maxRedirects: 5,
+                validateStatus: (status) => status >= 200 && status < 300,
             });
         } catch (error) {
-            response = await axios.get(url, {
+            response = await axios.get(url, {
                 responseType: 'stream',
                 timeout: 30000,
                 maxContentLength: Infinity,
                 decompress: true,
-                httpAgent: new http.Agent({ keepAlive: true }),
-                httpsAgent: new https.Agent({ keepAlive: true })
+                httpAgent: agent.http,
+                httpsAgent: agent.https,
+                maxRedirects: 5,
+                validateStatus: (status) => status >= 200 && status < 300,
             });
         }
-        const writer = fs.createWriteStream(outputPath);
         // Use pipeline for better error handling and memory management
-        await pipeline(response.data, writer);
+        await pipeline(
+            response.data,
+            fs.createWriteStream(outputPath, { highWaterMark: 4 * 1024 * 1024 }), // 4MB chunks
+        );
         if (!fs.existsSync(outputPath) || fs.statSync(outputPath).size === 0) {
             throw new Error('Download failed or file is empty');
@@ -124,25 +146,30 @@ async function downloadFile(url, outputPath) {
     }
 }
-async function splitMediaFile(inputPath, chunkDurationInSeconds = 500, requestId = uuidv4()) {
+async function splitMediaFile(
+    inputPath,
+    chunkDurationInSeconds = 500,
+    requestId = uuidv4(),
+) {
     let tempPath = null;
     let uniqueOutputPath = null;
     let inputStream = null;
     try {
         uniqueOutputPath = generateUniqueFolderName();
         fs.mkdirSync(uniqueOutputPath, { recursive: true });
         tempDirectories.set(uniqueOutputPath, {
             createdAt: Date.now(),
-            requestId
+            requestId,
         });
         // Handle URL downloads with streaming
         const isUrl = /^(https?|ftp):\/\/[^\s/$.?#].[^\s]*$/i.test(inputPath);
         if (isUrl) {
             const urlObj = new URL(ensureEncoded(inputPath));
-            const originalFileName = path.basename(urlObj.pathname) || 'downloaded_file';
+            const originalFileName =
+        path.basename(urlObj.pathname) || 'downloaded_file';
             tempPath = path.join(uniqueOutputPath, originalFileName);
             console.log('Downloading file to:', tempPath);
             await downloadFile(inputPath, tempPath);
@@ -155,9 +182,9 @@ async function splitMediaFile(inputPath, chunkDurationInSeconds = 500, requestId
         }
         // Use a larger chunk size for better throughput while still managing memory
-        inputStream = fs.createReadStream(inputPath, {
+        inputStream = fs.createReadStream(inputPath, {
             highWaterMark: 4 * 1024 * 1024, // 4MB chunks
-            autoClose: true
+            autoClose: true,
         });
         console.log('Probing file:', inputPath);
@@ -168,33 +195,50 @@ async function splitMediaFile(inputPath, chunkDurationInSeconds = 500, requestId
         const duration = metadata.format.duration;
         const numChunks = Math.ceil((duration - 1) / chunkDurationInSeconds);
-        console.log(`Processing ${numChunks} chunks of ${chunkDurationInSeconds} seconds each`);
+        console.log(
+            `Processing ${numChunks} chunks of ${chunkDurationInSeconds} seconds each`,
+        );
         const chunkResults = new Array(numChunks); // Pre-allocate array to maintain order
         const chunkOffsets = new Array(numChunks); // Pre-allocate offsets array
         // Process chunks in parallel with a concurrency limit
-        const CONCURRENT_CHUNKS = 3; // Process 3 chunks at a time
+        const CONCURRENT_CHUNKS = Math.min(3, os.cpus().length); // Use CPU count to determine concurrency
+        const chunkPromises = [];
         for (let i = 0; i < numChunks; i += CONCURRENT_CHUNKS) {
             const chunkBatch = [];
             for (let j = 0; j < CONCURRENT_CHUNKS && i + j < numChunks; j++) {
                 const chunkIndex = i + j;
-                const outputFileName = path.join(uniqueOutputPath, `chunk-${chunkIndex + 1}-${path.parse(inputPath).name}.mp3`);
+                const outputFileName = path.join(
+                    uniqueOutputPath,
+                    `chunk-${chunkIndex + 1}-${path.parse(inputPath).name}.mp3`,
+                );
                 const offset = chunkIndex * chunkDurationInSeconds;
-                chunkBatch.push(processChunk(inputPath, outputFileName, offset, chunkDurationInSeconds)
-                    .then(result => {
-                        chunkResults[chunkIndex] = result; // Store in correct position
-                        chunkOffsets[chunkIndex] = offset; // Store offset in correct position
-                        console.log(`Completed chunk ${chunkIndex + 1}/${numChunks}`);
-                        return result;
-                    })
-                    .catch(error => {
-                        console.error(`Failed to process chunk ${chunkIndex + 1}:`, error);
-                        return null;
-                    }));
+                chunkBatch.push(
+                    processChunk(
+                        inputPath,
+                        outputFileName,
+                        offset,
+                        chunkDurationInSeconds,
+                    )
+                        .then((result) => {
+                            chunkResults[chunkIndex] = result; // Store in correct position
+                            chunkOffsets[chunkIndex] = offset; // Store offset in correct position
+                            console.log(`Completed chunk ${chunkIndex + 1}/${numChunks}`);
+                            return result;
+                        })
+                        .catch((error) => {
+                            console.error(
+                                `Failed to process chunk ${chunkIndex + 1}:`,
+                                error,
+                            );
+                            return null;
+                        }),
+                );
             }
             // Wait for the current batch to complete before starting the next
             await Promise.all(chunkBatch);
         }
@@ -207,7 +251,11 @@ async function splitMediaFile(inputPath, chunkDurationInSeconds = 500, requestId
             throw new Error('No chunks were successfully processed');
         }
-        return { chunkPromises: validChunks, chunkOffsets: validOffsets, uniqueOutputPath };
+        return {
+            chunkPromises: validChunks,
+            chunkOffsets: validOffsets,
+            uniqueOutputPath,
+        };
     } catch (err) {
         if (uniqueOutputPath && fs.existsSync(uniqueOutputPath)) {
             try {
@@ -230,7 +278,4 @@ async function splitMediaFile(inputPath, chunkDurationInSeconds = 500, requestId
     }
 }
-export {
-    splitMediaFile,
-    downloadFile
-};
+export { splitMediaFile, downloadFile };

package/helper-apps/cortex-file-handler/function.json CHANGED Viewed

@@ -5,11 +5,7 @@
       "type": "httpTrigger",
       "direction": "in",
       "name": "req",
-      "methods": [
-        "get",
-        "post",
-        "delete"
-      ]
+      "methods": ["get", "post", "delete"]
     },
     {
       "type": "http",
@@ -17,4 +13,4 @@
       "name": "res"
     }
   ]
-}
+}

package/helper-apps/cortex-file-handler/helper.js CHANGED Viewed

@@ -1,8 +1,9 @@
 import fs from 'fs';
-import { ACCEPTED_MIME_TYPES, isAcceptedMimeType } from './constants.js';
-import path from 'path';
 import http from 'http';
 import https from 'https';
+import path from 'path';
+import { ACCEPTED_MIME_TYPES, isAcceptedMimeType } from './constants.js';
 export async function deleteTempPath(path) {
     try {
@@ -20,7 +21,9 @@ export async function deleteTempPath(path) {
             console.log(`Temporary file ${path} deleted successfully.`);
         } else if (stats.isDirectory()) {
             fs.rmSync(path, { recursive: true });
-            console.log(`Temporary folder ${path} and its contents deleted successfully.`);
+            console.log(
+                `Temporary folder ${path} and its contents deleted successfully.`,
+            );
         }
     } catch (err) {
         console.error('Error occurred while deleting the temporary path:', err);
@@ -38,7 +41,7 @@ export function getExtensionForMimeType(mimeType) {
 // Ensure a filename has the correct extension based on its mime type
 export function ensureFileExtension(filename, mimeType) {
     if (!mimeType) return filename;
     const extension = getExtensionForMimeType(mimeType);
     if (!extension) return filename;
@@ -49,12 +52,12 @@ export function ensureFileExtension(filename, mimeType) {
     // Get the current extension if any
     const currentExt = path.extname(filename);
     // If there's no current extension, just append the new one
     if (!currentExt) {
         return `${filename}${extension}`;
     }
     // Replace the current extension with the new one
     return filename.slice(0, -currentExt.length) + extension;
 }
@@ -69,39 +72,45 @@ export function ensureEncoded(url) {
 }
 export async function urlExists(url) {
-    if(!url) return false;
+    if (!url) return false;
     try {
-        // Basic URL validation
+    // Basic URL validation
         const urlObj = new URL(url);
         if (!['http:', 'https:'].includes(urlObj.protocol)) {
             throw new Error('Invalid protocol - only HTTP and HTTPS are supported');
         }
         const httpModule = urlObj.protocol === 'https:' ? https : http;
         return new Promise((resolve) => {
-            const request = httpModule.request(url, { method: 'HEAD' }, function(response) {
-                if (response.statusCode >= 200 && response.statusCode < 400) {
-                    const contentType = response.headers['content-type'];
-                    const cleanContentType = contentType ? contentType.split(';')[0].trim() : '';
-                    // Check if the content type is one we accept
-                    if (cleanContentType && isAcceptedMimeType(cleanContentType)) {
-                        resolve({ valid: true, contentType: cleanContentType });
+            const request = httpModule.request(
+                url,
+                { method: 'HEAD' },
+                function (response) {
+                    if (response.statusCode >= 200 && response.statusCode < 400) {
+                        const contentType = response.headers['content-type'];
+                        const cleanContentType = contentType
+                            ? contentType.split(';')[0].trim()
+                            : '';
+                        // Check if the content type is one we accept
+                        if (cleanContentType && isAcceptedMimeType(cleanContentType)) {
+                            resolve({ valid: true, contentType: cleanContentType });
+                        } else {
+                            console.log(`Unsupported content type: ${contentType}`);
+                            resolve({ valid: false });
+                        }
                     } else {
-                        console.log(`Unsupported content type: ${contentType}`);
                         resolve({ valid: false });
                     }
-                } else {
-                    resolve({ valid: false });
-                }
-            });
-            request.on('error', function(err) {
+                },
+            );
+            request.on('error', function (err) {
                 console.error('URL validation error:', err.message);
                 resolve({ valid: false });
             });
             request.end();
         });
     } catch (error) {