npm - @peopl-health/nexus - Versions diffs - 2.4.9-fix-pdf-processing → 2.4.9-fix-mime-type - Mend

@peopl-health/nexus 2.4.9-fix-pdf-processing → 2.4.9-fix-mime-type

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/lib/helpers/filesHelper.js +93 -45
package/lib/helpers/llmsHelper.js +24 -6
package/package.json +1 -1

package/lib/helpers/filesHelper.js CHANGED Viewed

@@ -1,6 +1,7 @@
 const { PDFDocument } = require('pdf-lib');
 const { execFile } = require('child_process');
 const fs = require('fs').promises;
+const fsSync = require('fs');
 const path = require('path');
 const sharp = require('sharp');
@@ -22,28 +23,52 @@ async function convertPdfToImages(pdfName, existingPdfPath = null) {
     const args = ['-jpeg', pdfPath, outputPattern];
     logger.info('[convertPdfToImages] Running: pdftoppm', args.join(' '));
-    execFile('pdftoppm', args, (error, stdout, stderr) => {
+    const timeout = 30000;
+    let timedOut = false;
+    const child = execFile('pdftoppm', args, { timeout, maxBuffer: 10 * 1024 * 1024 }, (error, stdout, stderr) => {
+      if (timedOut) {
+        return;
+      }
       if (error) {
         logger.error('[convertPdfToImages] Error details:', {
           error: error.message,
           stderr,
           pdfPath,
-          pdfExists: require('fs').existsSync(pdfPath)
+          pdfExists: fsSync.existsSync(pdfPath),
+          killed: error.killed,
+          signal: error.signal
         });
         return reject(new Error(`Error splitting PDF: ${stderr || error.message}`));
       }
-      fs.readdir(outputDir, (err, files) => {
-        if (err) {
-          return reject(new Error(`Error reading output directory: ${err.message}`));
-        }
+      logger.info('[convertPdfToImages] pdftoppm completed successfully');
-        const jpgFiles = files
-          .filter(file => file.startsWith(sanitizedName) && file.endsWith('.jpg'))
-          .map(file => path.join(outputDir, file));
+      fs.readdir(outputDir)
+        .then(files => {
+          const jpgFiles = files
+            .filter(file => file.startsWith(sanitizedName) && file.endsWith('.jpg'))
+            .map(file => path.join(outputDir, file));
-        resolve(jpgFiles);
-      });
+          logger.info(`[convertPdfToImages] Found ${jpgFiles.length} image files`);
+          resolve(jpgFiles);
+        })
+        .catch(err => {
+          logger.error('[convertPdfToImages] Error reading output directory:', { error: err.message });
+          reject(new Error(`Error reading output directory: ${err.message}`));
+        });
+    });
+    const timeoutId = setTimeout(() => {
+      timedOut = true;
+      child.kill('SIGTERM');
+      logger.error('[convertPdfToImages] Process timed out after 30 seconds', { pdfPath });
+      reject(new Error('PDF conversion timed out after 30 seconds'));
+    }, timeout);
+    child.on('exit', () => {
+      clearTimeout(timeoutId);
     });
   });
 }
@@ -136,46 +161,69 @@ const cleanupFiles = async (files) => {
 };
 async function downloadMediaAndCreateFile(code, reply) {
-  const resultMedia = await Message.findOne({
-    message_id: reply.message_id,
-    timestamp: reply.timestamp,
-    media: { $ne: null }
-  });
+  try {
+    const resultMedia = await Message.findOne({
+      message_id: reply.message_id,
+      timestamp: reply.timestamp,
+      media: { $ne: null }
+    });
-  if (!resultMedia) return [];
+    if (!resultMedia) return [];
-  if (!resultMedia.media || !resultMedia.media.key) {
-    logger.info('[downloadMediaAndCreateFile] No valid media found for message:', reply.message_id);
-    return [];
-  }
+    if (!resultMedia.media || !resultMedia.media.key) {
+      logger.info('[downloadMediaAndCreateFile] No valid media found for message:', reply.message_id);
+      return [];
+    }
-  const { bucketName, key } = resultMedia.media;
-  if (!bucketName || !key) return [];
-  const [subType, fileName] = key.split('/');
-  const sanitizedCode = sanitizeFilename(code, 20);
-  const sanitizedSubType = sanitizeFilename(subType, 10);
-  const sanitizedFileName = sanitizeFilename(fileName, 50);
-  const sourceFile = `${sanitizedCode}-${sanitizedSubType}-${sanitizedFileName}`;
-  const downloadPath = path.join(__dirname, 'assets', 'tmp', sourceFile);
-  logger.info('[downloadMediaAndCreateFile] Downloading file', { sourceFile, downloadPath, bucketName, key });
-  await fs.mkdir(path.dirname(downloadPath), { recursive: true });
-  await downloadFileFromS3(bucketName, key, downloadPath);
+    const { bucketName, key } = resultMedia.media;
+    if (!bucketName || !key) return [];
+    const [subType, fileName] = key.split('/');
+    const sanitizedCode = sanitizeFilename(code, 20);
+    const sanitizedSubType = sanitizeFilename(subType, 10);
+    const sanitizedFileName = sanitizeFilename(fileName, 50);
+    const sourceFile = `${sanitizedCode}-${sanitizedSubType}-${sanitizedFileName}`;
+    const downloadPath = path.join(__dirname, 'assets', 'tmp', sourceFile);
+    logger.info('[downloadMediaAndCreateFile] Downloading file', { sourceFile, downloadPath, bucketName, key });
+    await fs.mkdir(path.dirname(downloadPath), { recursive: true });
+    await downloadFileFromS3(bucketName, key, downloadPath);
-  const { name: baseName } = path.parse(sourceFile);
-  const fileNames = (subType === 'document' ||  subType === 'application')
-    ? await convertPdfToImages(baseName, downloadPath)
-    : [downloadPath];
+    const { name: baseName } = path.parse(sourceFile);
+    let fileNames = [];
+    if (subType === 'document' || subType === 'application') {
+      try {
+        fileNames = await convertPdfToImages(baseName, downloadPath);
+        logger.info('[downloadMediaAndCreateFile] PDF converted successfully', { imageCount: fileNames.length });
+      } catch (conversionError) {
+        logger.error('[downloadMediaAndCreateFile] PDF conversion failed:', {
+          error: conversionError.message,
+          sourceFile
+        });
+        fileNames = [];
+      } finally {
+        try {
+          await fs.unlink(downloadPath);
+        } catch (unlinkError) {
+          logger.warn('[downloadMediaAndCreateFile] Failed to delete PDF:', { error: unlinkError.message });
+        }
+      }
+    } else {
+      fileNames = [downloadPath];
+    }
-  if (subType === 'document' ||  subType === 'application') {
-    await fs.unlink(downloadPath);
+    return fileNames;
+  } catch (error) {
+    logger.error('[downloadMediaAndCreateFile] Error processing media:', {
+      error: error.message,
+      message_id: reply.message_id
+    });
+    return [];
   }
-  return fileNames;
 }
 module.exports = {

package/lib/helpers/llmsHelper.js CHANGED Viewed

@@ -1,7 +1,7 @@
 const llmConfig = require('../config/llmConfig.js');
 const { logger } = require('../utils/logger');
 const fs = require('fs');
-const mime = require('mime-types');
+const path = require('path');
 async function analyzeImage(imagePath, isSticker = false, contentType = null) {
@@ -30,14 +30,30 @@ async function analyzeImage(imagePath, isSticker = false, contentType = null) {
       };
     }
+    // Determine mime type from file extension
     let mimeType = contentType;
     if (!mimeType) {
-      if (imagePath.toLowerCase().endsWith('.webp')) {
-        mimeType = 'image/webp';
-      } else {
-        mimeType = mime.lookup(imagePath) || 'image/jpeg';
-      }
+      const ext = path.extname(imagePath).toLowerCase();
+      const mimeMap = {
+        '.jpg': 'image/jpeg',
+        '.jpeg': 'image/jpeg',
+        '.png': 'image/png',
+        '.gif': 'image/gif',
+        '.webp': 'image/webp'
+      };
+      mimeType = mimeMap[ext] || 'image/jpeg'; // Default to jpeg for pdftoppm output
+    }
+    // Validate that mime type is supported by Claude
+    const supportedMimeTypes = ['image/jpeg', 'image/png', 'image/gif', 'image/webp'];
+    if (!supportedMimeTypes.includes(mimeType)) {
+      logger.warn('[analyzeImage] Unsupported mime type, defaulting to image/jpeg:', {
+        originalMimeType: mimeType,
+        imagePath
+      });
+      mimeType = 'image/jpeg';
     }
     if (mimeType === 'image/vnd.wap.wbmp') {
       logger.info('Skipping image with MIME type:', mimeType);
       return {
@@ -49,6 +65,7 @@ async function analyzeImage(imagePath, isSticker = false, contentType = null) {
       };
     }
     // Read the image file and convert to base64
+    logger.info('[analyzeImage] Reading image file:', { imagePath: imagePath.split('/').pop() });
     const imageBuffer = await fs.promises.readFile(imagePath);
     const base64Image = imageBuffer.toString('base64');
@@ -77,6 +94,7 @@ async function analyzeImage(imagePath, isSticker = false, contentType = null) {
         },
       ],
     });
+    logger.info('[analyzeImage] Description received');
     const description = messageDescription.content[0].text;
     // For stickers, skip medical analysis and table extraction

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@peopl-health/nexus",
-  "version": "2.4.9-fix-pdf-processing",
+  "version": "2.4.9-fix-mime-type",
   "description": "Core messaging and assistant library for WhatsApp communication platforms",
   "keywords": [
     "whatsapp",