npm - @peopl-health/nexus - Versions diffs - 2.4.8 → 2.4.9-fix-pdf-processing - Mend

@peopl-health/nexus 2.4.8 → 2.4.9-fix-pdf-processing

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/lib/helpers/filesHelper.js +14 -6
package/lib/helpers/llmsHelper.js +12 -6
package/lib/helpers/processHelper.js +1 -1
package/lib/providers/OpenAIResponsesProvider.js +6 -0
package/lib/services/assistantService.js +54 -38
package/lib/utils/mediaValidator.js +18 -14
package/lib/utils/tracingDecorator.js +7 -1
package/package.json +1 -1

package/lib/helpers/filesHelper.js CHANGED Viewed

@@ -9,11 +9,11 @@ const { Message } = require('../models/messageModel.js');
 const { sanitizeFilename } = require('../utils/sanitizer.js');
 const { logger } = require('../utils/logger');
-async function convertPdfToImages(pdfName) {
+async function convertPdfToImages(pdfName, existingPdfPath = null) {
   const outputDir = path.join(__dirname, 'assets', 'tmp');
   const sanitizedName = sanitizeFilename(pdfName);
-  const pdfPath = path.join(outputDir, `${sanitizedName}.pdf`);
+  const pdfPath = existingPdfPath || path.join(outputDir, `${sanitizedName}.pdf`);
   const outputPattern = path.join(outputDir, sanitizedName);
   await fs.mkdir(outputDir, { recursive: true });
@@ -24,6 +24,12 @@ async function convertPdfToImages(pdfName) {
     execFile('pdftoppm', args, (error, stdout, stderr) => {
       if (error) {
+        logger.error('[convertPdfToImages] Error details:', {
+          error: error.message,
+          stderr,
+          pdfPath,
+          pdfExists: require('fs').existsSync(pdfPath)
+        });
         return reject(new Error(`Error splitting PDF: ${stderr || error.message}`));
       }
@@ -148,19 +154,21 @@ async function downloadMediaAndCreateFile(code, reply) {
   const [subType, fileName] = key.split('/');
-  const sanitizedCode = sanitizeFilename(code);
-  const sanitizedSubType = sanitizeFilename(subType);
-  const sanitizedFileName = sanitizeFilename(fileName);
+  const sanitizedCode = sanitizeFilename(code, 20);
+  const sanitizedSubType = sanitizeFilename(subType, 10);
+  const sanitizedFileName = sanitizeFilename(fileName, 50);
   const sourceFile = `${sanitizedCode}-${sanitizedSubType}-${sanitizedFileName}`;
   const downloadPath = path.join(__dirname, 'assets', 'tmp', sourceFile);
+  logger.info('[downloadMediaAndCreateFile] Downloading file', { sourceFile, downloadPath, bucketName, key });
   await fs.mkdir(path.dirname(downloadPath), { recursive: true });
   await downloadFileFromS3(bucketName, key, downloadPath);
   const { name: baseName } = path.parse(sourceFile);
   const fileNames = (subType === 'document' ||  subType === 'application')
-    ? await convertPdfToImages(baseName)
+    ? await convertPdfToImages(baseName, downloadPath)
     : [downloadPath];
   if (subType === 'document' ||  subType === 'application') {

package/lib/helpers/llmsHelper.js CHANGED Viewed

@@ -4,7 +4,7 @@ const fs = require('fs');
 const mime = require('mime-types');
-async function analyzeImage(imagePath, isSticker = false) {
+async function analyzeImage(imagePath, isSticker = false, contentType = null) {
   try {
     const anthropicClient = llmConfig.anthropicClient;
     if (!anthropicClient || !anthropicClient.messages) {
@@ -30,8 +30,14 @@ async function analyzeImage(imagePath, isSticker = false) {
       };
     }
-    // Check MIME type
-    const mimeType = mime.lookup(imagePath) || 'image/jpeg';
+    let mimeType = contentType;
+    if (!mimeType) {
+      if (imagePath.toLowerCase().endsWith('.webp')) {
+        mimeType = 'image/webp';
+      } else {
+        mimeType = mime.lookup(imagePath) || 'image/jpeg';
+      }
+    }
     if (mimeType === 'image/vnd.wap.wbmp') {
       logger.info('Skipping image with MIME type:', mimeType);
       return {
@@ -114,7 +120,7 @@ Only extract tables - ignore any other content in the image.`;
               type: 'image',
               source: {
                 type: 'base64',
-                media_type: mime.lookup(imagePath) || 'image/jpeg',
+                media_type: mimeType,
                 data: base64Image,
               },
             },
@@ -181,7 +187,7 @@ Ejemplo 1:
               type: 'image',
               source: {
                 type: 'base64',
-                media_type: mime.lookup(imagePath) || 'image/jpeg',
+                media_type: mimeType,
                 data: base64Image,
               },
             },
@@ -209,7 +215,7 @@ Ejemplo 1:
               type: 'image',
               source: {
                 type: 'base64',
-                media_type: mime.lookup(imagePath) || 'image/jpeg',
+                media_type: mimeType,
                 data: base64Image,
               },
             },

package/lib/helpers/processHelper.js CHANGED Viewed

@@ -66,7 +66,7 @@ const processImageFile = async (fileName, reply) => {
                     fileName.toLowerCase().includes('/sticker/');
   try {
-    imageAnalysis = await analyzeImage(fileName, isSticker);
+    imageAnalysis = await analyzeImage(fileName, isSticker, reply.media?.contentType);
     logger.info('processImageFile', {
       message_id: reply.message_id,

package/lib/providers/OpenAIResponsesProvider.js CHANGED Viewed

@@ -212,6 +212,12 @@ class OpenAIResponsesProvider {
     if (payloads.length === 0) return null;
+    if (payloads.length > MAX_ITEMS_PER_BATCH) {
+      logger.info(`[OpenAIResponsesProvider] Batching ${payloads.length} messages into chunks of ${MAX_ITEMS_PER_BATCH}`);
+      await this._addItemsInBatches(id, payloads, MAX_ITEMS_PER_BATCH);
+      return { batched: true, count: payloads.length };
+    }
     return this._retryWithRateLimit(async () => {
       if (this.conversations?.items?.create) {
         return await this.conversations.items.create(id, { items: payloads });

package/lib/services/assistantService.js CHANGED Viewed

@@ -273,21 +273,28 @@ const replyAssistantCore = async (code, message_ = null, thread_ = null, runOpti
   const timings = {};
   const startTotal = Date.now();
-  timings.getThread = Date.now();
-  const thread = thread_ || await withTracing(getThread, 'get_thread_operation',
+  const { result: thread, duration: getThreadMs } = await withTracing(
+    getThread,
+    'get_thread_operation',
     (threadCode) => ({
       'thread.code': threadCode,
       'operation.type': 'thread_retrieval',
       'thread.provided': !!thread_
-    })
+    }),
+    { returnTiming: true }
   )(code);
-  timings.getThread = Date.now() - timings.getThread;
+  timings.get_thread_ms = getThreadMs;
-  if (!thread) return null;
-  timings.getMessages = Date.now();
-  const patientReply = await getLastMessages(code);
-  timings.getMessages = Date.now() - timings.getMessages;
+  if (!thread_ && !thread) return null;
+  const finalThread = thread_ || thread;
+  const { result: patientReply, duration: getMessagesMs } = await withTracing(
+    getLastMessages,
+    'get_last_messages',
+    (code) => ({ 'thread.code': code }),
+    { returnTiming: true }
+  )(code);
+  timings.get_messages_ms = getMessagesMs;
   if (!patientReply) {
     logger.info('[replyAssistantCore] No relevant data found for this assistant.');
@@ -296,10 +303,18 @@ const replyAssistantCore = async (code, message_ = null, thread_ = null, runOpti
   const provider = createProvider({ variant: process.env.VARIANT || 'assistants' });
-  timings.processMessages = Date.now();
   logger.info(`[replyAssistantCore] Processing ${patientReply.length} messages in parallel`);
-  const processResults = await processThreadMessage(code, patientReply, provider);
+  const { result: processResults, duration: processMessagesMs } = await withTracing(
+    processThreadMessage,
+    'process_thread_messages',
+    (code, patientReply, provider) => ({
+      'messages.count': patientReply.length,
+      'thread.code': code
+    }),
+    { returnTiming: true }
+  )(code, patientReply, provider);
+  timings.process_messages_ms = processMessagesMs;
   const patientMsg = processResults.some(r => r.isPatient);
   const urls = processResults.filter(r => r.url).map(r => ({ url: r.url }));
@@ -307,21 +322,27 @@ const replyAssistantCore = async (code, message_ = null, thread_ = null, runOpti
   const allTempFiles = processResults.flatMap(r => r.tempFiles || []);
   if (allMessagesToAdd.length > 0) {
-    const threadId = thread.getConversationId();
+    const threadId = finalThread.getConversationId();
     logger.info(`[replyAssistantCore] Adding ${allMessagesToAdd.length} messages to thread in batch`);
     await provider.addMessage({ threadId, messages: allMessagesToAdd });
   }
-  await Promise.all(processResults.map(r => updateMessageRecord(r.reply, thread)));
+  await Promise.all(processResults.map(r => updateMessageRecord(r.reply, finalThread)));
   await cleanupFiles(allTempFiles);
-  timings.processMessages = Date.now() - timings.processMessages;
   if (urls.length > 0) {
-    timings.pdfCombination = Date.now();
     logger.info(`[replyAssistantCore] Processing ${urls.length} URLs for PDF combination`);
-    const { pdfBuffer, processedFiles } = await combineImagesToPDF({ code });
-    timings.pdfCombination = Date.now() - timings.pdfCombination;
+    const { result: pdfResult, duration: pdfCombinationMs } = await withTracing(
+      combineImagesToPDF,
+      'combine_images_to_pdf',
+      ({ code }) => ({
+        'pdf.thread_code': code,
+        'pdf.url_count': urls.length
+      }),
+      { returnTiming: true }
+    )({ code });
+    timings.pdf_combination_ms = pdfCombinationMs;
+    const { pdfBuffer, processedFiles } = pdfResult;
     logger.info(`[replyAssistantCore] PDF combination complete: ${processedFiles?.length || 0} files processed`);
     if (pdfBuffer) {
@@ -337,47 +358,42 @@ const replyAssistantCore = async (code, message_ = null, thread_ = null, runOpti
     }
   }
-  if (!patientMsg || thread.stopped) return null;
+  if (!patientMsg || finalThread.stopped) return null;
-  timings.runAssistant = Date.now();
-  const assistant = getAssistantById(thread.getAssistantId(), thread);
-  const { run, output, completed, retries, predictionTimeMs } = await withTracing(
+  const assistant = getAssistantById(finalThread.getAssistantId(), finalThread);
+  const { result: runResult, duration: runAssistantMs } = await withTracing(
     runAssistantWithRetries,
     'run_assistant_with_retries',
     (thread, assistant, runConfig, patientReply) => ({
       'assistant.id': thread.getAssistantId(),
       'assistant.max_retries': DEFAULT_MAX_RETRIES,
       'assistant.has_patient_reply': !!patientReply
-    })
-  )(thread, assistant, runOptions, patientReply);
-  timings.runAssistant = Date.now() - timings.runAssistant;
-  timings.total = Date.now() - startTotal;
+    }),
+    { returnTiming: true }
+  )(finalThread, assistant, runOptions, patientReply);
+  timings.run_assistant_ms = runAssistantMs;
+  timings.total_ms = Date.now() - startTotal;
+  const { run, output, completed, retries, predictionTimeMs } = runResult;
-  logger.info('[Performance Breakdown]', {
+  logger.info('[Assistant Reply Complete]', {
     code: code ? `${code.substring(0, 3)}***${code.slice(-4)}` : 'unknown',
     messageCount: patientReply.length,
     hasMedia: urls.length > 0,
     retries,
-    time: `${timings.total}ms`
+    totalMs: timings.total_ms
   });
   if (output && predictionTimeMs) {
     await PredictionMetrics.create({
       message_id: `${code}-${Date.now()}`,
       numero: code,
-      assistant_id: thread.getAssistantId(),
-      thread_id: thread.getConversationId(),
+      assistant_id: finalThread.getAssistantId(),
+      thread_id: finalThread.getConversationId(),
       prediction_time_ms: predictionTimeMs,
       retry_count: retries,
       completed: completed,
-      timing_breakdown: {
-        get_thread_ms: timings.getThread,
-        get_messages_ms: timings.getMessages,
-        process_messages_ms: timings.processMessages,
-        pdf_combination_ms: timings.pdfCombination || 0,
-        run_assistant_ms: timings.runAssistant,
-        total_ms: timings.total
-      }
+      timing_breakdown: timings
     }).catch(err => logger.error('[replyAssistantCore] Failed to store metrics:', err));
   }

package/lib/utils/mediaValidator.js CHANGED Viewed

@@ -65,23 +65,27 @@ function getMediaType(contentType) {
 }
 function validateMedia(media, contentType) {
+  const fileSize = Buffer.isBuffer(media) ? media.length : media;
+  if (contentType === 'image/webp') {
+    const mediaType = fileSize <= MEDIA_LIMITS.sticker ? 'sticker' : 'image';
+    const formatValidation = validateMediaFormat(contentType, mediaType);
+    if (!formatValidation.valid) return formatValidation;
+    const sizeValidation = validateMediaSize(media, mediaType);
+    if (!sizeValidation.valid) return sizeValidation;
+    return { valid: true, mediaType, message: `Media validated successfully as ${mediaType}` };
+  }
   const mediaType = getMediaType(contentType);
   const formatValidation = validateMediaFormat(contentType, mediaType);
-  if (!formatValidation.valid) {
-    return formatValidation;
-  }
+  if (!formatValidation.valid) return formatValidation;
   const sizeValidation = validateMediaSize(media, mediaType);
-  if (!sizeValidation.valid) {
-    return sizeValidation;
-  }
-  return {
-    valid: true,
-    mediaType,
-    message: `Media validated successfully as ${mediaType}`
-  };
+  if (!sizeValidation.valid) return sizeValidation;
+  return { valid: true, mediaType, message: `Media validated successfully as ${mediaType}` };
 }
 module.exports = {

package/lib/utils/tracingDecorator.js CHANGED Viewed

@@ -4,9 +4,10 @@ const { SpanStatusCode } = require('@opentelemetry/api');
 /**
  * Usage: const tracedFunction = withTracing(originalFunction, 'operation_name');
  */
-const withTracing = (fn, spanName, attributeMapper = null) => {
+const withTracing = (fn, spanName, attributeMapper = null, options = {}) => {
   return async function (...args) {
     const span = createSpan(spanName);
+    const startTime = Date.now();
     try {
       if (attributeMapper && typeof attributeMapper === 'function') {
@@ -16,6 +17,11 @@ const withTracing = (fn, spanName, attributeMapper = null) => {
       const result = await fn.apply(this, args);
       span.setStatus({ code: SpanStatusCode.OK });
+      if (options.returnTiming) {
+        const duration = Date.now() - startTime;
+        return { result, duration };
+      }
       return result;
     } catch (error) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@peopl-health/nexus",
-  "version": "2.4.8",
+  "version": "2.4.9-fix-pdf-processing",
   "description": "Core messaging and assistant library for WhatsApp communication platforms",
   "keywords": [
     "whatsapp",