@peopl-health/nexus 2.4.8 → 2.4.9-fix-pdf-processing

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,11 +9,11 @@ const { Message } = require('../models/messageModel.js');
9
9
  const { sanitizeFilename } = require('../utils/sanitizer.js');
10
10
  const { logger } = require('../utils/logger');
11
11
 
12
- async function convertPdfToImages(pdfName) {
12
+ async function convertPdfToImages(pdfName, existingPdfPath = null) {
13
13
  const outputDir = path.join(__dirname, 'assets', 'tmp');
14
14
 
15
15
  const sanitizedName = sanitizeFilename(pdfName);
16
- const pdfPath = path.join(outputDir, `${sanitizedName}.pdf`);
16
+ const pdfPath = existingPdfPath || path.join(outputDir, `${sanitizedName}.pdf`);
17
17
  const outputPattern = path.join(outputDir, sanitizedName);
18
18
 
19
19
  await fs.mkdir(outputDir, { recursive: true });
@@ -24,6 +24,12 @@ async function convertPdfToImages(pdfName) {
24
24
 
25
25
  execFile('pdftoppm', args, (error, stdout, stderr) => {
26
26
  if (error) {
27
+ logger.error('[convertPdfToImages] Error details:', {
28
+ error: error.message,
29
+ stderr,
30
+ pdfPath,
31
+ pdfExists: require('fs').existsSync(pdfPath)
32
+ });
27
33
  return reject(new Error(`Error splitting PDF: ${stderr || error.message}`));
28
34
  }
29
35
 
@@ -148,19 +154,21 @@ async function downloadMediaAndCreateFile(code, reply) {
148
154
 
149
155
  const [subType, fileName] = key.split('/');
150
156
 
151
- const sanitizedCode = sanitizeFilename(code);
152
- const sanitizedSubType = sanitizeFilename(subType);
153
- const sanitizedFileName = sanitizeFilename(fileName);
157
+ const sanitizedCode = sanitizeFilename(code, 20);
158
+ const sanitizedSubType = sanitizeFilename(subType, 10);
159
+ const sanitizedFileName = sanitizeFilename(fileName, 50);
154
160
 
155
161
  const sourceFile = `${sanitizedCode}-${sanitizedSubType}-${sanitizedFileName}`;
156
162
  const downloadPath = path.join(__dirname, 'assets', 'tmp', sourceFile);
157
163
 
164
+ logger.info('[downloadMediaAndCreateFile] Downloading file', { sourceFile, downloadPath, bucketName, key });
165
+
158
166
  await fs.mkdir(path.dirname(downloadPath), { recursive: true });
159
167
  await downloadFileFromS3(bucketName, key, downloadPath);
160
168
 
161
169
  const { name: baseName } = path.parse(sourceFile);
162
170
  const fileNames = (subType === 'document' || subType === 'application')
163
- ? await convertPdfToImages(baseName)
171
+ ? await convertPdfToImages(baseName, downloadPath)
164
172
  : [downloadPath];
165
173
 
166
174
  if (subType === 'document' || subType === 'application') {
@@ -4,7 +4,7 @@ const fs = require('fs');
4
4
  const mime = require('mime-types');
5
5
 
6
6
 
7
- async function analyzeImage(imagePath, isSticker = false) {
7
+ async function analyzeImage(imagePath, isSticker = false, contentType = null) {
8
8
  try {
9
9
  const anthropicClient = llmConfig.anthropicClient;
10
10
  if (!anthropicClient || !anthropicClient.messages) {
@@ -30,8 +30,14 @@ async function analyzeImage(imagePath, isSticker = false) {
30
30
  };
31
31
  }
32
32
 
33
- // Check MIME type
34
- const mimeType = mime.lookup(imagePath) || 'image/jpeg';
33
+ let mimeType = contentType;
34
+ if (!mimeType) {
35
+ if (imagePath.toLowerCase().endsWith('.webp')) {
36
+ mimeType = 'image/webp';
37
+ } else {
38
+ mimeType = mime.lookup(imagePath) || 'image/jpeg';
39
+ }
40
+ }
35
41
  if (mimeType === 'image/vnd.wap.wbmp') {
36
42
  logger.info('Skipping image with MIME type:', mimeType);
37
43
  return {
@@ -114,7 +120,7 @@ Only extract tables - ignore any other content in the image.`;
114
120
  type: 'image',
115
121
  source: {
116
122
  type: 'base64',
117
- media_type: mime.lookup(imagePath) || 'image/jpeg',
123
+ media_type: mimeType,
118
124
  data: base64Image,
119
125
  },
120
126
  },
@@ -181,7 +187,7 @@ Ejemplo 1:
181
187
  type: 'image',
182
188
  source: {
183
189
  type: 'base64',
184
- media_type: mime.lookup(imagePath) || 'image/jpeg',
190
+ media_type: mimeType,
185
191
  data: base64Image,
186
192
  },
187
193
  },
@@ -209,7 +215,7 @@ Ejemplo 1:
209
215
  type: 'image',
210
216
  source: {
211
217
  type: 'base64',
212
- media_type: mime.lookup(imagePath) || 'image/jpeg',
218
+ media_type: mimeType,
213
219
  data: base64Image,
214
220
  },
215
221
  },
@@ -66,7 +66,7 @@ const processImageFile = async (fileName, reply) => {
66
66
  fileName.toLowerCase().includes('/sticker/');
67
67
 
68
68
  try {
69
- imageAnalysis = await analyzeImage(fileName, isSticker);
69
+ imageAnalysis = await analyzeImage(fileName, isSticker, reply.media?.contentType);
70
70
 
71
71
  logger.info('processImageFile', {
72
72
  message_id: reply.message_id,
@@ -212,6 +212,12 @@ class OpenAIResponsesProvider {
212
212
 
213
213
  if (payloads.length === 0) return null;
214
214
 
215
+ if (payloads.length > MAX_ITEMS_PER_BATCH) {
216
+ logger.info(`[OpenAIResponsesProvider] Batching ${payloads.length} messages into chunks of ${MAX_ITEMS_PER_BATCH}`);
217
+ await this._addItemsInBatches(id, payloads, MAX_ITEMS_PER_BATCH);
218
+ return { batched: true, count: payloads.length };
219
+ }
220
+
215
221
  return this._retryWithRateLimit(async () => {
216
222
  if (this.conversations?.items?.create) {
217
223
  return await this.conversations.items.create(id, { items: payloads });
@@ -273,21 +273,28 @@ const replyAssistantCore = async (code, message_ = null, thread_ = null, runOpti
273
273
  const timings = {};
274
274
  const startTotal = Date.now();
275
275
 
276
- timings.getThread = Date.now();
277
- const thread = thread_ || await withTracing(getThread, 'get_thread_operation',
276
+ const { result: thread, duration: getThreadMs } = await withTracing(
277
+ getThread,
278
+ 'get_thread_operation',
278
279
  (threadCode) => ({
279
280
  'thread.code': threadCode,
280
281
  'operation.type': 'thread_retrieval',
281
282
  'thread.provided': !!thread_
282
- })
283
+ }),
284
+ { returnTiming: true }
283
285
  )(code);
284
- timings.getThread = Date.now() - timings.getThread;
286
+ timings.get_thread_ms = getThreadMs;
285
287
 
286
- if (!thread) return null;
287
-
288
- timings.getMessages = Date.now();
289
- const patientReply = await getLastMessages(code);
290
- timings.getMessages = Date.now() - timings.getMessages;
288
+ if (!thread_ && !thread) return null;
289
+ const finalThread = thread_ || thread;
290
+
291
+ const { result: patientReply, duration: getMessagesMs } = await withTracing(
292
+ getLastMessages,
293
+ 'get_last_messages',
294
+ (code) => ({ 'thread.code': code }),
295
+ { returnTiming: true }
296
+ )(code);
297
+ timings.get_messages_ms = getMessagesMs;
291
298
 
292
299
  if (!patientReply) {
293
300
  logger.info('[replyAssistantCore] No relevant data found for this assistant.');
@@ -296,10 +303,18 @@ const replyAssistantCore = async (code, message_ = null, thread_ = null, runOpti
296
303
 
297
304
  const provider = createProvider({ variant: process.env.VARIANT || 'assistants' });
298
305
 
299
- timings.processMessages = Date.now();
300
306
  logger.info(`[replyAssistantCore] Processing ${patientReply.length} messages in parallel`);
301
307
 
302
- const processResults = await processThreadMessage(code, patientReply, provider);
308
+ const { result: processResults, duration: processMessagesMs } = await withTracing(
309
+ processThreadMessage,
310
+ 'process_thread_messages',
311
+ (code, patientReply, provider) => ({
312
+ 'messages.count': patientReply.length,
313
+ 'thread.code': code
314
+ }),
315
+ { returnTiming: true }
316
+ )(code, patientReply, provider);
317
+ timings.process_messages_ms = processMessagesMs;
303
318
 
304
319
  const patientMsg = processResults.some(r => r.isPatient);
305
320
  const urls = processResults.filter(r => r.url).map(r => ({ url: r.url }));
@@ -307,21 +322,27 @@ const replyAssistantCore = async (code, message_ = null, thread_ = null, runOpti
307
322
  const allTempFiles = processResults.flatMap(r => r.tempFiles || []);
308
323
 
309
324
  if (allMessagesToAdd.length > 0) {
310
- const threadId = thread.getConversationId();
325
+ const threadId = finalThread.getConversationId();
311
326
  logger.info(`[replyAssistantCore] Adding ${allMessagesToAdd.length} messages to thread in batch`);
312
327
  await provider.addMessage({ threadId, messages: allMessagesToAdd });
313
328
  }
314
329
 
315
- await Promise.all(processResults.map(r => updateMessageRecord(r.reply, thread)));
330
+ await Promise.all(processResults.map(r => updateMessageRecord(r.reply, finalThread)));
316
331
  await cleanupFiles(allTempFiles);
317
332
 
318
- timings.processMessages = Date.now() - timings.processMessages;
319
-
320
333
  if (urls.length > 0) {
321
- timings.pdfCombination = Date.now();
322
334
  logger.info(`[replyAssistantCore] Processing ${urls.length} URLs for PDF combination`);
323
- const { pdfBuffer, processedFiles } = await combineImagesToPDF({ code });
324
- timings.pdfCombination = Date.now() - timings.pdfCombination;
335
+ const { result: pdfResult, duration: pdfCombinationMs } = await withTracing(
336
+ combineImagesToPDF,
337
+ 'combine_images_to_pdf',
338
+ ({ code }) => ({
339
+ 'pdf.thread_code': code,
340
+ 'pdf.url_count': urls.length
341
+ }),
342
+ { returnTiming: true }
343
+ )({ code });
344
+ timings.pdf_combination_ms = pdfCombinationMs;
345
+ const { pdfBuffer, processedFiles } = pdfResult;
325
346
  logger.info(`[replyAssistantCore] PDF combination complete: ${processedFiles?.length || 0} files processed`);
326
347
 
327
348
  if (pdfBuffer) {
@@ -337,47 +358,42 @@ const replyAssistantCore = async (code, message_ = null, thread_ = null, runOpti
337
358
  }
338
359
  }
339
360
 
340
- if (!patientMsg || thread.stopped) return null;
361
+ if (!patientMsg || finalThread.stopped) return null;
341
362
 
342
- timings.runAssistant = Date.now();
343
- const assistant = getAssistantById(thread.getAssistantId(), thread);
344
- const { run, output, completed, retries, predictionTimeMs } = await withTracing(
363
+ const assistant = getAssistantById(finalThread.getAssistantId(), finalThread);
364
+ const { result: runResult, duration: runAssistantMs } = await withTracing(
345
365
  runAssistantWithRetries,
346
366
  'run_assistant_with_retries',
347
367
  (thread, assistant, runConfig, patientReply) => ({
348
368
  'assistant.id': thread.getAssistantId(),
349
369
  'assistant.max_retries': DEFAULT_MAX_RETRIES,
350
370
  'assistant.has_patient_reply': !!patientReply
351
- })
352
- )(thread, assistant, runOptions, patientReply);
353
- timings.runAssistant = Date.now() - timings.runAssistant;
354
- timings.total = Date.now() - startTotal;
371
+ }),
372
+ { returnTiming: true }
373
+ )(finalThread, assistant, runOptions, patientReply);
374
+ timings.run_assistant_ms = runAssistantMs;
375
+ timings.total_ms = Date.now() - startTotal;
376
+
377
+ const { run, output, completed, retries, predictionTimeMs } = runResult;
355
378
 
356
- logger.info('[Performance Breakdown]', {
379
+ logger.info('[Assistant Reply Complete]', {
357
380
  code: code ? `${code.substring(0, 3)}***${code.slice(-4)}` : 'unknown',
358
381
  messageCount: patientReply.length,
359
382
  hasMedia: urls.length > 0,
360
383
  retries,
361
- time: `${timings.total}ms`
384
+ totalMs: timings.total_ms
362
385
  });
363
386
 
364
387
  if (output && predictionTimeMs) {
365
388
  await PredictionMetrics.create({
366
389
  message_id: `${code}-${Date.now()}`,
367
390
  numero: code,
368
- assistant_id: thread.getAssistantId(),
369
- thread_id: thread.getConversationId(),
391
+ assistant_id: finalThread.getAssistantId(),
392
+ thread_id: finalThread.getConversationId(),
370
393
  prediction_time_ms: predictionTimeMs,
371
394
  retry_count: retries,
372
395
  completed: completed,
373
- timing_breakdown: {
374
- get_thread_ms: timings.getThread,
375
- get_messages_ms: timings.getMessages,
376
- process_messages_ms: timings.processMessages,
377
- pdf_combination_ms: timings.pdfCombination || 0,
378
- run_assistant_ms: timings.runAssistant,
379
- total_ms: timings.total
380
- }
396
+ timing_breakdown: timings
381
397
  }).catch(err => logger.error('[replyAssistantCore] Failed to store metrics:', err));
382
398
  }
383
399
 
@@ -65,23 +65,27 @@ function getMediaType(contentType) {
65
65
  }
66
66
 
67
67
  function validateMedia(media, contentType) {
68
+ const fileSize = Buffer.isBuffer(media) ? media.length : media;
69
+
70
+ if (contentType === 'image/webp') {
71
+ const mediaType = fileSize <= MEDIA_LIMITS.sticker ? 'sticker' : 'image';
72
+ const formatValidation = validateMediaFormat(contentType, mediaType);
73
+ if (!formatValidation.valid) return formatValidation;
74
+
75
+ const sizeValidation = validateMediaSize(media, mediaType);
76
+ if (!sizeValidation.valid) return sizeValidation;
77
+
78
+ return { valid: true, mediaType, message: `Media validated successfully as ${mediaType}` };
79
+ }
80
+
68
81
  const mediaType = getMediaType(contentType);
69
82
  const formatValidation = validateMediaFormat(contentType, mediaType);
70
-
71
- if (!formatValidation.valid) {
72
- return formatValidation;
73
- }
74
-
83
+ if (!formatValidation.valid) return formatValidation;
84
+
75
85
  const sizeValidation = validateMediaSize(media, mediaType);
76
- if (!sizeValidation.valid) {
77
- return sizeValidation;
78
- }
79
-
80
- return {
81
- valid: true,
82
- mediaType,
83
- message: `Media validated successfully as ${mediaType}`
84
- };
86
+ if (!sizeValidation.valid) return sizeValidation;
87
+
88
+ return { valid: true, mediaType, message: `Media validated successfully as ${mediaType}` };
85
89
  }
86
90
 
87
91
  module.exports = {
@@ -4,9 +4,10 @@ const { SpanStatusCode } = require('@opentelemetry/api');
4
4
  /**
5
5
  * Usage: const tracedFunction = withTracing(originalFunction, 'operation_name');
6
6
  */
7
- const withTracing = (fn, spanName, attributeMapper = null) => {
7
+ const withTracing = (fn, spanName, attributeMapper = null, options = {}) => {
8
8
  return async function (...args) {
9
9
  const span = createSpan(spanName);
10
+ const startTime = Date.now();
10
11
 
11
12
  try {
12
13
  if (attributeMapper && typeof attributeMapper === 'function') {
@@ -16,6 +17,11 @@ const withTracing = (fn, spanName, attributeMapper = null) => {
16
17
  const result = await fn.apply(this, args);
17
18
 
18
19
  span.setStatus({ code: SpanStatusCode.OK });
20
+
21
+ if (options.returnTiming) {
22
+ const duration = Date.now() - startTime;
23
+ return { result, duration };
24
+ }
19
25
  return result;
20
26
 
21
27
  } catch (error) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@peopl-health/nexus",
3
- "version": "2.4.8",
3
+ "version": "2.4.9-fix-pdf-processing",
4
4
  "description": "Core messaging and assistant library for WhatsApp communication platforms",
5
5
  "keywords": [
6
6
  "whatsapp",