@peopl-health/nexus 2.4.9-fix-pdf-processing → 2.4.9-fix-mime-type

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,7 @@
1
1
  const { PDFDocument } = require('pdf-lib');
2
2
  const { execFile } = require('child_process');
3
3
  const fs = require('fs').promises;
4
+ const fsSync = require('fs');
4
5
  const path = require('path');
5
6
  const sharp = require('sharp');
6
7
 
@@ -22,28 +23,52 @@ async function convertPdfToImages(pdfName, existingPdfPath = null) {
22
23
  const args = ['-jpeg', pdfPath, outputPattern];
23
24
  logger.info('[convertPdfToImages] Running: pdftoppm', args.join(' '));
24
25
 
25
- execFile('pdftoppm', args, (error, stdout, stderr) => {
26
+ const timeout = 30000;
27
+ let timedOut = false;
28
+
29
+ const child = execFile('pdftoppm', args, { timeout, maxBuffer: 10 * 1024 * 1024 }, (error, stdout, stderr) => {
30
+ if (timedOut) {
31
+ return;
32
+ }
33
+
26
34
  if (error) {
27
35
  logger.error('[convertPdfToImages] Error details:', {
28
36
  error: error.message,
29
37
  stderr,
30
38
  pdfPath,
31
- pdfExists: require('fs').existsSync(pdfPath)
39
+ pdfExists: fsSync.existsSync(pdfPath),
40
+ killed: error.killed,
41
+ signal: error.signal
32
42
  });
33
43
  return reject(new Error(`Error splitting PDF: ${stderr || error.message}`));
34
44
  }
35
45
 
36
- fs.readdir(outputDir, (err, files) => {
37
- if (err) {
38
- return reject(new Error(`Error reading output directory: ${err.message}`));
39
- }
46
+ logger.info('[convertPdfToImages] pdftoppm completed successfully');
40
47
 
41
- const jpgFiles = files
42
- .filter(file => file.startsWith(sanitizedName) && file.endsWith('.jpg'))
43
- .map(file => path.join(outputDir, file));
48
+ fs.readdir(outputDir)
49
+ .then(files => {
50
+ const jpgFiles = files
51
+ .filter(file => file.startsWith(sanitizedName) && file.endsWith('.jpg'))
52
+ .map(file => path.join(outputDir, file));
44
53
 
45
- resolve(jpgFiles);
46
- });
54
+ logger.info(`[convertPdfToImages] Found ${jpgFiles.length} image files`);
55
+ resolve(jpgFiles);
56
+ })
57
+ .catch(err => {
58
+ logger.error('[convertPdfToImages] Error reading output directory:', { error: err.message });
59
+ reject(new Error(`Error reading output directory: ${err.message}`));
60
+ });
61
+ });
62
+
63
+ const timeoutId = setTimeout(() => {
64
+ timedOut = true;
65
+ child.kill('SIGTERM');
66
+ logger.error('[convertPdfToImages] Process timed out after 30 seconds', { pdfPath });
67
+ reject(new Error('PDF conversion timed out after 30 seconds'));
68
+ }, timeout);
69
+
70
+ child.on('exit', () => {
71
+ clearTimeout(timeoutId);
47
72
  });
48
73
  });
49
74
  }
@@ -136,46 +161,69 @@ const cleanupFiles = async (files) => {
136
161
  };
137
162
 
138
163
  async function downloadMediaAndCreateFile(code, reply) {
139
- const resultMedia = await Message.findOne({
140
- message_id: reply.message_id,
141
- timestamp: reply.timestamp,
142
- media: { $ne: null }
143
- });
164
+ try {
165
+ const resultMedia = await Message.findOne({
166
+ message_id: reply.message_id,
167
+ timestamp: reply.timestamp,
168
+ media: { $ne: null }
169
+ });
144
170
 
145
- if (!resultMedia) return [];
171
+ if (!resultMedia) return [];
146
172
 
147
- if (!resultMedia.media || !resultMedia.media.key) {
148
- logger.info('[downloadMediaAndCreateFile] No valid media found for message:', reply.message_id);
149
- return [];
150
- }
173
+ if (!resultMedia.media || !resultMedia.media.key) {
174
+ logger.info('[downloadMediaAndCreateFile] No valid media found for message:', reply.message_id);
175
+ return [];
176
+ }
151
177
 
152
- const { bucketName, key } = resultMedia.media;
153
- if (!bucketName || !key) return [];
154
-
155
- const [subType, fileName] = key.split('/');
156
-
157
- const sanitizedCode = sanitizeFilename(code, 20);
158
- const sanitizedSubType = sanitizeFilename(subType, 10);
159
- const sanitizedFileName = sanitizeFilename(fileName, 50);
160
-
161
- const sourceFile = `${sanitizedCode}-${sanitizedSubType}-${sanitizedFileName}`;
162
- const downloadPath = path.join(__dirname, 'assets', 'tmp', sourceFile);
163
-
164
- logger.info('[downloadMediaAndCreateFile] Downloading file', { sourceFile, downloadPath, bucketName, key });
165
-
166
- await fs.mkdir(path.dirname(downloadPath), { recursive: true });
167
- await downloadFileFromS3(bucketName, key, downloadPath);
178
+ const { bucketName, key } = resultMedia.media;
179
+ if (!bucketName || !key) return [];
180
+
181
+ const [subType, fileName] = key.split('/');
182
+
183
+ const sanitizedCode = sanitizeFilename(code, 20);
184
+ const sanitizedSubType = sanitizeFilename(subType, 10);
185
+ const sanitizedFileName = sanitizeFilename(fileName, 50);
186
+
187
+ const sourceFile = `${sanitizedCode}-${sanitizedSubType}-${sanitizedFileName}`;
188
+ const downloadPath = path.join(__dirname, 'assets', 'tmp', sourceFile);
189
+
190
+ logger.info('[downloadMediaAndCreateFile] Downloading file', { sourceFile, downloadPath, bucketName, key });
191
+
192
+ await fs.mkdir(path.dirname(downloadPath), { recursive: true });
193
+ await downloadFileFromS3(bucketName, key, downloadPath);
168
194
 
169
- const { name: baseName } = path.parse(sourceFile);
170
- const fileNames = (subType === 'document' || subType === 'application')
171
- ? await convertPdfToImages(baseName, downloadPath)
172
- : [downloadPath];
195
+ const { name: baseName } = path.parse(sourceFile);
196
+ let fileNames = [];
197
+
198
+ if (subType === 'document' || subType === 'application') {
199
+ try {
200
+ fileNames = await convertPdfToImages(baseName, downloadPath);
201
+ logger.info('[downloadMediaAndCreateFile] PDF converted successfully', { imageCount: fileNames.length });
202
+ } catch (conversionError) {
203
+ logger.error('[downloadMediaAndCreateFile] PDF conversion failed:', {
204
+ error: conversionError.message,
205
+ sourceFile
206
+ });
207
+ fileNames = [];
208
+ } finally {
209
+ try {
210
+ await fs.unlink(downloadPath);
211
+ } catch (unlinkError) {
212
+ logger.warn('[downloadMediaAndCreateFile] Failed to delete PDF:', { error: unlinkError.message });
213
+ }
214
+ }
215
+ } else {
216
+ fileNames = [downloadPath];
217
+ }
173
218
 
174
- if (subType === 'document' || subType === 'application') {
175
- await fs.unlink(downloadPath);
219
+ return fileNames;
220
+ } catch (error) {
221
+ logger.error('[downloadMediaAndCreateFile] Error processing media:', {
222
+ error: error.message,
223
+ message_id: reply.message_id
224
+ });
225
+ return [];
176
226
  }
177
-
178
- return fileNames;
179
227
  }
180
228
 
181
229
  module.exports = {
@@ -1,7 +1,7 @@
1
1
  const llmConfig = require('../config/llmConfig.js');
2
2
  const { logger } = require('../utils/logger');
3
3
  const fs = require('fs');
4
- const mime = require('mime-types');
4
+ const path = require('path');
5
5
 
6
6
 
7
7
  async function analyzeImage(imagePath, isSticker = false, contentType = null) {
@@ -30,14 +30,30 @@ async function analyzeImage(imagePath, isSticker = false, contentType = null) {
30
30
  };
31
31
  }
32
32
 
33
+ // Determine mime type from file extension
33
34
  let mimeType = contentType;
34
35
  if (!mimeType) {
35
- if (imagePath.toLowerCase().endsWith('.webp')) {
36
- mimeType = 'image/webp';
37
- } else {
38
- mimeType = mime.lookup(imagePath) || 'image/jpeg';
39
- }
36
+ const ext = path.extname(imagePath).toLowerCase();
37
+ const mimeMap = {
38
+ '.jpg': 'image/jpeg',
39
+ '.jpeg': 'image/jpeg',
40
+ '.png': 'image/png',
41
+ '.gif': 'image/gif',
42
+ '.webp': 'image/webp'
43
+ };
44
+ mimeType = mimeMap[ext] || 'image/jpeg'; // Default to jpeg for pdftoppm output
45
+ }
46
+
47
+ // Validate that mime type is supported by Claude
48
+ const supportedMimeTypes = ['image/jpeg', 'image/png', 'image/gif', 'image/webp'];
49
+ if (!supportedMimeTypes.includes(mimeType)) {
50
+ logger.warn('[analyzeImage] Unsupported mime type, defaulting to image/jpeg:', {
51
+ originalMimeType: mimeType,
52
+ imagePath
53
+ });
54
+ mimeType = 'image/jpeg';
40
55
  }
56
+
41
57
  if (mimeType === 'image/vnd.wap.wbmp') {
42
58
  logger.info('Skipping image with MIME type:', mimeType);
43
59
  return {
@@ -49,6 +65,7 @@ async function analyzeImage(imagePath, isSticker = false, contentType = null) {
49
65
  };
50
66
  }
51
67
  // Read the image file and convert to base64
68
+ logger.info('[analyzeImage] Reading image file:', { imagePath: imagePath.split('/').pop() });
52
69
  const imageBuffer = await fs.promises.readFile(imagePath);
53
70
  const base64Image = imageBuffer.toString('base64');
54
71
 
@@ -77,6 +94,7 @@ async function analyzeImage(imagePath, isSticker = false, contentType = null) {
77
94
  },
78
95
  ],
79
96
  });
97
+ logger.info('[analyzeImage] Description received');
80
98
  const description = messageDescription.content[0].text;
81
99
 
82
100
  // For stickers, skip medical analysis and table extraction
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@peopl-health/nexus",
3
- "version": "2.4.9-fix-pdf-processing",
3
+ "version": "2.4.9-fix-mime-type",
4
4
  "description": "Core messaging and assistant library for WhatsApp communication platforms",
5
5
  "keywords": [
6
6
  "whatsapp",