@peopl-health/nexus 2.4.8 → 2.4.9-fix-pdf-processing
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/helpers/filesHelper.js +14 -6
- package/lib/helpers/llmsHelper.js +12 -6
- package/lib/helpers/processHelper.js +1 -1
- package/lib/providers/OpenAIResponsesProvider.js +6 -0
- package/lib/services/assistantService.js +54 -38
- package/lib/utils/mediaValidator.js +18 -14
- package/lib/utils/tracingDecorator.js +7 -1
- package/package.json +1 -1
|
@@ -9,11 +9,11 @@ const { Message } = require('../models/messageModel.js');
|
|
|
9
9
|
const { sanitizeFilename } = require('../utils/sanitizer.js');
|
|
10
10
|
const { logger } = require('../utils/logger');
|
|
11
11
|
|
|
12
|
-
async function convertPdfToImages(pdfName) {
|
|
12
|
+
async function convertPdfToImages(pdfName, existingPdfPath = null) {
|
|
13
13
|
const outputDir = path.join(__dirname, 'assets', 'tmp');
|
|
14
14
|
|
|
15
15
|
const sanitizedName = sanitizeFilename(pdfName);
|
|
16
|
-
const pdfPath = path.join(outputDir, `${sanitizedName}.pdf`);
|
|
16
|
+
const pdfPath = existingPdfPath || path.join(outputDir, `${sanitizedName}.pdf`);
|
|
17
17
|
const outputPattern = path.join(outputDir, sanitizedName);
|
|
18
18
|
|
|
19
19
|
await fs.mkdir(outputDir, { recursive: true });
|
|
@@ -24,6 +24,12 @@ async function convertPdfToImages(pdfName) {
|
|
|
24
24
|
|
|
25
25
|
execFile('pdftoppm', args, (error, stdout, stderr) => {
|
|
26
26
|
if (error) {
|
|
27
|
+
logger.error('[convertPdfToImages] Error details:', {
|
|
28
|
+
error: error.message,
|
|
29
|
+
stderr,
|
|
30
|
+
pdfPath,
|
|
31
|
+
pdfExists: require('fs').existsSync(pdfPath)
|
|
32
|
+
});
|
|
27
33
|
return reject(new Error(`Error splitting PDF: ${stderr || error.message}`));
|
|
28
34
|
}
|
|
29
35
|
|
|
@@ -148,19 +154,21 @@ async function downloadMediaAndCreateFile(code, reply) {
|
|
|
148
154
|
|
|
149
155
|
const [subType, fileName] = key.split('/');
|
|
150
156
|
|
|
151
|
-
const sanitizedCode = sanitizeFilename(code);
|
|
152
|
-
const sanitizedSubType = sanitizeFilename(subType);
|
|
153
|
-
const sanitizedFileName = sanitizeFilename(fileName);
|
|
157
|
+
const sanitizedCode = sanitizeFilename(code, 20);
|
|
158
|
+
const sanitizedSubType = sanitizeFilename(subType, 10);
|
|
159
|
+
const sanitizedFileName = sanitizeFilename(fileName, 50);
|
|
154
160
|
|
|
155
161
|
const sourceFile = `${sanitizedCode}-${sanitizedSubType}-${sanitizedFileName}`;
|
|
156
162
|
const downloadPath = path.join(__dirname, 'assets', 'tmp', sourceFile);
|
|
157
163
|
|
|
164
|
+
logger.info('[downloadMediaAndCreateFile] Downloading file', { sourceFile, downloadPath, bucketName, key });
|
|
165
|
+
|
|
158
166
|
await fs.mkdir(path.dirname(downloadPath), { recursive: true });
|
|
159
167
|
await downloadFileFromS3(bucketName, key, downloadPath);
|
|
160
168
|
|
|
161
169
|
const { name: baseName } = path.parse(sourceFile);
|
|
162
170
|
const fileNames = (subType === 'document' || subType === 'application')
|
|
163
|
-
? await convertPdfToImages(baseName)
|
|
171
|
+
? await convertPdfToImages(baseName, downloadPath)
|
|
164
172
|
: [downloadPath];
|
|
165
173
|
|
|
166
174
|
if (subType === 'document' || subType === 'application') {
|
|
@@ -4,7 +4,7 @@ const fs = require('fs');
|
|
|
4
4
|
const mime = require('mime-types');
|
|
5
5
|
|
|
6
6
|
|
|
7
|
-
async function analyzeImage(imagePath, isSticker = false) {
|
|
7
|
+
async function analyzeImage(imagePath, isSticker = false, contentType = null) {
|
|
8
8
|
try {
|
|
9
9
|
const anthropicClient = llmConfig.anthropicClient;
|
|
10
10
|
if (!anthropicClient || !anthropicClient.messages) {
|
|
@@ -30,8 +30,14 @@ async function analyzeImage(imagePath, isSticker = false) {
|
|
|
30
30
|
};
|
|
31
31
|
}
|
|
32
32
|
|
|
33
|
-
|
|
34
|
-
|
|
33
|
+
let mimeType = contentType;
|
|
34
|
+
if (!mimeType) {
|
|
35
|
+
if (imagePath.toLowerCase().endsWith('.webp')) {
|
|
36
|
+
mimeType = 'image/webp';
|
|
37
|
+
} else {
|
|
38
|
+
mimeType = mime.lookup(imagePath) || 'image/jpeg';
|
|
39
|
+
}
|
|
40
|
+
}
|
|
35
41
|
if (mimeType === 'image/vnd.wap.wbmp') {
|
|
36
42
|
logger.info('Skipping image with MIME type:', mimeType);
|
|
37
43
|
return {
|
|
@@ -114,7 +120,7 @@ Only extract tables - ignore any other content in the image.`;
|
|
|
114
120
|
type: 'image',
|
|
115
121
|
source: {
|
|
116
122
|
type: 'base64',
|
|
117
|
-
media_type:
|
|
123
|
+
media_type: mimeType,
|
|
118
124
|
data: base64Image,
|
|
119
125
|
},
|
|
120
126
|
},
|
|
@@ -181,7 +187,7 @@ Ejemplo 1:
|
|
|
181
187
|
type: 'image',
|
|
182
188
|
source: {
|
|
183
189
|
type: 'base64',
|
|
184
|
-
media_type:
|
|
190
|
+
media_type: mimeType,
|
|
185
191
|
data: base64Image,
|
|
186
192
|
},
|
|
187
193
|
},
|
|
@@ -209,7 +215,7 @@ Ejemplo 1:
|
|
|
209
215
|
type: 'image',
|
|
210
216
|
source: {
|
|
211
217
|
type: 'base64',
|
|
212
|
-
media_type:
|
|
218
|
+
media_type: mimeType,
|
|
213
219
|
data: base64Image,
|
|
214
220
|
},
|
|
215
221
|
},
|
|
@@ -66,7 +66,7 @@ const processImageFile = async (fileName, reply) => {
|
|
|
66
66
|
fileName.toLowerCase().includes('/sticker/');
|
|
67
67
|
|
|
68
68
|
try {
|
|
69
|
-
imageAnalysis = await analyzeImage(fileName, isSticker);
|
|
69
|
+
imageAnalysis = await analyzeImage(fileName, isSticker, reply.media?.contentType);
|
|
70
70
|
|
|
71
71
|
logger.info('processImageFile', {
|
|
72
72
|
message_id: reply.message_id,
|
|
@@ -212,6 +212,12 @@ class OpenAIResponsesProvider {
|
|
|
212
212
|
|
|
213
213
|
if (payloads.length === 0) return null;
|
|
214
214
|
|
|
215
|
+
if (payloads.length > MAX_ITEMS_PER_BATCH) {
|
|
216
|
+
logger.info(`[OpenAIResponsesProvider] Batching ${payloads.length} messages into chunks of ${MAX_ITEMS_PER_BATCH}`);
|
|
217
|
+
await this._addItemsInBatches(id, payloads, MAX_ITEMS_PER_BATCH);
|
|
218
|
+
return { batched: true, count: payloads.length };
|
|
219
|
+
}
|
|
220
|
+
|
|
215
221
|
return this._retryWithRateLimit(async () => {
|
|
216
222
|
if (this.conversations?.items?.create) {
|
|
217
223
|
return await this.conversations.items.create(id, { items: payloads });
|
|
@@ -273,21 +273,28 @@ const replyAssistantCore = async (code, message_ = null, thread_ = null, runOpti
|
|
|
273
273
|
const timings = {};
|
|
274
274
|
const startTotal = Date.now();
|
|
275
275
|
|
|
276
|
-
|
|
277
|
-
|
|
276
|
+
const { result: thread, duration: getThreadMs } = await withTracing(
|
|
277
|
+
getThread,
|
|
278
|
+
'get_thread_operation',
|
|
278
279
|
(threadCode) => ({
|
|
279
280
|
'thread.code': threadCode,
|
|
280
281
|
'operation.type': 'thread_retrieval',
|
|
281
282
|
'thread.provided': !!thread_
|
|
282
|
-
})
|
|
283
|
+
}),
|
|
284
|
+
{ returnTiming: true }
|
|
283
285
|
)(code);
|
|
284
|
-
timings.
|
|
286
|
+
timings.get_thread_ms = getThreadMs;
|
|
285
287
|
|
|
286
|
-
if (!thread) return null;
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
const patientReply = await
|
|
290
|
-
|
|
288
|
+
if (!thread_ && !thread) return null;
|
|
289
|
+
const finalThread = thread_ || thread;
|
|
290
|
+
|
|
291
|
+
const { result: patientReply, duration: getMessagesMs } = await withTracing(
|
|
292
|
+
getLastMessages,
|
|
293
|
+
'get_last_messages',
|
|
294
|
+
(code) => ({ 'thread.code': code }),
|
|
295
|
+
{ returnTiming: true }
|
|
296
|
+
)(code);
|
|
297
|
+
timings.get_messages_ms = getMessagesMs;
|
|
291
298
|
|
|
292
299
|
if (!patientReply) {
|
|
293
300
|
logger.info('[replyAssistantCore] No relevant data found for this assistant.');
|
|
@@ -296,10 +303,18 @@ const replyAssistantCore = async (code, message_ = null, thread_ = null, runOpti
|
|
|
296
303
|
|
|
297
304
|
const provider = createProvider({ variant: process.env.VARIANT || 'assistants' });
|
|
298
305
|
|
|
299
|
-
timings.processMessages = Date.now();
|
|
300
306
|
logger.info(`[replyAssistantCore] Processing ${patientReply.length} messages in parallel`);
|
|
301
307
|
|
|
302
|
-
const processResults = await
|
|
308
|
+
const { result: processResults, duration: processMessagesMs } = await withTracing(
|
|
309
|
+
processThreadMessage,
|
|
310
|
+
'process_thread_messages',
|
|
311
|
+
(code, patientReply, provider) => ({
|
|
312
|
+
'messages.count': patientReply.length,
|
|
313
|
+
'thread.code': code
|
|
314
|
+
}),
|
|
315
|
+
{ returnTiming: true }
|
|
316
|
+
)(code, patientReply, provider);
|
|
317
|
+
timings.process_messages_ms = processMessagesMs;
|
|
303
318
|
|
|
304
319
|
const patientMsg = processResults.some(r => r.isPatient);
|
|
305
320
|
const urls = processResults.filter(r => r.url).map(r => ({ url: r.url }));
|
|
@@ -307,21 +322,27 @@ const replyAssistantCore = async (code, message_ = null, thread_ = null, runOpti
|
|
|
307
322
|
const allTempFiles = processResults.flatMap(r => r.tempFiles || []);
|
|
308
323
|
|
|
309
324
|
if (allMessagesToAdd.length > 0) {
|
|
310
|
-
const threadId =
|
|
325
|
+
const threadId = finalThread.getConversationId();
|
|
311
326
|
logger.info(`[replyAssistantCore] Adding ${allMessagesToAdd.length} messages to thread in batch`);
|
|
312
327
|
await provider.addMessage({ threadId, messages: allMessagesToAdd });
|
|
313
328
|
}
|
|
314
329
|
|
|
315
|
-
await Promise.all(processResults.map(r => updateMessageRecord(r.reply,
|
|
330
|
+
await Promise.all(processResults.map(r => updateMessageRecord(r.reply, finalThread)));
|
|
316
331
|
await cleanupFiles(allTempFiles);
|
|
317
332
|
|
|
318
|
-
timings.processMessages = Date.now() - timings.processMessages;
|
|
319
|
-
|
|
320
333
|
if (urls.length > 0) {
|
|
321
|
-
timings.pdfCombination = Date.now();
|
|
322
334
|
logger.info(`[replyAssistantCore] Processing ${urls.length} URLs for PDF combination`);
|
|
323
|
-
const {
|
|
324
|
-
|
|
335
|
+
const { result: pdfResult, duration: pdfCombinationMs } = await withTracing(
|
|
336
|
+
combineImagesToPDF,
|
|
337
|
+
'combine_images_to_pdf',
|
|
338
|
+
({ code }) => ({
|
|
339
|
+
'pdf.thread_code': code,
|
|
340
|
+
'pdf.url_count': urls.length
|
|
341
|
+
}),
|
|
342
|
+
{ returnTiming: true }
|
|
343
|
+
)({ code });
|
|
344
|
+
timings.pdf_combination_ms = pdfCombinationMs;
|
|
345
|
+
const { pdfBuffer, processedFiles } = pdfResult;
|
|
325
346
|
logger.info(`[replyAssistantCore] PDF combination complete: ${processedFiles?.length || 0} files processed`);
|
|
326
347
|
|
|
327
348
|
if (pdfBuffer) {
|
|
@@ -337,47 +358,42 @@ const replyAssistantCore = async (code, message_ = null, thread_ = null, runOpti
|
|
|
337
358
|
}
|
|
338
359
|
}
|
|
339
360
|
|
|
340
|
-
if (!patientMsg ||
|
|
361
|
+
if (!patientMsg || finalThread.stopped) return null;
|
|
341
362
|
|
|
342
|
-
|
|
343
|
-
const
|
|
344
|
-
const { run, output, completed, retries, predictionTimeMs } = await withTracing(
|
|
363
|
+
const assistant = getAssistantById(finalThread.getAssistantId(), finalThread);
|
|
364
|
+
const { result: runResult, duration: runAssistantMs } = await withTracing(
|
|
345
365
|
runAssistantWithRetries,
|
|
346
366
|
'run_assistant_with_retries',
|
|
347
367
|
(thread, assistant, runConfig, patientReply) => ({
|
|
348
368
|
'assistant.id': thread.getAssistantId(),
|
|
349
369
|
'assistant.max_retries': DEFAULT_MAX_RETRIES,
|
|
350
370
|
'assistant.has_patient_reply': !!patientReply
|
|
351
|
-
})
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
timings.
|
|
371
|
+
}),
|
|
372
|
+
{ returnTiming: true }
|
|
373
|
+
)(finalThread, assistant, runOptions, patientReply);
|
|
374
|
+
timings.run_assistant_ms = runAssistantMs;
|
|
375
|
+
timings.total_ms = Date.now() - startTotal;
|
|
376
|
+
|
|
377
|
+
const { run, output, completed, retries, predictionTimeMs } = runResult;
|
|
355
378
|
|
|
356
|
-
logger.info('[
|
|
379
|
+
logger.info('[Assistant Reply Complete]', {
|
|
357
380
|
code: code ? `${code.substring(0, 3)}***${code.slice(-4)}` : 'unknown',
|
|
358
381
|
messageCount: patientReply.length,
|
|
359
382
|
hasMedia: urls.length > 0,
|
|
360
383
|
retries,
|
|
361
|
-
|
|
384
|
+
totalMs: timings.total_ms
|
|
362
385
|
});
|
|
363
386
|
|
|
364
387
|
if (output && predictionTimeMs) {
|
|
365
388
|
await PredictionMetrics.create({
|
|
366
389
|
message_id: `${code}-${Date.now()}`,
|
|
367
390
|
numero: code,
|
|
368
|
-
assistant_id:
|
|
369
|
-
thread_id:
|
|
391
|
+
assistant_id: finalThread.getAssistantId(),
|
|
392
|
+
thread_id: finalThread.getConversationId(),
|
|
370
393
|
prediction_time_ms: predictionTimeMs,
|
|
371
394
|
retry_count: retries,
|
|
372
395
|
completed: completed,
|
|
373
|
-
timing_breakdown:
|
|
374
|
-
get_thread_ms: timings.getThread,
|
|
375
|
-
get_messages_ms: timings.getMessages,
|
|
376
|
-
process_messages_ms: timings.processMessages,
|
|
377
|
-
pdf_combination_ms: timings.pdfCombination || 0,
|
|
378
|
-
run_assistant_ms: timings.runAssistant,
|
|
379
|
-
total_ms: timings.total
|
|
380
|
-
}
|
|
396
|
+
timing_breakdown: timings
|
|
381
397
|
}).catch(err => logger.error('[replyAssistantCore] Failed to store metrics:', err));
|
|
382
398
|
}
|
|
383
399
|
|
|
@@ -65,23 +65,27 @@ function getMediaType(contentType) {
|
|
|
65
65
|
}
|
|
66
66
|
|
|
67
67
|
function validateMedia(media, contentType) {
|
|
68
|
+
const fileSize = Buffer.isBuffer(media) ? media.length : media;
|
|
69
|
+
|
|
70
|
+
if (contentType === 'image/webp') {
|
|
71
|
+
const mediaType = fileSize <= MEDIA_LIMITS.sticker ? 'sticker' : 'image';
|
|
72
|
+
const formatValidation = validateMediaFormat(contentType, mediaType);
|
|
73
|
+
if (!formatValidation.valid) return formatValidation;
|
|
74
|
+
|
|
75
|
+
const sizeValidation = validateMediaSize(media, mediaType);
|
|
76
|
+
if (!sizeValidation.valid) return sizeValidation;
|
|
77
|
+
|
|
78
|
+
return { valid: true, mediaType, message: `Media validated successfully as ${mediaType}` };
|
|
79
|
+
}
|
|
80
|
+
|
|
68
81
|
const mediaType = getMediaType(contentType);
|
|
69
82
|
const formatValidation = validateMediaFormat(contentType, mediaType);
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
return formatValidation;
|
|
73
|
-
}
|
|
74
|
-
|
|
83
|
+
if (!formatValidation.valid) return formatValidation;
|
|
84
|
+
|
|
75
85
|
const sizeValidation = validateMediaSize(media, mediaType);
|
|
76
|
-
if (!sizeValidation.valid)
|
|
77
|
-
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
return {
|
|
81
|
-
valid: true,
|
|
82
|
-
mediaType,
|
|
83
|
-
message: `Media validated successfully as ${mediaType}`
|
|
84
|
-
};
|
|
86
|
+
if (!sizeValidation.valid) return sizeValidation;
|
|
87
|
+
|
|
88
|
+
return { valid: true, mediaType, message: `Media validated successfully as ${mediaType}` };
|
|
85
89
|
}
|
|
86
90
|
|
|
87
91
|
module.exports = {
|
|
@@ -4,9 +4,10 @@ const { SpanStatusCode } = require('@opentelemetry/api');
|
|
|
4
4
|
/**
|
|
5
5
|
* Usage: const tracedFunction = withTracing(originalFunction, 'operation_name');
|
|
6
6
|
*/
|
|
7
|
-
const withTracing = (fn, spanName, attributeMapper = null) => {
|
|
7
|
+
const withTracing = (fn, spanName, attributeMapper = null, options = {}) => {
|
|
8
8
|
return async function (...args) {
|
|
9
9
|
const span = createSpan(spanName);
|
|
10
|
+
const startTime = Date.now();
|
|
10
11
|
|
|
11
12
|
try {
|
|
12
13
|
if (attributeMapper && typeof attributeMapper === 'function') {
|
|
@@ -16,6 +17,11 @@ const withTracing = (fn, spanName, attributeMapper = null) => {
|
|
|
16
17
|
const result = await fn.apply(this, args);
|
|
17
18
|
|
|
18
19
|
span.setStatus({ code: SpanStatusCode.OK });
|
|
20
|
+
|
|
21
|
+
if (options.returnTiming) {
|
|
22
|
+
const duration = Date.now() - startTime;
|
|
23
|
+
return { result, duration };
|
|
24
|
+
}
|
|
19
25
|
return result;
|
|
20
26
|
|
|
21
27
|
} catch (error) {
|