@ottolab/extraction 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +26 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +49 -0
- package/dist/index.js.map +1 -0
- package/dist/lab-detector.d.ts +12 -0
- package/dist/lab-detector.d.ts.map +1 -0
- package/dist/lab-detector.js +42 -0
- package/dist/lab-detector.js.map +1 -0
- package/dist/llm-extractor.d.ts +18 -0
- package/dist/llm-extractor.d.ts.map +1 -0
- package/dist/llm-extractor.js +133 -0
- package/dist/llm-extractor.js.map +1 -0
- package/dist/prompts/base.d.ts +12 -0
- package/dist/prompts/base.d.ts.map +1 -0
- package/{src/prompts/base.ts → dist/prompts/base.js} +1 -1
- package/dist/prompts/base.js.map +1 -0
- package/dist/prompts/labcorp.d.ts +12 -0
- package/dist/prompts/labcorp.d.ts.map +1 -0
- package/{src/prompts/labcorp.ts → dist/prompts/labcorp.js} +1 -0
- package/dist/prompts/labcorp.js.map +1 -0
- package/dist/prompts/quest.d.ts +11 -0
- package/dist/prompts/quest.d.ts.map +1 -0
- package/{src/prompts/quest.ts → dist/prompts/quest.js} +1 -0
- package/dist/prompts/quest.js.map +1 -0
- package/dist/validator.d.ts +39 -0
- package/dist/validator.d.ts.map +1 -0
- package/dist/validator.js +191 -0
- package/dist/validator.js.map +1 -0
- package/package.json +8 -2
- package/__tests__/llm-extractor.test.ts +0 -73
- package/__tests__/validator.test.ts +0 -243
- package/src/index.ts +0 -69
- package/src/lab-detector.ts +0 -60
- package/src/llm-extractor.ts +0 -157
- package/src/validator.ts +0 -218
- package/tsconfig.json +0 -10
- package/tsconfig.tsbuildinfo +0 -1
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import type { ExtractionResult, ExtendedLLMProvider } from '@ottolab/shared';
|
|
2
|
+
export interface ParseInput {
|
|
3
|
+
/** Base64-encoded PDF data */
|
|
4
|
+
pdf?: string;
|
|
5
|
+
/** Raw CSV text */
|
|
6
|
+
csv?: string;
|
|
7
|
+
}
|
|
8
|
+
/**
|
|
9
|
+
* Main extraction pipeline.
|
|
10
|
+
*
|
|
11
|
+
* PDF flow:
|
|
12
|
+
* 1. Lab Detection (LLM classify → Quest | LabCorp | international | unknown)
|
|
13
|
+
* 2. Structured Extraction (LLM multimodal + lab-specific few-shot)
|
|
14
|
+
* 3. Validation (clinical range checks, cross-biomarker consistency, confidence)
|
|
15
|
+
*
|
|
16
|
+
* CSV flow:
|
|
17
|
+
* 1. Column parsing + header normalization
|
|
18
|
+
* 2. Validation
|
|
19
|
+
*/
|
|
20
|
+
export declare function runExtractionPipeline(input: ParseInput, provider?: ExtendedLLMProvider): Promise<ExtractionResult>;
|
|
21
|
+
export { detectLab } from './lab-detector.js';
|
|
22
|
+
export { extractFromPdf, extractFromCsv } from './llm-extractor.js';
|
|
23
|
+
export { validateExtraction } from './validator.js';
|
|
24
|
+
export type { RawBiomarkers } from './llm-extractor.js';
|
|
25
|
+
export type { ValidationResult } from './validator.js';
|
|
26
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,gBAAgB,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAC;AAK7E,MAAM,WAAW,UAAU;IACzB,8BAA8B;IAC9B,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,mBAAmB;IACnB,GAAG,CAAC,EAAE,MAAM,CAAC;CACd;AAED;;;;;;;;;;;GAWG;AACH,wBAAsB,qBAAqB,CACzC,KAAK,EAAE,UAAU,EACjB,QAAQ,CAAC,EAAE,mBAAmB,GAC7B,OAAO,CAAC,gBAAgB,CAAC,CAmC3B;AAED,OAAO,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AAC9C,OAAO,EAAE,cAAc,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACpE,OAAO,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAC;AACpD,YAAY,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AACxD,YAAY,EAAE,gBAAgB,EAAE,MAAM,gBAAgB,CAAC"}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import { detectLab } from './lab-detector.js';
|
|
2
|
+
import { extractFromPdf, extractFromCsv } from './llm-extractor.js';
|
|
3
|
+
import { validateExtraction } from './validator.js';
|
|
4
|
+
/**
|
|
5
|
+
* Main extraction pipeline.
|
|
6
|
+
*
|
|
7
|
+
* PDF flow:
|
|
8
|
+
* 1. Lab Detection (LLM classify → Quest | LabCorp | international | unknown)
|
|
9
|
+
* 2. Structured Extraction (LLM multimodal + lab-specific few-shot)
|
|
10
|
+
* 3. Validation (clinical range checks, cross-biomarker consistency, confidence)
|
|
11
|
+
*
|
|
12
|
+
* CSV flow:
|
|
13
|
+
* 1. Column parsing + header normalization
|
|
14
|
+
* 2. Validation
|
|
15
|
+
*/
|
|
16
|
+
export async function runExtractionPipeline(input, provider) {
|
|
17
|
+
if (input.pdf) {
|
|
18
|
+
if (!provider)
|
|
19
|
+
throw new Error('LLM provider required for PDF extraction');
|
|
20
|
+
// Step 1: Detect lab source
|
|
21
|
+
const detection = await detectLab(input.pdf, provider);
|
|
22
|
+
// Step 2: Extract biomarkers with lab-specific prompt
|
|
23
|
+
const raw = await extractFromPdf(input.pdf, detection.lab, provider);
|
|
24
|
+
// Step 3: Validate and transform
|
|
25
|
+
const validation = validateExtraction(raw);
|
|
26
|
+
return {
|
|
27
|
+
biomarkers: validation.biomarkers,
|
|
28
|
+
sourceLab: detection.lab,
|
|
29
|
+
sourceLanguage: detection.language,
|
|
30
|
+
confidence: validation.confidence,
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
if (input.csv) {
|
|
34
|
+
// CSV extraction (no LLM needed)
|
|
35
|
+
const raw = extractFromCsv(input.csv);
|
|
36
|
+
const validation = validateExtraction(raw);
|
|
37
|
+
return {
|
|
38
|
+
biomarkers: validation.biomarkers,
|
|
39
|
+
sourceLab: 'unknown',
|
|
40
|
+
sourceLanguage: 'en',
|
|
41
|
+
confidence: validation.confidence,
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
throw new Error('Either pdf (base64) or csv text must be provided');
|
|
45
|
+
}
|
|
46
|
+
export { detectLab } from './lab-detector.js';
|
|
47
|
+
export { extractFromPdf, extractFromCsv } from './llm-extractor.js';
|
|
48
|
+
export { validateExtraction } from './validator.js';
|
|
49
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AAC9C,OAAO,EAAE,cAAc,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACpE,OAAO,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAC;AASpD;;;;;;;;;;;GAWG;AACH,MAAM,CAAC,KAAK,UAAU,qBAAqB,CACzC,KAAiB,EACjB,QAA8B;IAE9B,IAAI,KAAK,CAAC,GAAG,EAAE,CAAC;QACd,IAAI,CAAC,QAAQ;YAAE,MAAM,IAAI,KAAK,CAAC,0CAA0C,CAAC,CAAC;QAE3E,4BAA4B;QAC5B,MAAM,SAAS,GAAG,MAAM,SAAS,CAAC,KAAK,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;QAEvD,sDAAsD;QACtD,MAAM,GAAG,GAAG,MAAM,cAAc,CAAC,KAAK,CAAC,GAAG,EAAE,SAAS,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;QAErE,iCAAiC;QACjC,MAAM,UAAU,GAAG,kBAAkB,CAAC,GAAG,CAAC,CAAC;QAE3C,OAAO;YACL,UAAU,EAAE,UAAU,CAAC,UAAU;YACjC,SAAS,EAAE,SAAS,CAAC,GAAG;YACxB,cAAc,EAAE,SAAS,CAAC,QAAQ;YAClC,UAAU,EAAE,UAAU,CAAC,UAAU;SAClC,CAAC;IACJ,CAAC;IAED,IAAI,KAAK,CAAC,GAAG,EAAE,CAAC;QACd,iCAAiC;QACjC,MAAM,GAAG,GAAG,cAAc,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QACtC,MAAM,UAAU,GAAG,kBAAkB,CAAC,GAAG,CAAC,CAAC;QAE3C,OAAO;YACL,UAAU,EAAE,UAAU,CAAC,UAAU;YACjC,SAAS,EAAE,SAAS;YACpB,cAAc,EAAE,IAAI;YACpB,UAAU,EAAE,UAAU,CAAC,UAAU;SAClC,CAAC;IACJ,CAAC;IAED,MAAM,IAAI,KAAK,CAAC,kDAAkD,CAAC,CAAC;AACtE,CAAC;AAED,OAAO,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AAC9C,OAAO,EAAE,cAAc,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACpE,OAAO,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAC"}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import type { SourceLab, ExtendedLLMProvider } from '@ottolab/shared';
|
|
2
|
+
export interface LabDetectionResult {
|
|
3
|
+
lab: SourceLab;
|
|
4
|
+
language: string;
|
|
5
|
+
confidence: number;
|
|
6
|
+
}
|
|
7
|
+
/**
|
|
8
|
+
* Detect the source laboratory from a PDF using LLM classification.
|
|
9
|
+
* Sends the first page(s) to the LLM to identify Quest, LabCorp, etc.
|
|
10
|
+
*/
|
|
11
|
+
export declare function detectLab(pdfBase64: string, provider: ExtendedLLMProvider): Promise<LabDetectionResult>;
|
|
12
|
+
//# sourceMappingURL=lab-detector.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"lab-detector.d.ts","sourceRoot":"","sources":["../src/lab-detector.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAC;AAGtE,MAAM,WAAW,kBAAkB;IACjC,GAAG,EAAE,SAAS,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED;;;GAGG;AACH,wBAAsB,SAAS,CAC7B,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,mBAAmB,GAC5B,OAAO,CAAC,kBAAkB,CAAC,CA0B7B"}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import { LAB_DETECTION_PROMPT } from './prompts/base.js';
|
|
2
|
+
/**
|
|
3
|
+
* Detect the source laboratory from a PDF using LLM classification.
|
|
4
|
+
* Sends the first page(s) to the LLM to identify Quest, LabCorp, etc.
|
|
5
|
+
*/
|
|
6
|
+
export async function detectLab(pdfBase64, provider) {
|
|
7
|
+
try {
|
|
8
|
+
const response = await provider.chatMultimodal([
|
|
9
|
+
{ type: 'document', data: pdfBase64, mimeType: 'application/pdf' },
|
|
10
|
+
{ type: 'text', text: LAB_DETECTION_PROMPT },
|
|
11
|
+
], { temperature: 0, maxTokens: 256, responseFormat: 'json' });
|
|
12
|
+
const parsed = parseJsonResponse(response);
|
|
13
|
+
// Validate lab value
|
|
14
|
+
const validLabs = ['quest', 'labcorp', 'international', 'unknown'];
|
|
15
|
+
if (!validLabs.includes(parsed.lab)) {
|
|
16
|
+
parsed.lab = 'unknown';
|
|
17
|
+
}
|
|
18
|
+
return {
|
|
19
|
+
lab: parsed.lab,
|
|
20
|
+
language: parsed.language || 'en',
|
|
21
|
+
confidence: Math.min(Math.max(parsed.confidence || 0, 0), 1),
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
catch {
|
|
25
|
+
return { lab: 'unknown', language: 'en', confidence: 0 };
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
function parseJsonResponse(text) {
|
|
29
|
+
// Strip markdown code fences if present
|
|
30
|
+
let cleaned = text.trim();
|
|
31
|
+
if (cleaned.startsWith('```')) {
|
|
32
|
+
cleaned = cleaned.replace(/^```(?:json)?\s*/, '').replace(/\s*```$/, '');
|
|
33
|
+
}
|
|
34
|
+
// Find JSON object boundaries
|
|
35
|
+
const start = cleaned.indexOf('{');
|
|
36
|
+
const end = cleaned.lastIndexOf('}');
|
|
37
|
+
if (start === -1 || end === -1) {
|
|
38
|
+
throw new Error('No JSON object found in response');
|
|
39
|
+
}
|
|
40
|
+
return JSON.parse(cleaned.slice(start, end + 1));
|
|
41
|
+
}
|
|
42
|
+
//# sourceMappingURL=lab-detector.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"lab-detector.js","sourceRoot":"","sources":["../src/lab-detector.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,oBAAoB,EAAE,MAAM,mBAAmB,CAAC;AAQzD;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,SAAS,CAC7B,SAAiB,EACjB,QAA6B;IAE7B,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,QAAQ,CAAC,cAAc,CAC5C;YACE,EAAE,IAAI,EAAE,UAAU,EAAE,IAAI,EAAE,SAAS,EAAE,QAAQ,EAAE,iBAAiB,EAAE;YAClE,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,oBAAoB,EAAE;SAC7C,EACD,EAAE,WAAW,EAAE,CAAC,EAAE,SAAS,EAAE,GAAG,EAAE,cAAc,EAAE,MAAM,EAAE,CAC3D,CAAC;QAEF,MAAM,MAAM,GAAG,iBAAiB,CAAqB,QAAQ,CAAC,CAAC;QAE/D,qBAAqB;QACrB,MAAM,SAAS,GAAgB,CAAC,OAAO,EAAE,SAAS,EAAE,eAAe,EAAE,SAAS,CAAC,CAAC;QAChF,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC;YACpC,MAAM,CAAC,GAAG,GAAG,SAAS,CAAC;QACzB,CAAC;QAED,OAAO;YACL,GAAG,EAAE,MAAM,CAAC,GAAG;YACf,QAAQ,EAAE,MAAM,CAAC,QAAQ,IAAI,IAAI;YACjC,UAAU,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,UAAU,IAAI,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC;SAC7D,CAAC;IACJ,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,GAAG,EAAE,SAAS,EAAE,QAAQ,EAAE,IAAI,EAAE,UAAU,EAAE,CAAC,EAAE,CAAC;IAC3D,CAAC;AACH,CAAC;AAED,SAAS,iBAAiB,CAAI,IAAY;IACxC,wCAAwC;IACxC,IAAI,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAC1B,IAAI,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;QAC9B,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,kBAAkB,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;IAC3E,CAAC;IAED,8BAA8B;IAC9B,MAAM,KAAK,GAAG,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IACnC,MAAM,GAAG,GAAG,OAAO,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;IACrC,IAAI,KAAK,KAAK,CAAC,CAAC,IAAI,GAAG,KAAK,CAAC,CAAC,EAAE,CAAC;QAC/B,MAAM,IAAI,KAAK,CAAC,kCAAkC,CAAC,CAAC;IACtD,CAAC;IAED,OAAO,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,GAAG,CAAC,CAAC,CAAM,CAAC;AACxD,CAAC"}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import type { SourceLab, ExtendedLLMProvider } from '@ottolab/shared';
|
|
2
|
+
/** Raw extraction output — flat key/value from LLM. */
|
|
3
|
+
export type RawBiomarkers = Record<string, number | string | null>;
|
|
4
|
+
/**
|
|
5
|
+
* Extract biomarkers from a PDF using multimodal LLM.
|
|
6
|
+
*
|
|
7
|
+
* Sends the full PDF as base64 inline data with a lab-specific prompt.
|
|
8
|
+
* Returns raw key/value pairs (not yet validated or normalized).
|
|
9
|
+
*/
|
|
10
|
+
export declare function extractFromPdf(pdfBase64: string, lab: SourceLab, provider: ExtendedLLMProvider): Promise<RawBiomarkers>;
|
|
11
|
+
/**
|
|
12
|
+
* Extract biomarkers from CSV text.
|
|
13
|
+
*
|
|
14
|
+
* Expects a simple CSV with header row containing biomarker names
|
|
15
|
+
* and a single data row with values.
|
|
16
|
+
*/
|
|
17
|
+
export declare function extractFromCsv(csvText: string): RawBiomarkers;
|
|
18
|
+
//# sourceMappingURL=llm-extractor.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"llm-extractor.d.ts","sourceRoot":"","sources":["../src/llm-extractor.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAC;AAKtE,uDAAuD;AACvD,MAAM,MAAM,aAAa,GAAG,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAAC,CAAC;AAgBnE;;;;;GAKG;AACH,wBAAsB,cAAc,CAClC,SAAS,EAAE,MAAM,EACjB,GAAG,EAAE,SAAS,EACd,QAAQ,EAAE,mBAAmB,GAC5B,OAAO,CAAC,aAAa,CAAC,CAYxB;AAED;;;;;GAKG;AACH,wBAAgB,cAAc,CAAC,OAAO,EAAE,MAAM,GAAG,aAAa,CAoB7D"}
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
import { BASE_EXTRACTION_PROMPT } from './prompts/base.js';
|
|
2
|
+
import { QUEST_EXTRACTION_PROMPT } from './prompts/quest.js';
|
|
3
|
+
import { LABCORP_EXTRACTION_PROMPT } from './prompts/labcorp.js';
|
|
4
|
+
/**
|
|
5
|
+
* Select the appropriate extraction prompt based on detected lab.
|
|
6
|
+
*/
|
|
7
|
+
function getPromptForLab(lab) {
|
|
8
|
+
switch (lab) {
|
|
9
|
+
case 'quest':
|
|
10
|
+
return QUEST_EXTRACTION_PROMPT;
|
|
11
|
+
case 'labcorp':
|
|
12
|
+
return LABCORP_EXTRACTION_PROMPT;
|
|
13
|
+
default:
|
|
14
|
+
return BASE_EXTRACTION_PROMPT;
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Extract biomarkers from a PDF using multimodal LLM.
|
|
19
|
+
*
|
|
20
|
+
* Sends the full PDF as base64 inline data with a lab-specific prompt.
|
|
21
|
+
* Returns raw key/value pairs (not yet validated or normalized).
|
|
22
|
+
*/
|
|
23
|
+
export async function extractFromPdf(pdfBase64, lab, provider) {
|
|
24
|
+
const prompt = getPromptForLab(lab);
|
|
25
|
+
const response = await provider.chatMultimodal([
|
|
26
|
+
{ type: 'document', data: pdfBase64, mimeType: 'application/pdf' },
|
|
27
|
+
{ type: 'text', text: prompt },
|
|
28
|
+
], { temperature: 0, maxTokens: 4096, responseFormat: 'json' });
|
|
29
|
+
return parseExtractionResponse(response);
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Extract biomarkers from CSV text.
|
|
33
|
+
*
|
|
34
|
+
* Expects a simple CSV with header row containing biomarker names
|
|
35
|
+
* and a single data row with values.
|
|
36
|
+
*/
|
|
37
|
+
export function extractFromCsv(csvText) {
|
|
38
|
+
const lines = csvText.trim().split('\n');
|
|
39
|
+
if (lines.length < 2) {
|
|
40
|
+
throw new Error('CSV must have at least a header row and a data row');
|
|
41
|
+
}
|
|
42
|
+
const headers = lines[0].split(',').map((h) => h.trim().toLowerCase());
|
|
43
|
+
const values = lines[1].split(',').map((v) => v.trim());
|
|
44
|
+
const result = {};
|
|
45
|
+
for (let i = 0; i < headers.length; i++) {
|
|
46
|
+
const key = normalizeHeaderName(headers[i]);
|
|
47
|
+
const raw = values[i];
|
|
48
|
+
if (!key || !raw || raw === '')
|
|
49
|
+
continue;
|
|
50
|
+
const num = parseFloat(raw);
|
|
51
|
+
result[key] = Number.isNaN(num) ? raw : num;
|
|
52
|
+
}
|
|
53
|
+
return result;
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Normalize CSV header names to our canonical biomarker keys.
|
|
57
|
+
*/
|
|
58
|
+
function normalizeHeaderName(header) {
|
|
59
|
+
const mappings = {
|
|
60
|
+
'total cholesterol': 'total_cholesterol',
|
|
61
|
+
cholesterol: 'total_cholesterol',
|
|
62
|
+
'ldl-c': 'ldl_c',
|
|
63
|
+
ldl: 'ldl_c',
|
|
64
|
+
'hdl-c': 'hdl',
|
|
65
|
+
hdl: 'hdl',
|
|
66
|
+
triglycerides: 'triglycerides',
|
|
67
|
+
tg: 'triglycerides',
|
|
68
|
+
hba1c: 'hba1c',
|
|
69
|
+
a1c: 'hba1c',
|
|
70
|
+
glucose: 'fasting_glucose',
|
|
71
|
+
'fasting glucose': 'fasting_glucose',
|
|
72
|
+
insulin: 'fasting_insulin',
|
|
73
|
+
'fasting insulin': 'fasting_insulin',
|
|
74
|
+
'uric acid': 'uric_acid',
|
|
75
|
+
creatinine: 'creatinine',
|
|
76
|
+
bun: 'bun',
|
|
77
|
+
egfr: 'egfr',
|
|
78
|
+
alt: 'alt',
|
|
79
|
+
sgpt: 'alt',
|
|
80
|
+
ast: 'ast',
|
|
81
|
+
sgot: 'ast',
|
|
82
|
+
alp: 'alp',
|
|
83
|
+
ggt: 'ggt',
|
|
84
|
+
bilirubin: 'bilirubin_total',
|
|
85
|
+
albumin: 'albumin',
|
|
86
|
+
'hs-crp': 'hs_crp',
|
|
87
|
+
crp: 'hs_crp',
|
|
88
|
+
esr: 'esr',
|
|
89
|
+
cortisol: 'cortisol',
|
|
90
|
+
testosterone: 'testosterone',
|
|
91
|
+
estradiol: 'estradiol',
|
|
92
|
+
tsh: 'tsh',
|
|
93
|
+
wbc: 'wbc',
|
|
94
|
+
rbc: 'rbc',
|
|
95
|
+
hemoglobin: 'hemoglobin',
|
|
96
|
+
hgb: 'hemoglobin',
|
|
97
|
+
hematocrit: 'hematocrit',
|
|
98
|
+
hct: 'hematocrit',
|
|
99
|
+
platelets: 'platelets',
|
|
100
|
+
plt: 'platelets',
|
|
101
|
+
mcv: 'mcv',
|
|
102
|
+
rdw: 'rdw',
|
|
103
|
+
'lymphocyte %': 'lymphocyte_percent',
|
|
104
|
+
lymphocytes: 'lymphocyte_percent',
|
|
105
|
+
'vitamin d': 'vitamin_d',
|
|
106
|
+
'25-oh vitamin d': 'vitamin_d',
|
|
107
|
+
weight: 'weight_kg',
|
|
108
|
+
height: 'height_cm',
|
|
109
|
+
bmi: 'bmi',
|
|
110
|
+
'body fat': 'body_fat_percent',
|
|
111
|
+
'systolic bp': 'systolic_bp',
|
|
112
|
+
'diastolic bp': 'diastolic_bp',
|
|
113
|
+
'heart rate': 'heart_rate',
|
|
114
|
+
apob: 'apoB',
|
|
115
|
+
'apo b': 'apoB',
|
|
116
|
+
};
|
|
117
|
+
return mappings[header] ?? header.replace(/[\s-]+/g, '_').toLowerCase();
|
|
118
|
+
}
|
|
119
|
+
function parseExtractionResponse(text) {
|
|
120
|
+
let cleaned = text.trim();
|
|
121
|
+
// Strip markdown code fences
|
|
122
|
+
if (cleaned.startsWith('```')) {
|
|
123
|
+
cleaned = cleaned.replace(/^```(?:json)?\s*/, '').replace(/\s*```$/, '');
|
|
124
|
+
}
|
|
125
|
+
// Find JSON object
|
|
126
|
+
const start = cleaned.indexOf('{');
|
|
127
|
+
const end = cleaned.lastIndexOf('}');
|
|
128
|
+
if (start === -1 || end === -1) {
|
|
129
|
+
throw new Error('No JSON object found in LLM extraction response');
|
|
130
|
+
}
|
|
131
|
+
return JSON.parse(cleaned.slice(start, end + 1));
|
|
132
|
+
}
|
|
133
|
+
//# sourceMappingURL=llm-extractor.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"llm-extractor.js","sourceRoot":"","sources":["../src/llm-extractor.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,sBAAsB,EAAE,MAAM,mBAAmB,CAAC;AAC3D,OAAO,EAAE,uBAAuB,EAAE,MAAM,oBAAoB,CAAC;AAC7D,OAAO,EAAE,yBAAyB,EAAE,MAAM,sBAAsB,CAAC;AAKjE;;GAEG;AACH,SAAS,eAAe,CAAC,GAAc;IACrC,QAAQ,GAAG,EAAE,CAAC;QACZ,KAAK,OAAO;YACV,OAAO,uBAAuB,CAAC;QACjC,KAAK,SAAS;YACZ,OAAO,yBAAyB,CAAC;QACnC;YACE,OAAO,sBAAsB,CAAC;IAClC,CAAC;AACH,CAAC;AAED;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,SAAiB,EACjB,GAAc,EACd,QAA6B;IAE7B,MAAM,MAAM,GAAG,eAAe,CAAC,GAAG,CAAC,CAAC;IAEpC,MAAM,QAAQ,GAAG,MAAM,QAAQ,CAAC,cAAc,CAC5C;QACE,EAAE,IAAI,EAAE,UAAU,EAAE,IAAI,EAAE,SAAS,EAAE,QAAQ,EAAE,iBAAiB,EAAE;QAClE,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE;KAC/B,EACD,EAAE,WAAW,EAAE,CAAC,EAAE,SAAS,EAAE,IAAI,EAAE,cAAc,EAAE,MAAM,EAAE,CAC5D,CAAC;IAEF,OAAO,uBAAuB,CAAC,QAAQ,CAAC,CAAC;AAC3C,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,cAAc,CAAC,OAAe;IAC5C,MAAM,KAAK,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACzC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACrB,MAAM,IAAI,KAAK,CAAC,oDAAoD,CAAC,CAAC;IACxE,CAAC;IAED,MAAM,OAAO,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,CAAC;IACvE,MAAM,MAAM,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IAExD,MAAM,MAAM,GAAkB,EAAE,CAAC;IACjC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACxC,MAAM,GAAG,GAAG,mBAAmB,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC;QAC5C,MAAM,GAAG,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;QACtB,IAAI,CAAC,GAAG,IAAI,CAAC,GAAG,IAAI,GAAG,KAAK,EAAE;YAAE,SAAS;QAEzC,MAAM,GAAG,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC;QAC5B,MAAM,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAC9C,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,SAAS,mBAAmB,CAAC,MAAc;IACzC,MAAM,QAAQ,GAA2B;QACvC,mBAAmB,EAAE,mBAAmB;QACxC,WAAW,EAAE,mBAAmB;QAChC,OAAO,EAAE,OAAO;QAChB,GAAG,EAAE,OAAO;QACZ,OAAO,EAAE,KAAK;QACd,GAAG,EAAE,KAAK;QACV,aAAa,EAAE,eAAe;QAC9B,EAAE,EAAE,eAAe;QACnB,KAAK,EAAE,OAAO;QACd,GAAG,EAAE,OAAO;QACZ,OAAO,EAAE,iBAAiB;QAC1B,iBAAiB,EAAE,iBAAiB;QACpC,OAAO,EAAE,iBAAiB;QAC1B,iBAAiB,EAAE,iBAAiB;QACpC,WAAW,EAAE,WAAW;QACxB,UAAU,EAAE,YAAY;QACxB,GAAG,EAAE,KAAK;QACV,IAAI,EAAE,MAAM;QACZ,GAAG,EAAE,KAAK;QACV,IAAI,EAAE,KAAK;QACX,GAAG,EAAE,KAAK;QACV,IAAI,EAAE,KAAK;QACX,GAAG,EAAE,KAAK;QACV,GAAG,EAAE,KAAK;QACV,SAAS,EAAE,iBAAiB;QAC5B,OAAO,EAAE,SAAS;QAClB,QAAQ,EAAE,QAAQ;QAClB,GAAG,EAAE,QAAQ;QACb,GAAG,EAAE,KAAK;QACV,QAAQ,EAAE,UAAU;QACpB,YAAY,EAAE,cAAc;QAC5B,SAAS,EAAE,WAAW;QACtB,GAAG,EAAE,KAAK;QACV,GAAG,EAAE,KAAK;QACV,GAAG,EAAE,KAAK;QACV,UAAU,EAAE,YAAY;QACxB,GAAG,EAAE,YAAY;QACjB,UAAU,EAAE,YAAY;QACxB,GAAG,EAAE,YAAY;QACjB,SAAS,EAAE,WAAW;QACtB,GAAG,EAAE,WAAW;QAChB,GAAG,EAAE,KAAK;QACV,GAAG,EAAE,KAAK;QACV,cAAc,EAAE,oBAAoB;QACpC,WAAW,EAAE,oBAAoB;QACjC,WAAW,EAAE,WAAW;QACxB,iBAAiB,EAAE,WAAW;QAC9B,MAAM,EAAE,WAAW;QACnB,MAAM,EAAE,WAAW;QACnB,GAAG,EAAE,KAAK;QACV,UAAU,EAAE,kBAAkB;QAC9B,aAAa,EAAE,aAAa;QAC5B,cAAc,EAAE,cAAc;QAC9B,YAAY,EAAE,YAAY;QAC1B,IAAI,EAAE,MAAM;QACZ,OAAO,EAAE,MAAM;KAChB,CAAC;IAEF,OAAO,QAAQ,CAAC,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC,WAAW,EAAE,CAAC;AAC1E,CAAC;AAED,SAAS,uBAAuB,CAAC,IAAY;IAC3C,IAAI,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAE1B,6BAA6B;IAC7B,IAAI,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;QAC9B,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,kBAAkB,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;IAC3E,CAAC;IAED,mBAAmB;IACnB,MAAM,KAAK,GAAG,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IACnC,MAAM,GAAG,GAAG,OAAO,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;IACrC,IAAI,KAAK,KAAK,CAAC,CAAC,IAAI,GAAG,KAAK,CAAC,CAAC,EAAE,CAAC;QAC/B,MAAM,IAAI,KAAK,CAAC,iDAAiD,CAAC,CAAC;IACrE,CAAC;IAED,OAAO,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,GAAG,CAAC,CAAC,CAAkB,CAAC;AACpE,CAAC"}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Base extraction prompt — universal lab report parser.
|
|
3
|
+
*
|
|
4
|
+
* Used when lab source is unknown or for international labs.
|
|
5
|
+
* Instructs the LLM to extract structured biomarkers from any lab PDF.
|
|
6
|
+
*/
|
|
7
|
+
export declare const BASE_EXTRACTION_PROMPT = "You are a clinical laboratory data extraction system. Extract ALL biomarker values from this lab report into a structured JSON object.\n\n## Output Format\n\nReturn ONLY a raw JSON object (no markdown, no explanation, no code fences). The JSON must have this exact structure:\n\n{\n \"gender\": \"m\" or \"f\" or null,\n \"weight_kg\": <number or null>,\n \"height_cm\": <number or null>,\n \"bmi\": <number or null>,\n \"body_fat_percent\": <number or null>,\n \"systolic_bp\": <number or null>,\n \"diastolic_bp\": <number or null>,\n \"heart_rate\": <number or null>,\n \"total_cholesterol\": <mg/dL>,\n \"ldl_c\": <mg/dL>,\n \"hdl\": <mg/dL>,\n \"triglycerides\": <mg/dL>,\n \"apoB\": <mg/dL>,\n \"hba1c\": <%>,\n \"fasting_glucose\": <mg/dL>,\n \"fasting_insulin\": <uIU/mL>,\n \"uric_acid\": <mg/dL>,\n \"creatinine\": <mg/dL>,\n \"bun\": <mg/dL>,\n \"egfr\": <mL/min/1.73m2>,\n \"alt\": <U/L>,\n \"ast\": <U/L>,\n \"alp\": <U/L>,\n \"ggt\": <U/L>,\n \"bilirubin_total\": <mg/dL>,\n \"albumin\": <g/dL>,\n \"hs_crp\": <mg/L>,\n \"esr\": <mm/hr>,\n \"cortisol\": <ug/dL>,\n \"testosterone\": <ng/dL>,\n \"estradiol\": <pg/mL>,\n \"tsh\": <mIU/L>,\n \"wbc\": <10^3/uL>,\n \"rbc\": <10^6/uL>,\n \"hemoglobin\": <g/dL>,\n \"hematocrit\": <%>,\n \"platelets\": <10^3/uL>,\n \"mcv\": <fL>,\n \"rdw\": <%>,\n \"lymphocyte_percent\": <%>,\n \"vitamin_d\": <ng/mL>\n}\n\n## Critical Instructions\n\n1. Read EVERY page of the report. Parse ALL table rows, not just the first page.\n2. Use null for any value not found in the report.\n3. The report may be bilingual (e.g., Chinese/English, Spanish/English). Parse BOTH languages.\n4. For reference ranges, DO NOT include them in the output \u2014 only extract actual values.\n\n## Unit Conversions \u2014 apply these BEFORE outputting:\n\n- Cholesterol (Total, LDL, HDL): if mmol/L \u2192 multiply by 38.67 to get mg/dL\n- Triglycerides: if mmol/L \u2192 multiply by 88.57 to get mg/dL\n- Glucose: if mmol/L \u2192 multiply by 18.016 to get mg/dL\n- Creatinine: if \u00B5mol/L \u2192 divide by 88.4 to get mg/dL\n- Uric Acid: if \u00B5mol/L \u2192 divide by 59.48 to get mg/dL\n- Albumin: if g/L \u2192 divide by 10 to get g/dL\n- BUN: if mmol/L \u2192 multiply by 2.801 to get mg/dL\n- hs-CRP: if mg/dL \u2192 multiply by 10 to get mg/L\n- Bilirubin: if \u00B5mol/L \u2192 divide by 17.1 to get mg/dL\n- Hemoglobin: if g/L \u2192 divide by 10 to get g/dL\n- Vitamin D: if nmol/L \u2192 divide by 2.496 to get ng/mL\n\nReturn ONLY the JSON object.";
|
|
8
|
+
/**
|
|
9
|
+
* Lab detection prompt — classify the source laboratory.
|
|
10
|
+
*/
|
|
11
|
+
export declare const LAB_DETECTION_PROMPT = "Analyze this lab report and classify its source. Return ONLY a JSON object:\n\n{\n \"lab\": \"quest\" | \"labcorp\" | \"international\" | \"unknown\",\n \"language\": \"<ISO 639-1 code, e.g., en, zh, es>\",\n \"confidence\": <0.0 to 1.0>\n}\n\nClassification rules:\n- \"quest\": Quest Diagnostics reports \u2014 look for Quest logo, \"Quest Diagnostics\", QuestAssureD\n- \"labcorp\": LabCorp reports \u2014 look for LabCorp logo, \"Laboratory Corporation of America\"\n- \"international\": Non-US labs, reports in languages other than English, SI units throughout\n- \"unknown\": Cannot determine with confidence\n\nReturn ONLY the JSON object.";
|
|
12
|
+
//# sourceMappingURL=base.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"base.d.ts","sourceRoot":"","sources":["../../src/prompts/base.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AACH,eAAO,MAAM,sBAAsB,0+EAuEN,CAAC;AAE9B;;GAEG;AACH,eAAO,MAAM,oBAAoB,8oBAcJ,CAAC"}
|
|
@@ -76,7 +76,6 @@ Return ONLY a raw JSON object (no markdown, no explanation, no code fences). The
|
|
|
76
76
|
- Vitamin D: if nmol/L → divide by 2.496 to get ng/mL
|
|
77
77
|
|
|
78
78
|
Return ONLY the JSON object.`;
|
|
79
|
-
|
|
80
79
|
/**
|
|
81
80
|
* Lab detection prompt — classify the source laboratory.
|
|
82
81
|
*/
|
|
@@ -95,3 +94,4 @@ Classification rules:
|
|
|
95
94
|
- "unknown": Cannot determine with confidence
|
|
96
95
|
|
|
97
96
|
Return ONLY the JSON object.`;
|
|
97
|
+
//# sourceMappingURL=base.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"base.js","sourceRoot":"","sources":["../../src/prompts/base.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AACH,MAAM,CAAC,MAAM,sBAAsB,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;6BAuET,CAAC;AAE9B;;GAEG;AACH,MAAM,CAAC,MAAM,oBAAoB,GAAG;;;;;;;;;;;;;;6BAcP,CAAC"}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LabCorp-specific extraction prompt.
|
|
3
|
+
*
|
|
4
|
+
* LabCorp reports:
|
|
5
|
+
* - Dense tabular format, sometimes multi-column
|
|
6
|
+
* - Results in US conventional units
|
|
7
|
+
* - Flag column: H, L, *, or blank for normal
|
|
8
|
+
* - Often includes calculated values (LDL calc, eGFR, HOMA-IR)
|
|
9
|
+
* - NMR LipoProfile may have particle numbers (not standard lipid panel)
|
|
10
|
+
*/
|
|
11
|
+
export declare const LABCORP_EXTRACTION_PROMPT = "You are a clinical laboratory data extraction system specialized in LabCorp reports.\n\n## LabCorp Report Format Notes\n- Results are in US conventional units (mg/dL, g/dL, U/L) \u2014 use values as-is\n- Dense tabular layout, sometimes multi-column per page\n- Flags: H = high, L = low, * = critical\n- May include NMR LipoProfile \u2014 extract standard lipid values, not particle counts\n- eGFR and calculated LDL are often included as separate line items\n- CBC may show both relative (%) and absolute counts\n\n## Output Format\n\nReturn ONLY a raw JSON object (no markdown, no code fences):\n\n{\n \"gender\": \"m\" or \"f\" or null,\n \"weight_kg\": <number or null>,\n \"height_cm\": <number or null>,\n \"bmi\": <number or null>,\n \"total_cholesterol\": <mg/dL>,\n \"ldl_c\": <mg/dL>,\n \"hdl\": <mg/dL>,\n \"triglycerides\": <mg/dL>,\n \"apoB\": <mg/dL>,\n \"hba1c\": <%>,\n \"fasting_glucose\": <mg/dL>,\n \"fasting_insulin\": <uIU/mL>,\n \"uric_acid\": <mg/dL>,\n \"creatinine\": <mg/dL>,\n \"bun\": <mg/dL>,\n \"egfr\": <mL/min/1.73m2>,\n \"alt\": <U/L>,\n \"ast\": <U/L>,\n \"alp\": <U/L>,\n \"ggt\": <U/L>,\n \"bilirubin_total\": <mg/dL>,\n \"albumin\": <g/dL>,\n \"hs_crp\": <mg/L>,\n \"esr\": <mm/hr>,\n \"cortisol\": <ug/dL>,\n \"testosterone\": <ng/dL>,\n \"estradiol\": <pg/mL>,\n \"tsh\": <mIU/L>,\n \"wbc\": <10^3/uL>,\n \"rbc\": <10^6/uL>,\n \"hemoglobin\": <g/dL>,\n \"hematocrit\": <%>,\n \"platelets\": <10^3/uL>,\n \"mcv\": <fL>,\n \"rdw\": <%>,\n \"lymphocyte_percent\": <%>,\n \"vitamin_d\": <ng/mL>\n}\n\nRead EVERY page. Use null for values not found. Return ONLY the JSON.";
|
|
12
|
+
//# sourceMappingURL=labcorp.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"labcorp.d.ts","sourceRoot":"","sources":["../../src/prompts/labcorp.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AACH,eAAO,MAAM,yBAAyB,+mDAsDgC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"labcorp.js","sourceRoot":"","sources":["../../src/prompts/labcorp.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AACH,MAAM,CAAC,MAAM,yBAAyB,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;sEAsD6B,CAAC"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Quest Diagnostics-specific extraction prompt.
|
|
3
|
+
*
|
|
4
|
+
* Quest reports have a distinctive format:
|
|
5
|
+
* - Tabular layout with Test Name | Result | Flag | Reference Range
|
|
6
|
+
* - Results in US conventional units (mg/dL, g/dL, U/L)
|
|
7
|
+
* - Flags: H (high), L (low), * (critical)
|
|
8
|
+
* - Multi-page with patient demographics on page 1
|
|
9
|
+
*/
|
|
10
|
+
export declare const QUEST_EXTRACTION_PROMPT = "You are a clinical laboratory data extraction system specialized in Quest Diagnostics reports.\n\n## Quest Report Format Notes\n- Results are in US conventional units (mg/dL, g/dL, U/L) \u2014 use values as-is\n- Table columns: Test Name | Result | Flag | Reference Range\n- Flags: H = high, L = low, * = critical, A = abnormal\n- CBC differential may show both absolute counts and percentages\n- eGFR is often calculated and shown separately\n\n## Output Format\n\nReturn ONLY a raw JSON object (no markdown, no code fences):\n\n{\n \"gender\": \"m\" or \"f\" or null,\n \"weight_kg\": <number or null>,\n \"height_cm\": <number or null>,\n \"bmi\": <number or null>,\n \"total_cholesterol\": <mg/dL>,\n \"ldl_c\": <mg/dL>,\n \"hdl\": <mg/dL>,\n \"triglycerides\": <mg/dL>,\n \"apoB\": <mg/dL>,\n \"hba1c\": <%>,\n \"fasting_glucose\": <mg/dL>,\n \"fasting_insulin\": <uIU/mL>,\n \"uric_acid\": <mg/dL>,\n \"creatinine\": <mg/dL>,\n \"bun\": <mg/dL>,\n \"egfr\": <mL/min/1.73m2>,\n \"alt\": <U/L>,\n \"ast\": <U/L>,\n \"alp\": <U/L>,\n \"ggt\": <U/L>,\n \"bilirubin_total\": <mg/dL>,\n \"albumin\": <g/dL>,\n \"hs_crp\": <mg/L>,\n \"esr\": <mm/hr>,\n \"cortisol\": <ug/dL>,\n \"testosterone\": <ng/dL>,\n \"estradiol\": <pg/mL>,\n \"tsh\": <mIU/L>,\n \"wbc\": <10^3/uL>,\n \"rbc\": <10^6/uL>,\n \"hemoglobin\": <g/dL>,\n \"hematocrit\": <%>,\n \"platelets\": <10^3/uL>,\n \"mcv\": <fL>,\n \"rdw\": <%>,\n \"lymphocyte_percent\": <%>,\n \"vitamin_d\": <ng/mL>\n}\n\nRead EVERY page. Use null for values not found. Return ONLY the JSON.";
|
|
11
|
+
//# sourceMappingURL=quest.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"quest.d.ts","sourceRoot":"","sources":["../../src/prompts/quest.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AACH,eAAO,MAAM,uBAAuB,yiDAqDkC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"quest.js","sourceRoot":"","sources":["../../src/prompts/quest.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AACH,MAAM,CAAC,MAAM,uBAAuB,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;sEAqD+B,CAAC"}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import type { BiomarkerSet } from '@ottolab/shared';
|
|
2
|
+
import type { RawBiomarkers } from './llm-extractor.js';
|
|
3
|
+
/**
|
|
4
|
+
* Clinical ranges for validation — generous bounds to avoid false rejections.
|
|
5
|
+
* Values outside these ranges are likely extraction errors.
|
|
6
|
+
*/
|
|
7
|
+
declare const CLINICAL_RANGES: Record<string, [number, number]>;
|
|
8
|
+
/**
|
|
9
|
+
* Map from raw extraction keys to BiomarkerSet field names and display metadata.
|
|
10
|
+
*/
|
|
11
|
+
declare const BIOMARKER_MAP: Record<string, {
|
|
12
|
+
field: keyof BiomarkerSet;
|
|
13
|
+
name: string;
|
|
14
|
+
unit: string;
|
|
15
|
+
}>;
|
|
16
|
+
export interface ValidationResult {
|
|
17
|
+
biomarkers: BiomarkerSet;
|
|
18
|
+
accepted: number;
|
|
19
|
+
rejected: number;
|
|
20
|
+
confidence: number;
|
|
21
|
+
rejections: {
|
|
22
|
+
key: string;
|
|
23
|
+
value: unknown;
|
|
24
|
+
reason: string;
|
|
25
|
+
}[];
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Validate and transform raw LLM extraction output to typed BiomarkerSet.
|
|
29
|
+
*
|
|
30
|
+
* Steps:
|
|
31
|
+
* 1. Filter nulls/undefined
|
|
32
|
+
* 2. Type-check numeric values
|
|
33
|
+
* 3. Range-check against generous clinical bounds
|
|
34
|
+
* 4. Map to BiomarkerSet fields
|
|
35
|
+
* 5. Compute extraction confidence score
|
|
36
|
+
*/
|
|
37
|
+
export declare function validateExtraction(raw: RawBiomarkers): ValidationResult;
|
|
38
|
+
export { CLINICAL_RANGES, BIOMARKER_MAP };
|
|
39
|
+
//# sourceMappingURL=validator.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"validator.d.ts","sourceRoot":"","sources":["../src/validator.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAkB,YAAY,EAAE,MAAM,iBAAiB,CAAC;AACpE,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAExD;;;GAGG;AACH,QAAA,MAAM,eAAe,EAAE,MAAM,CAAC,MAAM,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAyCrD,CAAC;AAEF;;GAEG;AACH,QAAA,MAAM,aAAa,EAAE,MAAM,CAAC,MAAM,EAAE;IAAE,KAAK,EAAE,MAAM,YAAY,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,CAyC5F,CAAC;AAEF,MAAM,WAAW,gBAAgB;IAC/B,UAAU,EAAE,YAAY,CAAC;IACzB,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE;QAAE,GAAG,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,OAAO,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC;CAC/D;AAED;;;;;;;;;GASG;AACH,wBAAgB,kBAAkB,CAAC,GAAG,EAAE,aAAa,GAAG,gBAAgB,CAqDvE;AAkDD,OAAO,EAAE,eAAe,EAAE,aAAa,EAAE,CAAC"}
|