@ottolab/extraction 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +26 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +49 -0
- package/dist/index.js.map +1 -0
- package/dist/lab-detector.d.ts +12 -0
- package/dist/lab-detector.d.ts.map +1 -0
- package/dist/lab-detector.js +42 -0
- package/dist/lab-detector.js.map +1 -0
- package/dist/llm-extractor.d.ts +18 -0
- package/dist/llm-extractor.d.ts.map +1 -0
- package/dist/llm-extractor.js +133 -0
- package/dist/llm-extractor.js.map +1 -0
- package/dist/prompts/base.d.ts +12 -0
- package/dist/prompts/base.d.ts.map +1 -0
- package/{src/prompts/base.ts → dist/prompts/base.js} +1 -1
- package/dist/prompts/base.js.map +1 -0
- package/dist/prompts/labcorp.d.ts +12 -0
- package/dist/prompts/labcorp.d.ts.map +1 -0
- package/{src/prompts/labcorp.ts → dist/prompts/labcorp.js} +1 -0
- package/dist/prompts/labcorp.js.map +1 -0
- package/dist/prompts/quest.d.ts +11 -0
- package/dist/prompts/quest.d.ts.map +1 -0
- package/{src/prompts/quest.ts → dist/prompts/quest.js} +1 -0
- package/dist/prompts/quest.js.map +1 -0
- package/dist/validator.d.ts +39 -0
- package/dist/validator.d.ts.map +1 -0
- package/dist/validator.js +191 -0
- package/dist/validator.js.map +1 -0
- package/package.json +8 -2
- package/__tests__/llm-extractor.test.ts +0 -73
- package/__tests__/validator.test.ts +0 -243
- package/src/index.ts +0 -69
- package/src/lab-detector.ts +0 -60
- package/src/llm-extractor.ts +0 -157
- package/src/validator.ts +0 -218
- package/tsconfig.json +0 -10
- package/tsconfig.tsbuildinfo +0 -1
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Clinical ranges for validation — generous bounds to avoid false rejections.
|
|
3
|
+
* Values outside these ranges are likely extraction errors.
|
|
4
|
+
*/
|
|
5
|
+
const CLINICAL_RANGES = {
|
|
6
|
+
weight_kg: [20, 300],
|
|
7
|
+
height_cm: [100, 250],
|
|
8
|
+
bmi: [10, 60],
|
|
9
|
+
body_fat_percent: [2, 60],
|
|
10
|
+
systolic_bp: [60, 250],
|
|
11
|
+
diastolic_bp: [30, 150],
|
|
12
|
+
heart_rate: [30, 220],
|
|
13
|
+
total_cholesterol: [50, 500],
|
|
14
|
+
ldl_c: [10, 400],
|
|
15
|
+
hdl: [5, 150],
|
|
16
|
+
triglycerides: [10, 2000],
|
|
17
|
+
apoB: [10, 300],
|
|
18
|
+
hba1c: [3, 15],
|
|
19
|
+
fasting_glucose: [30, 500],
|
|
20
|
+
fasting_insulin: [0.5, 300],
|
|
21
|
+
uric_acid: [0.5, 20],
|
|
22
|
+
creatinine: [0.1, 15],
|
|
23
|
+
bun: [2, 100],
|
|
24
|
+
egfr: [5, 150],
|
|
25
|
+
alt: [1, 2000],
|
|
26
|
+
ast: [1, 2000],
|
|
27
|
+
alp: [10, 1000],
|
|
28
|
+
ggt: [1, 2000],
|
|
29
|
+
bilirubin_total: [0.05, 30],
|
|
30
|
+
albumin: [1, 6],
|
|
31
|
+
hs_crp: [0.01, 300],
|
|
32
|
+
esr: [0, 140],
|
|
33
|
+
cortisol: [0.5, 60],
|
|
34
|
+
testosterone: [1, 2000],
|
|
35
|
+
estradiol: [1, 5000],
|
|
36
|
+
tsh: [0.01, 100],
|
|
37
|
+
wbc: [1, 50],
|
|
38
|
+
rbc: [1, 10],
|
|
39
|
+
hemoglobin: [3, 25],
|
|
40
|
+
hematocrit: [15, 65],
|
|
41
|
+
platelets: [10, 1000],
|
|
42
|
+
mcv: [50, 130],
|
|
43
|
+
rdw: [9, 25],
|
|
44
|
+
lymphocyte_percent: [2, 70],
|
|
45
|
+
vitamin_d: [3, 200],
|
|
46
|
+
};
|
|
47
|
+
/**
|
|
48
|
+
* Map from raw extraction keys to BiomarkerSet field names and display metadata.
|
|
49
|
+
*/
|
|
50
|
+
const BIOMARKER_MAP = {
|
|
51
|
+
weight_kg: { field: 'weight', name: 'Weight', unit: 'kg' },
|
|
52
|
+
height_cm: { field: 'height', name: 'Height', unit: 'cm' },
|
|
53
|
+
bmi: { field: 'bmi', name: 'BMI', unit: 'kg/m2' },
|
|
54
|
+
body_fat_percent: { field: 'bodyFatPercent', name: 'Body Fat', unit: '%' },
|
|
55
|
+
systolic_bp: { field: 'bloodPressureSystolic', name: 'Systolic BP', unit: 'mmHg' },
|
|
56
|
+
diastolic_bp: { field: 'bloodPressureDiastolic', name: 'Diastolic BP', unit: 'mmHg' },
|
|
57
|
+
heart_rate: { field: 'heartRate', name: 'Heart Rate', unit: 'bpm' },
|
|
58
|
+
total_cholesterol: { field: 'totalCholesterol', name: 'Total Cholesterol', unit: 'mg/dL' },
|
|
59
|
+
ldl_c: { field: 'ldlC', name: 'LDL-C', unit: 'mg/dL' },
|
|
60
|
+
hdl: { field: 'hdl', name: 'HDL', unit: 'mg/dL' },
|
|
61
|
+
triglycerides: { field: 'triglycerides', name: 'Triglycerides', unit: 'mg/dL' },
|
|
62
|
+
apoB: { field: 'apoB', name: 'ApoB', unit: 'mg/dL' },
|
|
63
|
+
hba1c: { field: 'hba1c', name: 'HbA1c', unit: '%' },
|
|
64
|
+
fasting_glucose: { field: 'fastingGlucose', name: 'Fasting Glucose', unit: 'mg/dL' },
|
|
65
|
+
fasting_insulin: { field: 'fastingInsulin', name: 'Fasting Insulin', unit: 'uIU/mL' },
|
|
66
|
+
uric_acid: { field: 'uricAcid', name: 'Uric Acid', unit: 'mg/dL' },
|
|
67
|
+
creatinine: { field: 'creatinine', name: 'Creatinine', unit: 'mg/dL' },
|
|
68
|
+
bun: { field: 'bun', name: 'BUN', unit: 'mg/dL' },
|
|
69
|
+
egfr: { field: 'egfr', name: 'eGFR', unit: 'mL/min/1.73m2' },
|
|
70
|
+
alt: { field: 'alt', name: 'ALT', unit: 'U/L' },
|
|
71
|
+
ast: { field: 'ast', name: 'AST', unit: 'U/L' },
|
|
72
|
+
alp: { field: 'alp', name: 'ALP', unit: 'U/L' },
|
|
73
|
+
ggt: { field: 'ggt', name: 'GGT', unit: 'U/L' },
|
|
74
|
+
bilirubin_total: { field: 'bilirubinTotal', name: 'Bilirubin (Total)', unit: 'mg/dL' },
|
|
75
|
+
albumin: { field: 'albumin', name: 'Albumin', unit: 'g/dL' },
|
|
76
|
+
hs_crp: { field: 'hsCrp', name: 'hs-CRP', unit: 'mg/L' },
|
|
77
|
+
esr: { field: 'esr', name: 'ESR', unit: 'mm/hr' },
|
|
78
|
+
cortisol: { field: 'cortisol', name: 'Cortisol', unit: 'ug/dL' },
|
|
79
|
+
testosterone: { field: 'testosterone', name: 'Testosterone', unit: 'ng/dL' },
|
|
80
|
+
estradiol: { field: 'estradiol', name: 'Estradiol', unit: 'pg/mL' },
|
|
81
|
+
tsh: { field: 'tsh', name: 'TSH', unit: 'mIU/L' },
|
|
82
|
+
wbc: { field: 'wbc', name: 'WBC', unit: '10^3/uL' },
|
|
83
|
+
rbc: { field: 'rbc', name: 'RBC', unit: '10^6/uL' },
|
|
84
|
+
hemoglobin: { field: 'hemoglobin', name: 'Hemoglobin', unit: 'g/dL' },
|
|
85
|
+
hematocrit: { field: 'hematocrit', name: 'Hematocrit', unit: '%' },
|
|
86
|
+
platelets: { field: 'platelets', name: 'Platelets', unit: '10^3/uL' },
|
|
87
|
+
mcv: { field: 'mcv', name: 'MCV', unit: 'fL' },
|
|
88
|
+
rdw: { field: 'rdw', name: 'RDW', unit: '%' },
|
|
89
|
+
lymphocyte_percent: { field: 'lymphocytePercent', name: 'Lymphocyte %', unit: '%' },
|
|
90
|
+
vitamin_d: { field: 'vitaminD', name: 'Vitamin D', unit: 'ng/mL' },
|
|
91
|
+
};
|
|
92
|
+
/**
|
|
93
|
+
* Validate and transform raw LLM extraction output to typed BiomarkerSet.
|
|
94
|
+
*
|
|
95
|
+
* Steps:
|
|
96
|
+
* 1. Filter nulls/undefined
|
|
97
|
+
* 2. Type-check numeric values
|
|
98
|
+
* 3. Range-check against generous clinical bounds
|
|
99
|
+
* 4. Map to BiomarkerSet fields
|
|
100
|
+
* 5. Compute extraction confidence score
|
|
101
|
+
*/
|
|
102
|
+
export function validateExtraction(raw) {
|
|
103
|
+
const biomarkers = {};
|
|
104
|
+
const rejections = [];
|
|
105
|
+
let accepted = 0;
|
|
106
|
+
let rejected = 0;
|
|
107
|
+
for (const [key, value] of Object.entries(raw)) {
|
|
108
|
+
// Skip gender and non-biomarker fields
|
|
109
|
+
if (key === 'gender')
|
|
110
|
+
continue;
|
|
111
|
+
// Skip null/undefined
|
|
112
|
+
if (value === null || value === undefined)
|
|
113
|
+
continue;
|
|
114
|
+
// Skip string values (qualitative results like "Negative")
|
|
115
|
+
if (typeof value === 'string')
|
|
116
|
+
continue;
|
|
117
|
+
// Must be a number at this point
|
|
118
|
+
if (typeof value !== 'number' || !Number.isFinite(value)) {
|
|
119
|
+
rejections.push({ key, value, reason: 'not a finite number' });
|
|
120
|
+
rejected++;
|
|
121
|
+
continue;
|
|
122
|
+
}
|
|
123
|
+
// Check clinical ranges
|
|
124
|
+
const range = CLINICAL_RANGES[key];
|
|
125
|
+
if (range && (value < range[0] || value > range[1])) {
|
|
126
|
+
rejections.push({
|
|
127
|
+
key,
|
|
128
|
+
value,
|
|
129
|
+
reason: `outside clinical range [${range[0]}, ${range[1]}]`,
|
|
130
|
+
});
|
|
131
|
+
rejected++;
|
|
132
|
+
continue;
|
|
133
|
+
}
|
|
134
|
+
// Map to BiomarkerSet
|
|
135
|
+
const mapping = BIOMARKER_MAP[key];
|
|
136
|
+
if (!mapping)
|
|
137
|
+
continue; // skip unknown keys
|
|
138
|
+
const biomarkerValue = {
|
|
139
|
+
name: mapping.name,
|
|
140
|
+
value,
|
|
141
|
+
unit: mapping.unit,
|
|
142
|
+
};
|
|
143
|
+
biomarkers[mapping.field] = biomarkerValue;
|
|
144
|
+
accepted++;
|
|
145
|
+
}
|
|
146
|
+
// Confidence based on yield ratio and cross-checks
|
|
147
|
+
const confidence = computeConfidence(biomarkers, accepted, rejected);
|
|
148
|
+
return { biomarkers, accepted, rejected, confidence, rejections };
|
|
149
|
+
}
|
|
150
|
+
/**
|
|
151
|
+
* Compute extraction confidence score (0-1).
|
|
152
|
+
*
|
|
153
|
+
* Factors:
|
|
154
|
+
* - Yield: how many biomarkers were successfully extracted
|
|
155
|
+
* - Rejection rate: high rejections lower confidence
|
|
156
|
+
* - Cross-biomarker consistency checks
|
|
157
|
+
*/
|
|
158
|
+
function computeConfidence(biomarkers, accepted, rejected) {
|
|
159
|
+
if (accepted === 0)
|
|
160
|
+
return 0;
|
|
161
|
+
// Base confidence from yield (diminishing returns above 15 markers)
|
|
162
|
+
const yieldScore = Math.min(accepted / 15, 1.0);
|
|
163
|
+
// Penalty for rejections
|
|
164
|
+
const total = accepted + rejected;
|
|
165
|
+
const rejectionPenalty = total > 0 ? rejected / total : 0;
|
|
166
|
+
// Cross-biomarker consistency checks
|
|
167
|
+
let consistencyScore = 1.0;
|
|
168
|
+
// Check: if both ALT and AST exist, AST should generally be ≤ 2×ALT in healthy
|
|
169
|
+
if (biomarkers.alt && biomarkers.ast) {
|
|
170
|
+
if (biomarkers.ast.value > biomarkers.alt.value * 5) {
|
|
171
|
+
consistencyScore -= 0.1;
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
// Check: HDL should be less than total cholesterol
|
|
175
|
+
if (biomarkers.hdl && biomarkers.totalCholesterol) {
|
|
176
|
+
if (biomarkers.hdl.value > biomarkers.totalCholesterol.value) {
|
|
177
|
+
consistencyScore -= 0.15;
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
// Check: LDL should be less than total cholesterol
|
|
181
|
+
if (biomarkers.ldlC && biomarkers.totalCholesterol) {
|
|
182
|
+
if (biomarkers.ldlC.value > biomarkers.totalCholesterol.value) {
|
|
183
|
+
consistencyScore -= 0.15;
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
const raw = yieldScore * 0.5 + (1 - rejectionPenalty) * 0.25 + Math.max(consistencyScore, 0) * 0.25;
|
|
187
|
+
return Math.round(Math.min(Math.max(raw, 0), 1) * 100) / 100;
|
|
188
|
+
}
|
|
189
|
+
// Export for testing
|
|
190
|
+
export { CLINICAL_RANGES, BIOMARKER_MAP };
|
|
191
|
+
//# sourceMappingURL=validator.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"validator.js","sourceRoot":"","sources":["../src/validator.ts"],"names":[],"mappings":"AAGA;;;GAGG;AACH,MAAM,eAAe,GAAqC;IACxD,SAAS,EAAE,CAAC,EAAE,EAAE,GAAG,CAAC;IACpB,SAAS,EAAE,CAAC,GAAG,EAAE,GAAG,CAAC;IACrB,GAAG,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC;IACb,gBAAgB,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC;IACzB,WAAW,EAAE,CAAC,EAAE,EAAE,GAAG,CAAC;IACtB,YAAY,EAAE,CAAC,EAAE,EAAE,GAAG,CAAC;IACvB,UAAU,EAAE,CAAC,EAAE,EAAE,GAAG,CAAC;IACrB,iBAAiB,EAAE,CAAC,EAAE,EAAE,GAAG,CAAC;IAC5B,KAAK,EAAE,CAAC,EAAE,EAAE,GAAG,CAAC;IAChB,GAAG,EAAE,CAAC,CAAC,EAAE,GAAG,CAAC;IACb,aAAa,EAAE,CAAC,EAAE,EAAE,IAAI,CAAC;IACzB,IAAI,EAAE,CAAC,EAAE,EAAE,GAAG,CAAC;IACf,KAAK,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC;IACd,eAAe,EAAE,CAAC,EAAE,EAAE,GAAG,CAAC;IAC1B,eAAe,EAAE,CAAC,GAAG,EAAE,GAAG,CAAC;IAC3B,SAAS,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC;IACpB,UAAU,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC;IACrB,GAAG,EAAE,CAAC,CAAC,EAAE,GAAG,CAAC;IACb,IAAI,EAAE,CAAC,CAAC,EAAE,GAAG,CAAC;IACd,GAAG,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC;IACd,GAAG,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC;IACd,GAAG,EAAE,CAAC,EAAE,EAAE,IAAI,CAAC;IACf,GAAG,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC;IACd,eAAe,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC;IAC3B,OAAO,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC;IACf,MAAM,EAAE,CAAC,IAAI,EAAE,GAAG,CAAC;IACnB,GAAG,EAAE,CAAC,CAAC,EAAE,GAAG,CAAC;IACb,QAAQ,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC;IACnB,YAAY,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC;IACvB,SAAS,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC;IACpB,GAAG,EAAE,CAAC,IAAI,EAAE,GAAG,CAAC;IAChB,GAAG,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC;IACZ,GAAG,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC;IACZ,UAAU,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC;IACnB,UAAU,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC;IACpB,SAAS,EAAE,CAAC,EAAE,EAAE,IAAI,CAAC;IACrB,GAAG,EAAE,CAAC,EAAE,EAAE,GAAG,CAAC;IACd,GAAG,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC;IACZ,kBAAkB,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC;IAC3B,SAAS,EAAE,CAAC,CAAC,EAAE,GAAG,CAAC;CACpB,CAAC;AAEF;;GAEG;AACH,MAAM,aAAa,GAA8E;IAC/F,SAAS,EAAE,EAAE,KAAK,EAAE,QAAQ,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,IAAI,EAAE;IAC1D,SAAS,EAAE,EAAE,KAAK,EAAE,QAAQ,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,IAAI,EAAE;IAC1D,GAAG,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,OAAO,EAAE;IACjD,gBAAgB,EAAE,EAAE,KAAK,EAAE,gBAAgB,EAAE,IAAI,EAAE,UAAU,EAAE,IAAI,EAAE,GAAG,EAAE;IAC1E,WAAW,EAAE,EAAE,KAAK,EAAE,uBAAuB,EAAE,IAAI,EAAE,aAAa,EAAE,IAAI,EAAE,MAAM,EAAE;IAClF,YAAY,EAAE,EAAE,KAAK,EAAE,wBAAwB,EAAE,IAAI,EAAE,cAAc,EAAE,IAAI,EAAE,MAAM,EAAE;IACrF,UAAU,EAAE,EAAE,KAAK,EAAE,WAAW,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,KAAK,EAAE;IACnE,iBAAiB,EAAE,EAAE,KAAK,EAAE,kBAAkB,EAAE,IAAI,EAAE,mBAAmB,EAAE,IAAI,EAAE,OAAO,EAAE;IAC1F,KAAK,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE;IACtD,GAAG,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,OAAO,EAAE;IACjD,aAAa,EAAE,EAAE,KAAK,EAAE,eAAe,EAAE,IAAI,EAAE,eAAe,EAAE,IAAI,EAAE,OAAO,EAAE;IAC/E,IAAI,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE;IACpD,KAAK,EAAE,EAAE,KAAK,EAAE,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,GAAG,EAAE;IACnD,eAAe,EAAE,EAAE,KAAK,EAAE,gBAAgB,EAAE,IAAI,EAAE,iBAAiB,EAAE,IAAI,EAAE,OAAO,EAAE;IACpF,eAAe,EAAE,EAAE,KAAK,EAAE,gBAAgB,EAAE,IAAI,EAAE,iBAAiB,EAAE,IAAI,EAAE,QAAQ,EAAE;IACrF,SAAS,EAAE,EAAE,KAAK,EAAE,UAAU,EAAE,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,OAAO,EAAE;IAClE,UAAU,EAAE,EAAE,KAAK,EAAE,YAAY,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,OAAO,EAAE;IACtE,GAAG,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,OAAO,EAAE;IACjD,IAAI,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,eAAe,EAAE;IAC5D,GAAG,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE;IAC/C,GAAG,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE;IAC/C,GAAG,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE;IAC/C,GAAG,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE;IAC/C,eAAe,EAAE,EAAE,KAAK,EAAE,gBAAgB,EAAE,IAAI,EAAE,mBAAmB,EAAE,IAAI,EAAE,OAAO,EAAE;IACtF,OAAO,EAAE,EAAE,KAAK,EAAE,SAAS,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,EAAE;IAC5D,MAAM,EAAE,EAAE,KAAK,EAAE,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,EAAE;IACxD,GAAG,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,OAAO,EAAE;IACjD,QAAQ,EAAE,EAAE,KAAK,EAAE,UAAU,EAAE,IAAI,EAAE,UAAU,EAAE,IAAI,EAAE,OAAO,EAAE;IAChE,YAAY,EAAE,EAAE,KAAK,EAAE,cAAc,EAAE,IAAI,EAAE,cAAc,EAAE,IAAI,EAAE,OAAO,EAAE;IAC5E,SAAS,EAAE,EAAE,KAAK,EAAE,WAAW,EAAE,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,OAAO,EAAE;IACnE,GAAG,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,OAAO,EAAE;IACjD,GAAG,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,SAAS,EAAE;IACnD,GAAG,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,SAAS,EAAE;IACnD,UAAU,EAAE,EAAE,KAAK,EAAE,YAAY,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,MAAM,EAAE;IACrE,UAAU,EAAE,EAAE,KAAK,EAAE,YAAY,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,GAAG,EAAE;IAClE,SAAS,EAAE,EAAE,KAAK,EAAE,WAAW,EAAE,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,SAAS,EAAE;IACrE,GAAG,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,IAAI,EAAE;IAC9C,GAAG,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,GAAG,EAAE;IAC7C,kBAAkB,EAAE,EAAE,KAAK,EAAE,mBAAmB,EAAE,IAAI,EAAE,cAAc,EAAE,IAAI,EAAE,GAAG,EAAE;IACnF,SAAS,EAAE,EAAE,KAAK,EAAE,UAAU,EAAE,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,OAAO,EAAE;CACnE,CAAC;AAUF;;;;;;;;;GASG;AACH,MAAM,UAAU,kBAAkB,CAAC,GAAkB;IACnD,MAAM,UAAU,GAAiB,EAAE,CAAC;IACpC,MAAM,UAAU,GAAsD,EAAE,CAAC;IACzE,IAAI,QAAQ,GAAG,CAAC,CAAC;IACjB,IAAI,QAAQ,GAAG,CAAC,CAAC;IAEjB,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC;QAC/C,uCAAuC;QACvC,IAAI,GAAG,KAAK,QAAQ;YAAE,SAAS;QAE/B,sBAAsB;QACtB,IAAI,KAAK,KAAK,IAAI,IAAI,KAAK,KAAK,SAAS;YAAE,SAAS;QAEpD,2DAA2D;QAC3D,IAAI,OAAO,KAAK,KAAK,QAAQ;YAAE,SAAS;QAExC,iCAAiC;QACjC,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;YACzD,UAAU,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,EAAE,qBAAqB,EAAE,CAAC,CAAC;YAC/D,QAAQ,EAAE,CAAC;YACX,SAAS;QACX,CAAC;QAED,wBAAwB;QACxB,MAAM,KAAK,GAAG,eAAe,CAAC,GAAG,CAAC,CAAC;QACnC,IAAI,KAAK,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YACpD,UAAU,CAAC,IAAI,CAAC;gBACd,GAAG;gBACH,KAAK;gBACL,MAAM,EAAE,2BAA2B,KAAK,CAAC,CAAC,CAAC,KAAK,KAAK,CAAC,CAAC,CAAC,GAAG;aAC5D,CAAC,CAAC;YACH,QAAQ,EAAE,CAAC;YACX,SAAS;QACX,CAAC;QAED,sBAAsB;QACtB,MAAM,OAAO,GAAG,aAAa,CAAC,GAAG,CAAC,CAAC;QACnC,IAAI,CAAC,OAAO;YAAE,SAAS,CAAC,oBAAoB;QAE5C,MAAM,cAAc,GAAmB;YACrC,IAAI,EAAE,OAAO,CAAC,IAAI;YAClB,KAAK;YACL,IAAI,EAAE,OAAO,CAAC,IAAI;SACnB,CAAC;QAED,UAA6C,CAAC,OAAO,CAAC,KAAK,CAAC,GAAG,cAAc,CAAC;QAC/E,QAAQ,EAAE,CAAC;IACb,CAAC;IAED,mDAAmD;IACnD,MAAM,UAAU,GAAG,iBAAiB,CAAC,UAAU,EAAE,QAAQ,EAAE,QAAQ,CAAC,CAAC;IAErE,OAAO,EAAE,UAAU,EAAE,QAAQ,EAAE,QAAQ,EAAE,UAAU,EAAE,UAAU,EAAE,CAAC;AACpE,CAAC;AAED;;;;;;;GAOG;AACH,SAAS,iBAAiB,CAAC,UAAwB,EAAE,QAAgB,EAAE,QAAgB;IACrF,IAAI,QAAQ,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAE7B,oEAAoE;IACpE,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,GAAG,EAAE,EAAE,GAAG,CAAC,CAAC;IAEhD,yBAAyB;IACzB,MAAM,KAAK,GAAG,QAAQ,GAAG,QAAQ,CAAC;IAClC,MAAM,gBAAgB,GAAG,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IAE1D,qCAAqC;IACrC,IAAI,gBAAgB,GAAG,GAAG,CAAC;IAE3B,+EAA+E;IAC/E,IAAI,UAAU,CAAC,GAAG,IAAI,UAAU,CAAC,GAAG,EAAE,CAAC;QACrC,IAAI,UAAU,CAAC,GAAG,CAAC,KAAK,GAAG,UAAU,CAAC,GAAG,CAAC,KAAK,GAAG,CAAC,EAAE,CAAC;YACpD,gBAAgB,IAAI,GAAG,CAAC;QAC1B,CAAC;IACH,CAAC;IAED,mDAAmD;IACnD,IAAI,UAAU,CAAC,GAAG,IAAI,UAAU,CAAC,gBAAgB,EAAE,CAAC;QAClD,IAAI,UAAU,CAAC,GAAG,CAAC,KAAK,GAAG,UAAU,CAAC,gBAAgB,CAAC,KAAK,EAAE,CAAC;YAC7D,gBAAgB,IAAI,IAAI,CAAC;QAC3B,CAAC;IACH,CAAC;IAED,mDAAmD;IACnD,IAAI,UAAU,CAAC,IAAI,IAAI,UAAU,CAAC,gBAAgB,EAAE,CAAC;QACnD,IAAI,UAAU,CAAC,IAAI,CAAC,KAAK,GAAG,UAAU,CAAC,gBAAgB,CAAC,KAAK,EAAE,CAAC;YAC9D,gBAAgB,IAAI,IAAI,CAAC;QAC3B,CAAC;IACH,CAAC;IAED,MAAM,GAAG,GACP,UAAU,GAAG,GAAG,GAAG,CAAC,CAAC,GAAG,gBAAgB,CAAC,GAAG,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,gBAAgB,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC;IAC1F,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,GAAG,CAAC,GAAG,GAAG,CAAC;AAC/D,CAAC;AAED,qBAAqB;AACrB,OAAO,EAAE,eAAe,EAAE,aAAa,EAAE,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,10 +1,15 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ottolab/extraction",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.1",
|
|
4
4
|
"license": "MIT",
|
|
5
5
|
"repository": "https://github.com/hokev/Otto",
|
|
6
|
-
"publishConfig": {
|
|
6
|
+
"publishConfig": {
|
|
7
|
+
"access": "public"
|
|
8
|
+
},
|
|
7
9
|
"type": "module",
|
|
10
|
+
"files": [
|
|
11
|
+
"dist"
|
|
12
|
+
],
|
|
8
13
|
"exports": {
|
|
9
14
|
".": {
|
|
10
15
|
"import": {
|
|
@@ -16,6 +21,7 @@
|
|
|
16
21
|
},
|
|
17
22
|
"scripts": {
|
|
18
23
|
"build": "tsc --build",
|
|
24
|
+
"prepublishOnly": "npm run build",
|
|
19
25
|
"lint": "eslint src/",
|
|
20
26
|
"test": "vitest run"
|
|
21
27
|
},
|
|
@@ -1,73 +0,0 @@
|
|
|
1
|
-
import { describe, it, expect } from 'vitest';
|
|
2
|
-
import { extractFromCsv } from '../src/llm-extractor.js';
|
|
3
|
-
|
|
4
|
-
describe('extractFromCsv', () => {
|
|
5
|
-
it('should parse simple CSV with standard headers', () => {
|
|
6
|
-
const csv = `Total Cholesterol,LDL,HDL,Triglycerides,HbA1c
|
|
7
|
-
200,120,55,130,5.4`;
|
|
8
|
-
|
|
9
|
-
const result = extractFromCsv(csv);
|
|
10
|
-
expect(result.total_cholesterol).toBe(200);
|
|
11
|
-
expect(result.ldl_c).toBe(120);
|
|
12
|
-
expect(result.hdl).toBe(55);
|
|
13
|
-
expect(result.triglycerides).toBe(130);
|
|
14
|
-
expect(result.hba1c).toBe(5.4);
|
|
15
|
-
});
|
|
16
|
-
|
|
17
|
-
it('should normalize common header aliases', () => {
|
|
18
|
-
const csv = `LDL-C,HDL-C,A1C,Glucose,SGPT
|
|
19
|
-
110,58,5.2,92,25`;
|
|
20
|
-
|
|
21
|
-
const result = extractFromCsv(csv);
|
|
22
|
-
expect(result.ldl_c).toBe(110);
|
|
23
|
-
expect(result.hdl).toBe(58);
|
|
24
|
-
expect(result.hba1c).toBe(5.2);
|
|
25
|
-
expect(result.fasting_glucose).toBe(92);
|
|
26
|
-
expect(result.alt).toBe(25);
|
|
27
|
-
});
|
|
28
|
-
|
|
29
|
-
it('should handle empty values', () => {
|
|
30
|
-
const csv = `Total Cholesterol,LDL,HDL
|
|
31
|
-
200,,55`;
|
|
32
|
-
|
|
33
|
-
const result = extractFromCsv(csv);
|
|
34
|
-
expect(result.total_cholesterol).toBe(200);
|
|
35
|
-
expect(result.hdl).toBe(55);
|
|
36
|
-
expect(result.ldl_c).toBeUndefined();
|
|
37
|
-
});
|
|
38
|
-
|
|
39
|
-
it('should throw on single-line CSV (no data row)', () => {
|
|
40
|
-
expect(() => extractFromCsv('Total Cholesterol,LDL,HDL')).toThrow(
|
|
41
|
-
'at least a header row and a data row',
|
|
42
|
-
);
|
|
43
|
-
});
|
|
44
|
-
|
|
45
|
-
it('should handle case-insensitive headers', () => {
|
|
46
|
-
const csv = `CREATININE,BUN,eGFR
|
|
47
|
-
0.9,15,95`;
|
|
48
|
-
|
|
49
|
-
const result = extractFromCsv(csv);
|
|
50
|
-
expect(result.creatinine).toBe(0.9);
|
|
51
|
-
expect(result.bun).toBe(15);
|
|
52
|
-
expect(result.egfr).toBe(95);
|
|
53
|
-
});
|
|
54
|
-
|
|
55
|
-
it('should preserve non-numeric values as strings', () => {
|
|
56
|
-
const csv = `Albumin,Notes
|
|
57
|
-
4.2,Normal`;
|
|
58
|
-
|
|
59
|
-
const result = extractFromCsv(csv);
|
|
60
|
-
expect(result.albumin).toBe(4.2);
|
|
61
|
-
expect(result.notes).toBe('Normal');
|
|
62
|
-
});
|
|
63
|
-
|
|
64
|
-
it('should handle whitespace in headers and values', () => {
|
|
65
|
-
const csv = ` Total Cholesterol , LDL , HDL
|
|
66
|
-
200 , 120 , 55 `;
|
|
67
|
-
|
|
68
|
-
const result = extractFromCsv(csv);
|
|
69
|
-
expect(result.total_cholesterol).toBe(200);
|
|
70
|
-
expect(result.ldl_c).toBe(120);
|
|
71
|
-
expect(result.hdl).toBe(55);
|
|
72
|
-
});
|
|
73
|
-
});
|
|
@@ -1,243 +0,0 @@
|
|
|
1
|
-
import { describe, it, expect } from 'vitest';
|
|
2
|
-
import { validateExtraction, CLINICAL_RANGES, BIOMARKER_MAP } from '../src/validator.js';
|
|
3
|
-
import type { RawBiomarkers } from '../src/llm-extractor.js';
|
|
4
|
-
|
|
5
|
-
describe('CLINICAL_RANGES', () => {
|
|
6
|
-
it('should have ranges for all mapped biomarkers', () => {
|
|
7
|
-
for (const key of Object.keys(BIOMARKER_MAP)) {
|
|
8
|
-
expect(CLINICAL_RANGES[key]).toBeDefined();
|
|
9
|
-
}
|
|
10
|
-
});
|
|
11
|
-
|
|
12
|
-
it('should have low < high for all ranges', () => {
|
|
13
|
-
for (const [key, [low, high]] of Object.entries(CLINICAL_RANGES)) {
|
|
14
|
-
expect(low).toBeLessThan(high);
|
|
15
|
-
expect(low).toBeTypeOf('number');
|
|
16
|
-
expect(high).toBeTypeOf('number');
|
|
17
|
-
// Verify key exists
|
|
18
|
-
expect(key).toBeTruthy();
|
|
19
|
-
}
|
|
20
|
-
});
|
|
21
|
-
});
|
|
22
|
-
|
|
23
|
-
describe('validateExtraction', () => {
|
|
24
|
-
it('should accept valid biomarkers within range', () => {
|
|
25
|
-
const raw: RawBiomarkers = {
|
|
26
|
-
total_cholesterol: 200,
|
|
27
|
-
ldl_c: 120,
|
|
28
|
-
hdl: 55,
|
|
29
|
-
triglycerides: 130,
|
|
30
|
-
hba1c: 5.4,
|
|
31
|
-
fasting_glucose: 95,
|
|
32
|
-
creatinine: 0.9,
|
|
33
|
-
albumin: 4.2,
|
|
34
|
-
};
|
|
35
|
-
|
|
36
|
-
const result = validateExtraction(raw);
|
|
37
|
-
expect(result.accepted).toBe(8);
|
|
38
|
-
expect(result.rejected).toBe(0);
|
|
39
|
-
expect(result.biomarkers.totalCholesterol?.value).toBe(200);
|
|
40
|
-
expect(result.biomarkers.ldlC?.value).toBe(120);
|
|
41
|
-
expect(result.biomarkers.hdl?.value).toBe(55);
|
|
42
|
-
expect(result.biomarkers.albumin?.value).toBe(4.2);
|
|
43
|
-
});
|
|
44
|
-
|
|
45
|
-
it('should reject values outside clinical ranges', () => {
|
|
46
|
-
const raw: RawBiomarkers = {
|
|
47
|
-
total_cholesterol: 9999, // way too high
|
|
48
|
-
hdl: -5, // negative
|
|
49
|
-
hba1c: 5.4, // valid
|
|
50
|
-
};
|
|
51
|
-
|
|
52
|
-
const result = validateExtraction(raw);
|
|
53
|
-
expect(result.accepted).toBe(1); // only hba1c
|
|
54
|
-
expect(result.rejected).toBe(2);
|
|
55
|
-
expect(result.biomarkers.hba1c?.value).toBe(5.4);
|
|
56
|
-
expect(result.biomarkers.totalCholesterol).toBeUndefined();
|
|
57
|
-
expect(result.biomarkers.hdl).toBeUndefined();
|
|
58
|
-
});
|
|
59
|
-
|
|
60
|
-
it('should skip null and undefined values', () => {
|
|
61
|
-
const raw: RawBiomarkers = {
|
|
62
|
-
total_cholesterol: null,
|
|
63
|
-
ldl_c: 100,
|
|
64
|
-
hdl: null,
|
|
65
|
-
};
|
|
66
|
-
|
|
67
|
-
const result = validateExtraction(raw);
|
|
68
|
-
expect(result.accepted).toBe(1);
|
|
69
|
-
expect(result.biomarkers.ldlC?.value).toBe(100);
|
|
70
|
-
});
|
|
71
|
-
|
|
72
|
-
it('should skip string values (qualitative results)', () => {
|
|
73
|
-
const raw: RawBiomarkers = {
|
|
74
|
-
total_cholesterol: 200,
|
|
75
|
-
some_qualitative: 'Negative',
|
|
76
|
-
};
|
|
77
|
-
|
|
78
|
-
const result = validateExtraction(raw);
|
|
79
|
-
expect(result.accepted).toBe(1);
|
|
80
|
-
expect(result.rejected).toBe(0);
|
|
81
|
-
});
|
|
82
|
-
|
|
83
|
-
it('should skip gender field', () => {
|
|
84
|
-
const raw: RawBiomarkers = {
|
|
85
|
-
gender: 'm',
|
|
86
|
-
total_cholesterol: 200,
|
|
87
|
-
};
|
|
88
|
-
|
|
89
|
-
const result = validateExtraction(raw);
|
|
90
|
-
expect(result.accepted).toBe(1);
|
|
91
|
-
});
|
|
92
|
-
|
|
93
|
-
it('should reject non-finite numbers', () => {
|
|
94
|
-
const raw: RawBiomarkers = {
|
|
95
|
-
total_cholesterol: NaN,
|
|
96
|
-
ldl_c: Infinity,
|
|
97
|
-
hdl: 55,
|
|
98
|
-
};
|
|
99
|
-
|
|
100
|
-
const result = validateExtraction(raw);
|
|
101
|
-
expect(result.accepted).toBe(1);
|
|
102
|
-
expect(result.rejected).toBe(2);
|
|
103
|
-
});
|
|
104
|
-
|
|
105
|
-
it('should map to correct BiomarkerSet fields', () => {
|
|
106
|
-
const raw: RawBiomarkers = {
|
|
107
|
-
hs_crp: 1.5,
|
|
108
|
-
vitamin_d: 45,
|
|
109
|
-
lymphocyte_percent: 30,
|
|
110
|
-
bilirubin_total: 0.8,
|
|
111
|
-
};
|
|
112
|
-
|
|
113
|
-
const result = validateExtraction(raw);
|
|
114
|
-
expect(result.biomarkers.hsCrp?.value).toBe(1.5);
|
|
115
|
-
expect(result.biomarkers.hsCrp?.unit).toBe('mg/L');
|
|
116
|
-
expect(result.biomarkers.vitaminD?.value).toBe(45);
|
|
117
|
-
expect(result.biomarkers.lymphocytePercent?.value).toBe(30);
|
|
118
|
-
expect(result.biomarkers.bilirubinTotal?.value).toBe(0.8);
|
|
119
|
-
});
|
|
120
|
-
|
|
121
|
-
it('should include unit and name in BiomarkerValue', () => {
|
|
122
|
-
const raw: RawBiomarkers = { alt: 25 };
|
|
123
|
-
const result = validateExtraction(raw);
|
|
124
|
-
expect(result.biomarkers.alt).toEqual({
|
|
125
|
-
name: 'ALT',
|
|
126
|
-
value: 25,
|
|
127
|
-
unit: 'U/L',
|
|
128
|
-
});
|
|
129
|
-
});
|
|
130
|
-
|
|
131
|
-
it('should record rejections with reasons', () => {
|
|
132
|
-
const raw: RawBiomarkers = {
|
|
133
|
-
total_cholesterol: 9999,
|
|
134
|
-
};
|
|
135
|
-
|
|
136
|
-
const result = validateExtraction(raw);
|
|
137
|
-
expect(result.rejections.length).toBe(1);
|
|
138
|
-
expect(result.rejections[0].key).toBe('total_cholesterol');
|
|
139
|
-
expect(result.rejections[0].reason).toContain('outside clinical range');
|
|
140
|
-
});
|
|
141
|
-
|
|
142
|
-
it('should compute reasonable confidence for good extraction', () => {
|
|
143
|
-
const raw: RawBiomarkers = {
|
|
144
|
-
total_cholesterol: 200,
|
|
145
|
-
ldl_c: 120,
|
|
146
|
-
hdl: 55,
|
|
147
|
-
triglycerides: 130,
|
|
148
|
-
hba1c: 5.4,
|
|
149
|
-
fasting_glucose: 95,
|
|
150
|
-
creatinine: 0.9,
|
|
151
|
-
albumin: 4.2,
|
|
152
|
-
alt: 25,
|
|
153
|
-
ast: 22,
|
|
154
|
-
wbc: 7.0,
|
|
155
|
-
hemoglobin: 14.5,
|
|
156
|
-
platelets: 250,
|
|
157
|
-
mcv: 90,
|
|
158
|
-
rdw: 13,
|
|
159
|
-
};
|
|
160
|
-
|
|
161
|
-
const result = validateExtraction(raw);
|
|
162
|
-
expect(result.confidence).toBeGreaterThan(0.7);
|
|
163
|
-
});
|
|
164
|
-
|
|
165
|
-
it('should return 0 confidence for empty extraction', () => {
|
|
166
|
-
const result = validateExtraction({});
|
|
167
|
-
expect(result.confidence).toBe(0);
|
|
168
|
-
expect(result.accepted).toBe(0);
|
|
169
|
-
});
|
|
170
|
-
|
|
171
|
-
it('should reduce confidence when HDL > total cholesterol', () => {
|
|
172
|
-
const consistent: RawBiomarkers = {
|
|
173
|
-
total_cholesterol: 200,
|
|
174
|
-
hdl: 55,
|
|
175
|
-
ldl_c: 120,
|
|
176
|
-
hba1c: 5.4,
|
|
177
|
-
creatinine: 0.9,
|
|
178
|
-
};
|
|
179
|
-
const inconsistent: RawBiomarkers = {
|
|
180
|
-
total_cholesterol: 100,
|
|
181
|
-
hdl: 150, // impossible: HDL > total
|
|
182
|
-
ldl_c: 120,
|
|
183
|
-
hba1c: 5.4,
|
|
184
|
-
creatinine: 0.9,
|
|
185
|
-
};
|
|
186
|
-
|
|
187
|
-
const c1 = validateExtraction(consistent).confidence;
|
|
188
|
-
// HDL=150 is within range [5,150] so it passes range check but fails consistency
|
|
189
|
-
// However total_cholesterol=100 makes HDL > total
|
|
190
|
-
const c2 = validateExtraction(inconsistent).confidence;
|
|
191
|
-
expect(c2).toBeLessThan(c1);
|
|
192
|
-
});
|
|
193
|
-
|
|
194
|
-
it('should handle all target biomarkers', () => {
|
|
195
|
-
const raw: RawBiomarkers = {
|
|
196
|
-
weight_kg: 75,
|
|
197
|
-
height_cm: 175,
|
|
198
|
-
bmi: 24.5,
|
|
199
|
-
body_fat_percent: 18,
|
|
200
|
-
systolic_bp: 120,
|
|
201
|
-
diastolic_bp: 75,
|
|
202
|
-
heart_rate: 68,
|
|
203
|
-
total_cholesterol: 195,
|
|
204
|
-
ldl_c: 110,
|
|
205
|
-
hdl: 58,
|
|
206
|
-
triglycerides: 120,
|
|
207
|
-
apoB: 85,
|
|
208
|
-
hba1c: 5.3,
|
|
209
|
-
fasting_glucose: 88,
|
|
210
|
-
fasting_insulin: 5.5,
|
|
211
|
-
uric_acid: 5.2,
|
|
212
|
-
creatinine: 0.95,
|
|
213
|
-
bun: 15,
|
|
214
|
-
egfr: 95,
|
|
215
|
-
alt: 22,
|
|
216
|
-
ast: 20,
|
|
217
|
-
alp: 70,
|
|
218
|
-
ggt: 25,
|
|
219
|
-
bilirubin_total: 0.7,
|
|
220
|
-
albumin: 4.3,
|
|
221
|
-
hs_crp: 0.8,
|
|
222
|
-
esr: 8,
|
|
223
|
-
cortisol: 12,
|
|
224
|
-
testosterone: 550,
|
|
225
|
-
estradiol: 25,
|
|
226
|
-
tsh: 1.8,
|
|
227
|
-
wbc: 6.5,
|
|
228
|
-
rbc: 4.8,
|
|
229
|
-
hemoglobin: 14.8,
|
|
230
|
-
hematocrit: 44,
|
|
231
|
-
platelets: 240,
|
|
232
|
-
mcv: 88,
|
|
233
|
-
rdw: 12.8,
|
|
234
|
-
lymphocyte_percent: 32,
|
|
235
|
-
vitamin_d: 48,
|
|
236
|
-
};
|
|
237
|
-
|
|
238
|
-
const result = validateExtraction(raw);
|
|
239
|
-
expect(result.accepted).toBe(40);
|
|
240
|
-
expect(result.rejected).toBe(0);
|
|
241
|
-
expect(result.confidence).toBeGreaterThan(0.8);
|
|
242
|
-
});
|
|
243
|
-
});
|
package/src/index.ts
DELETED
|
@@ -1,69 +0,0 @@
|
|
|
1
|
-
import type { ExtractionResult, ExtendedLLMProvider } from '@ottolab/shared';
|
|
2
|
-
import { detectLab } from './lab-detector.js';
|
|
3
|
-
import { extractFromPdf, extractFromCsv } from './llm-extractor.js';
|
|
4
|
-
import { validateExtraction } from './validator.js';
|
|
5
|
-
|
|
6
|
-
export interface ParseInput {
|
|
7
|
-
/** Base64-encoded PDF data */
|
|
8
|
-
pdf?: string;
|
|
9
|
-
/** Raw CSV text */
|
|
10
|
-
csv?: string;
|
|
11
|
-
}
|
|
12
|
-
|
|
13
|
-
/**
|
|
14
|
-
* Main extraction pipeline.
|
|
15
|
-
*
|
|
16
|
-
* PDF flow:
|
|
17
|
-
* 1. Lab Detection (LLM classify → Quest | LabCorp | international | unknown)
|
|
18
|
-
* 2. Structured Extraction (LLM multimodal + lab-specific few-shot)
|
|
19
|
-
* 3. Validation (clinical range checks, cross-biomarker consistency, confidence)
|
|
20
|
-
*
|
|
21
|
-
* CSV flow:
|
|
22
|
-
* 1. Column parsing + header normalization
|
|
23
|
-
* 2. Validation
|
|
24
|
-
*/
|
|
25
|
-
export async function runExtractionPipeline(
|
|
26
|
-
input: ParseInput,
|
|
27
|
-
provider?: ExtendedLLMProvider,
|
|
28
|
-
): Promise<ExtractionResult> {
|
|
29
|
-
if (input.pdf) {
|
|
30
|
-
if (!provider) throw new Error('LLM provider required for PDF extraction');
|
|
31
|
-
|
|
32
|
-
// Step 1: Detect lab source
|
|
33
|
-
const detection = await detectLab(input.pdf, provider);
|
|
34
|
-
|
|
35
|
-
// Step 2: Extract biomarkers with lab-specific prompt
|
|
36
|
-
const raw = await extractFromPdf(input.pdf, detection.lab, provider);
|
|
37
|
-
|
|
38
|
-
// Step 3: Validate and transform
|
|
39
|
-
const validation = validateExtraction(raw);
|
|
40
|
-
|
|
41
|
-
return {
|
|
42
|
-
biomarkers: validation.biomarkers,
|
|
43
|
-
sourceLab: detection.lab,
|
|
44
|
-
sourceLanguage: detection.language,
|
|
45
|
-
confidence: validation.confidence,
|
|
46
|
-
};
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
if (input.csv) {
|
|
50
|
-
// CSV extraction (no LLM needed)
|
|
51
|
-
const raw = extractFromCsv(input.csv);
|
|
52
|
-
const validation = validateExtraction(raw);
|
|
53
|
-
|
|
54
|
-
return {
|
|
55
|
-
biomarkers: validation.biomarkers,
|
|
56
|
-
sourceLab: 'unknown',
|
|
57
|
-
sourceLanguage: 'en',
|
|
58
|
-
confidence: validation.confidence,
|
|
59
|
-
};
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
throw new Error('Either pdf (base64) or csv text must be provided');
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
export { detectLab } from './lab-detector.js';
|
|
66
|
-
export { extractFromPdf, extractFromCsv } from './llm-extractor.js';
|
|
67
|
-
export { validateExtraction } from './validator.js';
|
|
68
|
-
export type { RawBiomarkers } from './llm-extractor.js';
|
|
69
|
-
export type { ValidationResult } from './validator.js';
|
package/src/lab-detector.ts
DELETED
|
@@ -1,60 +0,0 @@
|
|
|
1
|
-
import type { SourceLab, ExtendedLLMProvider } from '@ottolab/shared';
|
|
2
|
-
import { LAB_DETECTION_PROMPT } from './prompts/base.js';
|
|
3
|
-
|
|
4
|
-
export interface LabDetectionResult {
|
|
5
|
-
lab: SourceLab;
|
|
6
|
-
language: string;
|
|
7
|
-
confidence: number;
|
|
8
|
-
}
|
|
9
|
-
|
|
10
|
-
/**
|
|
11
|
-
* Detect the source laboratory from a PDF using LLM classification.
|
|
12
|
-
* Sends the first page(s) to the LLM to identify Quest, LabCorp, etc.
|
|
13
|
-
*/
|
|
14
|
-
export async function detectLab(
|
|
15
|
-
pdfBase64: string,
|
|
16
|
-
provider: ExtendedLLMProvider,
|
|
17
|
-
): Promise<LabDetectionResult> {
|
|
18
|
-
try {
|
|
19
|
-
const response = await provider.chatMultimodal(
|
|
20
|
-
[
|
|
21
|
-
{ type: 'document', data: pdfBase64, mimeType: 'application/pdf' },
|
|
22
|
-
{ type: 'text', text: LAB_DETECTION_PROMPT },
|
|
23
|
-
],
|
|
24
|
-
{ temperature: 0, maxTokens: 256, responseFormat: 'json' },
|
|
25
|
-
);
|
|
26
|
-
|
|
27
|
-
const parsed = parseJsonResponse<LabDetectionResult>(response);
|
|
28
|
-
|
|
29
|
-
// Validate lab value
|
|
30
|
-
const validLabs: SourceLab[] = ['quest', 'labcorp', 'international', 'unknown'];
|
|
31
|
-
if (!validLabs.includes(parsed.lab)) {
|
|
32
|
-
parsed.lab = 'unknown';
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
return {
|
|
36
|
-
lab: parsed.lab,
|
|
37
|
-
language: parsed.language || 'en',
|
|
38
|
-
confidence: Math.min(Math.max(parsed.confidence || 0, 0), 1),
|
|
39
|
-
};
|
|
40
|
-
} catch {
|
|
41
|
-
return { lab: 'unknown', language: 'en', confidence: 0 };
|
|
42
|
-
}
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
function parseJsonResponse<T>(text: string): T {
|
|
46
|
-
// Strip markdown code fences if present
|
|
47
|
-
let cleaned = text.trim();
|
|
48
|
-
if (cleaned.startsWith('```')) {
|
|
49
|
-
cleaned = cleaned.replace(/^```(?:json)?\s*/, '').replace(/\s*```$/, '');
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
// Find JSON object boundaries
|
|
53
|
-
const start = cleaned.indexOf('{');
|
|
54
|
-
const end = cleaned.lastIndexOf('}');
|
|
55
|
-
if (start === -1 || end === -1) {
|
|
56
|
-
throw new Error('No JSON object found in response');
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
return JSON.parse(cleaned.slice(start, end + 1)) as T;
|
|
60
|
-
}
|