@ottolab/extraction 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,191 @@
1
+ /**
2
+ * Clinical ranges for validation — generous bounds to avoid false rejections.
3
+ * Values outside these ranges are likely extraction errors.
4
+ */
5
+ const CLINICAL_RANGES = {
6
+ weight_kg: [20, 300],
7
+ height_cm: [100, 250],
8
+ bmi: [10, 60],
9
+ body_fat_percent: [2, 60],
10
+ systolic_bp: [60, 250],
11
+ diastolic_bp: [30, 150],
12
+ heart_rate: [30, 220],
13
+ total_cholesterol: [50, 500],
14
+ ldl_c: [10, 400],
15
+ hdl: [5, 150],
16
+ triglycerides: [10, 2000],
17
+ apoB: [10, 300],
18
+ hba1c: [3, 15],
19
+ fasting_glucose: [30, 500],
20
+ fasting_insulin: [0.5, 300],
21
+ uric_acid: [0.5, 20],
22
+ creatinine: [0.1, 15],
23
+ bun: [2, 100],
24
+ egfr: [5, 150],
25
+ alt: [1, 2000],
26
+ ast: [1, 2000],
27
+ alp: [10, 1000],
28
+ ggt: [1, 2000],
29
+ bilirubin_total: [0.05, 30],
30
+ albumin: [1, 6],
31
+ hs_crp: [0.01, 300],
32
+ esr: [0, 140],
33
+ cortisol: [0.5, 60],
34
+ testosterone: [1, 2000],
35
+ estradiol: [1, 5000],
36
+ tsh: [0.01, 100],
37
+ wbc: [1, 50],
38
+ rbc: [1, 10],
39
+ hemoglobin: [3, 25],
40
+ hematocrit: [15, 65],
41
+ platelets: [10, 1000],
42
+ mcv: [50, 130],
43
+ rdw: [9, 25],
44
+ lymphocyte_percent: [2, 70],
45
+ vitamin_d: [3, 200],
46
+ };
47
+ /**
48
+ * Map from raw extraction keys to BiomarkerSet field names and display metadata.
49
+ */
50
+ const BIOMARKER_MAP = {
51
+ weight_kg: { field: 'weight', name: 'Weight', unit: 'kg' },
52
+ height_cm: { field: 'height', name: 'Height', unit: 'cm' },
53
+ bmi: { field: 'bmi', name: 'BMI', unit: 'kg/m2' },
54
+ body_fat_percent: { field: 'bodyFatPercent', name: 'Body Fat', unit: '%' },
55
+ systolic_bp: { field: 'bloodPressureSystolic', name: 'Systolic BP', unit: 'mmHg' },
56
+ diastolic_bp: { field: 'bloodPressureDiastolic', name: 'Diastolic BP', unit: 'mmHg' },
57
+ heart_rate: { field: 'heartRate', name: 'Heart Rate', unit: 'bpm' },
58
+ total_cholesterol: { field: 'totalCholesterol', name: 'Total Cholesterol', unit: 'mg/dL' },
59
+ ldl_c: { field: 'ldlC', name: 'LDL-C', unit: 'mg/dL' },
60
+ hdl: { field: 'hdl', name: 'HDL', unit: 'mg/dL' },
61
+ triglycerides: { field: 'triglycerides', name: 'Triglycerides', unit: 'mg/dL' },
62
+ apoB: { field: 'apoB', name: 'ApoB', unit: 'mg/dL' },
63
+ hba1c: { field: 'hba1c', name: 'HbA1c', unit: '%' },
64
+ fasting_glucose: { field: 'fastingGlucose', name: 'Fasting Glucose', unit: 'mg/dL' },
65
+ fasting_insulin: { field: 'fastingInsulin', name: 'Fasting Insulin', unit: 'uIU/mL' },
66
+ uric_acid: { field: 'uricAcid', name: 'Uric Acid', unit: 'mg/dL' },
67
+ creatinine: { field: 'creatinine', name: 'Creatinine', unit: 'mg/dL' },
68
+ bun: { field: 'bun', name: 'BUN', unit: 'mg/dL' },
69
+ egfr: { field: 'egfr', name: 'eGFR', unit: 'mL/min/1.73m2' },
70
+ alt: { field: 'alt', name: 'ALT', unit: 'U/L' },
71
+ ast: { field: 'ast', name: 'AST', unit: 'U/L' },
72
+ alp: { field: 'alp', name: 'ALP', unit: 'U/L' },
73
+ ggt: { field: 'ggt', name: 'GGT', unit: 'U/L' },
74
+ bilirubin_total: { field: 'bilirubinTotal', name: 'Bilirubin (Total)', unit: 'mg/dL' },
75
+ albumin: { field: 'albumin', name: 'Albumin', unit: 'g/dL' },
76
+ hs_crp: { field: 'hsCrp', name: 'hs-CRP', unit: 'mg/L' },
77
+ esr: { field: 'esr', name: 'ESR', unit: 'mm/hr' },
78
+ cortisol: { field: 'cortisol', name: 'Cortisol', unit: 'ug/dL' },
79
+ testosterone: { field: 'testosterone', name: 'Testosterone', unit: 'ng/dL' },
80
+ estradiol: { field: 'estradiol', name: 'Estradiol', unit: 'pg/mL' },
81
+ tsh: { field: 'tsh', name: 'TSH', unit: 'mIU/L' },
82
+ wbc: { field: 'wbc', name: 'WBC', unit: '10^3/uL' },
83
+ rbc: { field: 'rbc', name: 'RBC', unit: '10^6/uL' },
84
+ hemoglobin: { field: 'hemoglobin', name: 'Hemoglobin', unit: 'g/dL' },
85
+ hematocrit: { field: 'hematocrit', name: 'Hematocrit', unit: '%' },
86
+ platelets: { field: 'platelets', name: 'Platelets', unit: '10^3/uL' },
87
+ mcv: { field: 'mcv', name: 'MCV', unit: 'fL' },
88
+ rdw: { field: 'rdw', name: 'RDW', unit: '%' },
89
+ lymphocyte_percent: { field: 'lymphocytePercent', name: 'Lymphocyte %', unit: '%' },
90
+ vitamin_d: { field: 'vitaminD', name: 'Vitamin D', unit: 'ng/mL' },
91
+ };
92
+ /**
93
+ * Validate and transform raw LLM extraction output to typed BiomarkerSet.
94
+ *
95
+ * Steps:
96
+ * 1. Filter nulls/undefined
97
+ * 2. Type-check numeric values
98
+ * 3. Range-check against generous clinical bounds
99
+ * 4. Map to BiomarkerSet fields
100
+ * 5. Compute extraction confidence score
101
+ */
102
+ export function validateExtraction(raw) {
103
+ const biomarkers = {};
104
+ const rejections = [];
105
+ let accepted = 0;
106
+ let rejected = 0;
107
+ for (const [key, value] of Object.entries(raw)) {
108
+ // Skip gender and non-biomarker fields
109
+ if (key === 'gender')
110
+ continue;
111
+ // Skip null/undefined
112
+ if (value === null || value === undefined)
113
+ continue;
114
+ // Skip string values (qualitative results like "Negative")
115
+ if (typeof value === 'string')
116
+ continue;
117
+ // Must be a number at this point
118
+ if (typeof value !== 'number' || !Number.isFinite(value)) {
119
+ rejections.push({ key, value, reason: 'not a finite number' });
120
+ rejected++;
121
+ continue;
122
+ }
123
+ // Check clinical ranges
124
+ const range = CLINICAL_RANGES[key];
125
+ if (range && (value < range[0] || value > range[1])) {
126
+ rejections.push({
127
+ key,
128
+ value,
129
+ reason: `outside clinical range [${range[0]}, ${range[1]}]`,
130
+ });
131
+ rejected++;
132
+ continue;
133
+ }
134
+ // Map to BiomarkerSet
135
+ const mapping = BIOMARKER_MAP[key];
136
+ if (!mapping)
137
+ continue; // skip unknown keys
138
+ const biomarkerValue = {
139
+ name: mapping.name,
140
+ value,
141
+ unit: mapping.unit,
142
+ };
143
+ biomarkers[mapping.field] = biomarkerValue;
144
+ accepted++;
145
+ }
146
+ // Confidence based on yield ratio and cross-checks
147
+ const confidence = computeConfidence(biomarkers, accepted, rejected);
148
+ return { biomarkers, accepted, rejected, confidence, rejections };
149
+ }
150
+ /**
151
+ * Compute extraction confidence score (0-1).
152
+ *
153
+ * Factors:
154
+ * - Yield: how many biomarkers were successfully extracted
155
+ * - Rejection rate: high rejections lower confidence
156
+ * - Cross-biomarker consistency checks
157
+ */
158
+ function computeConfidence(biomarkers, accepted, rejected) {
159
+ if (accepted === 0)
160
+ return 0;
161
+ // Base confidence from yield (diminishing returns above 15 markers)
162
+ const yieldScore = Math.min(accepted / 15, 1.0);
163
+ // Penalty for rejections
164
+ const total = accepted + rejected;
165
+ const rejectionPenalty = total > 0 ? rejected / total : 0;
166
+ // Cross-biomarker consistency checks
167
+ let consistencyScore = 1.0;
168
+ // Check: if both ALT and AST exist, AST should generally be ≤ 2×ALT in healthy
169
+ if (biomarkers.alt && biomarkers.ast) {
170
+ if (biomarkers.ast.value > biomarkers.alt.value * 5) {
171
+ consistencyScore -= 0.1;
172
+ }
173
+ }
174
+ // Check: HDL should be less than total cholesterol
175
+ if (biomarkers.hdl && biomarkers.totalCholesterol) {
176
+ if (biomarkers.hdl.value > biomarkers.totalCholesterol.value) {
177
+ consistencyScore -= 0.15;
178
+ }
179
+ }
180
+ // Check: LDL should be less than total cholesterol
181
+ if (biomarkers.ldlC && biomarkers.totalCholesterol) {
182
+ if (biomarkers.ldlC.value > biomarkers.totalCholesterol.value) {
183
+ consistencyScore -= 0.15;
184
+ }
185
+ }
186
+ const raw = yieldScore * 0.5 + (1 - rejectionPenalty) * 0.25 + Math.max(consistencyScore, 0) * 0.25;
187
+ return Math.round(Math.min(Math.max(raw, 0), 1) * 100) / 100;
188
+ }
189
+ // Export for testing
190
+ export { CLINICAL_RANGES, BIOMARKER_MAP };
191
+ //# sourceMappingURL=validator.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"validator.js","sourceRoot":"","sources":["../src/validator.ts"],"names":[],"mappings":"AAGA;;;GAGG;AACH,MAAM,eAAe,GAAqC;IACxD,SAAS,EAAE,CAAC,EAAE,EAAE,GAAG,CAAC;IACpB,SAAS,EAAE,CAAC,GAAG,EAAE,GAAG,CAAC;IACrB,GAAG,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC;IACb,gBAAgB,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC;IACzB,WAAW,EAAE,CAAC,EAAE,EAAE,GAAG,CAAC;IACtB,YAAY,EAAE,CAAC,EAAE,EAAE,GAAG,CAAC;IACvB,UAAU,EAAE,CAAC,EAAE,EAAE,GAAG,CAAC;IACrB,iBAAiB,EAAE,CAAC,EAAE,EAAE,GAAG,CAAC;IAC5B,KAAK,EAAE,CAAC,EAAE,EAAE,GAAG,CAAC;IAChB,GAAG,EAAE,CAAC,CAAC,EAAE,GAAG,CAAC;IACb,aAAa,EAAE,CAAC,EAAE,EAAE,IAAI,CAAC;IACzB,IAAI,EAAE,CAAC,EAAE,EAAE,GAAG,CAAC;IACf,KAAK,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC;IACd,eAAe,EAAE,CAAC,EAAE,EAAE,GAAG,CAAC;IAC1B,eAAe,EAAE,CAAC,GAAG,EAAE,GAAG,CAAC;IAC3B,SAAS,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC;IACpB,UAAU,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC;IACrB,GAAG,EAAE,CAAC,CAAC,EAAE,GAAG,CAAC;IACb,IAAI,EAAE,CAAC,CAAC,EAAE,GAAG,CAAC;IACd,GAAG,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC;IACd,GAAG,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC;IACd,GAAG,EAAE,CAAC,EAAE,EAAE,IAAI,CAAC;IACf,GAAG,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC;IACd,eAAe,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC;IAC3B,OAAO,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC;IACf,MAAM,EAAE,CAAC,IAAI,EAAE,GAAG,CAAC;IACnB,GAAG,EAAE,CAAC,CAAC,EAAE,GAAG,CAAC;IACb,QAAQ,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC;IACnB,YAAY,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC;IACvB,SAAS,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC;IACpB,GAAG,EAAE,CAAC,IAAI,EAAE,GAAG,CAAC;IAChB,GAAG,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC;IACZ,GAAG,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC;IACZ,UAAU,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC;IACnB,UAAU,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC;IACpB,SAAS,EAAE,CAAC,EAAE,EAAE,IAAI,CAAC;IACrB,GAAG,EAAE,CAAC,EAAE,EAAE,GAAG,CAAC;IACd,GAAG,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC;IACZ,kBAAkB,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC;IAC3B,SAAS,EAAE,CAAC,CAAC,EAAE,GAAG,CAAC;CACpB,CAAC;AAEF;;GAEG;AACH,MAAM,aAAa,GAA8E;IAC/F,SAAS,EAAE,EAAE,KAAK,EAAE,QAAQ,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,IAAI,EAAE;IAC1D,SAAS,EAAE,EAAE,KAAK,EAAE,QAAQ,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,IAAI,EAAE;IAC1D,GAAG,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,OAAO,EAAE;IACjD,gBAAgB,EAAE,EAAE,KAAK,EAAE,gBAAgB,EAAE,IAAI,EAAE,UAAU,EAAE,IAAI,EAAE,GAAG,EAAE;IAC1E,WAAW,EAAE,EAAE,KAAK,EAAE,uBAAuB,EAAE,IAAI,EAAE,aAAa,EAAE,IAAI,EAAE,MAAM,EAAE;IAClF,YAAY,EAAE,EAAE,KAAK,EAAE,wBAAwB,EAAE,IAAI,EAAE,cAAc,EAAE,IAAI,EAAE,MAAM,EAAE;IACrF,UAAU,EAAE,EAAE,KAAK,EAAE,WAAW,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,KAAK,EAAE;IACnE,iBAAiB,EAAE,EAAE,KAAK,EAAE,kBAAkB,EAAE,IAAI,EAAE,mBAAmB,EAAE,IAAI,EAAE,OAAO,EAAE;IAC1F,KAAK,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE;IACtD,GAAG,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,OAAO,EAAE;IACjD,aAAa,EAAE,EAAE,KAAK,EAAE,eAAe,EAAE,IAAI,EAAE,eAAe,EAAE,IAAI,EAAE,OAAO,EAAE;IAC/E,IAAI,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE;IACpD,KAAK,EAAE,EAAE,KAAK,EAAE,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,GAAG,EAAE;IACnD,eAAe,EAAE,EAAE,KAAK,EAAE,gBAAgB,EAAE,IAAI,EAAE,iBAAiB,EAAE,IAAI,EAAE,OAAO,EAAE;IACpF,eAAe,EAAE,EAAE,KAAK,EAAE,gBAAgB,EAAE,IAAI,EAAE,iBAAiB,EAAE,IAAI,EAAE,QAAQ,EAAE;IACrF,SAAS,EAAE,EAAE,KAAK,EAAE,UAAU,EAAE,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,OAAO,EAAE;IAClE,UAAU,EAAE,EAAE,KAAK,EAAE,YAAY,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,OAAO,EAAE;IACtE,GAAG,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,OAAO,EAAE;IACjD,IAAI,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,eAAe,EAAE;IAC5D,GAAG,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE;IAC/C,GAAG,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE;IAC/C,GAAG,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE;IAC/C,GAAG,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE;IAC/C,eAAe,EAAE,EAAE,KAAK,EAAE,gBAAgB,EAAE,IAAI,EAAE,mBAAmB,EAAE,IAAI,EAAE,OAAO,EAAE;IACtF,OAAO,EAAE,EAAE,KAAK,EAAE,SAAS,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,EAAE;IAC5D,MAAM,EAAE,EAAE,KAAK,EAAE,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,EAAE;IACxD,GAAG,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,OAAO,EAAE;IACjD,QAAQ,EAAE,EAAE,KAAK,EAAE,UAAU,EAAE,IAAI,EAAE,UAAU,EAAE,IAAI,EAAE,OAAO,EAAE;IAChE,YAAY,EAAE,EAAE,KAAK,EAAE,cAAc,EAAE,IAAI,EAAE,cAAc,EAAE,IAAI,EAAE,OAAO,EAAE;IAC5E,SAAS,EAAE,EAAE,KAAK,EAAE,WAAW,EAAE,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,OAAO,EAAE;IACnE,GAAG,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,OAAO,EAAE;IACjD,GAAG,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,SAAS,EAAE;IACnD,GAAG,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,SAAS,EAAE;IACnD,UAAU,EAAE,EAAE,KAAK,EAAE,YAAY,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,MAAM,EAAE;IACrE,UAAU,EAAE,EAAE,KAAK,EAAE,YAAY,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,GAAG,EAAE;IAClE,SAAS,EAAE,EAAE,KAAK,EAAE,WAAW,EAAE,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,SAAS,EAAE;IACrE,GAAG,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,IAAI,EAAE;IAC9C,GAAG,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,GAAG,EAAE;IAC7C,kBAAkB,EAAE,EAAE,KAAK,EAAE,mBAAmB,EAAE,IAAI,EAAE,cAAc,EAAE,IAAI,EAAE,GAAG,EAAE;IACnF,SAAS,EAAE,EAAE,KAAK,EAAE,UAAU,EAAE,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,OAAO,EAAE;CACnE,CAAC;AAUF;;;;;;;;;GASG;AACH,MAAM,UAAU,kBAAkB,CAAC,GAAkB;IACnD,MAAM,UAAU,GAAiB,EAAE,CAAC;IACpC,MAAM,UAAU,GAAsD,EAAE,CAAC;IACzE,IAAI,QAAQ,GAAG,CAAC,CAAC;IACjB,IAAI,QAAQ,GAAG,CAAC,CAAC;IAEjB,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC;QAC/C,uCAAuC;QACvC,IAAI,GAAG,KAAK,QAAQ;YAAE,SAAS;QAE/B,sBAAsB;QACtB,IAAI,KAAK,KAAK,IAAI,IAAI,KAAK,KAAK,SAAS;YAAE,SAAS;QAEpD,2DAA2D;QAC3D,IAAI,OAAO,KAAK,KAAK,QAAQ;YAAE,SAAS;QAExC,iCAAiC;QACjC,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;YACzD,UAAU,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,EAAE,qBAAqB,EAAE,CAAC,CAAC;YAC/D,QAAQ,EAAE,CAAC;YACX,SAAS;QACX,CAAC;QAED,wBAAwB;QACxB,MAAM,KAAK,GAAG,eAAe,CAAC,GAAG,CAAC,CAAC;QACnC,IAAI,KAAK,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YACpD,UAAU,CAAC,IAAI,CAAC;gBACd,GAAG;gBACH,KAAK;gBACL,MAAM,EAAE,2BAA2B,KAAK,CAAC,CAAC,CAAC,KAAK,KAAK,CAAC,CAAC,CAAC,GAAG;aAC5D,CAAC,CAAC;YACH,QAAQ,EAAE,CAAC;YACX,SAAS;QACX,CAAC;QAED,sBAAsB;QACtB,MAAM,OAAO,GAAG,aAAa,CAAC,GAAG,CAAC,CAAC;QACnC,IAAI,CAAC,OAAO;YAAE,SAAS,CAAC,oBAAoB;QAE5C,MAAM,cAAc,GAAmB;YACrC,IAAI,EAAE,OAAO,CAAC,IAAI;YAClB,KAAK;YACL,IAAI,EAAE,OAAO,CAAC,IAAI;SACnB,CAAC;QAED,UAA6C,CAAC,OAAO,CAAC,KAAK,CAAC,GAAG,cAAc,CAAC;QAC/E,QAAQ,EAAE,CAAC;IACb,CAAC;IAED,mDAAmD;IACnD,MAAM,UAAU,GAAG,iBAAiB,CAAC,UAAU,EAAE,QAAQ,EAAE,QAAQ,CAAC,CAAC;IAErE,OAAO,EAAE,UAAU,EAAE,QAAQ,EAAE,QAAQ,EAAE,UAAU,EAAE,UAAU,EAAE,CAAC;AACpE,CAAC;AAED;;;;;;;GAOG;AACH,SAAS,iBAAiB,CAAC,UAAwB,EAAE,QAAgB,EAAE,QAAgB;IACrF,IAAI,QAAQ,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAE7B,oEAAoE;IACpE,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,GAAG,EAAE,EAAE,GAAG,CAAC,CAAC;IAEhD,yBAAyB;IACzB,MAAM,KAAK,GAAG,QAAQ,GAAG,QAAQ,CAAC;IAClC,MAAM,gBAAgB,GAAG,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IAE1D,qCAAqC;IACrC,IAAI,gBAAgB,GAAG,GAAG,CAAC;IAE3B,+EAA+E;IAC/E,IAAI,UAAU,CAAC,GAAG,IAAI,UAAU,CAAC,GAAG,EAAE,CAAC;QACrC,IAAI,UAAU,CAAC,GAAG,CAAC,KAAK,GAAG,UAAU,CAAC,GAAG,CAAC,KAAK,GAAG,CAAC,EAAE,CAAC;YACpD,gBAAgB,IAAI,GAAG,CAAC;QAC1B,CAAC;IACH,CAAC;IAED,mDAAmD;IACnD,IAAI,UAAU,CAAC,GAAG,IAAI,UAAU,CAAC,gBAAgB,EAAE,CAAC;QAClD,IAAI,UAAU,CAAC,GAAG,CAAC,KAAK,GAAG,UAAU,CAAC,gBAAgB,CAAC,KAAK,EAAE,CAAC;YAC7D,gBAAgB,IAAI,IAAI,CAAC;QAC3B,CAAC;IACH,CAAC;IAED,mDAAmD;IACnD,IAAI,UAAU,CAAC,IAAI,IAAI,UAAU,CAAC,gBAAgB,EAAE,CAAC;QACnD,IAAI,UAAU,CAAC,IAAI,CAAC,KAAK,GAAG,UAAU,CAAC,gBAAgB,CAAC,KAAK,EAAE,CAAC;YAC9D,gBAAgB,IAAI,IAAI,CAAC;QAC3B,CAAC;IACH,CAAC;IAED,MAAM,GAAG,GACP,UAAU,GAAG,GAAG,GAAG,CAAC,CAAC,GAAG,gBAAgB,CAAC,GAAG,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,gBAAgB,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC;IAC1F,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,GAAG,CAAC,GAAG,GAAG,CAAC;AAC/D,CAAC;AAED,qBAAqB;AACrB,OAAO,EAAE,eAAe,EAAE,aAAa,EAAE,CAAC"}
package/package.json CHANGED
@@ -1,10 +1,15 @@
1
1
  {
2
2
  "name": "@ottolab/extraction",
3
- "version": "0.1.0",
3
+ "version": "0.1.1",
4
4
  "license": "MIT",
5
5
  "repository": "https://github.com/hokev/Otto",
6
- "publishConfig": { "access": "public" },
6
+ "publishConfig": {
7
+ "access": "public"
8
+ },
7
9
  "type": "module",
10
+ "files": [
11
+ "dist"
12
+ ],
8
13
  "exports": {
9
14
  ".": {
10
15
  "import": {
@@ -16,6 +21,7 @@
16
21
  },
17
22
  "scripts": {
18
23
  "build": "tsc --build",
24
+ "prepublishOnly": "npm run build",
19
25
  "lint": "eslint src/",
20
26
  "test": "vitest run"
21
27
  },
@@ -1,73 +0,0 @@
1
- import { describe, it, expect } from 'vitest';
2
- import { extractFromCsv } from '../src/llm-extractor.js';
3
-
4
- describe('extractFromCsv', () => {
5
- it('should parse simple CSV with standard headers', () => {
6
- const csv = `Total Cholesterol,LDL,HDL,Triglycerides,HbA1c
7
- 200,120,55,130,5.4`;
8
-
9
- const result = extractFromCsv(csv);
10
- expect(result.total_cholesterol).toBe(200);
11
- expect(result.ldl_c).toBe(120);
12
- expect(result.hdl).toBe(55);
13
- expect(result.triglycerides).toBe(130);
14
- expect(result.hba1c).toBe(5.4);
15
- });
16
-
17
- it('should normalize common header aliases', () => {
18
- const csv = `LDL-C,HDL-C,A1C,Glucose,SGPT
19
- 110,58,5.2,92,25`;
20
-
21
- const result = extractFromCsv(csv);
22
- expect(result.ldl_c).toBe(110);
23
- expect(result.hdl).toBe(58);
24
- expect(result.hba1c).toBe(5.2);
25
- expect(result.fasting_glucose).toBe(92);
26
- expect(result.alt).toBe(25);
27
- });
28
-
29
- it('should handle empty values', () => {
30
- const csv = `Total Cholesterol,LDL,HDL
31
- 200,,55`;
32
-
33
- const result = extractFromCsv(csv);
34
- expect(result.total_cholesterol).toBe(200);
35
- expect(result.hdl).toBe(55);
36
- expect(result.ldl_c).toBeUndefined();
37
- });
38
-
39
- it('should throw on single-line CSV (no data row)', () => {
40
- expect(() => extractFromCsv('Total Cholesterol,LDL,HDL')).toThrow(
41
- 'at least a header row and a data row',
42
- );
43
- });
44
-
45
- it('should handle case-insensitive headers', () => {
46
- const csv = `CREATININE,BUN,eGFR
47
- 0.9,15,95`;
48
-
49
- const result = extractFromCsv(csv);
50
- expect(result.creatinine).toBe(0.9);
51
- expect(result.bun).toBe(15);
52
- expect(result.egfr).toBe(95);
53
- });
54
-
55
- it('should preserve non-numeric values as strings', () => {
56
- const csv = `Albumin,Notes
57
- 4.2,Normal`;
58
-
59
- const result = extractFromCsv(csv);
60
- expect(result.albumin).toBe(4.2);
61
- expect(result.notes).toBe('Normal');
62
- });
63
-
64
- it('should handle whitespace in headers and values', () => {
65
- const csv = ` Total Cholesterol , LDL , HDL
66
- 200 , 120 , 55 `;
67
-
68
- const result = extractFromCsv(csv);
69
- expect(result.total_cholesterol).toBe(200);
70
- expect(result.ldl_c).toBe(120);
71
- expect(result.hdl).toBe(55);
72
- });
73
- });
@@ -1,243 +0,0 @@
1
- import { describe, it, expect } from 'vitest';
2
- import { validateExtraction, CLINICAL_RANGES, BIOMARKER_MAP } from '../src/validator.js';
3
- import type { RawBiomarkers } from '../src/llm-extractor.js';
4
-
5
- describe('CLINICAL_RANGES', () => {
6
- it('should have ranges for all mapped biomarkers', () => {
7
- for (const key of Object.keys(BIOMARKER_MAP)) {
8
- expect(CLINICAL_RANGES[key]).toBeDefined();
9
- }
10
- });
11
-
12
- it('should have low < high for all ranges', () => {
13
- for (const [key, [low, high]] of Object.entries(CLINICAL_RANGES)) {
14
- expect(low).toBeLessThan(high);
15
- expect(low).toBeTypeOf('number');
16
- expect(high).toBeTypeOf('number');
17
- // Verify key exists
18
- expect(key).toBeTruthy();
19
- }
20
- });
21
- });
22
-
23
- describe('validateExtraction', () => {
24
- it('should accept valid biomarkers within range', () => {
25
- const raw: RawBiomarkers = {
26
- total_cholesterol: 200,
27
- ldl_c: 120,
28
- hdl: 55,
29
- triglycerides: 130,
30
- hba1c: 5.4,
31
- fasting_glucose: 95,
32
- creatinine: 0.9,
33
- albumin: 4.2,
34
- };
35
-
36
- const result = validateExtraction(raw);
37
- expect(result.accepted).toBe(8);
38
- expect(result.rejected).toBe(0);
39
- expect(result.biomarkers.totalCholesterol?.value).toBe(200);
40
- expect(result.biomarkers.ldlC?.value).toBe(120);
41
- expect(result.biomarkers.hdl?.value).toBe(55);
42
- expect(result.biomarkers.albumin?.value).toBe(4.2);
43
- });
44
-
45
- it('should reject values outside clinical ranges', () => {
46
- const raw: RawBiomarkers = {
47
- total_cholesterol: 9999, // way too high
48
- hdl: -5, // negative
49
- hba1c: 5.4, // valid
50
- };
51
-
52
- const result = validateExtraction(raw);
53
- expect(result.accepted).toBe(1); // only hba1c
54
- expect(result.rejected).toBe(2);
55
- expect(result.biomarkers.hba1c?.value).toBe(5.4);
56
- expect(result.biomarkers.totalCholesterol).toBeUndefined();
57
- expect(result.biomarkers.hdl).toBeUndefined();
58
- });
59
-
60
- it('should skip null and undefined values', () => {
61
- const raw: RawBiomarkers = {
62
- total_cholesterol: null,
63
- ldl_c: 100,
64
- hdl: null,
65
- };
66
-
67
- const result = validateExtraction(raw);
68
- expect(result.accepted).toBe(1);
69
- expect(result.biomarkers.ldlC?.value).toBe(100);
70
- });
71
-
72
- it('should skip string values (qualitative results)', () => {
73
- const raw: RawBiomarkers = {
74
- total_cholesterol: 200,
75
- some_qualitative: 'Negative',
76
- };
77
-
78
- const result = validateExtraction(raw);
79
- expect(result.accepted).toBe(1);
80
- expect(result.rejected).toBe(0);
81
- });
82
-
83
- it('should skip gender field', () => {
84
- const raw: RawBiomarkers = {
85
- gender: 'm',
86
- total_cholesterol: 200,
87
- };
88
-
89
- const result = validateExtraction(raw);
90
- expect(result.accepted).toBe(1);
91
- });
92
-
93
- it('should reject non-finite numbers', () => {
94
- const raw: RawBiomarkers = {
95
- total_cholesterol: NaN,
96
- ldl_c: Infinity,
97
- hdl: 55,
98
- };
99
-
100
- const result = validateExtraction(raw);
101
- expect(result.accepted).toBe(1);
102
- expect(result.rejected).toBe(2);
103
- });
104
-
105
- it('should map to correct BiomarkerSet fields', () => {
106
- const raw: RawBiomarkers = {
107
- hs_crp: 1.5,
108
- vitamin_d: 45,
109
- lymphocyte_percent: 30,
110
- bilirubin_total: 0.8,
111
- };
112
-
113
- const result = validateExtraction(raw);
114
- expect(result.biomarkers.hsCrp?.value).toBe(1.5);
115
- expect(result.biomarkers.hsCrp?.unit).toBe('mg/L');
116
- expect(result.biomarkers.vitaminD?.value).toBe(45);
117
- expect(result.biomarkers.lymphocytePercent?.value).toBe(30);
118
- expect(result.biomarkers.bilirubinTotal?.value).toBe(0.8);
119
- });
120
-
121
- it('should include unit and name in BiomarkerValue', () => {
122
- const raw: RawBiomarkers = { alt: 25 };
123
- const result = validateExtraction(raw);
124
- expect(result.biomarkers.alt).toEqual({
125
- name: 'ALT',
126
- value: 25,
127
- unit: 'U/L',
128
- });
129
- });
130
-
131
- it('should record rejections with reasons', () => {
132
- const raw: RawBiomarkers = {
133
- total_cholesterol: 9999,
134
- };
135
-
136
- const result = validateExtraction(raw);
137
- expect(result.rejections.length).toBe(1);
138
- expect(result.rejections[0].key).toBe('total_cholesterol');
139
- expect(result.rejections[0].reason).toContain('outside clinical range');
140
- });
141
-
142
- it('should compute reasonable confidence for good extraction', () => {
143
- const raw: RawBiomarkers = {
144
- total_cholesterol: 200,
145
- ldl_c: 120,
146
- hdl: 55,
147
- triglycerides: 130,
148
- hba1c: 5.4,
149
- fasting_glucose: 95,
150
- creatinine: 0.9,
151
- albumin: 4.2,
152
- alt: 25,
153
- ast: 22,
154
- wbc: 7.0,
155
- hemoglobin: 14.5,
156
- platelets: 250,
157
- mcv: 90,
158
- rdw: 13,
159
- };
160
-
161
- const result = validateExtraction(raw);
162
- expect(result.confidence).toBeGreaterThan(0.7);
163
- });
164
-
165
- it('should return 0 confidence for empty extraction', () => {
166
- const result = validateExtraction({});
167
- expect(result.confidence).toBe(0);
168
- expect(result.accepted).toBe(0);
169
- });
170
-
171
- it('should reduce confidence when HDL > total cholesterol', () => {
172
- const consistent: RawBiomarkers = {
173
- total_cholesterol: 200,
174
- hdl: 55,
175
- ldl_c: 120,
176
- hba1c: 5.4,
177
- creatinine: 0.9,
178
- };
179
- const inconsistent: RawBiomarkers = {
180
- total_cholesterol: 100,
181
- hdl: 150, // impossible: HDL > total
182
- ldl_c: 120,
183
- hba1c: 5.4,
184
- creatinine: 0.9,
185
- };
186
-
187
- const c1 = validateExtraction(consistent).confidence;
188
- // HDL=150 is within range [5,150] so it passes range check but fails consistency
189
- // However total_cholesterol=100 makes HDL > total
190
- const c2 = validateExtraction(inconsistent).confidence;
191
- expect(c2).toBeLessThan(c1);
192
- });
193
-
194
- it('should handle all target biomarkers', () => {
195
- const raw: RawBiomarkers = {
196
- weight_kg: 75,
197
- height_cm: 175,
198
- bmi: 24.5,
199
- body_fat_percent: 18,
200
- systolic_bp: 120,
201
- diastolic_bp: 75,
202
- heart_rate: 68,
203
- total_cholesterol: 195,
204
- ldl_c: 110,
205
- hdl: 58,
206
- triglycerides: 120,
207
- apoB: 85,
208
- hba1c: 5.3,
209
- fasting_glucose: 88,
210
- fasting_insulin: 5.5,
211
- uric_acid: 5.2,
212
- creatinine: 0.95,
213
- bun: 15,
214
- egfr: 95,
215
- alt: 22,
216
- ast: 20,
217
- alp: 70,
218
- ggt: 25,
219
- bilirubin_total: 0.7,
220
- albumin: 4.3,
221
- hs_crp: 0.8,
222
- esr: 8,
223
- cortisol: 12,
224
- testosterone: 550,
225
- estradiol: 25,
226
- tsh: 1.8,
227
- wbc: 6.5,
228
- rbc: 4.8,
229
- hemoglobin: 14.8,
230
- hematocrit: 44,
231
- platelets: 240,
232
- mcv: 88,
233
- rdw: 12.8,
234
- lymphocyte_percent: 32,
235
- vitamin_d: 48,
236
- };
237
-
238
- const result = validateExtraction(raw);
239
- expect(result.accepted).toBe(40);
240
- expect(result.rejected).toBe(0);
241
- expect(result.confidence).toBeGreaterThan(0.8);
242
- });
243
- });
package/src/index.ts DELETED
@@ -1,69 +0,0 @@
1
- import type { ExtractionResult, ExtendedLLMProvider } from '@ottolab/shared';
2
- import { detectLab } from './lab-detector.js';
3
- import { extractFromPdf, extractFromCsv } from './llm-extractor.js';
4
- import { validateExtraction } from './validator.js';
5
-
6
- export interface ParseInput {
7
- /** Base64-encoded PDF data */
8
- pdf?: string;
9
- /** Raw CSV text */
10
- csv?: string;
11
- }
12
-
13
- /**
14
- * Main extraction pipeline.
15
- *
16
- * PDF flow:
17
- * 1. Lab Detection (LLM classify → Quest | LabCorp | international | unknown)
18
- * 2. Structured Extraction (LLM multimodal + lab-specific few-shot)
19
- * 3. Validation (clinical range checks, cross-biomarker consistency, confidence)
20
- *
21
- * CSV flow:
22
- * 1. Column parsing + header normalization
23
- * 2. Validation
24
- */
25
- export async function runExtractionPipeline(
26
- input: ParseInput,
27
- provider?: ExtendedLLMProvider,
28
- ): Promise<ExtractionResult> {
29
- if (input.pdf) {
30
- if (!provider) throw new Error('LLM provider required for PDF extraction');
31
-
32
- // Step 1: Detect lab source
33
- const detection = await detectLab(input.pdf, provider);
34
-
35
- // Step 2: Extract biomarkers with lab-specific prompt
36
- const raw = await extractFromPdf(input.pdf, detection.lab, provider);
37
-
38
- // Step 3: Validate and transform
39
- const validation = validateExtraction(raw);
40
-
41
- return {
42
- biomarkers: validation.biomarkers,
43
- sourceLab: detection.lab,
44
- sourceLanguage: detection.language,
45
- confidence: validation.confidence,
46
- };
47
- }
48
-
49
- if (input.csv) {
50
- // CSV extraction (no LLM needed)
51
- const raw = extractFromCsv(input.csv);
52
- const validation = validateExtraction(raw);
53
-
54
- return {
55
- biomarkers: validation.biomarkers,
56
- sourceLab: 'unknown',
57
- sourceLanguage: 'en',
58
- confidence: validation.confidence,
59
- };
60
- }
61
-
62
- throw new Error('Either pdf (base64) or csv text must be provided');
63
- }
64
-
65
- export { detectLab } from './lab-detector.js';
66
- export { extractFromPdf, extractFromCsv } from './llm-extractor.js';
67
- export { validateExtraction } from './validator.js';
68
- export type { RawBiomarkers } from './llm-extractor.js';
69
- export type { ValidationResult } from './validator.js';
@@ -1,60 +0,0 @@
1
- import type { SourceLab, ExtendedLLMProvider } from '@ottolab/shared';
2
- import { LAB_DETECTION_PROMPT } from './prompts/base.js';
3
-
4
- export interface LabDetectionResult {
5
- lab: SourceLab;
6
- language: string;
7
- confidence: number;
8
- }
9
-
10
- /**
11
- * Detect the source laboratory from a PDF using LLM classification.
12
- * Sends the first page(s) to the LLM to identify Quest, LabCorp, etc.
13
- */
14
- export async function detectLab(
15
- pdfBase64: string,
16
- provider: ExtendedLLMProvider,
17
- ): Promise<LabDetectionResult> {
18
- try {
19
- const response = await provider.chatMultimodal(
20
- [
21
- { type: 'document', data: pdfBase64, mimeType: 'application/pdf' },
22
- { type: 'text', text: LAB_DETECTION_PROMPT },
23
- ],
24
- { temperature: 0, maxTokens: 256, responseFormat: 'json' },
25
- );
26
-
27
- const parsed = parseJsonResponse<LabDetectionResult>(response);
28
-
29
- // Validate lab value
30
- const validLabs: SourceLab[] = ['quest', 'labcorp', 'international', 'unknown'];
31
- if (!validLabs.includes(parsed.lab)) {
32
- parsed.lab = 'unknown';
33
- }
34
-
35
- return {
36
- lab: parsed.lab,
37
- language: parsed.language || 'en',
38
- confidence: Math.min(Math.max(parsed.confidence || 0, 0), 1),
39
- };
40
- } catch {
41
- return { lab: 'unknown', language: 'en', confidence: 0 };
42
- }
43
- }
44
-
45
- function parseJsonResponse<T>(text: string): T {
46
- // Strip markdown code fences if present
47
- let cleaned = text.trim();
48
- if (cleaned.startsWith('```')) {
49
- cleaned = cleaned.replace(/^```(?:json)?\s*/, '').replace(/\s*```$/, '');
50
- }
51
-
52
- // Find JSON object boundaries
53
- const start = cleaned.indexOf('{');
54
- const end = cleaned.lastIndexOf('}');
55
- if (start === -1 || end === -1) {
56
- throw new Error('No JSON object found in response');
57
- }
58
-
59
- return JSON.parse(cleaned.slice(start, end + 1)) as T;
60
- }