fhirsmith 0.5.5 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,313 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * parse-icd9.js
4
+ * Parses an ICD-9-CM text file into a FHIR R4 CodeSystem resource.
5
+ *
6
+ * Usage:
7
+ * node parse-icd9.js <input-file> [output-file]
8
+ *
9
+ * File format observed:
10
+ *
11
+ * GROUP HEADERS (two forms):
12
+ * [Roman numeral. ]DISPLAY (NNN-NNN) <- top-level chapter
13
+ * DISPLAY (NNN-NNN) <- sub-chapter / block
14
+ *
15
+ * ICD-9 CONCEPT HEADERS:
16
+ * NNN[.D...]<whitespace>DISPLAY
17
+ * (code is left-aligned; display follows after one or more spaces/tabs)
18
+ *
19
+ * CONTINUATION / ANNOTATION LINES:
20
+ * Lines that are neither a group header nor a code header and are not
21
+ * blank. They belong to the most recently opened concept.
22
+ * Keyword prefixes Includes: Excludes: Note: start a named section.
23
+ * Subsequent indented lines continue that section.
24
+ */
25
+
26
+ 'use strict';
27
+
28
+ const fs = require('fs');
29
+ const path = require('path');
30
+
31
+ // ── helpers ──────────────────────────────────────────────────────────────────
32
+
33
+ function normalise(str) {
34
+ return str.replace(/[ \t]+/g, ' ').trim();
35
+ }
36
+
37
+ // Group header: optional roman-numeral prefix + display text + (NNN-NNN)
38
+ // Also handles V-code ranges like (V01-V91) and E-code ranges (E800-E999)
39
+ const GROUP_HDR = /^(?:[IVXLCDM]+\.\s+)?(.*?)\s+\(([A-Z]?\d{1,3}-[A-Z]?\d{1,3})\)\s*$/;
40
+
41
+ // ICD-9 / V-code / E-code concept header: code at column 0, then 1+ spaces, then display
42
+ const CODE_HDR = /^([A-Z]?\d{2,3}(?:\.\d+)?)\s+(\S.*)/;
43
+
44
+ function isGroupHeader(line) {
45
+ return GROUP_HDR.test(line.trim());
46
+ }
47
+
48
+ function isCodeHeader(line) {
49
+ // Must start at column 0 (no leading whitespace)
50
+ return /^[A-Z]?\d/.test(line) && CODE_HDR.test(line);
51
+ }
52
+
53
+ function parseGroupHeader(line) {
54
+ const m = line.trim().match(GROUP_HDR);
55
+ if (!m) return null;
56
+ return { code: m[2], display: normalise(m[1]) };
57
+ }
58
+
59
+ function parseCodeHeader(line) {
60
+ const m = line.match(CODE_HDR);
61
+ if (!m) return null;
62
+ return { code: m[1].trim(), display: normalise(m[2]) };
63
+ }
64
+
65
+ // ── block collection ──────────────────────────────────────────────────────────
66
+
67
+ /**
68
+ * Walk lines and emit raw blocks.
69
+ * Each block = { type: 'group'|'icd9', lines: string[] }
70
+ * The first line is the header; subsequent lines are body lines (still raw).
71
+ */
72
+ function collectBlocks(lines) {
73
+ const blocks = [];
74
+ let current = null;
75
+
76
+ const flush = () => { if (current) { blocks.push(current); current = null; } };
77
+
78
+ for (const raw of lines) {
79
+ const trimmed = raw.trim();
80
+
81
+ if (!trimmed) {
82
+ // blank line - keep current open; annotations can span blank lines
83
+ continue;
84
+ }
85
+
86
+ if (isGroupHeader(trimmed)) {
87
+ flush();
88
+ current = { type: 'group', lines: [trimmed] };
89
+ } else if (isCodeHeader(raw)) {
90
+ flush();
91
+ current = { type: 'icd9', lines: [raw] };
92
+ } else {
93
+ // continuation / annotation
94
+ if (current) current.lines.push(raw);
95
+ // lines before the first recognised header are silently dropped
96
+ }
97
+ }
98
+ flush();
99
+ return blocks;
100
+ }
101
+
102
+ // ── block -> record ────────────────────────────────────────────────────────────
103
+
104
+ /**
105
+ * Given a list of body lines (after the header), split into named sections.
106
+ * Returns { description, includes, excludes, note } - each a plain string or undefined.
107
+ */
108
+ function parseSections(bodyLines) {
109
+ const lines = bodyLines.map(normalise).filter(Boolean);
110
+
111
+ const sections = { description: [], includes: [], excludes: [], note: [] };
112
+ let cur = 'description';
113
+
114
+ for (const line of lines) {
115
+ if (/^includes?:/i.test(line)) {
116
+ cur = 'includes';
117
+ const rest = line.replace(/^includes?:\s*/i, '').trim();
118
+ if (rest) sections.includes.push(rest);
119
+ } else if (/^excludes?:/i.test(line)) {
120
+ cur = 'excludes';
121
+ const rest = line.replace(/^excludes?:\s*/i, '').trim();
122
+ if (rest) sections.excludes.push(rest);
123
+ } else if (/^note:/i.test(line)) {
124
+ cur = 'note';
125
+ const rest = line.replace(/^note:\s*/i, '').trim();
126
+ if (rest) sections.note.push(rest);
127
+ } else {
128
+ sections[cur].push(line);
129
+ }
130
+ }
131
+
132
+ const join = arr => arr.join(' ').replace(/\s+/g, ' ').trim() || undefined;
133
+ return {
134
+ description : join(sections.description),
135
+ includes : join(sections.includes),
136
+ excludes : join(sections.excludes),
137
+ note : join(sections.note),
138
+ };
139
+ }
140
+
141
+ function parseBlock(block) {
142
+ let header;
143
+ if (block.type === 'group') {
144
+ header = parseGroupHeader(block.lines[0]);
145
+ } else {
146
+ header = parseCodeHeader(block.lines[0]);
147
+ }
148
+ if (!header) return null;
149
+
150
+ const sections = parseSections(block.lines.slice(1));
151
+
152
+ return {
153
+ code : header.code,
154
+ display : header.display,
155
+ isGroup : block.type === 'group',
156
+ ...sections,
157
+ };
158
+ }
159
+
160
+ // ── hierarchy ─────────────────────────────────────────────────────────────────
161
+
162
+ /**
163
+ * Find the best (narrowest) parent for a given code from codes already seen.
164
+ * Parents always precede children in the source file.
165
+ */
166
+ function findParent(code, seenCodes) {
167
+ const isRange = /^[A-Z]?\d{1,3}-[A-Z]?\d{1,3}$/.test(code);
168
+
169
+ // Helper: strip leading letter and parse int
170
+ const numOf = s => parseInt(s.replace(/^[A-Z]/, ''), 10);
171
+
172
+ if (isRange) {
173
+ const [loStr, hiStr] = code.split('-');
174
+ const lo = numOf(loStr), hi = numOf(hiStr);
175
+ let best = null, bestSpan = Infinity;
176
+ for (const c of seenCodes) {
177
+ if (!/^[A-Z]?\d{1,3}-[A-Z]?\d{1,3}$/.test(c) || c === code) continue;
178
+ const [pLo, pHi] = c.split('-').map(numOf);
179
+ if (pLo <= lo && pHi >= hi) {
180
+ const span = pHi - pLo;
181
+ if (span < bestSpan) { best = c; bestSpan = span; }
182
+ }
183
+ }
184
+ return best;
185
+ }
186
+
187
+ // Decimal code e.g. "002.1" -> try "002"
188
+ const dotIdx = code.indexOf('.');
189
+ if (dotIdx !== -1) {
190
+ for (let len = code.length - 1; len >= dotIdx; len--) {
191
+ const candidate = code.substring(0, len);
192
+ if (seenCodes.includes(candidate)) return candidate;
193
+ }
194
+ const base = code.substring(0, dotIdx);
195
+ if (seenCodes.includes(base)) return base;
196
+ }
197
+
198
+ // Integer code e.g. "002" -> find narrowest containing range
199
+ const num = numOf(code);
200
+ let best = null, bestSpan = Infinity;
201
+ for (const c of seenCodes) {
202
+ if (!/^[A-Z]?\d{1,3}-[A-Z]?\d{1,3}$/.test(c)) continue;
203
+ const [pLo, pHi] = c.split('-').map(numOf);
204
+ if (pLo <= num && pHi >= num) {
205
+ const span = pHi - pLo;
206
+ if (span < bestSpan) { best = c; bestSpan = span; }
207
+ }
208
+ }
209
+ return best;
210
+ }
211
+
212
+ // ── FHIR CodeSystem builder ───────────────────────────────────────────────────
213
+
214
+ function buildFhirCodeSystem(records) {
215
+ const byCode = new Map(records.map(r => [r.code, r]));
216
+ const parentOf = new Map();
217
+ const seenCodes = [];
218
+
219
+ for (const r of records) {
220
+ parentOf.set(r.code, findParent(r.code, seenCodes));
221
+ seenCodes.push(r.code);
222
+ }
223
+
224
+ const childrenOf = new Map();
225
+ for (const r of records) {
226
+ const p = parentOf.get(r.code);
227
+ if (p) {
228
+ if (!childrenOf.has(p)) childrenOf.set(p, []);
229
+ childrenOf.get(p).push(r.code);
230
+ }
231
+ }
232
+
233
+ function buildConcept(code) {
234
+ const r = byCode.get(code);
235
+ const concept = { code: r.code, display: r.display };
236
+
237
+ const props = [];
238
+ if (r.isGroup) props.push({ code: 'notSelectable', valueBoolean: true });
239
+ if (r.description) props.push({ code: 'description', valueString: r.description });
240
+ if (r.includes) props.push({ code: 'includes', valueString: r.includes });
241
+ if (r.excludes) props.push({ code: 'excludes', valueString: r.excludes });
242
+ if (r.note) props.push({ code: 'note', valueString: r.note });
243
+ if (props.length) concept.property = props;
244
+
245
+ const kids = childrenOf.get(code);
246
+ if (kids?.length) concept.concept = kids.map(buildConcept);
247
+
248
+ return concept;
249
+ }
250
+
251
+ const roots = records.filter(r => !parentOf.get(r.code));
252
+
253
+ return {
254
+ resourceType : 'CodeSystem',
255
+ id : 'icd-9-cm',
256
+ url : 'http://hl7.org/fhir/sid/icd-9-cm',
257
+ version : '2015',
258
+ name : 'ICD9CM',
259
+ title : 'International Classification of Diseases, 9th Revision, Clinical Modification',
260
+ status : 'active',
261
+ content : 'complete',
262
+ hierarchyMeaning : 'is-a',
263
+ property : [
264
+ {
265
+ code : 'notSelectable',
266
+ uri : 'http://hl7.org/fhir/concept-properties#notSelectable',
267
+ description : 'Grouping code not intended for direct coding',
268
+ type : 'boolean',
269
+ },
270
+ { code: 'description', description: 'Additional descriptive text', type: 'string' },
271
+ { code: 'includes', description: 'Inclusion notes', type: 'string' },
272
+ { code: 'excludes', description: 'Exclusion notes', type: 'string' },
273
+ { code: 'note', description: 'Additional notes', type: 'string' },
274
+ ],
275
+ concept: roots.map(r => buildConcept(r.code)),
276
+ };
277
+ }
278
+
279
+ // ── main ──────────────────────────────────────────────────────────────────────
280
+
281
+ function countAll(concepts) {
282
+ if (!concepts) return 0;
283
+ return concepts.reduce((n, c) => n + 1 + countAll(c.concept), 0);
284
+ }
285
+
286
+ function main() {
287
+ const [,, inputFile, outputFile] = process.argv;
288
+ if (!inputFile) {
289
+ console.error('Usage: node parse-icd9.js <input-file> [output-file]');
290
+ process.exit(1);
291
+ }
292
+
293
+ const outFile = outputFile || path.join(path.dirname(inputFile), 'icd9-cm.json');
294
+
295
+ const text = fs.readFileSync(inputFile, 'utf8');
296
+ const lines = text.split(/\r?\n/);
297
+ console.log(`Read ${lines.length} lines`);
298
+
299
+ const blocks = collectBlocks(lines);
300
+ console.log(`Collected ${blocks.length} blocks`);
301
+
302
+ const records = blocks.map(parseBlock).filter(Boolean);
303
+ const groups = records.filter(r => r.isGroup).length;
304
+ console.log(`Parsed ${records.length} records (${groups} groups, ${records.length - groups} codes)`);
305
+
306
+ const cs = buildFhirCodeSystem(records);
307
+ console.log(`CodeSystem has ${countAll(cs.concept)} concepts`);
308
+
309
+ fs.writeFileSync(outFile, JSON.stringify(cs, null, 2), 'utf8');
310
+ console.log(`Written -> ${outFile}`);
311
+ }
312
+
313
+ main();