@precisa-saude/fhir-ocr-utils 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +224 -0
- package/package.json +4 -1
package/dist/cli.js
ADDED
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
// src/cli/index.ts
|
|
4
|
+
import { parseArgs } from "util";
|
|
5
|
+
import { exitWithError } from "@precisa-saude/fhir/cli-utils";
|
|
6
|
+
|
|
7
|
+
// src/cli/commands/codes.ts
|
|
8
|
+
import { getInput, outputJson, outputText } from "@precisa-saude/fhir/cli-utils";
|
|
9
|
+
|
|
10
|
+
// src/anchor.ts
|
|
11
|
+
import {
|
|
12
|
+
generateFilteredLLMReference,
|
|
13
|
+
getAllSearchPatterns
|
|
14
|
+
} from "@precisa-saude/fhir";
|
|
15
|
+
function normalize(text) {
|
|
16
|
+
return text.normalize("NFD").replace(/[\u0300-\u036f]/g, "").toLowerCase().replace(/\s+/g, " ");
|
|
17
|
+
}
|
|
18
|
+
var UNAMBIGUOUS_SHORT_NAMES = /* @__PURE__ */ new Set([
|
|
19
|
+
"hdl",
|
|
20
|
+
"ldl",
|
|
21
|
+
"lh",
|
|
22
|
+
"tsh",
|
|
23
|
+
"crp",
|
|
24
|
+
"pcr",
|
|
25
|
+
"ggt",
|
|
26
|
+
"alt",
|
|
27
|
+
"ast",
|
|
28
|
+
"bun",
|
|
29
|
+
"wbc",
|
|
30
|
+
"rbc",
|
|
31
|
+
"mcv",
|
|
32
|
+
"mch",
|
|
33
|
+
"rdw",
|
|
34
|
+
"mpv",
|
|
35
|
+
"psa",
|
|
36
|
+
"fsh",
|
|
37
|
+
"hba1c",
|
|
38
|
+
"egfr",
|
|
39
|
+
"acr",
|
|
40
|
+
"esr",
|
|
41
|
+
"vhs",
|
|
42
|
+
"bmc",
|
|
43
|
+
"bmd",
|
|
44
|
+
"vat",
|
|
45
|
+
"dxa",
|
|
46
|
+
"dmo",
|
|
47
|
+
"cmo",
|
|
48
|
+
"ffm",
|
|
49
|
+
"lbm",
|
|
50
|
+
"mlg",
|
|
51
|
+
"tav"
|
|
52
|
+
]);
|
|
53
|
+
var cachedPatterns = null;
|
|
54
|
+
var cachedNormalized = null;
|
|
55
|
+
function getPatterns() {
|
|
56
|
+
if (!cachedPatterns) {
|
|
57
|
+
cachedPatterns = getAllSearchPatterns();
|
|
58
|
+
}
|
|
59
|
+
return cachedPatterns;
|
|
60
|
+
}
|
|
61
|
+
function getNormalizedPatterns() {
|
|
62
|
+
if (!cachedNormalized) {
|
|
63
|
+
const patterns = getPatterns();
|
|
64
|
+
const map = /* @__PURE__ */ new Map();
|
|
65
|
+
for (const pattern of patterns) {
|
|
66
|
+
for (const name of pattern.names) {
|
|
67
|
+
const normalized = normalize(name);
|
|
68
|
+
const existing = map.get(normalized) || [];
|
|
69
|
+
existing.push({
|
|
70
|
+
code: pattern.code,
|
|
71
|
+
...pattern.loinc && { loinc: pattern.loinc },
|
|
72
|
+
original: name
|
|
73
|
+
});
|
|
74
|
+
map.set(normalized, existing);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
cachedNormalized = map;
|
|
78
|
+
}
|
|
79
|
+
return cachedNormalized;
|
|
80
|
+
}
|
|
81
|
+
function findBiomarkersInText(ocrText) {
|
|
82
|
+
const startTime = Date.now();
|
|
83
|
+
const normalizedText = normalize(ocrText);
|
|
84
|
+
const matchedCodes = /* @__PURE__ */ new Set();
|
|
85
|
+
const matches = [];
|
|
86
|
+
const normalizedPatterns = getNormalizedPatterns();
|
|
87
|
+
for (const [normalizedName, entries] of normalizedPatterns) {
|
|
88
|
+
if (normalizedName.length < 3 && !UNAMBIGUOUS_SHORT_NAMES.has(normalizedName)) {
|
|
89
|
+
continue;
|
|
90
|
+
}
|
|
91
|
+
let position = -1;
|
|
92
|
+
if (normalizedName.length <= 4) {
|
|
93
|
+
const regex = new RegExp(`\\b${normalizedName}\\b`);
|
|
94
|
+
const match = regex.exec(normalizedText);
|
|
95
|
+
if (match) {
|
|
96
|
+
position = match.index;
|
|
97
|
+
}
|
|
98
|
+
} else {
|
|
99
|
+
position = normalizedText.indexOf(normalizedName);
|
|
100
|
+
}
|
|
101
|
+
if (position !== -1) {
|
|
102
|
+
for (const entry of entries) {
|
|
103
|
+
if (!matchedCodes.has(entry.code)) {
|
|
104
|
+
matchedCodes.add(entry.code);
|
|
105
|
+
matches.push({
|
|
106
|
+
code: entry.code,
|
|
107
|
+
confidence: 1,
|
|
108
|
+
loinc: entry.loinc,
|
|
109
|
+
matchedName: entry.original,
|
|
110
|
+
position
|
|
111
|
+
});
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
const scanTimeMs = Date.now() - startTime;
|
|
117
|
+
const matchedCodesArray = Array.from(matchedCodes);
|
|
118
|
+
return {
|
|
119
|
+
filteredReference: generateFilteredLLMReference(matchedCodesArray),
|
|
120
|
+
matches,
|
|
121
|
+
stats: {
|
|
122
|
+
matchedCount: matches.length,
|
|
123
|
+
scanTimeMs,
|
|
124
|
+
totalPatterns: getPatterns().length
|
|
125
|
+
}
|
|
126
|
+
};
|
|
127
|
+
}
|
|
128
|
+
function getMatchedCodes(result) {
|
|
129
|
+
return result.matches.map((m) => m.code);
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// src/cli/commands/codes.ts
|
|
133
|
+
async function codes(args, json) {
|
|
134
|
+
const text = await getInput(args[0]);
|
|
135
|
+
const result = findBiomarkersInText(text);
|
|
136
|
+
const matched = getMatchedCodes(result);
|
|
137
|
+
if (json) {
|
|
138
|
+
outputJson(matched);
|
|
139
|
+
return;
|
|
140
|
+
}
|
|
141
|
+
if (matched.length === 0) {
|
|
142
|
+
outputText("Nenhum biomarcador encontrado no texto.");
|
|
143
|
+
return;
|
|
144
|
+
}
|
|
145
|
+
outputText(matched.join("\n"));
|
|
146
|
+
outputText(`
|
|
147
|
+
Total: ${matched.length} c\xF3digos encontrados`);
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
// src/cli/commands/find.ts
|
|
151
|
+
import { formatTable, getInput as getInput2, outputJson as outputJson2, outputText as outputText2 } from "@precisa-saude/fhir/cli-utils";
|
|
152
|
+
async function find(args, json) {
|
|
153
|
+
const text = await getInput2(args[0]);
|
|
154
|
+
const result = findBiomarkersInText(text);
|
|
155
|
+
if (json) {
|
|
156
|
+
outputJson2(result);
|
|
157
|
+
return;
|
|
158
|
+
}
|
|
159
|
+
if (result.matches.length === 0) {
|
|
160
|
+
outputText2("Nenhum biomarcador encontrado no texto.");
|
|
161
|
+
return;
|
|
162
|
+
}
|
|
163
|
+
const rows = result.matches.map((m) => [
|
|
164
|
+
m.code,
|
|
165
|
+
m.loinc ?? "\u2014",
|
|
166
|
+
m.matchedName,
|
|
167
|
+
m.confidence.toFixed(2),
|
|
168
|
+
String(m.position)
|
|
169
|
+
]);
|
|
170
|
+
outputText2(formatTable(["C\xF3digo", "LOINC", "Match", "Confian\xE7a", "Posi\xE7\xE3o"], rows));
|
|
171
|
+
outputText2(
|
|
172
|
+
`
|
|
173
|
+
Encontrados: ${result.stats.matchedCount} de ${result.stats.totalPatterns} padr\xF5es (${result.stats.scanTimeMs}ms)`
|
|
174
|
+
);
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
// src/cli/index.ts
|
|
178
|
+
var HELP = `fhir-ocr \u2014 CLI do @precisa-saude/fhir-ocr-utils
|
|
179
|
+
|
|
180
|
+
Uso: fhir-ocr <comando> [op\xE7\xF5es]
|
|
181
|
+
|
|
182
|
+
Comandos:
|
|
183
|
+
find [arquivo] Encontrar biomarcadores em texto OCR
|
|
184
|
+
codes [arquivo] Extrair c\xF3digos de biomarcadores encontrados no texto
|
|
185
|
+
|
|
186
|
+
Flags globais:
|
|
187
|
+
--json Sa\xEDda em formato JSON
|
|
188
|
+
--help, -h Mostrar ajuda
|
|
189
|
+
--version, -v Mostrar vers\xE3o
|
|
190
|
+
|
|
191
|
+
L\xEA de stdin quando nenhum arquivo \xE9 fornecido.
|
|
192
|
+
`;
|
|
193
|
+
var COMMANDS = {
|
|
194
|
+
codes,
|
|
195
|
+
find
|
|
196
|
+
};
|
|
197
|
+
async function main() {
|
|
198
|
+
const { positionals, values } = parseArgs({
|
|
199
|
+
allowPositionals: true,
|
|
200
|
+
options: {
|
|
201
|
+
help: { default: false, short: "h", type: "boolean" },
|
|
202
|
+
json: { default: false, type: "boolean" },
|
|
203
|
+
version: { default: false, short: "v", type: "boolean" }
|
|
204
|
+
},
|
|
205
|
+
strict: false
|
|
206
|
+
});
|
|
207
|
+
if (values.version) {
|
|
208
|
+
process.stdout.write(`${"0.5.0"}
|
|
209
|
+
`);
|
|
210
|
+
return;
|
|
211
|
+
}
|
|
212
|
+
const [command, ...rest] = positionals;
|
|
213
|
+
if (values.help || !command) {
|
|
214
|
+
process.stdout.write(HELP);
|
|
215
|
+
return;
|
|
216
|
+
}
|
|
217
|
+
const handler = COMMANDS[command];
|
|
218
|
+
if (!handler) {
|
|
219
|
+
exitWithError(`Comando desconhecido: ${command}
|
|
220
|
+
Use --help para ver os comandos dispon\xEDveis.`);
|
|
221
|
+
}
|
|
222
|
+
await handler(rest, Boolean(values.json));
|
|
223
|
+
}
|
|
224
|
+
main().catch((err) => exitWithError(err.message));
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@precisa-saude/fhir-ocr-utils",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.5.0",
|
|
4
4
|
"description": "Utilitários de ancoragem OCR para extração de biomarcadores de PDFs de resultados laboratoriais",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"fhir",
|
|
@@ -34,6 +34,9 @@
|
|
|
34
34
|
"main": "./dist/index.cjs",
|
|
35
35
|
"module": "./dist/index.js",
|
|
36
36
|
"types": "./dist/index.d.ts",
|
|
37
|
+
"bin": {
|
|
38
|
+
"fhir-ocr": "./dist/cli.js"
|
|
39
|
+
},
|
|
37
40
|
"files": [
|
|
38
41
|
"dist"
|
|
39
42
|
],
|