@precisa-saude/fhir-ocr-utils 0.4.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/cli.js +224 -0
  2. package/package.json +4 -1
package/dist/cli.js ADDED
@@ -0,0 +1,224 @@
1
+ #!/usr/bin/env node
2
+
3
+ // src/cli/index.ts
4
+ import { parseArgs } from "util";
5
+ import { exitWithError } from "@precisa-saude/fhir/cli-utils";
6
+
7
+ // src/cli/commands/codes.ts
8
+ import { getInput, outputJson, outputText } from "@precisa-saude/fhir/cli-utils";
9
+
10
+ // src/anchor.ts
11
+ import {
12
+ generateFilteredLLMReference,
13
+ getAllSearchPatterns
14
+ } from "@precisa-saude/fhir";
15
+ function normalize(text) {
16
+ return text.normalize("NFD").replace(/[\u0300-\u036f]/g, "").toLowerCase().replace(/\s+/g, " ");
17
+ }
18
+ var UNAMBIGUOUS_SHORT_NAMES = /* @__PURE__ */ new Set([
19
+ "hdl",
20
+ "ldl",
21
+ "lh",
22
+ "tsh",
23
+ "crp",
24
+ "pcr",
25
+ "ggt",
26
+ "alt",
27
+ "ast",
28
+ "bun",
29
+ "wbc",
30
+ "rbc",
31
+ "mcv",
32
+ "mch",
33
+ "rdw",
34
+ "mpv",
35
+ "psa",
36
+ "fsh",
37
+ "hba1c",
38
+ "egfr",
39
+ "acr",
40
+ "esr",
41
+ "vhs",
42
+ "bmc",
43
+ "bmd",
44
+ "vat",
45
+ "dxa",
46
+ "dmo",
47
+ "cmo",
48
+ "ffm",
49
+ "lbm",
50
+ "mlg",
51
+ "tav"
52
+ ]);
53
+ var cachedPatterns = null;
54
+ var cachedNormalized = null;
55
+ function getPatterns() {
56
+ if (!cachedPatterns) {
57
+ cachedPatterns = getAllSearchPatterns();
58
+ }
59
+ return cachedPatterns;
60
+ }
61
+ function getNormalizedPatterns() {
62
+ if (!cachedNormalized) {
63
+ const patterns = getPatterns();
64
+ const map = /* @__PURE__ */ new Map();
65
+ for (const pattern of patterns) {
66
+ for (const name of pattern.names) {
67
+ const normalized = normalize(name);
68
+ const existing = map.get(normalized) || [];
69
+ existing.push({
70
+ code: pattern.code,
71
+ ...pattern.loinc && { loinc: pattern.loinc },
72
+ original: name
73
+ });
74
+ map.set(normalized, existing);
75
+ }
76
+ }
77
+ cachedNormalized = map;
78
+ }
79
+ return cachedNormalized;
80
+ }
81
+ function findBiomarkersInText(ocrText) {
82
+ const startTime = Date.now();
83
+ const normalizedText = normalize(ocrText);
84
+ const matchedCodes = /* @__PURE__ */ new Set();
85
+ const matches = [];
86
+ const normalizedPatterns = getNormalizedPatterns();
87
+ for (const [normalizedName, entries] of normalizedPatterns) {
88
+ if (normalizedName.length < 3 && !UNAMBIGUOUS_SHORT_NAMES.has(normalizedName)) {
89
+ continue;
90
+ }
91
+ let position = -1;
92
+ if (normalizedName.length <= 4) {
93
+ const regex = new RegExp(`\\b${normalizedName}\\b`);
94
+ const match = regex.exec(normalizedText);
95
+ if (match) {
96
+ position = match.index;
97
+ }
98
+ } else {
99
+ position = normalizedText.indexOf(normalizedName);
100
+ }
101
+ if (position !== -1) {
102
+ for (const entry of entries) {
103
+ if (!matchedCodes.has(entry.code)) {
104
+ matchedCodes.add(entry.code);
105
+ matches.push({
106
+ code: entry.code,
107
+ confidence: 1,
108
+ loinc: entry.loinc,
109
+ matchedName: entry.original,
110
+ position
111
+ });
112
+ }
113
+ }
114
+ }
115
+ }
116
+ const scanTimeMs = Date.now() - startTime;
117
+ const matchedCodesArray = Array.from(matchedCodes);
118
+ return {
119
+ filteredReference: generateFilteredLLMReference(matchedCodesArray),
120
+ matches,
121
+ stats: {
122
+ matchedCount: matches.length,
123
+ scanTimeMs,
124
+ totalPatterns: getPatterns().length
125
+ }
126
+ };
127
+ }
128
+ function getMatchedCodes(result) {
129
+ return result.matches.map((m) => m.code);
130
+ }
131
+
132
+ // src/cli/commands/codes.ts
133
+ async function codes(args, json) {
134
+ const text = await getInput(args[0]);
135
+ const result = findBiomarkersInText(text);
136
+ const matched = getMatchedCodes(result);
137
+ if (json) {
138
+ outputJson(matched);
139
+ return;
140
+ }
141
+ if (matched.length === 0) {
142
+ outputText("Nenhum biomarcador encontrado no texto.");
143
+ return;
144
+ }
145
+ outputText(matched.join("\n"));
146
+ outputText(`
147
+ Total: ${matched.length} c\xF3digos encontrados`);
148
+ }
149
+
150
+ // src/cli/commands/find.ts
151
+ import { formatTable, getInput as getInput2, outputJson as outputJson2, outputText as outputText2 } from "@precisa-saude/fhir/cli-utils";
152
+ async function find(args, json) {
153
+ const text = await getInput2(args[0]);
154
+ const result = findBiomarkersInText(text);
155
+ if (json) {
156
+ outputJson2(result);
157
+ return;
158
+ }
159
+ if (result.matches.length === 0) {
160
+ outputText2("Nenhum biomarcador encontrado no texto.");
161
+ return;
162
+ }
163
+ const rows = result.matches.map((m) => [
164
+ m.code,
165
+ m.loinc ?? "\u2014",
166
+ m.matchedName,
167
+ m.confidence.toFixed(2),
168
+ String(m.position)
169
+ ]);
170
+ outputText2(formatTable(["C\xF3digo", "LOINC", "Match", "Confian\xE7a", "Posi\xE7\xE3o"], rows));
171
+ outputText2(
172
+ `
173
+ Encontrados: ${result.stats.matchedCount} de ${result.stats.totalPatterns} padr\xF5es (${result.stats.scanTimeMs}ms)`
174
+ );
175
+ }
176
+
177
+ // src/cli/index.ts
178
+ var HELP = `fhir-ocr \u2014 CLI do @precisa-saude/fhir-ocr-utils
179
+
180
+ Uso: fhir-ocr <comando> [op\xE7\xF5es]
181
+
182
+ Comandos:
183
+ find [arquivo] Encontrar biomarcadores em texto OCR
184
+ codes [arquivo] Extrair c\xF3digos de biomarcadores encontrados no texto
185
+
186
+ Flags globais:
187
+ --json Sa\xEDda em formato JSON
188
+ --help, -h Mostrar ajuda
189
+ --version, -v Mostrar vers\xE3o
190
+
191
+ L\xEA de stdin quando nenhum arquivo \xE9 fornecido.
192
+ `;
193
+ var COMMANDS = {
194
+ codes,
195
+ find
196
+ };
197
+ async function main() {
198
+ const { positionals, values } = parseArgs({
199
+ allowPositionals: true,
200
+ options: {
201
+ help: { default: false, short: "h", type: "boolean" },
202
+ json: { default: false, type: "boolean" },
203
+ version: { default: false, short: "v", type: "boolean" }
204
+ },
205
+ strict: false
206
+ });
207
+ if (values.version) {
208
+ process.stdout.write(`${"0.5.0"}
209
+ `);
210
+ return;
211
+ }
212
+ const [command, ...rest] = positionals;
213
+ if (values.help || !command) {
214
+ process.stdout.write(HELP);
215
+ return;
216
+ }
217
+ const handler = COMMANDS[command];
218
+ if (!handler) {
219
+ exitWithError(`Comando desconhecido: ${command}
220
+ Use --help para ver os comandos dispon\xEDveis.`);
221
+ }
222
+ await handler(rest, Boolean(values.json));
223
+ }
224
+ main().catch((err) => exitWithError(err.message));
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@precisa-saude/fhir-ocr-utils",
3
- "version": "0.4.1",
3
+ "version": "0.5.0",
4
4
  "description": "Utilitários de ancoragem OCR para extração de biomarcadores de PDFs de resultados laboratoriais",
5
5
  "keywords": [
6
6
  "fhir",
@@ -34,6 +34,9 @@
34
34
  "main": "./dist/index.cjs",
35
35
  "module": "./dist/index.js",
36
36
  "types": "./dist/index.d.ts",
37
+ "bin": {
38
+ "fhir-ocr": "./dist/cli.js"
39
+ },
37
40
  "files": [
38
41
  "dist"
39
42
  ],