@precisa-saude/fhir-ocr-utils 0.1.1 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs.map +1 -1
- package/package.json +5 -2
package/dist/index.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["/
|
|
1
|
+
{"version":3,"sources":["/home/runner/work/fhir-brasil/fhir-brasil/packages/ocr-utils/dist/index.cjs","../src/anchor.ts"],"names":[],"mappings":"AAAA;ACQA;AAEE;AACA;AAAA,2CACK;AA0BP,SAAS,SAAA,CAAU,IAAA,EAAsB;AACvC,EAAA,OAAO,IAAA,CACJ,SAAA,CAAU,KAAK,CAAA,CACf,OAAA,CAAQ,kBAAA,EAAoB,EAAE,CAAA,CAC9B,WAAA,CAAY,CAAA,CACZ,OAAA,CAAQ,MAAA,EAAQ,GAAG,CAAA;AACxB;AAEA,IAAM,wBAAA,kBAA0B,IAAI,GAAA,CAAI;AAAA,EACtC,KAAA;AAAA,EAAO,KAAA;AAAA,EAAO,IAAA;AAAA,EAAM,KAAA;AAAA,EAAO,KAAA;AAAA,EAAO,KAAA;AAAA,EAAO,KAAA;AAAA,EAAO,KAAA;AAAA,EAAO,KAAA;AAAA,EAAO,KAAA;AAAA,EAC9D,KAAA;AAAA,EAAO,KAAA;AAAA,EAAO,KAAA;AAAA,EAAO,KAAA;AAAA,EAAO,KAAA;AAAA,EAAO,KAAA;AAAA,EAAO,KAAA;AAAA,EAAO,KAAA;AAAA,EAAO,OAAA;AAAA,EAAS,MAAA;AAAA,EACjE,KAAA;AAAA,EAAO,KAAA;AAAA,EAAO,KAAA;AAAA,EAAO,KAAA;AAAA,EAAO,KAAA;AAAA,EAAO,KAAA;AAAA,EAAO,KAAA;AAAA,EAAO,KAAA;AAAA,EAAO,KAAA;AAAA,EAAO,KAAA;AAAA,EAC/D,KAAA;AAAA,EAAO,KAAA;AAAA,EAAO;AAChB,CAAC,CAAA;AAID,IAAI,eAAA,EAAkD,IAAA;AACtD,IAAI,iBAAA,EAAuD,IAAA;AAE3D,SAAS,WAAA,CAAA,EAAwC;AAC/C,EAAA,GAAA,CAAI,CAAC,cAAA,EAAgB;AACnB,IAAA,eAAA,EAAiB,wCAAA,CAAqB;AAAA,EACxC;AACA,EAAA,OAAO,cAAA;AACT;AAEA,SAAS,qBAAA,CAAA,EAAqD;AAC5D,EAAA,GAAA,CAAI,CAAC,gBAAA,EAAkB;AACrB,IAAA,MAAM,SAAA,EAAW,WAAA,CAAY,CAAA;AAC7B,IAAA,MAAM,IAAA,kBAAM,IAAI,GAAA,CAA4B,CAAA;AAC5C,IAAA,IAAA,CAAA,MAAW,QAAA,GAAW,QAAA,EAAU;AAC9B,MAAA,IAAA,CAAA,MAAW,KAAA,GAAQ,OAAA,CAAQ,KAAA,EAAO;AAChC,QAAA,MAAM,WAAA,EAAa,SAAA,CAAU,IAAI,CAAA;AACjC,QAAA,MAAM,SAAA,EAAW,GAAA,CAAI,GAAA,CAAI,UAAU,EAAA,GAAK,CAAC,CAAA;AACzC,QAAA,QAAA,CAAS,IAAA,CAAK;AAAA,UACZ,IAAA,EAAM,OAAA,CAAQ,IAAA;AAAA,UACd,GAAI,OAAA,CAAQ,MAAA,GAAS,EAAE,KAAA,EAAO,OAAA,CAAQ,MAAM,CAAA;AAAA,UAC5C,QAAA,EAAU;AAAA,QACZ,CAAC,CAAA;AACD,QAAA,GAAA,CAAI,GAAA,CAAI,UAAA,EAAY,QAAQ,CAAA;AAAA,MAC9B;AAAA,IACF;AACA,IAAA,iBAAA,EAAmB,GAAA;AAAA,EACrB;AACA,EAAA,OAAO,gBAAA;AACT;AAOO,SAAS,oBAAA,CAAqB,OAAA,EAA+B;AAClE,EAAA,MAAM,UAAA,EAAY,IAAA,CAAK,GAAA,CAAI,CAAA;AAC3B,EAAA,MAAM,eAAA,EAAiB,SAAA,CAAU,OAAO,CAAA;AACxC,EAAA,MAAM,aAAA,kBAAe,IAAI,GAAA,CAAY,CAAA;AACrC,EAAA,MAAM,QAAA,EAAyB,CAAC,CAAA;AAChC,EAAA,MAAM,mBAAA,EAAqB,qBAAA,CAAsB,CAAA;AAEjD,EAAA,IAAA,CAAA,MAAW,CAAC,cAAA,EAAgB,OAAO,EAAA,GAAK,kBAAA,EAAoB;AAC1D,IAAA,GAAA,CAAI,cAAA,CAAe,OAAA,EAAS,EAAA,GAAK,CAAC,uBAAA,CAAwB,GAAA,CAAI,cAAc,CAAA,EAAG;AAC7E,MAAA,QAAA;AAAA,IACF;AAEA,IAAA,IAAI,SAAA,EAAW,CAAA,CAAA;AACf,IAAA,GAAA,CAAI,cAAA,CAAe,OAAA,GAAU,CAAA,EAAG;AAC9B,MAAA,MAAM,MAAA,EAAQ,IAAI,MAAA,CAAO,CAAA,GAAA,EAAM,cAAc,CAAA,GAAA,CAAK,CAAA;AAClD,MAAA,MAAM,MAAA,EAAQ,KAAA,CAAM,IAAA,CAAK,cAAc,CAAA;AACvC,MAAA,GAAA,CAAI,KAAA,EAAO;AACT,QAAA,SAAA,EAAW,KAAA,CAAM,KAAA;AAAA,MACnB;AAAA,IACF,EAAA,KAAO;AACL,MAAA,SAAA,EAAW,cAAA,CAAe,OAAA,CAAQ,cAAc,CAAA;AAAA,IAClD;AAEA,IAAA,GAAA,CAAI,SAAA,IAAa,CAAA,CAAA,EAAI;AACnB,MAAA,IAAA,CAAA,MAAW,MAAA,GAAS,OAAA,EAAS;AAC3B,QAAA,GAAA,CAAI,CAAC,YAAA,CAAa,GAAA,CAAI,KAAA,CAAM,IAAI,CAAA,EAAG;AACjC,UAAA,YAAA,CAAa,GAAA,CAAI,KAAA,CAAM,IAAI,CAAA;AAC3B,UAAA,OAAA,CAAQ,IAAA,CAAK;AAAA,YACX,IAAA,EAAM,KAAA,CAAM,IAAA;AAAA,YACZ,UAAA,EAAY,CAAA;AAAA,YACZ,KAAA,EAAO,KAAA,CAAM,KAAA;AAAA,YACb,WAAA,EAAa,KAAA,CAAM,QAAA;AAAA,YACnB;AAAA,UACF,CAAC,CAAA;AAAA,QACH;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,EAAA,MAAM,WAAA,EAAa,IAAA,CAAK,GAAA,CAAI,EAAA,EAAI,SAAA;AAChC,EAAA,MAAM,kBAAA,EAAoB,KAAA,CAAM,IAAA,CAAK,YAAY,CAAA;AAEjD,EAAA,OAAO;AAAA,IACL,iBAAA,EAAmB,gDAAA,iBAA8C,CAAA;AAAA,IACjE,OAAA;AAAA,IACA,KAAA,EAAO;AAAA,MACL,YAAA,EAAc,OAAA,CAAQ,MAAA;AAAA,MACtB,UAAA;AAAA,MACA,aAAA,EAAe,WAAA,CAAY,CAAA,CAAE;AAAA,IAC/B;AAAA,EACF,CAAA;AACF;AAKO,SAAS,eAAA,CAAgB,MAAA,EAAgC;AAC9D,EAAA,OAAO,MAAA,CAAO,OAAA,CAAQ,GAAA,CAAI,CAAC,CAAA,EAAA,GAAM,CAAA,CAAE,IAAI,CAAA;AACzC;AD5BA;AACE;AACA;AACF,+FAAC","file":"/home/runner/work/fhir-brasil/fhir-brasil/packages/ocr-utils/dist/index.cjs","sourcesContent":[null,"/**\n * OCR Anchor — Biomarker text anchoring\n *\n * Scans OCR text for biomarker names BEFORE sending to LLM.\n * This prevents hallucination by constraining what biomarkers\n * the LLM is allowed to extract.\n */\n\nimport {\n type BiomarkerSearchPattern,\n generateFilteredLLMReference,\n getAllSearchPatterns,\n} from '@precisa-saude/fhir';\n\nexport interface AnchorMatch {\n code: string;\n confidence: number;\n loinc?: string;\n matchedName: string;\n position: number;\n}\n\nexport interface AnchorResult {\n filteredReference: string;\n matches: AnchorMatch[];\n stats: {\n totalPatterns: number;\n matchedCount: number;\n scanTimeMs: number;\n };\n}\n\n/**\n * Normalize text for comparison:\n * - Removes diacritics (ã→a, ç→c, é→e)\n * - Converts to lowercase\n * - Normalizes whitespace\n */\nfunction normalize(text: string): string {\n return text\n .normalize('NFD')\n .replace(/[\\u0300-\\u036f]/g, '')\n .toLowerCase()\n .replace(/\\s+/g, ' ');\n}\n\nconst UNAMBIGUOUS_SHORT_NAMES = new Set([\n 'hdl', 'ldl', 'lh', 'tsh', 'crp', 'pcr', 'ggt', 'alt', 'ast', 'bun',\n 'wbc', 'rbc', 'mcv', 'mch', 'rdw', 'mpv', 'psa', 'fsh', 'hba1c', 'egfr',\n 'acr', 'esr', 'vhs', 'bmc', 'bmd', 'vat', 'dxa', 'dmo', 'cmo', 'ffm',\n 'lbm', 'mlg', 'tav',\n]);\n\ntype PatternEntry = { original: string; code: string; loinc?: string };\n\nlet cachedPatterns: BiomarkerSearchPattern[] | null = null;\nlet cachedNormalized: Map<string, PatternEntry[]> | null = null;\n\nfunction getPatterns(): BiomarkerSearchPattern[] {\n if (!cachedPatterns) {\n cachedPatterns = getAllSearchPatterns();\n }\n return cachedPatterns;\n}\n\nfunction getNormalizedPatterns(): Map<string, PatternEntry[]> {\n if (!cachedNormalized) {\n const patterns = getPatterns();\n const map = new Map<string, PatternEntry[]>();\n for (const pattern of patterns) {\n for (const name of pattern.names) {\n const normalized = normalize(name);\n const existing = map.get(normalized) || [];\n existing.push({\n code: pattern.code,\n ...(pattern.loinc && { loinc: pattern.loinc }),\n original: name,\n });\n map.set(normalized, existing);\n }\n }\n cachedNormalized = map;\n }\n return cachedNormalized;\n}\n\n/**\n * Find all biomarker names present in OCR text.\n * Uses exact string matching on normalized text.\n * Returns unique matches (same biomarker won't be matched twice).\n */\nexport function findBiomarkersInText(ocrText: string): AnchorResult {\n const startTime = Date.now();\n const normalizedText = normalize(ocrText);\n const matchedCodes = new Set<string>();\n const matches: AnchorMatch[] = [];\n const normalizedPatterns = getNormalizedPatterns();\n\n for (const [normalizedName, entries] of normalizedPatterns) {\n if (normalizedName.length < 3 && !UNAMBIGUOUS_SHORT_NAMES.has(normalizedName)) {\n continue;\n }\n\n let position = -1;\n if (normalizedName.length <= 4) {\n const regex = new RegExp(`\\\\b${normalizedName}\\\\b`);\n const match = regex.exec(normalizedText);\n if (match) {\n position = match.index;\n }\n } else {\n position = normalizedText.indexOf(normalizedName);\n }\n\n if (position !== -1) {\n for (const entry of entries) {\n if (!matchedCodes.has(entry.code)) {\n matchedCodes.add(entry.code);\n matches.push({\n code: entry.code,\n confidence: 1.0,\n loinc: entry.loinc,\n matchedName: entry.original,\n position,\n });\n }\n }\n }\n }\n\n const scanTimeMs = Date.now() - startTime;\n const matchedCodesArray = Array.from(matchedCodes);\n\n return {\n filteredReference: generateFilteredLLMReference(matchedCodesArray),\n matches,\n stats: {\n matchedCount: matches.length,\n scanTimeMs,\n totalPatterns: getPatterns().length,\n },\n };\n}\n\n/**\n * Get the list of matched biomarker codes from an anchor result.\n */\nexport function getMatchedCodes(result: AnchorResult): string[] {\n return result.matches.map((m) => m.code);\n}\n"]}
|
package/package.json
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@precisa-saude/fhir-ocr-utils",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.8",
|
|
4
|
+
"sideEffects": false,
|
|
5
|
+
"homepage": "https://github.com/Precisa-Saude/fhir-brasil/tree/main/packages/ocr-utils#readme",
|
|
6
|
+
"bugs": "https://github.com/Precisa-Saude/fhir-brasil/issues",
|
|
4
7
|
"description": "Utilitários de ancoragem OCR para extração de biomarcadores de PDFs de resultados laboratoriais",
|
|
5
8
|
"type": "module",
|
|
6
9
|
"main": "./dist/index.cjs",
|
|
@@ -31,7 +34,7 @@
|
|
|
31
34
|
"license": "Apache-2.0",
|
|
32
35
|
"repository": {
|
|
33
36
|
"type": "git",
|
|
34
|
-
"url": "https://github.com/
|
|
37
|
+
"url": "https://github.com/Precisa-Saude/fhir-brasil.git",
|
|
35
38
|
"directory": "packages/ocr-utils"
|
|
36
39
|
},
|
|
37
40
|
"publishConfig": {
|