@llamaindex/liteparse 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/README.md +339 -0
- package/dist/cli/parse.d.ts +4 -0
- package/dist/cli/parse.d.ts.map +1 -0
- package/dist/cli/parse.js +401 -0
- package/dist/cli/parse.js.map +1 -0
- package/dist/src/conversion/convertToPdf.d.ts +47 -0
- package/dist/src/conversion/convertToPdf.d.ts.map +1 -0
- package/dist/src/conversion/convertToPdf.js +337 -0
- package/dist/src/conversion/convertToPdf.js.map +1 -0
- package/dist/src/conversion/convertToPdf.test.d.ts +2 -0
- package/dist/src/conversion/convertToPdf.test.d.ts.map +1 -0
- package/dist/src/conversion/convertToPdf.test.js +208 -0
- package/dist/src/conversion/convertToPdf.test.js.map +1 -0
- package/dist/src/core/config.d.ts +4 -0
- package/dist/src/core/config.d.ts.map +1 -0
- package/dist/src/core/config.js +25 -0
- package/dist/src/core/config.js.map +1 -0
- package/dist/src/core/config.test.d.ts +2 -0
- package/dist/src/core/config.test.d.ts.map +1 -0
- package/dist/src/core/config.test.js +21 -0
- package/dist/src/core/config.test.js.map +1 -0
- package/dist/src/core/parser.d.ts +83 -0
- package/dist/src/core/parser.d.ts.map +1 -0
- package/dist/src/core/parser.js +333 -0
- package/dist/src/core/parser.js.map +1 -0
- package/dist/src/core/parser.test.d.ts +2 -0
- package/dist/src/core/parser.test.d.ts.map +1 -0
- package/dist/src/core/parser.test.js +537 -0
- package/dist/src/core/parser.test.js.map +1 -0
- package/dist/src/core/types.d.ts +287 -0
- package/dist/src/core/types.d.ts.map +1 -0
- package/dist/src/core/types.js +2 -0
- package/dist/src/core/types.js.map +1 -0
- package/dist/src/engines/ocr/http-simple.d.ts +19 -0
- package/dist/src/engines/ocr/http-simple.d.ts.map +1 -0
- package/dist/src/engines/ocr/http-simple.js +63 -0
- package/dist/src/engines/ocr/http-simple.js.map +1 -0
- package/dist/src/engines/ocr/http-simple.test.d.ts +2 -0
- package/dist/src/engines/ocr/http-simple.test.d.ts.map +1 -0
- package/dist/src/engines/ocr/http-simple.test.js +108 -0
- package/dist/src/engines/ocr/http-simple.test.js.map +1 -0
- package/dist/src/engines/ocr/interface.d.ts +15 -0
- package/dist/src/engines/ocr/interface.d.ts.map +1 -0
- package/dist/src/engines/ocr/interface.js +2 -0
- package/dist/src/engines/ocr/interface.js.map +1 -0
- package/dist/src/engines/ocr/tesseract.d.ts +19 -0
- package/dist/src/engines/ocr/tesseract.d.ts.map +1 -0
- package/dist/src/engines/ocr/tesseract.js +112 -0
- package/dist/src/engines/ocr/tesseract.js.map +1 -0
- package/dist/src/engines/ocr/tesseract.test.d.ts +2 -0
- package/dist/src/engines/ocr/tesseract.test.d.ts.map +1 -0
- package/dist/src/engines/ocr/tesseract.test.js +84 -0
- package/dist/src/engines/ocr/tesseract.test.js.map +1 -0
- package/dist/src/engines/pdf/interface.d.ts +79 -0
- package/dist/src/engines/pdf/interface.d.ts.map +1 -0
- package/dist/src/engines/pdf/interface.js +2 -0
- package/dist/src/engines/pdf/interface.js.map +1 -0
- package/dist/src/engines/pdf/pdfium-renderer.d.ts +11 -0
- package/dist/src/engines/pdf/pdfium-renderer.d.ts.map +1 -0
- package/dist/src/engines/pdf/pdfium-renderer.js +64 -0
- package/dist/src/engines/pdf/pdfium-renderer.js.map +1 -0
- package/dist/src/engines/pdf/pdfium-renderer.test.d.ts +2 -0
- package/dist/src/engines/pdf/pdfium-renderer.test.d.ts.map +1 -0
- package/dist/src/engines/pdf/pdfium-renderer.test.js +76 -0
- package/dist/src/engines/pdf/pdfium-renderer.test.js.map +1 -0
- package/dist/src/engines/pdf/pdfjs.d.ts +13 -0
- package/dist/src/engines/pdf/pdfjs.d.ts.map +1 -0
- package/dist/src/engines/pdf/pdfjs.js +538 -0
- package/dist/src/engines/pdf/pdfjs.js.map +1 -0
- package/dist/src/engines/pdf/pdfjs.test.d.ts +2 -0
- package/dist/src/engines/pdf/pdfjs.test.d.ts.map +1 -0
- package/dist/src/engines/pdf/pdfjs.test.js +220 -0
- package/dist/src/engines/pdf/pdfjs.test.js.map +1 -0
- package/dist/src/engines/pdf/pdfjsImporter.d.ts +5 -0
- package/dist/src/engines/pdf/pdfjsImporter.d.ts.map +1 -0
- package/dist/src/engines/pdf/pdfjsImporter.js +9 -0
- package/dist/src/engines/pdf/pdfjsImporter.js.map +1 -0
- package/dist/src/index.d.ts +3 -0
- package/dist/src/index.d.ts.map +1 -0
- package/dist/src/index.js +5 -0
- package/dist/src/index.js.map +1 -0
- package/dist/src/lib.d.ts +17 -0
- package/dist/src/lib.d.ts.map +1 -0
- package/dist/src/lib.js +16 -0
- package/dist/src/lib.js.map +1 -0
- package/dist/src/output/json.d.ts +10 -0
- package/dist/src/output/json.d.ts.map +1 -0
- package/dist/src/output/json.js +31 -0
- package/dist/src/output/json.js.map +1 -0
- package/dist/src/output/json.test.d.ts +2 -0
- package/dist/src/output/json.test.d.ts.map +1 -0
- package/dist/src/output/json.test.js +136 -0
- package/dist/src/output/json.test.js.map +1 -0
- package/dist/src/output/text.d.ts +10 -0
- package/dist/src/output/text.d.ts.map +1 -0
- package/dist/src/output/text.js +17 -0
- package/dist/src/output/text.js.map +1 -0
- package/dist/src/output/text.test.d.ts +2 -0
- package/dist/src/output/text.test.d.ts.map +1 -0
- package/dist/src/output/text.test.js +65 -0
- package/dist/src/output/text.test.js.map +1 -0
- package/dist/src/processing/bbox.d.ts +20 -0
- package/dist/src/processing/bbox.d.ts.map +1 -0
- package/dist/src/processing/bbox.js +258 -0
- package/dist/src/processing/bbox.js.map +1 -0
- package/dist/src/processing/bbox.test.d.ts +2 -0
- package/dist/src/processing/bbox.test.d.ts.map +1 -0
- package/dist/src/processing/bbox.test.js +334 -0
- package/dist/src/processing/bbox.test.js.map +1 -0
- package/dist/src/processing/cleanText.d.ts +6 -0
- package/dist/src/processing/cleanText.d.ts.map +1 -0
- package/dist/src/processing/cleanText.js +73 -0
- package/dist/src/processing/cleanText.js.map +1 -0
- package/dist/src/processing/cleanText.test.d.ts +2 -0
- package/dist/src/processing/cleanText.test.d.ts.map +1 -0
- package/dist/src/processing/cleanText.test.js +46 -0
- package/dist/src/processing/cleanText.test.js.map +1 -0
- package/dist/src/processing/grid.d.ts +7 -0
- package/dist/src/processing/grid.d.ts.map +1 -0
- package/dist/src/processing/grid.js +13 -0
- package/dist/src/processing/grid.js.map +1 -0
- package/dist/src/processing/gridProjection.d.ts +18 -0
- package/dist/src/processing/gridProjection.d.ts.map +1 -0
- package/dist/src/processing/gridProjection.js +1392 -0
- package/dist/src/processing/gridProjection.js.map +1 -0
- package/dist/src/processing/gridProjection.test.d.ts +2 -0
- package/dist/src/processing/gridProjection.test.d.ts.map +1 -0
- package/dist/src/processing/gridProjection.test.js +464 -0
- package/dist/src/processing/gridProjection.test.js.map +1 -0
- package/dist/src/processing/markupUtils.d.ts +7 -0
- package/dist/src/processing/markupUtils.d.ts.map +1 -0
- package/dist/src/processing/markupUtils.js +25 -0
- package/dist/src/processing/markupUtils.js.map +1 -0
- package/dist/src/processing/markupUtils.test.d.ts +2 -0
- package/dist/src/processing/markupUtils.test.d.ts.map +1 -0
- package/dist/src/processing/markupUtils.test.js +26 -0
- package/dist/src/processing/markupUtils.test.js.map +1 -0
- package/dist/src/processing/ocrUtils.d.ts +24 -0
- package/dist/src/processing/ocrUtils.d.ts.map +1 -0
- package/dist/src/processing/ocrUtils.js +79 -0
- package/dist/src/processing/ocrUtils.js.map +1 -0
- package/dist/src/processing/octUtils.test.d.ts +2 -0
- package/dist/src/processing/octUtils.test.d.ts.map +1 -0
- package/dist/src/processing/octUtils.test.js +72 -0
- package/dist/src/processing/octUtils.test.js.map +1 -0
- package/dist/src/processing/textUtils.d.ts +20 -0
- package/dist/src/processing/textUtils.d.ts.map +1 -0
- package/dist/src/processing/textUtils.js +142 -0
- package/dist/src/processing/textUtils.js.map +1 -0
- package/dist/src/processing/textUtils.test.d.ts +2 -0
- package/dist/src/processing/textUtils.test.d.ts.map +1 -0
- package/dist/src/processing/textUtils.test.js +45 -0
- package/dist/src/processing/textUtils.test.js.map +1 -0
- package/dist/src/vendor/pdfjs/LICENSE +177 -0
- package/dist/src/vendor/pdfjs/README.md +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78ms-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78ms-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/83pv-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/90ms-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/90ms-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/90msp-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/90msp-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/90pv-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/90pv-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Add-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Add-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Add-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Add-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-0.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-1.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-3.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-4.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-5.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-6.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-UCS2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-0.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-1.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-3.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-4.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-5.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-UCS2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-0.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-1.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-3.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-4.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-5.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-6.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-UCS2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-0.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-1.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-UCS2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/B5pc-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/B5pc-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/CNS-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/CNS-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/CNS1-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/CNS1-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/CNS2-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/CNS2-V.bcmap +3 -0
- package/dist/src/vendor/pdfjs/cmaps/ETHK-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/ETHK-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/ETen-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/ETen-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/ETenms-B5-H.bcmap +3 -0
- package/dist/src/vendor/pdfjs/cmaps/ETenms-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Ext-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Ext-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Ext-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Ext-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GB-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GB-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GB-H.bcmap +4 -0
- package/dist/src/vendor/pdfjs/cmaps/GB-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBK-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBK-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBK2K-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBK2K-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBKp-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBKp-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBT-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBT-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBT-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBT-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBTpc-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBTpc-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBpc-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBpc-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKdla-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKdla-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKdlb-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKdlb-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKgccs-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKgccs-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKm314-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKm314-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKm471-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKm471-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKscs-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKscs-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Hankaku.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Hiragana.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSC-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSC-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSC-Johab-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSC-Johab-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSCpc-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSCpc-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Katakana.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/LICENSE +36 -0
- package/dist/src/vendor/pdfjs/cmaps/NWP-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/NWP-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Roman.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UCS2-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UCS2-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF16-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF16-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF8-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF8-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UCS2-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UCS2-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF16-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF16-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF8-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF8-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF16-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF16-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF8-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF8-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-HW-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISPro-UTF8-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UCS2-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UCS2-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF16-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF16-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF8-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF8-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/WP-Symbol.bcmap +0 -0
- package/dist/src/vendor/pdfjs/pdf.mjs +19481 -0
- package/dist/src/vendor/pdfjs/pdf.mjs.map +1 -0
- package/dist/src/vendor/pdfjs/pdf.sandbox.mjs +210 -0
- package/dist/src/vendor/pdfjs/pdf.sandbox.mjs.map +1 -0
- package/dist/src/vendor/pdfjs/pdf.worker.mjs +56001 -0
- package/dist/src/vendor/pdfjs/pdf.worker.mjs.map +1 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitDingbats.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixed.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixedBold.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixedBoldItalic.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixedItalic.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerif.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerifBold.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerifBoldItalic.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerifItalic.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitSymbol.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/LICENSE_FOXIT +27 -0
- package/dist/src/vendor/pdfjs/standard_fonts/LICENSE_LIBERATION +102 -0
- package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-Bold.ttf +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-BoldItalic.ttf +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-Italic.ttf +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-Regular.ttf +0 -0
- package/package.json +89 -0
- package/src/vendor/pdfjs/LICENSE +177 -0
- package/src/vendor/pdfjs/README.md +0 -0
- package/src/vendor/pdfjs/cmaps/78-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78ms-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78ms-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/83pv-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/90ms-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/90ms-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/90msp-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/90msp-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/90pv-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/90pv-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Add-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Add-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Add-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Add-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-0.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-1.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-3.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-4.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-5.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-6.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-UCS2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-0.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-1.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-3.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-4.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-5.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-UCS2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-0.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-1.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-3.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-4.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-5.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-6.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-UCS2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Korea1-0.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Korea1-1.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Korea1-2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Korea1-UCS2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/B5pc-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/B5pc-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/CNS-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/CNS-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/CNS1-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/CNS1-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/CNS2-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/CNS2-V.bcmap +3 -0
- package/src/vendor/pdfjs/cmaps/ETHK-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/ETHK-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/ETen-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/ETen-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/ETenms-B5-H.bcmap +3 -0
- package/src/vendor/pdfjs/cmaps/ETenms-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Ext-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Ext-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Ext-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Ext-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GB-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GB-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GB-H.bcmap +4 -0
- package/src/vendor/pdfjs/cmaps/GB-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBK-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBK-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBK2K-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBK2K-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBKp-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBKp-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBT-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBT-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBT-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBT-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBTpc-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBTpc-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBpc-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBpc-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKdla-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKdla-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKdlb-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKdlb-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKgccs-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKgccs-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKm314-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKm314-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKm471-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKm471-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKscs-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKscs-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Hankaku.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Hiragana.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSC-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSC-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSC-Johab-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSC-Johab-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSCms-UHC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSCms-UHC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSCpc-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSCpc-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Katakana.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/LICENSE +36 -0
- package/src/vendor/pdfjs/cmaps/NWP-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/NWP-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Roman.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UCS2-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UCS2-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UTF16-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UTF16-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UTF8-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UTF8-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UCS2-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UCS2-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UTF16-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UTF16-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UTF8-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UTF8-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UTF16-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UTF16-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UTF8-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UTF8-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-HW-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISPro-UTF8-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UCS2-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UCS2-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UTF16-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UTF16-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UTF8-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UTF8-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/WP-Symbol.bcmap +0 -0
- package/src/vendor/pdfjs/pdf.mjs +19481 -0
- package/src/vendor/pdfjs/pdf.mjs.map +1 -0
- package/src/vendor/pdfjs/pdf.sandbox.mjs +210 -0
- package/src/vendor/pdfjs/pdf.sandbox.mjs.map +1 -0
- package/src/vendor/pdfjs/pdf.worker.mjs +56001 -0
- package/src/vendor/pdfjs/pdf.worker.mjs.map +1 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitDingbats.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitFixed.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitFixedBold.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitFixedBoldItalic.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitFixedItalic.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitSerif.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitSerifBold.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitSerifBoldItalic.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitSerifItalic.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitSymbol.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/LICENSE_FOXIT +27 -0
- package/src/vendor/pdfjs/standard_fonts/LICENSE_LIBERATION +102 -0
- package/src/vendor/pdfjs/standard_fonts/LiberationSans-Bold.ttf +0 -0
- package/src/vendor/pdfjs/standard_fonts/LiberationSans-BoldItalic.ttf +0 -0
- package/src/vendor/pdfjs/standard_fonts/LiberationSans-Italic.ttf +0 -0
- package/src/vendor/pdfjs/standard_fonts/LiberationSans-Regular.ttf +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"markupUtils.js","sourceRoot":"","sources":["../../../src/processing/markupUtils.ts"],"names":[],"mappings":"AAEA;;;GAGG;AACH,MAAM,UAAU,eAAe,CAAC,MAAkB,EAAE,IAAY;IAC9D,IAAI,MAAM,GAAG,IAAI,CAAC;IAElB,6BAA6B;IAC7B,IAAI,MAAM,CAAC,SAAS,EAAE,CAAC;QACrB,MAAM,GAAG,KAAK,MAAM,IAAI,CAAC;IAC3B,CAAC;IAED,6BAA6B;IAC7B,IAAI,MAAM,CAAC,SAAS,EAAE,CAAC;QACrB,MAAM,GAAG,KAAK,MAAM,IAAI,CAAC;IAC3B,CAAC;IAED,yDAAyD;IACzD,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;QACpB,MAAM,GAAG,KAAK,MAAM,IAAI,CAAC;IAC3B,CAAC;IAED,6BAA6B;IAC7B,IAAI,MAAM,CAAC,SAAS,EAAE,CAAC;QACrB,MAAM,GAAG,KAAK,MAAM,IAAI,CAAC;IAC3B,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"markupUtils.test.d.ts","sourceRoot":"","sources":["../../../src/processing/markupUtils.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import { applyMarkupTags } from "./markupUtils.js";
|
|
3
|
+
const text = "hello";
|
|
4
|
+
describe("test markupUtils", () => {
|
|
5
|
+
it("test strikeout", () => {
|
|
6
|
+
const data = { strikeout: true };
|
|
7
|
+
expect(applyMarkupTags(data, text)).toBe(`~~${text}~~`);
|
|
8
|
+
});
|
|
9
|
+
it("test underline", () => {
|
|
10
|
+
const data = { underline: true };
|
|
11
|
+
expect(applyMarkupTags(data, text)).toBe(`__${text}__`);
|
|
12
|
+
});
|
|
13
|
+
it("test squiggly", () => {
|
|
14
|
+
const data = { squiggly: true };
|
|
15
|
+
expect(applyMarkupTags(data, text)).toBe(`__${text}__`);
|
|
16
|
+
});
|
|
17
|
+
it("test highlight", () => {
|
|
18
|
+
const data = { highlight: "yes" };
|
|
19
|
+
expect(applyMarkupTags(data, text)).toBe(`==${text}==`);
|
|
20
|
+
});
|
|
21
|
+
it("test none", () => {
|
|
22
|
+
const data = {};
|
|
23
|
+
expect(applyMarkupTags(data, text)).toBe(text);
|
|
24
|
+
});
|
|
25
|
+
});
|
|
26
|
+
//# sourceMappingURL=markupUtils.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"markupUtils.test.js","sourceRoot":"","sources":["../../../src/processing/markupUtils.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,QAAQ,CAAC;AAE9C,OAAO,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAC;AAEnD,MAAM,IAAI,GAAG,OAAO,CAAC;AAErB,QAAQ,CAAC,kBAAkB,EAAE,GAAG,EAAE;IAChC,EAAE,CAAC,gBAAgB,EAAE,GAAG,EAAE;QACxB,MAAM,IAAI,GAAe,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC;QAC7C,MAAM,CAAC,eAAe,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,CAAC;IAC1D,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gBAAgB,EAAE,GAAG,EAAE;QACxB,MAAM,IAAI,GAAe,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC;QAC7C,MAAM,CAAC,eAAe,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,CAAC;IAC1D,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,eAAe,EAAE,GAAG,EAAE;QACvB,MAAM,IAAI,GAAe,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC;QAC5C,MAAM,CAAC,eAAe,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,CAAC;IAC1D,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gBAAgB,EAAE,GAAG,EAAE;QACxB,MAAM,IAAI,GAAe,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC;QAC9C,MAAM,CAAC,eAAe,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,CAAC;IAC1D,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,WAAW,EAAE,GAAG,EAAE;QACnB,MAAM,IAAI,GAAe,EAAE,CAAC;QAC5B,MAAM,CAAC,eAAe,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACjD,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import { Image, EasyOcrResultLine } from "../engines/pdf/interface.js";
|
|
2
|
+
export interface OcrBlock {
|
|
3
|
+
c: string;
|
|
4
|
+
x: number;
|
|
5
|
+
rx: number;
|
|
6
|
+
y: number;
|
|
7
|
+
ry: number;
|
|
8
|
+
w: number;
|
|
9
|
+
rw: number;
|
|
10
|
+
h: number;
|
|
11
|
+
rh: number;
|
|
12
|
+
confidence: string | number;
|
|
13
|
+
fromOcr: boolean;
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* Parse OCR blocks from image with OCR data
|
|
17
|
+
* Converts OCR bounding boxes from image space to page space
|
|
18
|
+
*/
|
|
19
|
+
export declare function parseImageOcrBlocks(image: Image): OcrBlock[];
|
|
20
|
+
/**
|
|
21
|
+
* Parse EasyOCR stdout result into structured format
|
|
22
|
+
*/
|
|
23
|
+
export declare function easyOcrResultLinesToList(stdOutResult?: string): EasyOcrResultLine[];
|
|
24
|
+
//# sourceMappingURL=ocrUtils.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ocrUtils.d.ts","sourceRoot":"","sources":["../../../src/processing/ocrUtils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,iBAAiB,EAAE,MAAM,6BAA6B,CAAC;AAEvE,MAAM,WAAW,QAAQ;IACvB,CAAC,EAAE,MAAM,CAAC;IACV,CAAC,EAAE,MAAM,CAAC;IACV,EAAE,EAAE,MAAM,CAAC;IACX,CAAC,EAAE,MAAM,CAAC;IACV,EAAE,EAAE,MAAM,CAAC;IACX,CAAC,EAAE,MAAM,CAAC;IACV,EAAE,EAAE,MAAM,CAAC;IACX,CAAC,EAAE,MAAM,CAAC;IACV,EAAE,EAAE,MAAM,CAAC;IACX,UAAU,EAAE,MAAM,GAAG,MAAM,CAAC;IAC5B,OAAO,EAAE,OAAO,CAAC;CAClB;AAED;;;GAGG;AACH,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,KAAK,GAAG,QAAQ,EAAE,CAqC5D;AAED;;GAEG;AACH,wBAAgB,wBAAwB,CAAC,YAAY,CAAC,EAAE,MAAM,GAAG,iBAAiB,EAAE,CA8CnF"}
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Parse OCR blocks from image with OCR data
|
|
3
|
+
* Converts OCR bounding boxes from image space to page space
|
|
4
|
+
*/
|
|
5
|
+
export function parseImageOcrBlocks(image) {
|
|
6
|
+
if (!image || !image.ocrRaw?.length) {
|
|
7
|
+
return [];
|
|
8
|
+
}
|
|
9
|
+
const blocks = [];
|
|
10
|
+
// Calculate ratio to convert from OCR image coordinates to page coordinates
|
|
11
|
+
const coords = image.coords || { x: 0, y: 0, w: image.width, h: image.height };
|
|
12
|
+
const xRatio = image.width / coords.w;
|
|
13
|
+
const yRatio = image.height / coords.h;
|
|
14
|
+
for (const line of image.ocrRaw) {
|
|
15
|
+
// line format: [[[x1, y1], [x2, y2], [x3, y3], [x4, y4]], 'text', confidence]
|
|
16
|
+
const [x1, y1] = line[0][0];
|
|
17
|
+
const [x2, y2] = line[0][2]; // Use opposite corner for bbox
|
|
18
|
+
const text = line[1];
|
|
19
|
+
const confidence = line[2];
|
|
20
|
+
// Convert the bounding box to page space
|
|
21
|
+
const block = {
|
|
22
|
+
c: text,
|
|
23
|
+
x: x1 / xRatio + coords.x,
|
|
24
|
+
rx: Math.round(x1 / (image.scaleFactor || 1)),
|
|
25
|
+
y: y1 / yRatio + coords.y,
|
|
26
|
+
ry: Math.round(y1 / (image.scaleFactor || 1)),
|
|
27
|
+
w: Math.abs(x2 - x1) / xRatio,
|
|
28
|
+
rw: Math.round(Math.abs(x2 - x1) / (image.scaleFactor || 1)),
|
|
29
|
+
h: Math.abs(y2 - y1) / yRatio,
|
|
30
|
+
rh: Math.round(Math.abs(y2 - y1) / (image.scaleFactor || 1)),
|
|
31
|
+
confidence,
|
|
32
|
+
fromOcr: true,
|
|
33
|
+
};
|
|
34
|
+
blocks.push(block);
|
|
35
|
+
}
|
|
36
|
+
return blocks;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Parse EasyOCR stdout result into structured format
|
|
40
|
+
*/
|
|
41
|
+
export function easyOcrResultLinesToList(stdOutResult) {
|
|
42
|
+
if (!stdOutResult?.length) {
|
|
43
|
+
return [];
|
|
44
|
+
}
|
|
45
|
+
const lines = stdOutResult.split("\n");
|
|
46
|
+
const blocks = [];
|
|
47
|
+
for (const line of lines) {
|
|
48
|
+
if (!line.trim()) {
|
|
49
|
+
continue;
|
|
50
|
+
}
|
|
51
|
+
// Parse line format: ([[x1, y1], [x2, y2], [x3, y3], [x4, y4]], 'text', confidence)
|
|
52
|
+
const ocrMatch = line.match(/\[\[(.*?), (.*?)\], \[(.*?), (.*?)\], \[(.*?), (.*?)\], \[(.*?), (.*?)\]\], ['"](.*?)['"], (.*?)\)$/);
|
|
53
|
+
if (!ocrMatch) {
|
|
54
|
+
continue;
|
|
55
|
+
}
|
|
56
|
+
const x1 = Number(ocrMatch[1]);
|
|
57
|
+
const y1 = Number(ocrMatch[2]);
|
|
58
|
+
const x2 = Number(ocrMatch[3]);
|
|
59
|
+
const y2 = Number(ocrMatch[4]);
|
|
60
|
+
const x3 = Number(ocrMatch[5]);
|
|
61
|
+
const y3 = Number(ocrMatch[6]);
|
|
62
|
+
const x4 = Number(ocrMatch[7]);
|
|
63
|
+
const y4 = Number(ocrMatch[8]);
|
|
64
|
+
const text = ocrMatch[9];
|
|
65
|
+
const confidence = ocrMatch[10];
|
|
66
|
+
blocks.push([
|
|
67
|
+
[
|
|
68
|
+
[x1, y1],
|
|
69
|
+
[x2, y2],
|
|
70
|
+
[x3, y3],
|
|
71
|
+
[x4, y4],
|
|
72
|
+
],
|
|
73
|
+
text,
|
|
74
|
+
confidence,
|
|
75
|
+
]);
|
|
76
|
+
}
|
|
77
|
+
return blocks;
|
|
78
|
+
}
|
|
79
|
+
//# sourceMappingURL=ocrUtils.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ocrUtils.js","sourceRoot":"","sources":["../../../src/processing/ocrUtils.ts"],"names":[],"mappings":"AAgBA;;;GAGG;AACH,MAAM,UAAU,mBAAmB,CAAC,KAAY;IAC9C,IAAI,CAAC,KAAK,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC;QACpC,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,MAAM,GAAe,EAAE,CAAC;IAE9B,4EAA4E;IAC5E,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,IAAI,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,KAAK,CAAC,KAAK,EAAE,CAAC,EAAE,KAAK,CAAC,MAAM,EAAE,CAAC;IAC/E,MAAM,MAAM,GAAG,KAAK,CAAC,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC;IACtC,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,GAAG,MAAM,CAAC,CAAC,CAAC;IAEvC,KAAK,MAAM,IAAI,IAAI,KAAK,CAAC,MAAM,EAAE,CAAC;QAChC,8EAA8E;QAC9E,MAAM,CAAC,EAAE,EAAE,EAAE,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC5B,MAAM,CAAC,EAAE,EAAE,EAAE,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,+BAA+B;QAC5D,MAAM,IAAI,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QACrB,MAAM,UAAU,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QAE3B,yCAAyC;QACzC,MAAM,KAAK,GAAa;YACtB,CAAC,EAAE,IAAI;YACP,CAAC,EAAE,EAAE,GAAG,MAAM,GAAG,MAAM,CAAC,CAAC;YACzB,EAAE,EAAE,IAAI,CAAC,KAAK,CAAC,EAAE,GAAG,CAAC,KAAK,CAAC,WAAW,IAAI,CAAC,CAAC,CAAC;YAC7C,CAAC,EAAE,EAAE,GAAG,MAAM,GAAG,MAAM,CAAC,CAAC;YACzB,EAAE,EAAE,IAAI,CAAC,KAAK,CAAC,EAAE,GAAG,CAAC,KAAK,CAAC,WAAW,IAAI,CAAC,CAAC,CAAC;YAC7C,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,GAAG,MAAM;YAC7B,EAAE,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,GAAG,CAAC,KAAK,CAAC,WAAW,IAAI,CAAC,CAAC,CAAC;YAC5D,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,GAAG,MAAM;YAC7B,EAAE,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,GAAG,CAAC,KAAK,CAAC,WAAW,IAAI,CAAC,CAAC,CAAC;YAC5D,UAAU;YACV,OAAO,EAAE,IAAI;SACd,CAAC;QACF,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACrB,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,wBAAwB,CAAC,YAAqB;IAC5D,IAAI,CAAC,YAAY,EAAE,MAAM,EAAE,CAAC;QAC1B,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,KAAK,GAAG,YAAY,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACvC,MAAM,MAAM,GAAwB,EAAE,CAAC;IAEvC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC;YACjB,SAAS;QACX,CAAC;QAED,oFAAoF;QACpF,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CACzB,qGAAqG,CACtG,CAAC;QAEF,IAAI,CAAC,QAAQ,EAAE,CAAC;YACd,SAAS;QACX,CAAC;QAED,MAAM,EAAE,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;QAC/B,MAAM,EAAE,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;QAC/B,MAAM,EAAE,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;QAC/B,MAAM,EAAE,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;QAC/B,MAAM,EAAE,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;QAC/B,MAAM,EAAE,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;QAC/B,MAAM,EAAE,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;QAC/B,MAAM,EAAE,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;QAC/B,MAAM,IAAI,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;QACzB,MAAM,UAAU,GAAG,QAAQ,CAAC,EAAE,CAAC,CAAC;QAEhC,MAAM,CAAC,IAAI,CAAC;YACV;gBACE,CAAC,EAAE,EAAE,EAAE,CAAC;gBACR,CAAC,EAAE,EAAE,EAAE,CAAC;gBACR,CAAC,EAAE,EAAE,EAAE,CAAC;gBACR,CAAC,EAAE,EAAE,EAAE,CAAC;aACT;YACD,IAAI;YACJ,UAAU;SACX,CAAC,CAAC;IACL,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"octUtils.test.d.ts","sourceRoot":"","sources":["../../../src/processing/octUtils.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import { parseImageOcrBlocks, easyOcrResultLinesToList } from "./ocrUtils";
|
|
3
|
+
const mockImage = {
|
|
4
|
+
x: 0,
|
|
5
|
+
y: 0,
|
|
6
|
+
width: 200,
|
|
7
|
+
height: 100,
|
|
8
|
+
scaleFactor: 2,
|
|
9
|
+
coords: { x: 10, y: 20, w: 100, h: 50 },
|
|
10
|
+
ocrRaw: [
|
|
11
|
+
[
|
|
12
|
+
[
|
|
13
|
+
[0, 0],
|
|
14
|
+
[40, 0],
|
|
15
|
+
[40, 20],
|
|
16
|
+
[0, 20],
|
|
17
|
+
],
|
|
18
|
+
"Hello",
|
|
19
|
+
0.95,
|
|
20
|
+
],
|
|
21
|
+
],
|
|
22
|
+
};
|
|
23
|
+
const expectedBlock = {
|
|
24
|
+
c: "Hello",
|
|
25
|
+
x: 0 / 2 + 10, // = 10
|
|
26
|
+
rx: Math.round(0 / 2), // = 0
|
|
27
|
+
y: 0 / 2 + 20, // = 20
|
|
28
|
+
ry: Math.round(0 / 2), // = 0
|
|
29
|
+
w: 40 / 2, // = 20
|
|
30
|
+
rw: Math.round(40 / 2), // = 20
|
|
31
|
+
h: 20 / 2, // = 10
|
|
32
|
+
rh: Math.round(20 / 2), // = 10
|
|
33
|
+
confidence: 0.95,
|
|
34
|
+
fromOcr: true,
|
|
35
|
+
};
|
|
36
|
+
const mockStdOut = "([[0, 10], [40, 10], [40, 30], [0, 30]], 'Hello', 0.95)";
|
|
37
|
+
const expectedResult = [
|
|
38
|
+
[
|
|
39
|
+
[
|
|
40
|
+
[0, 10],
|
|
41
|
+
[40, 10],
|
|
42
|
+
[40, 30],
|
|
43
|
+
[0, 30],
|
|
44
|
+
],
|
|
45
|
+
"Hello",
|
|
46
|
+
"0.95", // note: string, since ocrMatch[10] is not parsed with Number()
|
|
47
|
+
],
|
|
48
|
+
];
|
|
49
|
+
describe("test ocrUtils", () => {
|
|
50
|
+
it("test parseImageOcrBlocks success", () => {
|
|
51
|
+
const result = parseImageOcrBlocks(mockImage);
|
|
52
|
+
expect(result.length).toBe(1);
|
|
53
|
+
expect(result[0]).toStrictEqual(expectedBlock);
|
|
54
|
+
});
|
|
55
|
+
it("test parseImageOcrBlocks failure", () => {
|
|
56
|
+
const imageNoRawOcr = {
|
|
57
|
+
...mockImage,
|
|
58
|
+
ocrRaw: [],
|
|
59
|
+
};
|
|
60
|
+
const result = parseImageOcrBlocks(imageNoRawOcr);
|
|
61
|
+
expect(result.length).toBe(0);
|
|
62
|
+
});
|
|
63
|
+
it("test EasyOcrResultLine success", () => {
|
|
64
|
+
const result = easyOcrResultLinesToList(mockStdOut);
|
|
65
|
+
expect(result).toStrictEqual(expectedResult);
|
|
66
|
+
});
|
|
67
|
+
it("test EasyOcrResultLine success", () => {
|
|
68
|
+
const result = easyOcrResultLinesToList("");
|
|
69
|
+
expect(result.length).toBe(0);
|
|
70
|
+
});
|
|
71
|
+
});
|
|
72
|
+
//# sourceMappingURL=octUtils.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"octUtils.test.js","sourceRoot":"","sources":["../../../src/processing/octUtils.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EAAE,mBAAmB,EAAE,wBAAwB,EAAE,MAAM,YAAY,CAAC;AAG3E,MAAM,SAAS,GAAU;IACvB,CAAC,EAAE,CAAC;IACJ,CAAC,EAAE,CAAC;IACJ,KAAK,EAAE,GAAG;IACV,MAAM,EAAE,GAAG;IACX,WAAW,EAAE,CAAC;IACd,MAAM,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,EAAE,EAAE;IACvC,MAAM,EAAE;QACN;YACE;gBACE,CAAC,CAAC,EAAE,CAAC,CAAC;gBACN,CAAC,EAAE,EAAE,CAAC,CAAC;gBACP,CAAC,EAAE,EAAE,EAAE,CAAC;gBACR,CAAC,CAAC,EAAE,EAAE,CAAC;aACR;YACD,OAAO;YACP,IAAI;SACgB;KACvB;CACF,CAAC;AAEF,MAAM,aAAa,GAAG;IACpB,CAAC,EAAE,OAAO;IACV,CAAC,EAAE,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,OAAO;IACtB,EAAE,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,MAAM;IAC7B,CAAC,EAAE,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,OAAO;IACtB,EAAE,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,MAAM;IAC7B,CAAC,EAAE,EAAE,GAAG,CAAC,EAAE,OAAO;IAClB,EAAE,EAAE,IAAI,CAAC,KAAK,CAAC,EAAE,GAAG,CAAC,CAAC,EAAE,OAAO;IAC/B,CAAC,EAAE,EAAE,GAAG,CAAC,EAAE,OAAO;IAClB,EAAE,EAAE,IAAI,CAAC,KAAK,CAAC,EAAE,GAAG,CAAC,CAAC,EAAE,OAAO;IAC/B,UAAU,EAAE,IAAI;IAChB,OAAO,EAAE,IAAI;CACd,CAAC;AAEF,MAAM,UAAU,GAAG,yDAAyD,CAAC;AAE7E,MAAM,cAAc,GAAwB;IAC1C;QACE;YACE,CAAC,CAAC,EAAE,EAAE,CAAC;YACP,CAAC,EAAE,EAAE,EAAE,CAAC;YACR,CAAC,EAAE,EAAE,EAAE,CAAC;YACR,CAAC,CAAC,EAAE,EAAE,CAAC;SACR;QACD,OAAO;QACP,MAAM,EAAE,+DAA+D;KACnD;CACvB,CAAC;AAEF,QAAQ,CAAC,eAAe,EAAE,GAAG,EAAE;IAC7B,EAAE,CAAC,kCAAkC,EAAE,GAAG,EAAE;QAC1C,MAAM,MAAM,GAAG,mBAAmB,CAAC,SAAS,CAAC,CAAC;QAC9C,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC9B,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC,aAAa,CAAC,CAAC;IACjD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kCAAkC,EAAE,GAAG,EAAE;QAC1C,MAAM,aAAa,GAAG;YACpB,GAAG,SAAS;YACZ,MAAM,EAAE,EAAE;SACX,CAAC;QACF,MAAM,MAAM,GAAG,mBAAmB,CAAC,aAAa,CAAC,CAAC;QAClD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAChC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gCAAgC,EAAE,GAAG,EAAE;QACxC,MAAM,MAAM,GAAG,wBAAwB,CAAC,UAAU,CAAC,CAAC;QACpD,MAAM,CAAC,MAAM,CAAC,CAAC,aAAa,CAAC,cAAc,CAAC,CAAC;IAC/C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gCAAgC,EAAE,GAAG,EAAE;QACxC,MAAM,MAAM,GAAG,wBAAwB,CAAC,EAAE,CAAC,CAAC;QAC5C,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAChC,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Clean common OCR artifacts from table documents.
|
|
3
|
+
* OCR often misreads vertical table border lines as bracket-like characters.
|
|
4
|
+
* This is especially common with numbers adjacent to table cell borders.
|
|
5
|
+
*
|
|
6
|
+
* Examples:
|
|
7
|
+
* - "44520]" → "44,520" (vertical line misread as ])
|
|
8
|
+
* - "|123" → "123" (vertical line misread as |)
|
|
9
|
+
* - "0.3|" → "0.3" (vertical line misread as |)
|
|
10
|
+
*/
|
|
11
|
+
export declare function cleanOcrTableArtifacts(text: string): string;
|
|
12
|
+
/**
|
|
13
|
+
* Convert string to subscript unicode characters
|
|
14
|
+
*/
|
|
15
|
+
export declare function strToSubscriptString(str: string): string;
|
|
16
|
+
/**
|
|
17
|
+
* Convert string to superscript unicode characters
|
|
18
|
+
*/
|
|
19
|
+
export declare function strToPostScript(str: string): string;
|
|
20
|
+
//# sourceMappingURL=textUtils.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"textUtils.d.ts","sourceRoot":"","sources":["../../../src/processing/textUtils.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AACH,wBAAgB,sBAAsB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAsB3D;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAuCxD;AAED;;GAEG;AACH,wBAAgB,eAAe,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAqEnD"}
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Clean common OCR artifacts from table documents.
|
|
3
|
+
* OCR often misreads vertical table border lines as bracket-like characters.
|
|
4
|
+
* This is especially common with numbers adjacent to table cell borders.
|
|
5
|
+
*
|
|
6
|
+
* Examples:
|
|
7
|
+
* - "44520]" → "44,520" (vertical line misread as ])
|
|
8
|
+
* - "|123" → "123" (vertical line misread as |)
|
|
9
|
+
* - "0.3|" → "0.3" (vertical line misread as |)
|
|
10
|
+
*/
|
|
11
|
+
export function cleanOcrTableArtifacts(text) {
|
|
12
|
+
// Characters commonly misread from vertical table borders
|
|
13
|
+
// These typically appear at the start or end of cell content
|
|
14
|
+
const borderArtifacts = /^[|[\](){}]+|[|[\](){}]+$/g;
|
|
15
|
+
const cleaned = text.trim();
|
|
16
|
+
// Only clean if the core content looks like a number or short text
|
|
17
|
+
// This avoids incorrectly stripping brackets from actual content like "(note)"
|
|
18
|
+
const withoutArtifacts = cleaned.replace(borderArtifacts, "");
|
|
19
|
+
// If removing artifacts leaves us with something that looks like a number,
|
|
20
|
+
// statistical value, or percentage, use the cleaned version
|
|
21
|
+
if (withoutArtifacts.length > 0) {
|
|
22
|
+
// Check if core content is numeric-ish (numbers, commas, periods, asterisks, percent, minus, plus, Z, N/A)
|
|
23
|
+
const numericPattern = /^[*+-]?[\d,.\s]+[%]?$|^[*]?-?[\d,.\s]+$|^[ZN]\/A$|^[Z-]$/;
|
|
24
|
+
if (numericPattern.test(withoutArtifacts.trim())) {
|
|
25
|
+
return withoutArtifacts.trim();
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
return cleaned;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Convert string to subscript unicode characters
|
|
32
|
+
*/
|
|
33
|
+
export function strToSubscriptString(str) {
|
|
34
|
+
const sub = {
|
|
35
|
+
"0": "₀",
|
|
36
|
+
"1": "₁",
|
|
37
|
+
"2": "₂",
|
|
38
|
+
"3": "₃",
|
|
39
|
+
"4": "₄",
|
|
40
|
+
"5": "₅",
|
|
41
|
+
"6": "₆",
|
|
42
|
+
"7": "₇",
|
|
43
|
+
"8": "₈",
|
|
44
|
+
"9": "₉",
|
|
45
|
+
"+": "₊",
|
|
46
|
+
"-": "₋",
|
|
47
|
+
a: "ₐ",
|
|
48
|
+
e: "ₑ",
|
|
49
|
+
o: "ₒ",
|
|
50
|
+
x: "ₓ",
|
|
51
|
+
ə: "ₔ",
|
|
52
|
+
h: "ₕ",
|
|
53
|
+
k: "ₖ",
|
|
54
|
+
l: "ₗ",
|
|
55
|
+
m: "ₘ",
|
|
56
|
+
n: "ₙ",
|
|
57
|
+
p: "ₚ",
|
|
58
|
+
r: "ᵣ",
|
|
59
|
+
s: "ₛ",
|
|
60
|
+
t: "ₜ",
|
|
61
|
+
};
|
|
62
|
+
let subStr = "";
|
|
63
|
+
for (let i = 0; i < str.length; i++) {
|
|
64
|
+
if (!sub[str[i]]) {
|
|
65
|
+
return str;
|
|
66
|
+
}
|
|
67
|
+
subStr += sub[str[i]];
|
|
68
|
+
}
|
|
69
|
+
return subStr;
|
|
70
|
+
}
|
|
71
|
+
/**
|
|
72
|
+
* Convert string to superscript unicode characters
|
|
73
|
+
*/
|
|
74
|
+
export function strToPostScript(str) {
|
|
75
|
+
const post = {
|
|
76
|
+
"0": "⁰",
|
|
77
|
+
"1": "¹",
|
|
78
|
+
"2": "²",
|
|
79
|
+
"3": "³",
|
|
80
|
+
"4": "⁴",
|
|
81
|
+
"5": "⁵",
|
|
82
|
+
"6": "⁶",
|
|
83
|
+
"7": "⁷",
|
|
84
|
+
"8": "⁸",
|
|
85
|
+
"9": "⁹",
|
|
86
|
+
"+": "⁺",
|
|
87
|
+
"-": "⁻",
|
|
88
|
+
a: "ᵃ",
|
|
89
|
+
b: "ᵇ",
|
|
90
|
+
c: "ᶜ",
|
|
91
|
+
d: "ᵈ",
|
|
92
|
+
e: "ᵉ",
|
|
93
|
+
f: "ᶠ",
|
|
94
|
+
g: "ᵍ",
|
|
95
|
+
h: "ʰ",
|
|
96
|
+
i: "ⁱ",
|
|
97
|
+
j: "ʲ",
|
|
98
|
+
k: "ᵏ",
|
|
99
|
+
l: "ˡ",
|
|
100
|
+
m: "ᵐ",
|
|
101
|
+
n: "ⁿ",
|
|
102
|
+
o: "ᵒ",
|
|
103
|
+
p: "ᵖ",
|
|
104
|
+
r: "ʳ",
|
|
105
|
+
s: "ˢ",
|
|
106
|
+
t: "ᵗ",
|
|
107
|
+
u: "ᵘ",
|
|
108
|
+
v: "ᵛ",
|
|
109
|
+
w: "ʷ",
|
|
110
|
+
x: "ˣ",
|
|
111
|
+
y: "ʸ",
|
|
112
|
+
z: "ᶻ",
|
|
113
|
+
A: "ᴬ",
|
|
114
|
+
B: "ᴮ",
|
|
115
|
+
D: "ᴰ",
|
|
116
|
+
E: "ᴱ",
|
|
117
|
+
G: "ᴳ",
|
|
118
|
+
H: "ᴴ",
|
|
119
|
+
I: "ᴵ",
|
|
120
|
+
J: "ᴶ",
|
|
121
|
+
K: "ᴷ",
|
|
122
|
+
L: "ᴸ",
|
|
123
|
+
M: "ᴹ",
|
|
124
|
+
N: "ᴺ",
|
|
125
|
+
O: "ᴼ",
|
|
126
|
+
P: "ᴾ",
|
|
127
|
+
R: "ᴿ",
|
|
128
|
+
T: "ᵀ",
|
|
129
|
+
U: "ᵁ",
|
|
130
|
+
V: "ⱽ",
|
|
131
|
+
W: "ᵂ",
|
|
132
|
+
};
|
|
133
|
+
let postStr = "";
|
|
134
|
+
for (let i = 0; i < str.length; i++) {
|
|
135
|
+
if (!post[str[i]]) {
|
|
136
|
+
return str;
|
|
137
|
+
}
|
|
138
|
+
postStr += post[str[i]];
|
|
139
|
+
}
|
|
140
|
+
return postStr;
|
|
141
|
+
}
|
|
142
|
+
//# sourceMappingURL=textUtils.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"textUtils.js","sourceRoot":"","sources":["../../../src/processing/textUtils.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AACH,MAAM,UAAU,sBAAsB,CAAC,IAAY;IACjD,0DAA0D;IAC1D,6DAA6D;IAC7D,MAAM,eAAe,GAAG,4BAA4B,CAAC;IAErD,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAE5B,mEAAmE;IACnE,+EAA+E;IAC/E,MAAM,gBAAgB,GAAG,OAAO,CAAC,OAAO,CAAC,eAAe,EAAE,EAAE,CAAC,CAAC;IAE9D,2EAA2E;IAC3E,4DAA4D;IAC5D,IAAI,gBAAgB,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAChC,2GAA2G;QAC3G,MAAM,cAAc,GAAG,0DAA0D,CAAC;QAClF,IAAI,cAAc,CAAC,IAAI,CAAC,gBAAgB,CAAC,IAAI,EAAE,CAAC,EAAE,CAAC;YACjD,OAAO,gBAAgB,CAAC,IAAI,EAAE,CAAC;QACjC,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,oBAAoB,CAAC,GAAW;IAC9C,MAAM,GAAG,GAA8B;QACrC,GAAG,EAAE,GAAG;QACR,GAAG,EAAE,GAAG;QACR,GAAG,EAAE,GAAG;QACR,GAAG,EAAE,GAAG;QACR,GAAG,EAAE,GAAG;QACR,GAAG,EAAE,GAAG;QACR,GAAG,EAAE,GAAG;QACR,GAAG,EAAE,GAAG;QACR,GAAG,EAAE,GAAG;QACR,GAAG,EAAE,GAAG;QACR,GAAG,EAAE,GAAG;QACR,GAAG,EAAE,GAAG;QACR,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;KACP,CAAC;IAEF,IAAI,MAAM,GAAG,EAAE,CAAC;IAChB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACpC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YACjB,OAAO,GAAG,CAAC;QACb,CAAC;QACD,MAAM,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IACxB,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,eAAe,CAAC,GAAW;IACzC,MAAM,IAAI,GAA8B;QACtC,GAAG,EAAE,GAAG;QACR,GAAG,EAAE,GAAG;QACR,GAAG,EAAE,GAAG;QACR,GAAG,EAAE,GAAG;QACR,GAAG,EAAE,GAAG;QACR,GAAG,EAAE,GAAG;QACR,GAAG,EAAE,GAAG;QACR,GAAG,EAAE,GAAG;QACR,GAAG,EAAE,GAAG;QACR,GAAG,EAAE,GAAG;QACR,GAAG,EAAE,GAAG;QACR,GAAG,EAAE,GAAG;QACR,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;QACN,CAAC,EAAE,GAAG;KACP,CAAC;IAEF,IAAI,OAAO,GAAG,EAAE,CAAC;IACjB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACpC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAClB,OAAO,GAAG,CAAC;QACb,CAAC;QACD,OAAO,IAAI,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IAC1B,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"textUtils.test.d.ts","sourceRoot":"","sources":["../../../src/processing/textUtils.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import { strToPostScript, strToSubscriptString, cleanOcrTableArtifacts } from "./textUtils";
|
|
3
|
+
describe("test processing textUtils", () => {
|
|
4
|
+
it("test strToSubscriptString full conversion", () => {
|
|
5
|
+
const originalStr = "hello123";
|
|
6
|
+
const finalStr = strToSubscriptString(originalStr);
|
|
7
|
+
expect(finalStr).toBe("ₕₑₗₗₒ₁₂₃");
|
|
8
|
+
});
|
|
9
|
+
it("test strToSubscriptString fail conversion", () => {
|
|
10
|
+
const originalStr = "hello123!";
|
|
11
|
+
const finalStr = strToSubscriptString(originalStr);
|
|
12
|
+
expect(finalStr).toBe("hello123!");
|
|
13
|
+
});
|
|
14
|
+
it("test strToPostScript full conversion", () => {
|
|
15
|
+
const originalStr = "hello123";
|
|
16
|
+
const finalStr = strToPostScript(originalStr);
|
|
17
|
+
expect(finalStr).toBe("ʰᵉˡˡᵒ¹²³");
|
|
18
|
+
});
|
|
19
|
+
it("test strToPostScript fail conversion", () => {
|
|
20
|
+
const originalStr = "hello123!";
|
|
21
|
+
const finalStr = strToPostScript(originalStr);
|
|
22
|
+
expect(finalStr).toBe("hello123!");
|
|
23
|
+
});
|
|
24
|
+
it("test cleanOcrTableArtifacts removes trailing bracket from number", () => {
|
|
25
|
+
expect(cleanOcrTableArtifacts("44520]")).toBe("44520");
|
|
26
|
+
expect(cleanOcrTableArtifacts("9,674]")).toBe("9,674");
|
|
27
|
+
expect(cleanOcrTableArtifacts("0.3|")).toBe("0.3");
|
|
28
|
+
expect(cleanOcrTableArtifacts("63,790|")).toBe("63,790");
|
|
29
|
+
});
|
|
30
|
+
it("test cleanOcrTableArtifacts removes leading bracket from number", () => {
|
|
31
|
+
expect(cleanOcrTableArtifacts("|123")).toBe("123");
|
|
32
|
+
expect(cleanOcrTableArtifacts("[456")).toBe("456");
|
|
33
|
+
});
|
|
34
|
+
it("test cleanOcrTableArtifacts preserves brackets in non-numeric text", () => {
|
|
35
|
+
expect(cleanOcrTableArtifacts("(note)")).toBe("(note)");
|
|
36
|
+
expect(cleanOcrTableArtifacts("[ref]")).toBe("[ref]");
|
|
37
|
+
expect(cleanOcrTableArtifacts("hello]")).toBe("hello]");
|
|
38
|
+
});
|
|
39
|
+
it("test cleanOcrTableArtifacts handles special numeric patterns", () => {
|
|
40
|
+
expect(cleanOcrTableArtifacts("*-123")).toBe("*-123");
|
|
41
|
+
expect(cleanOcrTableArtifacts("Z")).toBe("Z");
|
|
42
|
+
expect(cleanOcrTableArtifacts("N/A")).toBe("N/A");
|
|
43
|
+
});
|
|
44
|
+
});
|
|
45
|
+
//# sourceMappingURL=textUtils.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"textUtils.test.js","sourceRoot":"","sources":["../../../src/processing/textUtils.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EAAE,eAAe,EAAE,oBAAoB,EAAE,sBAAsB,EAAE,MAAM,aAAa,CAAC;AAE5F,QAAQ,CAAC,2BAA2B,EAAE,GAAG,EAAE;IACzC,EAAE,CAAC,2CAA2C,EAAE,GAAG,EAAE;QACnD,MAAM,WAAW,GAAG,UAAU,CAAC;QAC/B,MAAM,QAAQ,GAAG,oBAAoB,CAAC,WAAW,CAAC,CAAC;QACnD,MAAM,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IACpC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,2CAA2C,EAAE,GAAG,EAAE;QACnD,MAAM,WAAW,GAAG,WAAW,CAAC;QAChC,MAAM,QAAQ,GAAG,oBAAoB,CAAC,WAAW,CAAC,CAAC;QACnD,MAAM,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IACrC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,sCAAsC,EAAE,GAAG,EAAE;QAC9C,MAAM,WAAW,GAAG,UAAU,CAAC;QAC/B,MAAM,QAAQ,GAAG,eAAe,CAAC,WAAW,CAAC,CAAC;QAC9C,MAAM,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IACpC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,sCAAsC,EAAE,GAAG,EAAE;QAC9C,MAAM,WAAW,GAAG,WAAW,CAAC;QAChC,MAAM,QAAQ,GAAG,eAAe,CAAC,WAAW,CAAC,CAAC;QAC9C,MAAM,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IACrC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kEAAkE,EAAE,GAAG,EAAE;QAC1E,MAAM,CAAC,sBAAsB,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACvD,MAAM,CAAC,sBAAsB,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACvD,MAAM,CAAC,sBAAsB,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACnD,MAAM,CAAC,sBAAsB,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IAC3D,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iEAAiE,EAAE,GAAG,EAAE;QACzE,MAAM,CAAC,sBAAsB,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACnD,MAAM,CAAC,sBAAsB,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACrD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oEAAoE,EAAE,GAAG,EAAE;QAC5E,MAAM,CAAC,sBAAsB,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACxD,MAAM,CAAC,sBAAsB,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACtD,MAAM,CAAC,sBAAsB,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IAC1D,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,8DAA8D,EAAE,GAAG,EAAE;QACtE,MAAM,CAAC,sBAAsB,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACtD,MAAM,CAAC,sBAAsB,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC9C,MAAM,CAAC,sBAAsB,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACpD,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|