@llamaindex/liteparse 1.5.2 → 2.0.0-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +50 -373
- package/dist/cli.d.ts +3 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +87 -0
- package/dist/cli.js.map +1 -0
- package/dist/lib.d.ts +58 -0
- package/dist/lib.d.ts.map +1 -0
- package/dist/lib.js +88 -0
- package/dist/lib.js.map +1 -0
- package/dist/native.d.ts +54 -0
- package/dist/native.d.ts.map +1 -0
- package/dist/native.js +70 -0
- package/dist/native.js.map +1 -0
- package/libpdfium.so +0 -0
- package/liteparse.linux-x64-gnu.node +0 -0
- package/package.json +36 -50
- package/LICENSE +0 -201
- package/dist/cli/parse.d.ts +0 -4
- package/dist/cli/parse.d.ts.map +0 -1
- package/dist/cli/parse.js +0 -450
- package/dist/cli/parse.js.map +0 -1
- package/dist/package.json +0 -90
- package/dist/src/conversion/convertToPdf.d.ts +0 -65
- package/dist/src/conversion/convertToPdf.d.ts.map +0 -1
- package/dist/src/conversion/convertToPdf.js +0 -405
- package/dist/src/conversion/convertToPdf.js.map +0 -1
- package/dist/src/conversion/convertToPdf.test.d.ts +0 -2
- package/dist/src/conversion/convertToPdf.test.d.ts.map +0 -1
- package/dist/src/conversion/convertToPdf.test.js +0 -327
- package/dist/src/conversion/convertToPdf.test.js.map +0 -1
- package/dist/src/core/config.d.ts +0 -4
- package/dist/src/core/config.d.ts.map +0 -1
- package/dist/src/core/config.js +0 -26
- package/dist/src/core/config.js.map +0 -1
- package/dist/src/core/config.test.d.ts +0 -2
- package/dist/src/core/config.test.d.ts.map +0 -1
- package/dist/src/core/config.test.js +0 -21
- package/dist/src/core/config.test.js.map +0 -1
- package/dist/src/core/parser.d.ts +0 -92
- package/dist/src/core/parser.d.ts.map +0 -1
- package/dist/src/core/parser.js +0 -401
- package/dist/src/core/parser.js.map +0 -1
- package/dist/src/core/parser.test.d.ts +0 -2
- package/dist/src/core/parser.test.d.ts.map +0 -1
- package/dist/src/core/parser.test.js +0 -541
- package/dist/src/core/parser.test.js.map +0 -1
- package/dist/src/core/types.d.ts +0 -370
- package/dist/src/core/types.d.ts.map +0 -1
- package/dist/src/core/types.js +0 -2
- package/dist/src/core/types.js.map +0 -1
- package/dist/src/engines/ocr/http-simple.d.ts +0 -19
- package/dist/src/engines/ocr/http-simple.d.ts.map +0 -1
- package/dist/src/engines/ocr/http-simple.js +0 -69
- package/dist/src/engines/ocr/http-simple.js.map +0 -1
- package/dist/src/engines/ocr/http-simple.test.d.ts +0 -2
- package/dist/src/engines/ocr/http-simple.test.d.ts.map +0 -1
- package/dist/src/engines/ocr/http-simple.test.js +0 -108
- package/dist/src/engines/ocr/http-simple.test.js.map +0 -1
- package/dist/src/engines/ocr/interface.d.ts +0 -15
- package/dist/src/engines/ocr/interface.d.ts.map +0 -1
- package/dist/src/engines/ocr/interface.js +0 -2
- package/dist/src/engines/ocr/interface.js.map +0 -1
- package/dist/src/engines/ocr/tesseract.d.ts +0 -20
- package/dist/src/engines/ocr/tesseract.d.ts.map +0 -1
- package/dist/src/engines/ocr/tesseract.js +0 -161
- package/dist/src/engines/ocr/tesseract.js.map +0 -1
- package/dist/src/engines/ocr/tesseract.test.d.ts +0 -2
- package/dist/src/engines/ocr/tesseract.test.d.ts.map +0 -1
- package/dist/src/engines/ocr/tesseract.test.js +0 -94
- package/dist/src/engines/ocr/tesseract.test.js.map +0 -1
- package/dist/src/engines/pdf/interface.d.ts +0 -84
- package/dist/src/engines/pdf/interface.d.ts.map +0 -1
- package/dist/src/engines/pdf/interface.js +0 -2
- package/dist/src/engines/pdf/interface.js.map +0 -1
- package/dist/src/engines/pdf/pdfium-renderer.d.ts +0 -31
- package/dist/src/engines/pdf/pdfium-renderer.d.ts.map +0 -1
- package/dist/src/engines/pdf/pdfium-renderer.js +0 -145
- package/dist/src/engines/pdf/pdfium-renderer.js.map +0 -1
- package/dist/src/engines/pdf/pdfium-renderer.test.d.ts +0 -2
- package/dist/src/engines/pdf/pdfium-renderer.test.d.ts.map +0 -1
- package/dist/src/engines/pdf/pdfium-renderer.test.js +0 -109
- package/dist/src/engines/pdf/pdfium-renderer.test.js.map +0 -1
- package/dist/src/engines/pdf/pdfjs.d.ts +0 -14
- package/dist/src/engines/pdf/pdfjs.d.ts.map +0 -1
- package/dist/src/engines/pdf/pdfjs.js +0 -799
- package/dist/src/engines/pdf/pdfjs.js.map +0 -1
- package/dist/src/engines/pdf/pdfjs.test.d.ts +0 -2
- package/dist/src/engines/pdf/pdfjs.test.d.ts.map +0 -1
- package/dist/src/engines/pdf/pdfjs.test.js +0 -225
- package/dist/src/engines/pdf/pdfjs.test.js.map +0 -1
- package/dist/src/engines/pdf/pdfjsImporter.d.ts +0 -5
- package/dist/src/engines/pdf/pdfjsImporter.d.ts.map +0 -1
- package/dist/src/engines/pdf/pdfjsImporter.js +0 -45
- package/dist/src/engines/pdf/pdfjsImporter.js.map +0 -1
- package/dist/src/index.d.ts +0 -3
- package/dist/src/index.d.ts.map +0 -1
- package/dist/src/index.js +0 -5
- package/dist/src/index.js.map +0 -1
- package/dist/src/lib.d.ts +0 -19
- package/dist/src/lib.d.ts.map +0 -1
- package/dist/src/lib.js +0 -17
- package/dist/src/lib.js.map +0 -1
- package/dist/src/output/json.d.ts +0 -10
- package/dist/src/output/json.d.ts.map +0 -1
- package/dist/src/output/json.js +0 -32
- package/dist/src/output/json.js.map +0 -1
- package/dist/src/output/json.test.d.ts +0 -2
- package/dist/src/output/json.test.d.ts.map +0 -1
- package/dist/src/output/json.test.js +0 -199
- package/dist/src/output/json.test.js.map +0 -1
- package/dist/src/output/text.d.ts +0 -10
- package/dist/src/output/text.d.ts.map +0 -1
- package/dist/src/output/text.js +0 -17
- package/dist/src/output/text.js.map +0 -1
- package/dist/src/output/text.test.d.ts +0 -2
- package/dist/src/output/text.test.d.ts.map +0 -1
- package/dist/src/output/text.test.js +0 -65
- package/dist/src/output/text.test.js.map +0 -1
- package/dist/src/processing/bbox.d.ts +0 -20
- package/dist/src/processing/bbox.d.ts.map +0 -1
- package/dist/src/processing/bbox.js +0 -258
- package/dist/src/processing/bbox.js.map +0 -1
- package/dist/src/processing/bbox.test.d.ts +0 -2
- package/dist/src/processing/bbox.test.d.ts.map +0 -1
- package/dist/src/processing/bbox.test.js +0 -334
- package/dist/src/processing/bbox.test.js.map +0 -1
- package/dist/src/processing/cleanText.d.ts +0 -6
- package/dist/src/processing/cleanText.d.ts.map +0 -1
- package/dist/src/processing/cleanText.js +0 -73
- package/dist/src/processing/cleanText.js.map +0 -1
- package/dist/src/processing/cleanText.test.d.ts +0 -2
- package/dist/src/processing/cleanText.test.d.ts.map +0 -1
- package/dist/src/processing/cleanText.test.js +0 -46
- package/dist/src/processing/cleanText.test.js.map +0 -1
- package/dist/src/processing/grid.d.ts +0 -7
- package/dist/src/processing/grid.d.ts.map +0 -1
- package/dist/src/processing/grid.js +0 -13
- package/dist/src/processing/grid.js.map +0 -1
- package/dist/src/processing/gridDebugLogger.d.ts +0 -206
- package/dist/src/processing/gridDebugLogger.d.ts.map +0 -1
- package/dist/src/processing/gridDebugLogger.js +0 -446
- package/dist/src/processing/gridDebugLogger.js.map +0 -1
- package/dist/src/processing/gridProjection.d.ts +0 -19
- package/dist/src/processing/gridProjection.d.ts.map +0 -1
- package/dist/src/processing/gridProjection.js +0 -1813
- package/dist/src/processing/gridProjection.js.map +0 -1
- package/dist/src/processing/gridProjection.test.d.ts +0 -2
- package/dist/src/processing/gridProjection.test.d.ts.map +0 -1
- package/dist/src/processing/gridProjection.test.js +0 -495
- package/dist/src/processing/gridProjection.test.js.map +0 -1
- package/dist/src/processing/gridVisualizer.d.ts +0 -14
- package/dist/src/processing/gridVisualizer.d.ts.map +0 -1
- package/dist/src/processing/gridVisualizer.js +0 -166
- package/dist/src/processing/gridVisualizer.js.map +0 -1
- package/dist/src/processing/markupUtils.d.ts +0 -7
- package/dist/src/processing/markupUtils.d.ts.map +0 -1
- package/dist/src/processing/markupUtils.js +0 -25
- package/dist/src/processing/markupUtils.js.map +0 -1
- package/dist/src/processing/markupUtils.test.d.ts +0 -2
- package/dist/src/processing/markupUtils.test.d.ts.map +0 -1
- package/dist/src/processing/markupUtils.test.js +0 -26
- package/dist/src/processing/markupUtils.test.js.map +0 -1
- package/dist/src/processing/ocrUtils.d.ts +0 -24
- package/dist/src/processing/ocrUtils.d.ts.map +0 -1
- package/dist/src/processing/ocrUtils.js +0 -79
- package/dist/src/processing/ocrUtils.js.map +0 -1
- package/dist/src/processing/octUtils.test.d.ts +0 -2
- package/dist/src/processing/octUtils.test.d.ts.map +0 -1
- package/dist/src/processing/octUtils.test.js +0 -72
- package/dist/src/processing/octUtils.test.js.map +0 -1
- package/dist/src/processing/searchItems.d.ts +0 -26
- package/dist/src/processing/searchItems.d.ts.map +0 -1
- package/dist/src/processing/searchItems.js +0 -93
- package/dist/src/processing/searchItems.js.map +0 -1
- package/dist/src/processing/searchItems.test.d.ts +0 -2
- package/dist/src/processing/searchItems.test.d.ts.map +0 -1
- package/dist/src/processing/searchItems.test.js +0 -84
- package/dist/src/processing/searchItems.test.js.map +0 -1
- package/dist/src/processing/textUtils.d.ts +0 -20
- package/dist/src/processing/textUtils.d.ts.map +0 -1
- package/dist/src/processing/textUtils.js +0 -142
- package/dist/src/processing/textUtils.js.map +0 -1
- package/dist/src/processing/textUtils.test.d.ts +0 -2
- package/dist/src/processing/textUtils.test.d.ts.map +0 -1
- package/dist/src/processing/textUtils.test.js +0 -45
- package/dist/src/processing/textUtils.test.js.map +0 -1
- package/dist/src/vendor/pdfjs/LICENSE +0 -177
- package/dist/src/vendor/pdfjs/README.md +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78ms-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78ms-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/83pv-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/90ms-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/90ms-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/90msp-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/90msp-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/90pv-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/90pv-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Add-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Add-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Add-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Add-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-0.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-1.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-3.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-4.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-5.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-6.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-UCS2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-0.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-1.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-3.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-4.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-5.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-UCS2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-0.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-1.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-3.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-4.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-5.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-6.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-UCS2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-0.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-1.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-UCS2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/B5pc-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/B5pc-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/CNS-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/CNS-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/CNS1-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/CNS1-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/CNS2-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/CNS2-V.bcmap +0 -3
- package/dist/src/vendor/pdfjs/cmaps/ETHK-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/ETHK-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/ETen-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/ETen-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/ETenms-B5-H.bcmap +0 -3
- package/dist/src/vendor/pdfjs/cmaps/ETenms-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Ext-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Ext-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Ext-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Ext-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GB-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GB-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GB-H.bcmap +0 -4
- package/dist/src/vendor/pdfjs/cmaps/GB-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBK-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBK-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBK2K-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBK2K-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBKp-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBKp-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBT-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBT-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBT-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBT-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBTpc-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBTpc-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBpc-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBpc-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKdla-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKdla-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKdlb-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKdlb-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKgccs-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKgccs-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKm314-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKm314-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKm471-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKm471-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKscs-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKscs-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Hankaku.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Hiragana.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSC-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSC-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSC-Johab-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSC-Johab-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSCpc-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSCpc-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Katakana.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/LICENSE +0 -36
- package/dist/src/vendor/pdfjs/cmaps/NWP-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/NWP-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Roman.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UCS2-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UCS2-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF16-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF16-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF8-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF8-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UCS2-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UCS2-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF16-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF16-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF8-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF8-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF16-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF16-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF8-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF8-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-HW-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISPro-UTF8-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UCS2-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UCS2-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF16-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF16-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF8-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF8-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/WP-Symbol.bcmap +0 -0
- package/dist/src/vendor/pdfjs/jbig2.wasm +0 -0
- package/dist/src/vendor/pdfjs/openjpeg.wasm +0 -0
- package/dist/src/vendor/pdfjs/pdf.mjs +0 -33603
- package/dist/src/vendor/pdfjs/pdf.mjs.map +0 -1
- package/dist/src/vendor/pdfjs/pdf.sandbox.mjs +0 -4936
- package/dist/src/vendor/pdfjs/pdf.sandbox.mjs.map +0 -1
- package/dist/src/vendor/pdfjs/pdf.worker.mjs +0 -70100
- package/dist/src/vendor/pdfjs/pdf.worker.mjs.map +0 -1
- package/dist/src/vendor/pdfjs/qcms_bg.wasm +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitDingbats.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixed.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixedBold.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixedBoldItalic.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixedItalic.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerif.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerifBold.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerifBoldItalic.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerifItalic.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitSymbol.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/LICENSE_FOXIT +0 -27
- package/dist/src/vendor/pdfjs/standard_fonts/LICENSE_LIBERATION +0 -102
- package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-Bold.ttf +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-BoldItalic.ttf +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-Italic.ttf +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-Regular.ttf +0 -0
- package/src/vendor/pdfjs/LICENSE +0 -177
- package/src/vendor/pdfjs/README.md +0 -0
- package/src/vendor/pdfjs/cmaps/78-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78ms-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78ms-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/83pv-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/90ms-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/90ms-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/90msp-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/90msp-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/90pv-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/90pv-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Add-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Add-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Add-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Add-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-0.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-1.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-3.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-4.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-5.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-6.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-UCS2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-0.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-1.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-3.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-4.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-5.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-UCS2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-0.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-1.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-3.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-4.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-5.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-6.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-UCS2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Korea1-0.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Korea1-1.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Korea1-2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Korea1-UCS2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/B5pc-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/B5pc-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/CNS-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/CNS-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/CNS1-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/CNS1-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/CNS2-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/CNS2-V.bcmap +0 -3
- package/src/vendor/pdfjs/cmaps/ETHK-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/ETHK-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/ETen-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/ETen-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/ETenms-B5-H.bcmap +0 -3
- package/src/vendor/pdfjs/cmaps/ETenms-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Ext-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Ext-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Ext-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Ext-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GB-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GB-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GB-H.bcmap +0 -4
- package/src/vendor/pdfjs/cmaps/GB-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBK-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBK-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBK2K-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBK2K-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBKp-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBKp-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBT-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBT-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBT-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBT-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBTpc-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBTpc-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBpc-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBpc-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKdla-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKdla-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKdlb-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKdlb-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKgccs-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKgccs-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKm314-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKm314-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKm471-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKm471-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKscs-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKscs-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Hankaku.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Hiragana.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSC-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSC-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSC-Johab-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSC-Johab-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSCms-UHC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSCms-UHC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSCpc-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSCpc-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Katakana.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/LICENSE +0 -36
- package/src/vendor/pdfjs/cmaps/NWP-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/NWP-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Roman.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UCS2-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UCS2-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UTF16-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UTF16-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UTF8-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UTF8-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UCS2-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UCS2-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UTF16-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UTF16-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UTF8-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UTF8-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UTF16-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UTF16-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UTF8-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UTF8-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-HW-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISPro-UTF8-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UCS2-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UCS2-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UTF16-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UTF16-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UTF8-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UTF8-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/WP-Symbol.bcmap +0 -0
- package/src/vendor/pdfjs/jbig2.wasm +0 -0
- package/src/vendor/pdfjs/openjpeg.wasm +0 -0
- package/src/vendor/pdfjs/pdf.mjs +0 -33603
- package/src/vendor/pdfjs/pdf.mjs.map +0 -1
- package/src/vendor/pdfjs/pdf.sandbox.mjs +0 -4936
- package/src/vendor/pdfjs/pdf.sandbox.mjs.map +0 -1
- package/src/vendor/pdfjs/pdf.worker.mjs +0 -70100
- package/src/vendor/pdfjs/pdf.worker.mjs.map +0 -1
- package/src/vendor/pdfjs/qcms_bg.wasm +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitDingbats.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitFixed.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitFixedBold.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitFixedBoldItalic.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitFixedItalic.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitSerif.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitSerifBold.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitSerifBoldItalic.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitSerifItalic.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitSymbol.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/LICENSE_FOXIT +0 -27
- package/src/vendor/pdfjs/standard_fonts/LICENSE_LIBERATION +0 -102
- package/src/vendor/pdfjs/standard_fonts/LiberationSans-Bold.ttf +0 -0
- package/src/vendor/pdfjs/standard_fonts/LiberationSans-BoldItalic.ttf +0 -0
- package/src/vendor/pdfjs/standard_fonts/LiberationSans-Italic.ttf +0 -0
- package/src/vendor/pdfjs/standard_fonts/LiberationSans-Regular.ttf +0 -0
|
@@ -1,161 +0,0 @@
|
|
|
1
|
-
import { createWorker, createScheduler } from "tesseract.js";
|
|
2
|
-
export class TesseractEngine {
|
|
3
|
-
name = "tesseract";
|
|
4
|
-
scheduler;
|
|
5
|
-
workers = [];
|
|
6
|
-
currentLanguage;
|
|
7
|
-
concurrency;
|
|
8
|
-
tessdataPath;
|
|
9
|
-
constructor(concurrency = 4, tessdataPath) {
|
|
10
|
-
this.concurrency = concurrency;
|
|
11
|
-
// Use explicit path, then TESSDATA_PREFIX env var, then let tesseract.js default (CDN)
|
|
12
|
-
this.tessdataPath = tessdataPath || process.env.TESSDATA_PREFIX || undefined;
|
|
13
|
-
}
|
|
14
|
-
async initialize(language = "eng") {
|
|
15
|
-
if (this.scheduler && this.currentLanguage === language) {
|
|
16
|
-
return; // Already initialized for this language
|
|
17
|
-
}
|
|
18
|
-
// Clean up existing scheduler and workers if language changed
|
|
19
|
-
await this.terminate();
|
|
20
|
-
// Create scheduler
|
|
21
|
-
this.scheduler = createScheduler();
|
|
22
|
-
// Build worker options for local tessdata support
|
|
23
|
-
const workerOptions = {};
|
|
24
|
-
if (this.tessdataPath) {
|
|
25
|
-
workerOptions.langPath = this.tessdataPath;
|
|
26
|
-
workerOptions.cachePath = this.tessdataPath;
|
|
27
|
-
workerOptions.gzip = false; // Pre-cached files are not gzipped
|
|
28
|
-
}
|
|
29
|
-
workerOptions.errorHandler = () => {
|
|
30
|
-
// Let createWorker reject so LiteParse can convert the failure into
|
|
31
|
-
// an actionable initialization error instead of crashing the process.
|
|
32
|
-
};
|
|
33
|
-
// Create worker pool
|
|
34
|
-
for (let i = 0; i < this.concurrency; i++) {
|
|
35
|
-
let worker;
|
|
36
|
-
try {
|
|
37
|
-
worker = await createWorker(language, 1, Object.keys(workerOptions).length > 0 ? workerOptions : undefined);
|
|
38
|
-
}
|
|
39
|
-
catch (error) {
|
|
40
|
-
// Clean up any workers already created
|
|
41
|
-
await this.terminate();
|
|
42
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
43
|
-
// Provide actionable guidance for common failures
|
|
44
|
-
if (message.includes("fetch") ||
|
|
45
|
-
message.includes("network") ||
|
|
46
|
-
message.includes("ENOTFOUND") ||
|
|
47
|
-
message.includes("ERR_INVALID_URL")) {
|
|
48
|
-
throw new Error(`Tesseract failed to download language data for "${language}". ` +
|
|
49
|
-
`This usually means the machine has no internet access. ` +
|
|
50
|
-
`To fix this, either:\n` +
|
|
51
|
-
` 1. Set the TESSDATA_PREFIX env var to a directory containing ${language}.traineddata\n` +
|
|
52
|
-
` 2. Use --ocr-server-url to use an external OCR server instead\n` +
|
|
53
|
-
` 3. Use --no-ocr to disable OCR entirely`, {
|
|
54
|
-
cause: error,
|
|
55
|
-
});
|
|
56
|
-
}
|
|
57
|
-
if (message.includes("traineddata") ||
|
|
58
|
-
message.includes("TESSDATA") ||
|
|
59
|
-
message.includes("loading language")) {
|
|
60
|
-
throw new Error(`Tesseract failed to load language data for "${language}": ${message}\n` +
|
|
61
|
-
`Ensure ${language}.traineddata exists in your tessdata directory and set ` +
|
|
62
|
-
`the TESSDATA_PREFIX env var accordingly.`, {
|
|
63
|
-
cause: error,
|
|
64
|
-
});
|
|
65
|
-
}
|
|
66
|
-
throw new Error(`Tesseract OCR initialization failed: ${message}`, { cause: error });
|
|
67
|
-
}
|
|
68
|
-
if (!worker) {
|
|
69
|
-
await this.terminate();
|
|
70
|
-
throw new Error("Tesseract worker not initialized");
|
|
71
|
-
}
|
|
72
|
-
this.workers.push(worker);
|
|
73
|
-
this.scheduler.addWorker(worker);
|
|
74
|
-
}
|
|
75
|
-
this.currentLanguage = language;
|
|
76
|
-
}
|
|
77
|
-
async recognize(image, options) {
|
|
78
|
-
// Handle language - tesseract.js uses language codes like 'eng', 'fra', 'deu'
|
|
79
|
-
const language = this.normalizeLanguage(Array.isArray(options.language) ? options.language[0] : options.language);
|
|
80
|
-
// Initialize scheduler if needed
|
|
81
|
-
await this.initialize(language);
|
|
82
|
-
if (!this.scheduler) {
|
|
83
|
-
throw new Error("Tesseract scheduler not initialized");
|
|
84
|
-
}
|
|
85
|
-
try {
|
|
86
|
-
// Recognize text from image using scheduler
|
|
87
|
-
// tesseract.js accepts string (path/URL) or Buffer/Uint8Array
|
|
88
|
-
// In tesseract.js v6+, we need to enable blocks output to get word-level data
|
|
89
|
-
const { data: { blocks }, } = await this.scheduler.addJob("recognize", image, options.correctRotation ? { rotateAuto: true } : {}, { blocks: true });
|
|
90
|
-
// Extract words from hierarchical blocks structure: blocks → paragraphs → lines → words
|
|
91
|
-
const results = [];
|
|
92
|
-
for (const block of blocks || []) {
|
|
93
|
-
for (const paragraph of block.paragraphs || []) {
|
|
94
|
-
for (const line of paragraph.lines || []) {
|
|
95
|
-
for (const word of line.words || []) {
|
|
96
|
-
results.push({
|
|
97
|
-
text: word.text,
|
|
98
|
-
bbox: [word.bbox.x0, word.bbox.y0, word.bbox.x1, word.bbox.y1],
|
|
99
|
-
confidence: word.confidence / 100, // Tesseract returns 0-100, we want 0-1
|
|
100
|
-
});
|
|
101
|
-
}
|
|
102
|
-
}
|
|
103
|
-
}
|
|
104
|
-
}
|
|
105
|
-
// Filter out low confidence results (below 30%)
|
|
106
|
-
return results.filter((r) => r.confidence > 0.3);
|
|
107
|
-
}
|
|
108
|
-
catch (error) {
|
|
109
|
-
const label = typeof image === "string" ? image : "<buffer>";
|
|
110
|
-
console.error(`\nTesseract OCR error for ${label}:`, error);
|
|
111
|
-
return [];
|
|
112
|
-
}
|
|
113
|
-
}
|
|
114
|
-
async recognizeBatch(images, options) {
|
|
115
|
-
// Handle language
|
|
116
|
-
const language = this.normalizeLanguage(Array.isArray(options.language) ? options.language[0] : options.language);
|
|
117
|
-
// Initialize scheduler if needed
|
|
118
|
-
await this.initialize(language);
|
|
119
|
-
if (!this.scheduler) {
|
|
120
|
-
throw new Error("Tesseract scheduler not initialized");
|
|
121
|
-
}
|
|
122
|
-
// Process all images in parallel - scheduler handles distribution
|
|
123
|
-
const jobs = images.map((image) => this.recognize(image, options));
|
|
124
|
-
return Promise.all(jobs);
|
|
125
|
-
}
|
|
126
|
-
async terminate() {
|
|
127
|
-
if (this.scheduler) {
|
|
128
|
-
await this.scheduler.terminate();
|
|
129
|
-
this.scheduler = undefined;
|
|
130
|
-
}
|
|
131
|
-
this.workers = [];
|
|
132
|
-
this.currentLanguage = undefined;
|
|
133
|
-
}
|
|
134
|
-
/**
|
|
135
|
-
* Normalize language codes to Tesseract format
|
|
136
|
-
* Common mappings: en->eng, fr->fra, de->deu, es->spa, zh->chi_sim, ja->jpn
|
|
137
|
-
*/
|
|
138
|
-
normalizeLanguage(lang) {
|
|
139
|
-
const languageMap = {
|
|
140
|
-
en: "eng",
|
|
141
|
-
fr: "fra",
|
|
142
|
-
de: "deu",
|
|
143
|
-
es: "spa",
|
|
144
|
-
it: "ita",
|
|
145
|
-
pt: "por",
|
|
146
|
-
ru: "rus",
|
|
147
|
-
zh: "chi_sim",
|
|
148
|
-
"zh-cn": "chi_sim",
|
|
149
|
-
"zh-tw": "chi_tra",
|
|
150
|
-
ja: "jpn",
|
|
151
|
-
ko: "kor",
|
|
152
|
-
ar: "ara",
|
|
153
|
-
hi: "hin",
|
|
154
|
-
th: "tha",
|
|
155
|
-
vi: "vie",
|
|
156
|
-
};
|
|
157
|
-
const normalized = lang.toLowerCase().trim();
|
|
158
|
-
return languageMap[normalized] || normalized;
|
|
159
|
-
}
|
|
160
|
-
}
|
|
161
|
-
//# sourceMappingURL=tesseract.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"tesseract.js","sourceRoot":"","sources":["../../../../src/engines/ocr/tesseract.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,eAAe,EAAqB,MAAM,cAAc,CAAC;AAGhF,MAAM,OAAO,eAAe;IAC1B,IAAI,GAAG,WAAW,CAAC;IACX,SAAS,CAAa;IACtB,OAAO,GAAa,EAAE,CAAC;IACvB,eAAe,CAAU;IACzB,WAAW,CAAS;IACpB,YAAY,CAAU;IAE9B,YAAY,cAAsB,CAAC,EAAE,YAAqB;QACxD,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC;QAC/B,uFAAuF;QACvF,IAAI,CAAC,YAAY,GAAG,YAAY,IAAI,OAAO,CAAC,GAAG,CAAC,eAAe,IAAI,SAAS,CAAC;IAC/E,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,WAAmB,KAAK;QACvC,IAAI,IAAI,CAAC,SAAS,IAAI,IAAI,CAAC,eAAe,KAAK,QAAQ,EAAE,CAAC;YACxD,OAAO,CAAC,wCAAwC;QAClD,CAAC;QAED,8DAA8D;QAC9D,MAAM,IAAI,CAAC,SAAS,EAAE,CAAC;QAEvB,mBAAmB;QACnB,IAAI,CAAC,SAAS,GAAG,eAAe,EAAE,CAAC;QAEnC,kDAAkD;QAClD,MAAM,aAAa,GAA4B,EAAE,CAAC;QAClD,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;YACtB,aAAa,CAAC,QAAQ,GAAG,IAAI,CAAC,YAAY,CAAC;YAC3C,aAAa,CAAC,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC;YAC5C,aAAa,CAAC,IAAI,GAAG,KAAK,CAAC,CAAC,mCAAmC;QACjE,CAAC;QACD,aAAa,CAAC,YAAY,GAAG,GAAG,EAAE;YAChC,oEAAoE;YACpE,sEAAsE;QACxE,CAAC,CAAC;QAEF,qBAAqB;QACrB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC,EAAE,EAAE,CAAC;YAC1C,IAAI,MAAc,CAAC;YACnB,IAAI,CAAC;gBACH,MAAM,GAAG,MAAM,YAAY,CACzB,QAAQ,EACR,CAAC,EACD,MAAM,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,SAAS,CAClE,CAAC;YACJ,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,uCAAuC;gBACvC,MAAM,IAAI,CAAC,SAAS,EAAE,CAAC;gBACvB,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;gBAEvE,kDAAkD;gBAClD,IACE,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAC;oBACzB,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC;oBAC3B,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAC;oBAC7B,OAAO,CAAC,QAAQ,CAAC,iBAAiB,CAAC,EACnC,CAAC;oBACD,MAAM,IAAI,KAAK,CACb,mDAAmD,QAAQ,KAAK;wBAC9D,yDAAyD;wBACzD,wBAAwB;wBACxB,kEAAkE,QAAQ,gBAAgB;wBAC1F,mEAAmE;wBACnE,2CAA2C,EAC7C;wBACE,KAAK,EAAE,KAAK;qBACb,CACF,CAAC;gBACJ,CAAC;gBACD,IACE,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAC;oBAC/B,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC;oBAC5B,OAAO,CAAC,QAAQ,CAAC,kBAAkB,CAAC,EACpC,CAAC;oBACD,MAAM,IAAI,KAAK,CACb,+CAA+C,QAAQ,MAAM,OAAO,IAAI;wBACtE,UAAU,QAAQ,yDAAyD;wBAC3E,0CAA0C,EAC5C;wBACE,KAAK,EAAE,KAAK;qBACb,CACF,CAAC;gBACJ,CAAC;gBACD,MAAM,IAAI,KAAK,CAAC,wCAAwC,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,CAAC;YACvF,CAAC;YACD,IAAI,CAAC,MAAM,EAAE,CAAC;gBACZ,MAAM,IAAI,CAAC,SAAS,EAAE,CAAC;gBACvB,MAAM,IAAI,KAAK,CAAC,kCAAkC,CAAC,CAAC;YACtD,CAAC;YACD,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAC1B,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;QACnC,CAAC;QAED,IAAI,CAAC,eAAe,GAAG,QAAQ,CAAC;IAClC,CAAC;IAED,KAAK,CAAC,SAAS,CAAC,KAAsB,EAAE,OAAmB;QACzD,8EAA8E;QAC9E,MAAM,QAAQ,GAAG,IAAI,CAAC,iBAAiB,CACrC,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,QAAQ,CACzE,CAAC;QAEF,iCAAiC;QACjC,MAAM,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC;QAEhC,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YACpB,MAAM,IAAI,KAAK,CAAC,qCAAqC,CAAC,CAAC;QACzD,CAAC;QAED,IAAI,CAAC;YACH,4CAA4C;YAC5C,8DAA8D;YAC9D,8EAA8E;YAC9E,MAAM,EACJ,IAAI,EAAE,EAAE,MAAM,EAAE,GACjB,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,MAAM,CAC7B,WAAW,EACX,KAAK,EACL,OAAO,CAAC,eAAe,CAAC,CAAC,CAAC,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,EACnD,EAAE,MAAM,EAAE,IAAI,EAAE,CACjB,CAAC;YAEF,wFAAwF;YACxF,MAAM,OAAO,GAAgB,EAAE,CAAC;YAChC,KAAK,MAAM,KAAK,IAAI,MAAM,IAAI,EAAE,EAAE,CAAC;gBACjC,KAAK,MAAM,SAAS,IAAI,KAAK,CAAC,UAAU,IAAI,EAAE,EAAE,CAAC;oBAC/C,KAAK,MAAM,IAAI,IAAI,SAAS,CAAC,KAAK,IAAI,EAAE,EAAE,CAAC;wBACzC,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,KAAK,IAAI,EAAE,EAAE,CAAC;4BACpC,OAAO,CAAC,IAAI,CAAC;gCACX,IAAI,EAAE,IAAI,CAAC,IAAI;gCACf,IAAI,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,IAAI,CAAC,IAAI,CAAC,EAAE,CAK5D;gCACD,UAAU,EAAE,IAAI,CAAC,UAAU,GAAG,GAAG,EAAE,uCAAuC;6BAC3E,CAAC,CAAC;wBACL,CAAC;oBACH,CAAC;gBACH,CAAC;YACH,CAAC;YAED,gDAAgD;YAChD,OAAO,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,GAAG,GAAG,CAAC,CAAC;QACnD,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,KAAK,GAAG,OAAO,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,UAAU,CAAC;YAC7D,OAAO,CAAC,KAAK,CAAC,6BAA6B,KAAK,GAAG,EAAE,KAAK,CAAC,CAAC;YAC5D,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC;IAED,KAAK,CAAC,cAAc,CAAC,MAA2B,EAAE,OAAmB;QACnE,kBAAkB;QAClB,MAAM,QAAQ,GAAG,IAAI,CAAC,iBAAiB,CACrC,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,QAAQ,CACzE,CAAC;QAEF,iCAAiC;QACjC,MAAM,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC;QAEhC,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YACpB,MAAM,IAAI,KAAK,CAAC,qCAAqC,CAAC,CAAC;QACzD,CAAC;QAED,kEAAkE;QAClE,MAAM,IAAI,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC,CAAC;QAEnE,OAAO,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAC3B,CAAC;IAED,KAAK,CAAC,SAAS;QACb,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;YACnB,MAAM,IAAI,CAAC,SAAS,CAAC,SAAS,EAAE,CAAC;YACjC,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC7B,CAAC;QACD,IAAI,CAAC,OAAO,GAAG,EAAE,CAAC;QAClB,IAAI,CAAC,eAAe,GAAG,SAAS,CAAC;IACnC,CAAC;IAED;;;OAGG;IACK,iBAAiB,CAAC,IAAY;QACpC,MAAM,WAAW,GAA2B;YAC1C,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,SAAS;YACb,OAAO,EAAE,SAAS;YAClB,OAAO,EAAE,SAAS;YAClB,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;SACV,CAAC;QAEF,MAAM,UAAU,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC;QAC7C,OAAO,WAAW,CAAC,UAAU,CAAC,IAAI,UAAU,CAAC;IAC/C,CAAC;CACF"}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"tesseract.test.d.ts","sourceRoot":"","sources":["../../../../src/engines/ocr/tesseract.test.ts"],"names":[],"mappings":""}
|
|
@@ -1,94 +0,0 @@
|
|
|
1
|
-
import { vi, describe, it, expect } from "vitest";
|
|
2
|
-
// In tesseract.js v6+, words are nested in blocks → paragraphs → lines → words
|
|
3
|
-
const mockWords = [
|
|
4
|
-
{
|
|
5
|
-
text: "Hello",
|
|
6
|
-
confidence: 95,
|
|
7
|
-
bbox: { x0: 0, y0: 0, x1: 50, y1: 20 },
|
|
8
|
-
},
|
|
9
|
-
{
|
|
10
|
-
text: "World",
|
|
11
|
-
confidence: 92,
|
|
12
|
-
bbox: { x0: 60, y0: 0, x1: 120, y1: 20 },
|
|
13
|
-
},
|
|
14
|
-
];
|
|
15
|
-
const mockTesseractResult = {
|
|
16
|
-
data: {
|
|
17
|
-
text: "Hello World",
|
|
18
|
-
blocks: [
|
|
19
|
-
{
|
|
20
|
-
paragraphs: [
|
|
21
|
-
{
|
|
22
|
-
lines: [
|
|
23
|
-
{
|
|
24
|
-
words: mockWords,
|
|
25
|
-
},
|
|
26
|
-
],
|
|
27
|
-
},
|
|
28
|
-
],
|
|
29
|
-
},
|
|
30
|
-
],
|
|
31
|
-
confidence: 93,
|
|
32
|
-
},
|
|
33
|
-
};
|
|
34
|
-
const mockResults = mockWords.map((word) => ({
|
|
35
|
-
text: word.text,
|
|
36
|
-
bbox: [word.bbox.x0, word.bbox.y0, word.bbox.x1, word.bbox.y1],
|
|
37
|
-
confidence: word.confidence / 100, // Tesseract returns 0-100, we want 0-1
|
|
38
|
-
}));
|
|
39
|
-
const mockTesseractWorker = {
|
|
40
|
-
terminate: vi.fn(async () => { }),
|
|
41
|
-
recognize: vi.fn(async () => {
|
|
42
|
-
return mockTesseractResult;
|
|
43
|
-
}),
|
|
44
|
-
};
|
|
45
|
-
vi.mock("tesseract.js", async () => {
|
|
46
|
-
const actual = await vi.importActual("tesseract.js");
|
|
47
|
-
return {
|
|
48
|
-
...actual,
|
|
49
|
-
createWorker: vi.fn(
|
|
50
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
51
|
-
async (language, _num, options) => {
|
|
52
|
-
if (language == "it" || language == "ita") {
|
|
53
|
-
return;
|
|
54
|
-
}
|
|
55
|
-
if (language == "offline" || language == "fetchfail") {
|
|
56
|
-
options?.errorHandler?.("TypeError: fetch failed");
|
|
57
|
-
throw new Error("TypeError: fetch failed");
|
|
58
|
-
}
|
|
59
|
-
return mockTesseractWorker;
|
|
60
|
-
}),
|
|
61
|
-
};
|
|
62
|
-
});
|
|
63
|
-
import { TesseractEngine } from "./tesseract";
|
|
64
|
-
describe("test Tesseract OCR (single image)", () => {
|
|
65
|
-
it("test engine success", async () => {
|
|
66
|
-
const engine = new TesseractEngine();
|
|
67
|
-
expect(engine.name).toBe("tesseract");
|
|
68
|
-
const result = await engine.recognize("cat.png", { language: "en" });
|
|
69
|
-
expect(result).toStrictEqual(mockResults);
|
|
70
|
-
});
|
|
71
|
-
it("test engine failure (failed to initialize)", async () => {
|
|
72
|
-
const engine = new TesseractEngine();
|
|
73
|
-
expect(engine.name).toBe("tesseract");
|
|
74
|
-
await expect(engine.recognize("cat.png", { language: "it" })).rejects.toThrow("Tesseract worker not initialized");
|
|
75
|
-
});
|
|
76
|
-
it("test engine failure (fetch failed) returns actionable guidance", async () => {
|
|
77
|
-
const engine = new TesseractEngine();
|
|
78
|
-
await expect(engine.recognize("cat.png", { language: "offline" })).rejects.toThrow('Tesseract failed to download language data for "offline"');
|
|
79
|
-
});
|
|
80
|
-
});
|
|
81
|
-
describe("test OCR simple HTTP server (batch)", () => {
|
|
82
|
-
it("test engine success", async () => {
|
|
83
|
-
const engine = new TesseractEngine();
|
|
84
|
-
expect(engine.name).toBe("tesseract");
|
|
85
|
-
const result = await engine.recognizeBatch(["cat.png", "dog.png"], { language: "en" });
|
|
86
|
-
expect(result).toStrictEqual([mockResults, mockResults]);
|
|
87
|
-
});
|
|
88
|
-
it("test engine failure (failed to initialize)", async () => {
|
|
89
|
-
const engine = new TesseractEngine();
|
|
90
|
-
expect(engine.name).toBe("tesseract");
|
|
91
|
-
await expect(engine.recognizeBatch(["cat.png", "dog.png"], { language: "it" })).rejects.toThrow("Tesseract worker not initialized");
|
|
92
|
-
});
|
|
93
|
-
});
|
|
94
|
-
//# sourceMappingURL=tesseract.test.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"tesseract.test.js","sourceRoot":"","sources":["../../../../src/engines/ocr/tesseract.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,EAAE,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAElD,+EAA+E;AAC/E,MAAM,SAAS,GAAG;IAChB;QACE,IAAI,EAAE,OAAO;QACb,UAAU,EAAE,EAAE;QACd,IAAI,EAAE,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE;KACvC;IACD;QACE,IAAI,EAAE,OAAO;QACb,UAAU,EAAE,EAAE;QACd,IAAI,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,EAAE,EAAE;KACzC;CACF,CAAC;AAEF,MAAM,mBAAmB,GAAG;IAC1B,IAAI,EAAE;QACJ,IAAI,EAAE,aAAa;QACnB,MAAM,EAAE;YACN;gBACE,UAAU,EAAE;oBACV;wBACE,KAAK,EAAE;4BACL;gCACE,KAAK,EAAE,SAAS;6BACjB;yBACF;qBACF;iBACF;aACF;SACF;QACD,UAAU,EAAE,EAAE;KACf;CACF,CAAC;AAEF,MAAM,WAAW,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;IAC3C,IAAI,EAAE,IAAI,CAAC,IAAI;IACf,IAAI,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,IAAI,CAAC,IAAI,CAAC,EAAE,CAK5D;IACD,UAAU,EAAE,IAAI,CAAC,UAAU,GAAG,GAAG,EAAE,uCAAuC;CAC3E,CAAC,CAAC,CAAC;AAEJ,MAAM,mBAAmB,GAAG;IAC1B,SAAS,EAAE,EAAE,CAAC,EAAE,CAAC,KAAK,IAAI,EAAE,GAAE,CAAC,CAAC;IAChC,SAAS,EAAE,EAAE,CAAC,EAAE,CAAC,KAAK,IAAI,EAAE;QAC1B,OAAO,mBAAmB,CAAC;IAC7B,CAAC,CAAC;CACH,CAAC;AAEF,EAAE,CAAC,IAAI,CAAC,cAAc,EAAE,KAAK,IAAI,EAAE;IACjC,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC,YAAY,CAAgC,cAAc,CAAC,CAAC;IACpF,OAAO;QACL,GAAG,MAAM;QACT,YAAY,EAAE,EAAE,CAAC,EAAE;QACjB,8DAA8D;QAC9D,KAAK,EAAE,QAAgB,EAAE,IAAY,EAAE,OAA+C,EAAE,EAAE;YACxF,IAAI,QAAQ,IAAI,IAAI,IAAI,QAAQ,IAAI,KAAK,EAAE,CAAC;gBAC1C,OAAO;YACT,CAAC;YACD,IAAI,QAAQ,IAAI,SAAS,IAAI,QAAQ,IAAI,WAAW,EAAE,CAAC;gBACrD,OAAO,EAAE,YAAY,EAAE,CAAC,yBAAyB,CAAC,CAAC;gBACnD,MAAM,IAAI,KAAK,CAAC,yBAAyB,CAAC,CAAC;YAC7C,CAAC;YACD,OAAO,mBAAmB,CAAC;QAC7B,CAAC,CACF;KACF,CAAC;AACJ,CAAC,CAAC,CAAC;AAEH,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,QAAQ,CAAC,mCAAmC,EAAE,GAAG,EAAE;IACjD,EAAE,CAAC,qBAAqB,EAAE,KAAK,IAAI,EAAE;QACnC,MAAM,MAAM,GAAG,IAAI,eAAe,EAAE,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACtC,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,SAAS,CAAC,SAAS,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC;QACrE,MAAM,CAAC,MAAM,CAAC,CAAC,aAAa,CAAC,WAAW,CAAC,CAAC;IAC5C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4CAA4C,EAAE,KAAK,IAAI,EAAE;QAC1D,MAAM,MAAM,GAAG,IAAI,eAAe,EAAE,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACtC,MAAM,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,SAAS,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAC3E,kCAAkC,CACnC,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gEAAgE,EAAE,KAAK,IAAI,EAAE;QAC9E,MAAM,MAAM,GAAG,IAAI,eAAe,EAAE,CAAC;QACrC,MAAM,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,SAAS,EAAE,EAAE,QAAQ,EAAE,SAAS,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAChF,0DAA0D,CAC3D,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,qCAAqC,EAAE,GAAG,EAAE;IACnD,EAAE,CAAC,qBAAqB,EAAE,KAAK,IAAI,EAAE;QACnC,MAAM,MAAM,GAAG,IAAI,eAAe,EAAE,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACtC,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,cAAc,CAAC,CAAC,SAAS,EAAE,SAAS,CAAC,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC;QACvF,MAAM,CAAC,MAAM,CAAC,CAAC,aAAa,CAAC,CAAC,WAAW,EAAE,WAAW,CAAC,CAAC,CAAC;IAC3D,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4CAA4C,EAAE,KAAK,IAAI,EAAE;QAC1D,MAAM,MAAM,GAAG,IAAI,eAAe,EAAE,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACtC,MAAM,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,SAAS,EAAE,SAAS,CAAC,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAC7F,kCAAkC,CACnC,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -1,84 +0,0 @@
|
|
|
1
|
-
import { TextItem } from "../../core/types.js";
|
|
2
|
-
/** Options for page extraction */
|
|
3
|
-
export interface ExtractOptions {
|
|
4
|
-
/** Whether to extract embedded image bounds (needed for OCR). Default: true */
|
|
5
|
-
extractImages?: boolean;
|
|
6
|
-
}
|
|
7
|
-
export interface PdfEngine {
|
|
8
|
-
name: string;
|
|
9
|
-
loadDocument(input: string | Uint8Array, password?: string): Promise<PdfDocument>;
|
|
10
|
-
extractPage(doc: PdfDocument, pageNum: number, options?: ExtractOptions): Promise<PageData>;
|
|
11
|
-
extractAllPages(doc: PdfDocument, maxPages?: number, targetPages?: string, options?: ExtractOptions): Promise<PageData[]>;
|
|
12
|
-
renderPageImage(doc: PdfDocument, pageNum: number, dpi: number, password?: string): Promise<Buffer>;
|
|
13
|
-
close(doc: PdfDocument): Promise<void>;
|
|
14
|
-
}
|
|
15
|
-
export interface PdfDocument {
|
|
16
|
-
numPages: number;
|
|
17
|
-
data: Uint8Array;
|
|
18
|
-
metadata?: unknown;
|
|
19
|
-
}
|
|
20
|
-
/** Bounding box region */
|
|
21
|
-
export interface BoundingBox {
|
|
22
|
-
x: number;
|
|
23
|
-
y: number;
|
|
24
|
-
width: number;
|
|
25
|
-
height: number;
|
|
26
|
-
}
|
|
27
|
-
export interface PageData {
|
|
28
|
-
pageNum: number;
|
|
29
|
-
width: number;
|
|
30
|
-
height: number;
|
|
31
|
-
textItems: TextItem[];
|
|
32
|
-
images: Image[];
|
|
33
|
-
annotations?: Annotation[];
|
|
34
|
-
/** Bounding boxes of garbled text that was filtered out (for targeted OCR) */
|
|
35
|
-
garbledTextRegions?: BoundingBox[];
|
|
36
|
-
}
|
|
37
|
-
export interface Path {
|
|
38
|
-
type: "rectangle" | "line" | "curve";
|
|
39
|
-
points: number[][];
|
|
40
|
-
color?: string;
|
|
41
|
-
width?: number;
|
|
42
|
-
}
|
|
43
|
-
export interface Image {
|
|
44
|
-
x: number;
|
|
45
|
-
y: number;
|
|
46
|
-
width: number;
|
|
47
|
-
height: number;
|
|
48
|
-
data?: Buffer;
|
|
49
|
-
coords?: {
|
|
50
|
-
x: number;
|
|
51
|
-
y: number;
|
|
52
|
-
w: number;
|
|
53
|
-
h: number;
|
|
54
|
-
};
|
|
55
|
-
scaleFactor?: number;
|
|
56
|
-
originalOrientationAngle?: number;
|
|
57
|
-
type?: string;
|
|
58
|
-
ocrRaw?: EasyOcrResultLine[];
|
|
59
|
-
ocrParsed?: Array<{
|
|
60
|
-
x: number;
|
|
61
|
-
y: number;
|
|
62
|
-
w: number;
|
|
63
|
-
h: number;
|
|
64
|
-
confidence: number;
|
|
65
|
-
text: string;
|
|
66
|
-
}>;
|
|
67
|
-
}
|
|
68
|
-
export type EasyOcrResultLine = [
|
|
69
|
-
[
|
|
70
|
-
[number, number],
|
|
71
|
-
[number, number],
|
|
72
|
-
[number, number],
|
|
73
|
-
[number, number]
|
|
74
|
-
],
|
|
75
|
-
string,
|
|
76
|
-
string | number
|
|
77
|
-
];
|
|
78
|
-
export interface Annotation {
|
|
79
|
-
type: string;
|
|
80
|
-
subtype?: string;
|
|
81
|
-
url?: string;
|
|
82
|
-
rect: number[];
|
|
83
|
-
}
|
|
84
|
-
//# sourceMappingURL=interface.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"interface.d.ts","sourceRoot":"","sources":["../../../../src/engines/pdf/interface.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,qBAAqB,CAAC;AAE/C,kCAAkC;AAClC,MAAM,WAAW,cAAc;IAC7B,+EAA+E;IAC/E,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB;AAED,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,MAAM,CAAC;IACb,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,UAAU,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC,CAAC;IAClF,WAAW,CAAC,GAAG,EAAE,WAAW,EAAE,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,cAAc,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;IAC5F,eAAe,CACb,GAAG,EAAE,WAAW,EAChB,QAAQ,CAAC,EAAE,MAAM,EACjB,WAAW,CAAC,EAAE,MAAM,EACpB,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,QAAQ,EAAE,CAAC,CAAC;IACvB,eAAe,CACb,GAAG,EAAE,WAAW,EAChB,OAAO,EAAE,MAAM,EACf,GAAG,EAAE,MAAM,EACX,QAAQ,CAAC,EAAE,MAAM,GAChB,OAAO,CAAC,MAAM,CAAC,CAAC;IACnB,KAAK,CAAC,GAAG,EAAE,WAAW,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;CACxC;AAED,MAAM,WAAW,WAAW;IAC1B,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,UAAU,CAAC;IACjB,QAAQ,CAAC,EAAE,OAAO,CAAC;CACpB;AAED,0BAA0B;AAC1B,MAAM,WAAW,WAAW;IAC1B,CAAC,EAAE,MAAM,CAAC;IACV,CAAC,EAAE,MAAM,CAAC;IACV,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,QAAQ;IACvB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,QAAQ,EAAE,CAAC;IACtB,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,WAAW,CAAC,EAAE,UAAU,EAAE,CAAC;IAC3B,8EAA8E;IAC9E,kBAAkB,CAAC,EAAE,WAAW,EAAE,CAAC;CACpC;AAED,MAAM,WAAW,IAAI;IACnB,IAAI,EAAE,WAAW,GAAG,MAAM,GAAG,OAAO,CAAC;IACrC,MAAM,EAAE,MAAM,EAAE,EAAE,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,KAAK;IACpB,CAAC,EAAE,MAAM,CAAC;IACV,CAAC,EAAE,MAAM,CAAC;IACV,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE;QAAE,CAAC,EAAE,MAAM,CAAC;QAAC,CAAC,EAAE,MAAM,CAAC;QAAC,CAAC,EAAE,MAAM,CAAC;QAAC,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;IACxD,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,wBAAwB,CAAC,EAAE,MAAM,CAAC;IAClC,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,iBAAiB,EAAE,CAAC;IAC7B,SAAS,CAAC,EAAE,KAAK,CAAC;QAChB,CAAC,EAAE,MAAM,CAAC;QACV,CAAC,EAAE,MAAM,CAAC;QACV,CAAC,EAAE,MAAM,CAAC;QACV,CAAC,EAAE,MAAM,CAAC;QACV,UAAU,EAAE,MAAM,CAAC;QACnB,IAAI,EAAE,MAAM,CAAC;KACd,CAAC,CAAC;CACJ;AAGD,MAAM,MAAM,iBAAiB,GAAG;IAC9B;QAAC,CAAC,MAAM,EAAE,MAAM,CAAC;QAAE,CAAC,MAAM,EAAE,MAAM,CAAC;QAAE,CAAC,MAAM,EAAE,MAAM,CAAC;QAAE,CAAC,MAAM,EAAE,MAAM,CAAC;KAAC;IACxE,MAAM;IACN,MAAM,GAAG,MAAM;CAChB,CAAC;AAEF,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,EAAE,CAAC;CAChB"}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"interface.js","sourceRoot":"","sources":["../../../../src/engines/pdf/interface.ts"],"names":[],"mappings":""}
|
|
@@ -1,31 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* PDFium-based PDF screenshot renderer
|
|
3
|
-
* Uses native PDFium library for high-quality, fast screenshots
|
|
4
|
-
*/
|
|
5
|
-
export declare class PdfiumRenderer {
|
|
6
|
-
private pdfium;
|
|
7
|
-
private cachedDocument;
|
|
8
|
-
init(): Promise<void>;
|
|
9
|
-
/**
|
|
10
|
-
* Pre-load a PDF document so that subsequent per-page calls
|
|
11
|
-
* (`renderPageToBuffer`, `extractImageBounds`) reuse it instead
|
|
12
|
-
* of re-parsing the file on every invocation.
|
|
13
|
-
*/
|
|
14
|
-
loadDocument(pdfInput: string | Buffer | Uint8Array, password?: string): Promise<void>;
|
|
15
|
-
closeDocument(): void;
|
|
16
|
-
private getOrLoadDocument;
|
|
17
|
-
renderPageToBuffer(pdfInput: string | Buffer | Uint8Array, pageNumber: number, dpi?: number, password?: string): Promise<Buffer>;
|
|
18
|
-
/**
|
|
19
|
-
* Extract bounding boxes of all embedded images on a page.
|
|
20
|
-
* Uses PDFium's low-level WASM API to iterate page objects and read image bounds.
|
|
21
|
-
* Returns coordinates in viewport space (Y-down, origin top-left) in PDF points.
|
|
22
|
-
*/
|
|
23
|
-
extractImageBounds(pdfInput: string | Buffer | Uint8Array, pageNumber: number, password?: string): Promise<Array<{
|
|
24
|
-
x: number;
|
|
25
|
-
y: number;
|
|
26
|
-
width: number;
|
|
27
|
-
height: number;
|
|
28
|
-
}>>;
|
|
29
|
-
close(): Promise<void>;
|
|
30
|
-
}
|
|
31
|
-
//# sourceMappingURL=pdfium-renderer.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"pdfium-renderer.d.ts","sourceRoot":"","sources":["../../../../src/engines/pdf/pdfium-renderer.ts"],"names":[],"mappings":"AAmCA;;;GAGG;AACH,qBAAa,cAAc;IACzB,OAAO,CAAC,MAAM,CAA8B;IAC5C,OAAO,CAAC,cAAc,CAA+B;IAE/C,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAM3B;;;;OAIG;IACG,YAAY,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,GAAG,UAAU,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAQ5F,aAAa,IAAI,IAAI;YAOP,iBAAiB;IAczB,kBAAkB,CACtB,QAAQ,EAAE,MAAM,GAAG,MAAM,GAAG,UAAU,EACtC,UAAU,EAAE,MAAM,EAClB,GAAG,GAAE,MAAY,EACjB,QAAQ,CAAC,EAAE,MAAM,GAChB,OAAO,CAAC,MAAM,CAAC;IAmClB;;;;OAIG;IACG,kBAAkB,CACtB,QAAQ,EAAE,MAAM,GAAG,MAAM,GAAG,UAAU,EACtC,UAAU,EAAE,MAAM,EAClB,QAAQ,CAAC,EAAE,MAAM,GAChB,OAAO,CAAC,KAAK,CAAC;QAAE,CAAC,EAAE,MAAM,CAAC;QAAC,CAAC,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IA6DpE,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;CAO7B"}
|
|
@@ -1,145 +0,0 @@
|
|
|
1
|
-
import { PDFiumLibrary } from "@hyzyla/pdfium";
|
|
2
|
-
import sharp from "sharp";
|
|
3
|
-
import { promises as fs } from "fs";
|
|
4
|
-
/** Minimum image dimension in PDF points to be considered for OCR */
|
|
5
|
-
const MIN_IMAGE_SIZE_PT = 25;
|
|
6
|
-
/** Images covering more than this fraction of the page are treated as backgrounds */
|
|
7
|
-
const MAX_IMAGE_PAGE_COVERAGE = 0.9;
|
|
8
|
-
/**
|
|
9
|
-
* PDFium-based PDF screenshot renderer
|
|
10
|
-
* Uses native PDFium library for high-quality, fast screenshots
|
|
11
|
-
*/
|
|
12
|
-
export class PdfiumRenderer {
|
|
13
|
-
pdfium = null;
|
|
14
|
-
cachedDocument = null;
|
|
15
|
-
async init() {
|
|
16
|
-
if (!this.pdfium) {
|
|
17
|
-
this.pdfium = await PDFiumLibrary.init();
|
|
18
|
-
}
|
|
19
|
-
}
|
|
20
|
-
/**
|
|
21
|
-
* Pre-load a PDF document so that subsequent per-page calls
|
|
22
|
-
* (`renderPageToBuffer`, `extractImageBounds`) reuse it instead
|
|
23
|
-
* of re-parsing the file on every invocation.
|
|
24
|
-
*/
|
|
25
|
-
async loadDocument(pdfInput, password) {
|
|
26
|
-
await this.init();
|
|
27
|
-
this.closeDocument();
|
|
28
|
-
const pdfBuffer = typeof pdfInput === "string" ? await fs.readFile(pdfInput) : Buffer.from(pdfInput);
|
|
29
|
-
this.cachedDocument = await this.pdfium.loadDocument(pdfBuffer, password);
|
|
30
|
-
}
|
|
31
|
-
closeDocument() {
|
|
32
|
-
if (this.cachedDocument) {
|
|
33
|
-
this.cachedDocument.destroy();
|
|
34
|
-
this.cachedDocument = null;
|
|
35
|
-
}
|
|
36
|
-
}
|
|
37
|
-
async getOrLoadDocument(pdfInput, password) {
|
|
38
|
-
if (this.cachedDocument) {
|
|
39
|
-
return { document: this.cachedDocument, isTemporary: false };
|
|
40
|
-
}
|
|
41
|
-
await this.init();
|
|
42
|
-
const pdfBuffer = typeof pdfInput === "string" ? await fs.readFile(pdfInput) : Buffer.from(pdfInput);
|
|
43
|
-
const document = await this.pdfium.loadDocument(pdfBuffer, password);
|
|
44
|
-
return { document, isTemporary: true };
|
|
45
|
-
}
|
|
46
|
-
async renderPageToBuffer(pdfInput, pageNumber, dpi = 150, password) {
|
|
47
|
-
const { document, isTemporary } = await this.getOrLoadDocument(pdfInput, password);
|
|
48
|
-
try {
|
|
49
|
-
const page = document.getPage(pageNumber - 1);
|
|
50
|
-
const scale = dpi / 72;
|
|
51
|
-
const image = await page.render({
|
|
52
|
-
scale,
|
|
53
|
-
render: async (options) => {
|
|
54
|
-
return await sharp(options.data, {
|
|
55
|
-
raw: {
|
|
56
|
-
width: options.width,
|
|
57
|
-
height: options.height,
|
|
58
|
-
channels: 4, // RGBA
|
|
59
|
-
},
|
|
60
|
-
})
|
|
61
|
-
.png({
|
|
62
|
-
compressionLevel: 6,
|
|
63
|
-
})
|
|
64
|
-
.withMetadata({
|
|
65
|
-
density: dpi,
|
|
66
|
-
})
|
|
67
|
-
.toBuffer();
|
|
68
|
-
},
|
|
69
|
-
});
|
|
70
|
-
return Buffer.from(image.data);
|
|
71
|
-
}
|
|
72
|
-
finally {
|
|
73
|
-
if (isTemporary) {
|
|
74
|
-
document.destroy();
|
|
75
|
-
}
|
|
76
|
-
}
|
|
77
|
-
}
|
|
78
|
-
/**
|
|
79
|
-
* Extract bounding boxes of all embedded images on a page.
|
|
80
|
-
* Uses PDFium's low-level WASM API to iterate page objects and read image bounds.
|
|
81
|
-
* Returns coordinates in viewport space (Y-down, origin top-left) in PDF points.
|
|
82
|
-
*/
|
|
83
|
-
async extractImageBounds(pdfInput, pageNumber, password) {
|
|
84
|
-
const { document, isTemporary } = await this.getOrLoadDocument(pdfInput, password);
|
|
85
|
-
try {
|
|
86
|
-
const page = document.getPage(pageNumber - 1);
|
|
87
|
-
const results = [];
|
|
88
|
-
const mod = page.module;
|
|
89
|
-
const pagePtr = page.pageIdx;
|
|
90
|
-
if (!mod || !mod._FPDFPageObj_GetBounds) {
|
|
91
|
-
return results;
|
|
92
|
-
}
|
|
93
|
-
const pageWidth = mod._FPDF_GetPageWidthF(pagePtr);
|
|
94
|
-
const pageHeight = mod._FPDF_GetPageHeightF(pagePtr);
|
|
95
|
-
for (const obj of page.objects()) {
|
|
96
|
-
if (obj.type !== "image")
|
|
97
|
-
continue;
|
|
98
|
-
const objHandle = obj.objectIdx;
|
|
99
|
-
if (!objHandle)
|
|
100
|
-
continue;
|
|
101
|
-
const ptr = mod._malloc(16);
|
|
102
|
-
try {
|
|
103
|
-
const ok = mod._FPDFPageObj_GetBounds(objHandle, ptr, ptr + 4, ptr + 8, ptr + 12);
|
|
104
|
-
if (!ok)
|
|
105
|
-
continue;
|
|
106
|
-
const buf = mod.HEAPU8.buffer;
|
|
107
|
-
const view = new DataView(buf);
|
|
108
|
-
const left = view.getFloat32(ptr, true);
|
|
109
|
-
const bottom = view.getFloat32(ptr + 4, true);
|
|
110
|
-
const right = view.getFloat32(ptr + 8, true);
|
|
111
|
-
const top = view.getFloat32(ptr + 12, true);
|
|
112
|
-
const w = right - left;
|
|
113
|
-
const h = top - bottom;
|
|
114
|
-
if (w < MIN_IMAGE_SIZE_PT || h < MIN_IMAGE_SIZE_PT)
|
|
115
|
-
continue;
|
|
116
|
-
if (w > pageWidth * MAX_IMAGE_PAGE_COVERAGE && h > pageHeight * MAX_IMAGE_PAGE_COVERAGE)
|
|
117
|
-
continue;
|
|
118
|
-
results.push({
|
|
119
|
-
x: left,
|
|
120
|
-
y: pageHeight - top,
|
|
121
|
-
width: w,
|
|
122
|
-
height: h,
|
|
123
|
-
});
|
|
124
|
-
}
|
|
125
|
-
finally {
|
|
126
|
-
mod._free(ptr);
|
|
127
|
-
}
|
|
128
|
-
}
|
|
129
|
-
return results;
|
|
130
|
-
}
|
|
131
|
-
finally {
|
|
132
|
-
if (isTemporary) {
|
|
133
|
-
document.destroy();
|
|
134
|
-
}
|
|
135
|
-
}
|
|
136
|
-
}
|
|
137
|
-
async close() {
|
|
138
|
-
this.closeDocument();
|
|
139
|
-
if (this.pdfium) {
|
|
140
|
-
this.pdfium.destroy();
|
|
141
|
-
this.pdfium = null;
|
|
142
|
-
}
|
|
143
|
-
}
|
|
144
|
-
}
|
|
145
|
-
//# sourceMappingURL=pdfium-renderer.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"pdfium-renderer.js","sourceRoot":"","sources":["../../../../src/engines/pdf/pdfium-renderer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAgD,MAAM,gBAAgB,CAAC;AAC7F,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,QAAQ,IAAI,EAAE,EAAE,MAAM,IAAI,CAAC;AAsBpC,qEAAqE;AACrE,MAAM,iBAAiB,GAAG,EAAE,CAAC;AAC7B,qFAAqF;AACrF,MAAM,uBAAuB,GAAG,GAAG,CAAC;AAQpC;;;GAGG;AACH,MAAM,OAAO,cAAc;IACjB,MAAM,GAAyB,IAAI,CAAC;IACpC,cAAc,GAA0B,IAAI,CAAC;IAErD,KAAK,CAAC,IAAI;QACR,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;YACjB,IAAI,CAAC,MAAM,GAAG,MAAM,aAAa,CAAC,IAAI,EAAE,CAAC;QAC3C,CAAC;IACH,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,YAAY,CAAC,QAAsC,EAAE,QAAiB;QAC1E,MAAM,IAAI,CAAC,IAAI,EAAE,CAAC;QAClB,IAAI,CAAC,aAAa,EAAE,CAAC;QACrB,MAAM,SAAS,GACb,OAAO,QAAQ,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACrF,IAAI,CAAC,cAAc,GAAG,MAAM,IAAI,CAAC,MAAO,CAAC,YAAY,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;IAC7E,CAAC;IAED,aAAa;QACX,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;YACxB,IAAI,CAAC,cAAc,CAAC,OAAO,EAAE,CAAC;YAC9B,IAAI,CAAC,cAAc,GAAG,IAAI,CAAC;QAC7B,CAAC;IACH,CAAC;IAEO,KAAK,CAAC,iBAAiB,CAC7B,QAAsC,EACtC,QAAiB;QAEjB,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;YACxB,OAAO,EAAE,QAAQ,EAAE,IAAI,CAAC,cAAc,EAAE,WAAW,EAAE,KAAK,EAAE,CAAC;QAC/D,CAAC;QACD,MAAM,IAAI,CAAC,IAAI,EAAE,CAAC;QAClB,MAAM,SAAS,GACb,OAAO,QAAQ,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACrF,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,MAAO,CAAC,YAAY,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QACtE,OAAO,EAAE,QAAQ,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;IACzC,CAAC;IAED,KAAK,CAAC,kBAAkB,CACtB,QAAsC,EACtC,UAAkB,EAClB,MAAc,GAAG,EACjB,QAAiB;QAEjB,MAAM,EAAE,QAAQ,EAAE,WAAW,EAAE,GAAG,MAAM,IAAI,CAAC,iBAAiB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;QAEnF,IAAI,CAAC;YACH,MAAM,IAAI,GAAG,QAAQ,CAAC,OAAO,CAAC,UAAU,GAAG,CAAC,CAAC,CAAC;YAC9C,MAAM,KAAK,GAAG,GAAG,GAAG,EAAE,CAAC;YAEvB,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC;gBAC9B,KAAK;gBACL,MAAM,EAAE,KAAK,EAAE,OAAgC,EAAE,EAAE;oBACjD,OAAO,MAAM,KAAK,CAAC,OAAO,CAAC,IAAI,EAAE;wBAC/B,GAAG,EAAE;4BACH,KAAK,EAAE,OAAO,CAAC,KAAK;4BACpB,MAAM,EAAE,OAAO,CAAC,MAAM;4BACtB,QAAQ,EAAE,CAAC,EAAE,OAAO;yBACrB;qBACF,CAAC;yBACC,GAAG,CAAC;wBACH,gBAAgB,EAAE,CAAC;qBACpB,CAAC;yBACD,YAAY,CAAC;wBACZ,OAAO,EAAE,GAAG;qBACb,CAAC;yBACD,QAAQ,EAAE,CAAC;gBAChB,CAAC;aACF,CAAC,CAAC;YAEH,OAAO,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QACjC,CAAC;gBAAS,CAAC;YACT,IAAI,WAAW,EAAE,CAAC;gBAChB,QAAQ,CAAC,OAAO,EAAE,CAAC;YACrB,CAAC;QACH,CAAC;IACH,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,kBAAkB,CACtB,QAAsC,EACtC,UAAkB,EAClB,QAAiB;QAEjB,MAAM,EAAE,QAAQ,EAAE,WAAW,EAAE,GAAG,MAAM,IAAI,CAAC,iBAAiB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;QAEnF,IAAI,CAAC;YACH,MAAM,IAAI,GAAG,QAAQ,CAAC,OAAO,CAAC,UAAU,GAAG,CAAC,CAAkC,CAAC;YAC/E,MAAM,OAAO,GAAmE,EAAE,CAAC;YAEnF,MAAM,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC;YACxB,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC;YAE7B,IAAI,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,sBAAsB,EAAE,CAAC;gBACxC,OAAO,OAAO,CAAC;YACjB,CAAC;YAED,MAAM,SAAS,GAAG,GAAG,CAAC,mBAAmB,CAAC,OAAO,CAAC,CAAC;YACnD,MAAM,UAAU,GAAG,GAAG,CAAC,oBAAoB,CAAC,OAAO,CAAC,CAAC;YAErD,KAAK,MAAM,GAAG,IAAI,IAAI,CAAC,OAAO,EAAE,EAAE,CAAC;gBACjC,IAAI,GAAG,CAAC,IAAI,KAAK,OAAO;oBAAE,SAAS;gBAEnC,MAAM,SAAS,GAAG,GAAG,CAAC,SAAS,CAAC;gBAChC,IAAI,CAAC,SAAS;oBAAE,SAAS;gBAEzB,MAAM,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;gBAC5B,IAAI,CAAC;oBACH,MAAM,EAAE,GAAG,GAAG,CAAC,sBAAsB,CAAC,SAAS,EAAE,GAAG,EAAE,GAAG,GAAG,CAAC,EAAE,GAAG,GAAG,CAAC,EAAE,GAAG,GAAG,EAAE,CAAC,CAAC;oBAClF,IAAI,CAAC,EAAE;wBAAE,SAAS;oBAElB,MAAM,GAAG,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC;oBAC9B,MAAM,IAAI,GAAG,IAAI,QAAQ,CAAC,GAAG,CAAC,CAAC;oBAC/B,MAAM,IAAI,GAAG,IAAI,CAAC,UAAU,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC;oBACxC,MAAM,MAAM,GAAG,IAAI,CAAC,UAAU,CAAC,GAAG,GAAG,CAAC,EAAE,IAAI,CAAC,CAAC;oBAC9C,MAAM,KAAK,GAAG,IAAI,CAAC,UAAU,CAAC,GAAG,GAAG,CAAC,EAAE,IAAI,CAAC,CAAC;oBAC7C,MAAM,GAAG,GAAG,IAAI,CAAC,UAAU,CAAC,GAAG,GAAG,EAAE,EAAE,IAAI,CAAC,CAAC;oBAE5C,MAAM,CAAC,GAAG,KAAK,GAAG,IAAI,CAAC;oBACvB,MAAM,CAAC,GAAG,GAAG,GAAG,MAAM,CAAC;oBAEvB,IAAI,CAAC,GAAG,iBAAiB,IAAI,CAAC,GAAG,iBAAiB;wBAAE,SAAS;oBAC7D,IAAI,CAAC,GAAG,SAAS,GAAG,uBAAuB,IAAI,CAAC,GAAG,UAAU,GAAG,uBAAuB;wBACrF,SAAS;oBAEX,OAAO,CAAC,IAAI,CAAC;wBACX,CAAC,EAAE,IAAI;wBACP,CAAC,EAAE,UAAU,GAAG,GAAG;wBACnB,KAAK,EAAE,CAAC;wBACR,MAAM,EAAE,CAAC;qBACV,CAAC,CAAC;gBACL,CAAC;wBAAS,CAAC;oBACT,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;gBACjB,CAAC;YACH,CAAC;YAED,OAAO,OAAO,CAAC;QACjB,CAAC;gBAAS,CAAC;YACT,IAAI,WAAW,EAAE,CAAC;gBAChB,QAAQ,CAAC,OAAO,EAAE,CAAC;YACrB,CAAC;QACH,CAAC;IACH,CAAC;IAED,KAAK,CAAC,KAAK;QACT,IAAI,CAAC,aAAa,EAAE,CAAC;QACrB,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChB,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;YACtB,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC;QACrB,CAAC;IACH,CAAC;CACF"}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"pdfium-renderer.test.d.ts","sourceRoot":"","sources":["../../../../src/engines/pdf/pdfium-renderer.test.ts"],"names":[],"mappings":""}
|