@llamaindex/liteparse 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/README.md +339 -0
- package/dist/cli/parse.d.ts +4 -0
- package/dist/cli/parse.d.ts.map +1 -0
- package/dist/cli/parse.js +401 -0
- package/dist/cli/parse.js.map +1 -0
- package/dist/src/conversion/convertToPdf.d.ts +47 -0
- package/dist/src/conversion/convertToPdf.d.ts.map +1 -0
- package/dist/src/conversion/convertToPdf.js +337 -0
- package/dist/src/conversion/convertToPdf.js.map +1 -0
- package/dist/src/conversion/convertToPdf.test.d.ts +2 -0
- package/dist/src/conversion/convertToPdf.test.d.ts.map +1 -0
- package/dist/src/conversion/convertToPdf.test.js +208 -0
- package/dist/src/conversion/convertToPdf.test.js.map +1 -0
- package/dist/src/core/config.d.ts +4 -0
- package/dist/src/core/config.d.ts.map +1 -0
- package/dist/src/core/config.js +25 -0
- package/dist/src/core/config.js.map +1 -0
- package/dist/src/core/config.test.d.ts +2 -0
- package/dist/src/core/config.test.d.ts.map +1 -0
- package/dist/src/core/config.test.js +21 -0
- package/dist/src/core/config.test.js.map +1 -0
- package/dist/src/core/parser.d.ts +83 -0
- package/dist/src/core/parser.d.ts.map +1 -0
- package/dist/src/core/parser.js +333 -0
- package/dist/src/core/parser.js.map +1 -0
- package/dist/src/core/parser.test.d.ts +2 -0
- package/dist/src/core/parser.test.d.ts.map +1 -0
- package/dist/src/core/parser.test.js +537 -0
- package/dist/src/core/parser.test.js.map +1 -0
- package/dist/src/core/types.d.ts +287 -0
- package/dist/src/core/types.d.ts.map +1 -0
- package/dist/src/core/types.js +2 -0
- package/dist/src/core/types.js.map +1 -0
- package/dist/src/engines/ocr/http-simple.d.ts +19 -0
- package/dist/src/engines/ocr/http-simple.d.ts.map +1 -0
- package/dist/src/engines/ocr/http-simple.js +63 -0
- package/dist/src/engines/ocr/http-simple.js.map +1 -0
- package/dist/src/engines/ocr/http-simple.test.d.ts +2 -0
- package/dist/src/engines/ocr/http-simple.test.d.ts.map +1 -0
- package/dist/src/engines/ocr/http-simple.test.js +108 -0
- package/dist/src/engines/ocr/http-simple.test.js.map +1 -0
- package/dist/src/engines/ocr/interface.d.ts +15 -0
- package/dist/src/engines/ocr/interface.d.ts.map +1 -0
- package/dist/src/engines/ocr/interface.js +2 -0
- package/dist/src/engines/ocr/interface.js.map +1 -0
- package/dist/src/engines/ocr/tesseract.d.ts +19 -0
- package/dist/src/engines/ocr/tesseract.d.ts.map +1 -0
- package/dist/src/engines/ocr/tesseract.js +112 -0
- package/dist/src/engines/ocr/tesseract.js.map +1 -0
- package/dist/src/engines/ocr/tesseract.test.d.ts +2 -0
- package/dist/src/engines/ocr/tesseract.test.d.ts.map +1 -0
- package/dist/src/engines/ocr/tesseract.test.js +84 -0
- package/dist/src/engines/ocr/tesseract.test.js.map +1 -0
- package/dist/src/engines/pdf/interface.d.ts +79 -0
- package/dist/src/engines/pdf/interface.d.ts.map +1 -0
- package/dist/src/engines/pdf/interface.js +2 -0
- package/dist/src/engines/pdf/interface.js.map +1 -0
- package/dist/src/engines/pdf/pdfium-renderer.d.ts +11 -0
- package/dist/src/engines/pdf/pdfium-renderer.d.ts.map +1 -0
- package/dist/src/engines/pdf/pdfium-renderer.js +64 -0
- package/dist/src/engines/pdf/pdfium-renderer.js.map +1 -0
- package/dist/src/engines/pdf/pdfium-renderer.test.d.ts +2 -0
- package/dist/src/engines/pdf/pdfium-renderer.test.d.ts.map +1 -0
- package/dist/src/engines/pdf/pdfium-renderer.test.js +76 -0
- package/dist/src/engines/pdf/pdfium-renderer.test.js.map +1 -0
- package/dist/src/engines/pdf/pdfjs.d.ts +13 -0
- package/dist/src/engines/pdf/pdfjs.d.ts.map +1 -0
- package/dist/src/engines/pdf/pdfjs.js +538 -0
- package/dist/src/engines/pdf/pdfjs.js.map +1 -0
- package/dist/src/engines/pdf/pdfjs.test.d.ts +2 -0
- package/dist/src/engines/pdf/pdfjs.test.d.ts.map +1 -0
- package/dist/src/engines/pdf/pdfjs.test.js +220 -0
- package/dist/src/engines/pdf/pdfjs.test.js.map +1 -0
- package/dist/src/engines/pdf/pdfjsImporter.d.ts +5 -0
- package/dist/src/engines/pdf/pdfjsImporter.d.ts.map +1 -0
- package/dist/src/engines/pdf/pdfjsImporter.js +9 -0
- package/dist/src/engines/pdf/pdfjsImporter.js.map +1 -0
- package/dist/src/index.d.ts +3 -0
- package/dist/src/index.d.ts.map +1 -0
- package/dist/src/index.js +5 -0
- package/dist/src/index.js.map +1 -0
- package/dist/src/lib.d.ts +17 -0
- package/dist/src/lib.d.ts.map +1 -0
- package/dist/src/lib.js +16 -0
- package/dist/src/lib.js.map +1 -0
- package/dist/src/output/json.d.ts +10 -0
- package/dist/src/output/json.d.ts.map +1 -0
- package/dist/src/output/json.js +31 -0
- package/dist/src/output/json.js.map +1 -0
- package/dist/src/output/json.test.d.ts +2 -0
- package/dist/src/output/json.test.d.ts.map +1 -0
- package/dist/src/output/json.test.js +136 -0
- package/dist/src/output/json.test.js.map +1 -0
- package/dist/src/output/text.d.ts +10 -0
- package/dist/src/output/text.d.ts.map +1 -0
- package/dist/src/output/text.js +17 -0
- package/dist/src/output/text.js.map +1 -0
- package/dist/src/output/text.test.d.ts +2 -0
- package/dist/src/output/text.test.d.ts.map +1 -0
- package/dist/src/output/text.test.js +65 -0
- package/dist/src/output/text.test.js.map +1 -0
- package/dist/src/processing/bbox.d.ts +20 -0
- package/dist/src/processing/bbox.d.ts.map +1 -0
- package/dist/src/processing/bbox.js +258 -0
- package/dist/src/processing/bbox.js.map +1 -0
- package/dist/src/processing/bbox.test.d.ts +2 -0
- package/dist/src/processing/bbox.test.d.ts.map +1 -0
- package/dist/src/processing/bbox.test.js +334 -0
- package/dist/src/processing/bbox.test.js.map +1 -0
- package/dist/src/processing/cleanText.d.ts +6 -0
- package/dist/src/processing/cleanText.d.ts.map +1 -0
- package/dist/src/processing/cleanText.js +73 -0
- package/dist/src/processing/cleanText.js.map +1 -0
- package/dist/src/processing/cleanText.test.d.ts +2 -0
- package/dist/src/processing/cleanText.test.d.ts.map +1 -0
- package/dist/src/processing/cleanText.test.js +46 -0
- package/dist/src/processing/cleanText.test.js.map +1 -0
- package/dist/src/processing/grid.d.ts +7 -0
- package/dist/src/processing/grid.d.ts.map +1 -0
- package/dist/src/processing/grid.js +13 -0
- package/dist/src/processing/grid.js.map +1 -0
- package/dist/src/processing/gridProjection.d.ts +18 -0
- package/dist/src/processing/gridProjection.d.ts.map +1 -0
- package/dist/src/processing/gridProjection.js +1392 -0
- package/dist/src/processing/gridProjection.js.map +1 -0
- package/dist/src/processing/gridProjection.test.d.ts +2 -0
- package/dist/src/processing/gridProjection.test.d.ts.map +1 -0
- package/dist/src/processing/gridProjection.test.js +464 -0
- package/dist/src/processing/gridProjection.test.js.map +1 -0
- package/dist/src/processing/markupUtils.d.ts +7 -0
- package/dist/src/processing/markupUtils.d.ts.map +1 -0
- package/dist/src/processing/markupUtils.js +25 -0
- package/dist/src/processing/markupUtils.js.map +1 -0
- package/dist/src/processing/markupUtils.test.d.ts +2 -0
- package/dist/src/processing/markupUtils.test.d.ts.map +1 -0
- package/dist/src/processing/markupUtils.test.js +26 -0
- package/dist/src/processing/markupUtils.test.js.map +1 -0
- package/dist/src/processing/ocrUtils.d.ts +24 -0
- package/dist/src/processing/ocrUtils.d.ts.map +1 -0
- package/dist/src/processing/ocrUtils.js +79 -0
- package/dist/src/processing/ocrUtils.js.map +1 -0
- package/dist/src/processing/octUtils.test.d.ts +2 -0
- package/dist/src/processing/octUtils.test.d.ts.map +1 -0
- package/dist/src/processing/octUtils.test.js +72 -0
- package/dist/src/processing/octUtils.test.js.map +1 -0
- package/dist/src/processing/textUtils.d.ts +20 -0
- package/dist/src/processing/textUtils.d.ts.map +1 -0
- package/dist/src/processing/textUtils.js +142 -0
- package/dist/src/processing/textUtils.js.map +1 -0
- package/dist/src/processing/textUtils.test.d.ts +2 -0
- package/dist/src/processing/textUtils.test.d.ts.map +1 -0
- package/dist/src/processing/textUtils.test.js +45 -0
- package/dist/src/processing/textUtils.test.js.map +1 -0
- package/dist/src/vendor/pdfjs/LICENSE +177 -0
- package/dist/src/vendor/pdfjs/README.md +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78ms-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78ms-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/83pv-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/90ms-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/90ms-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/90msp-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/90msp-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/90pv-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/90pv-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Add-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Add-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Add-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Add-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-0.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-1.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-3.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-4.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-5.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-6.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-UCS2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-0.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-1.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-3.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-4.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-5.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-UCS2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-0.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-1.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-3.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-4.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-5.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-6.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-UCS2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-0.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-1.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-UCS2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/B5pc-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/B5pc-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/CNS-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/CNS-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/CNS1-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/CNS1-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/CNS2-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/CNS2-V.bcmap +3 -0
- package/dist/src/vendor/pdfjs/cmaps/ETHK-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/ETHK-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/ETen-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/ETen-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/ETenms-B5-H.bcmap +3 -0
- package/dist/src/vendor/pdfjs/cmaps/ETenms-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Ext-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Ext-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Ext-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Ext-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GB-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GB-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GB-H.bcmap +4 -0
- package/dist/src/vendor/pdfjs/cmaps/GB-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBK-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBK-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBK2K-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBK2K-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBKp-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBKp-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBT-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBT-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBT-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBT-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBTpc-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBTpc-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBpc-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBpc-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKdla-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKdla-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKdlb-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKdlb-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKgccs-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKgccs-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKm314-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKm314-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKm471-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKm471-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKscs-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKscs-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Hankaku.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Hiragana.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSC-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSC-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSC-Johab-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSC-Johab-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSCpc-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSCpc-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Katakana.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/LICENSE +36 -0
- package/dist/src/vendor/pdfjs/cmaps/NWP-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/NWP-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Roman.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UCS2-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UCS2-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF16-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF16-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF8-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF8-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UCS2-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UCS2-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF16-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF16-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF8-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF8-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF16-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF16-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF8-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF8-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-HW-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISPro-UTF8-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UCS2-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UCS2-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF16-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF16-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF8-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF8-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/WP-Symbol.bcmap +0 -0
- package/dist/src/vendor/pdfjs/pdf.mjs +19481 -0
- package/dist/src/vendor/pdfjs/pdf.mjs.map +1 -0
- package/dist/src/vendor/pdfjs/pdf.sandbox.mjs +210 -0
- package/dist/src/vendor/pdfjs/pdf.sandbox.mjs.map +1 -0
- package/dist/src/vendor/pdfjs/pdf.worker.mjs +56001 -0
- package/dist/src/vendor/pdfjs/pdf.worker.mjs.map +1 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitDingbats.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixed.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixedBold.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixedBoldItalic.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixedItalic.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerif.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerifBold.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerifBoldItalic.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerifItalic.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitSymbol.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/LICENSE_FOXIT +27 -0
- package/dist/src/vendor/pdfjs/standard_fonts/LICENSE_LIBERATION +102 -0
- package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-Bold.ttf +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-BoldItalic.ttf +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-Italic.ttf +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-Regular.ttf +0 -0
- package/package.json +89 -0
- package/src/vendor/pdfjs/LICENSE +177 -0
- package/src/vendor/pdfjs/README.md +0 -0
- package/src/vendor/pdfjs/cmaps/78-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78ms-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78ms-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/83pv-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/90ms-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/90ms-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/90msp-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/90msp-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/90pv-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/90pv-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Add-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Add-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Add-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Add-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-0.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-1.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-3.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-4.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-5.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-6.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-UCS2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-0.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-1.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-3.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-4.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-5.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-UCS2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-0.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-1.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-3.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-4.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-5.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-6.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-UCS2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Korea1-0.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Korea1-1.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Korea1-2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Korea1-UCS2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/B5pc-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/B5pc-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/CNS-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/CNS-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/CNS1-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/CNS1-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/CNS2-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/CNS2-V.bcmap +3 -0
- package/src/vendor/pdfjs/cmaps/ETHK-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/ETHK-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/ETen-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/ETen-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/ETenms-B5-H.bcmap +3 -0
- package/src/vendor/pdfjs/cmaps/ETenms-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Ext-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Ext-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Ext-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Ext-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GB-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GB-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GB-H.bcmap +4 -0
- package/src/vendor/pdfjs/cmaps/GB-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBK-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBK-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBK2K-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBK2K-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBKp-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBKp-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBT-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBT-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBT-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBT-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBTpc-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBTpc-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBpc-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBpc-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKdla-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKdla-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKdlb-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKdlb-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKgccs-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKgccs-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKm314-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKm314-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKm471-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKm471-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKscs-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKscs-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Hankaku.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Hiragana.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSC-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSC-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSC-Johab-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSC-Johab-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSCms-UHC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSCms-UHC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSCpc-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSCpc-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Katakana.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/LICENSE +36 -0
- package/src/vendor/pdfjs/cmaps/NWP-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/NWP-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Roman.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UCS2-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UCS2-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UTF16-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UTF16-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UTF8-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UTF8-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UCS2-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UCS2-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UTF16-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UTF16-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UTF8-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UTF8-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UTF16-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UTF16-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UTF8-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UTF8-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-HW-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISPro-UTF8-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UCS2-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UCS2-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UTF16-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UTF16-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UTF8-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UTF8-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/WP-Symbol.bcmap +0 -0
- package/src/vendor/pdfjs/pdf.mjs +19481 -0
- package/src/vendor/pdfjs/pdf.mjs.map +1 -0
- package/src/vendor/pdfjs/pdf.sandbox.mjs +210 -0
- package/src/vendor/pdfjs/pdf.sandbox.mjs.map +1 -0
- package/src/vendor/pdfjs/pdf.worker.mjs +56001 -0
- package/src/vendor/pdfjs/pdf.worker.mjs.map +1 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitDingbats.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitFixed.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitFixedBold.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitFixedBoldItalic.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitFixedItalic.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitSerif.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitSerifBold.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitSerifBoldItalic.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitSerifItalic.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitSymbol.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/LICENSE_FOXIT +27 -0
- package/src/vendor/pdfjs/standard_fonts/LICENSE_LIBERATION +102 -0
- package/src/vendor/pdfjs/standard_fonts/LiberationSans-Bold.ttf +0 -0
- package/src/vendor/pdfjs/standard_fonts/LiberationSans-BoldItalic.ttf +0 -0
- package/src/vendor/pdfjs/standard_fonts/LiberationSans-Italic.ttf +0 -0
- package/src/vendor/pdfjs/standard_fonts/LiberationSans-Regular.ttf +0 -0
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
import { createWorker, createScheduler } from "tesseract.js";
|
|
2
|
+
export class TesseractEngine {
|
|
3
|
+
name = "tesseract";
|
|
4
|
+
scheduler;
|
|
5
|
+
workers = [];
|
|
6
|
+
currentLanguage;
|
|
7
|
+
concurrency;
|
|
8
|
+
constructor(concurrency = 4) {
|
|
9
|
+
this.concurrency = concurrency;
|
|
10
|
+
}
|
|
11
|
+
async initialize(language = "eng") {
|
|
12
|
+
if (this.scheduler && this.currentLanguage === language) {
|
|
13
|
+
return; // Already initialized for this language
|
|
14
|
+
}
|
|
15
|
+
// Clean up existing scheduler and workers if language changed
|
|
16
|
+
await this.terminate();
|
|
17
|
+
// Create scheduler
|
|
18
|
+
this.scheduler = createScheduler();
|
|
19
|
+
// Create worker pool
|
|
20
|
+
for (let i = 0; i < this.concurrency; i++) {
|
|
21
|
+
const worker = await createWorker(language, 1);
|
|
22
|
+
if (!worker) {
|
|
23
|
+
throw new Error("Tesseract worker not initialized");
|
|
24
|
+
}
|
|
25
|
+
this.workers.push(worker);
|
|
26
|
+
this.scheduler.addWorker(worker);
|
|
27
|
+
}
|
|
28
|
+
this.currentLanguage = language;
|
|
29
|
+
}
|
|
30
|
+
async recognize(imagePath, options) {
|
|
31
|
+
// Handle language - tesseract.js uses language codes like 'eng', 'fra', 'deu'
|
|
32
|
+
const language = this.normalizeLanguage(Array.isArray(options.language) ? options.language[0] : options.language);
|
|
33
|
+
// Initialize scheduler if needed
|
|
34
|
+
await this.initialize(language);
|
|
35
|
+
if (!this.scheduler) {
|
|
36
|
+
throw new Error("Tesseract scheduler not initialized");
|
|
37
|
+
}
|
|
38
|
+
try {
|
|
39
|
+
// Recognize text from image using scheduler
|
|
40
|
+
// In tesseract.js v6+, we need to enable blocks output to get word-level data
|
|
41
|
+
const { data: { blocks }, } = await this.scheduler.addJob("recognize", imagePath, {}, { blocks: true });
|
|
42
|
+
// Extract words from hierarchical blocks structure: blocks → paragraphs → lines → words
|
|
43
|
+
const results = [];
|
|
44
|
+
for (const block of blocks || []) {
|
|
45
|
+
for (const paragraph of block.paragraphs || []) {
|
|
46
|
+
for (const line of paragraph.lines || []) {
|
|
47
|
+
for (const word of line.words || []) {
|
|
48
|
+
results.push({
|
|
49
|
+
text: word.text,
|
|
50
|
+
bbox: [word.bbox.x0, word.bbox.y0, word.bbox.x1, word.bbox.y1],
|
|
51
|
+
confidence: word.confidence / 100, // Tesseract returns 0-100, we want 0-1
|
|
52
|
+
});
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
// Filter out low confidence results (below 30%)
|
|
58
|
+
return results.filter((r) => r.confidence > 0.3);
|
|
59
|
+
}
|
|
60
|
+
catch (error) {
|
|
61
|
+
console.error(`\nTesseract OCR error for ${imagePath}:`, error);
|
|
62
|
+
return [];
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
async recognizeBatch(imagePaths, options) {
|
|
66
|
+
// Handle language
|
|
67
|
+
const language = this.normalizeLanguage(Array.isArray(options.language) ? options.language[0] : options.language);
|
|
68
|
+
// Initialize scheduler if needed
|
|
69
|
+
await this.initialize(language);
|
|
70
|
+
if (!this.scheduler) {
|
|
71
|
+
throw new Error("Tesseract scheduler not initialized");
|
|
72
|
+
}
|
|
73
|
+
// Process all images in parallel - scheduler handles distribution
|
|
74
|
+
const jobs = imagePaths.map((imagePath) => this.recognize(imagePath, options));
|
|
75
|
+
return Promise.all(jobs);
|
|
76
|
+
}
|
|
77
|
+
async terminate() {
|
|
78
|
+
if (this.scheduler) {
|
|
79
|
+
await this.scheduler.terminate();
|
|
80
|
+
this.scheduler = undefined;
|
|
81
|
+
}
|
|
82
|
+
this.workers = [];
|
|
83
|
+
this.currentLanguage = undefined;
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Normalize language codes to Tesseract format
|
|
87
|
+
* Common mappings: en->eng, fr->fra, de->deu, es->spa, zh->chi_sim, ja->jpn
|
|
88
|
+
*/
|
|
89
|
+
normalizeLanguage(lang) {
|
|
90
|
+
const languageMap = {
|
|
91
|
+
en: "eng",
|
|
92
|
+
fr: "fra",
|
|
93
|
+
de: "deu",
|
|
94
|
+
es: "spa",
|
|
95
|
+
it: "ita",
|
|
96
|
+
pt: "por",
|
|
97
|
+
ru: "rus",
|
|
98
|
+
zh: "chi_sim",
|
|
99
|
+
"zh-cn": "chi_sim",
|
|
100
|
+
"zh-tw": "chi_tra",
|
|
101
|
+
ja: "jpn",
|
|
102
|
+
ko: "kor",
|
|
103
|
+
ar: "ara",
|
|
104
|
+
hi: "hin",
|
|
105
|
+
th: "tha",
|
|
106
|
+
vi: "vie",
|
|
107
|
+
};
|
|
108
|
+
const normalized = lang.toLowerCase().trim();
|
|
109
|
+
return languageMap[normalized] || normalized;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
//# sourceMappingURL=tesseract.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tesseract.js","sourceRoot":"","sources":["../../../../src/engines/ocr/tesseract.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,eAAe,EAAqB,MAAM,cAAc,CAAC;AAGhF,MAAM,OAAO,eAAe;IAC1B,IAAI,GAAG,WAAW,CAAC;IACX,SAAS,CAAa;IACtB,OAAO,GAAa,EAAE,CAAC;IACvB,eAAe,CAAU;IACzB,WAAW,CAAS;IAE5B,YAAY,cAAsB,CAAC;QACjC,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC;IACjC,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,WAAmB,KAAK;QACvC,IAAI,IAAI,CAAC,SAAS,IAAI,IAAI,CAAC,eAAe,KAAK,QAAQ,EAAE,CAAC;YACxD,OAAO,CAAC,wCAAwC;QAClD,CAAC;QAED,8DAA8D;QAC9D,MAAM,IAAI,CAAC,SAAS,EAAE,CAAC;QAEvB,mBAAmB;QACnB,IAAI,CAAC,SAAS,GAAG,eAAe,EAAE,CAAC;QAEnC,qBAAqB;QACrB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC,EAAE,EAAE,CAAC;YAC1C,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC;YAC/C,IAAI,CAAC,MAAM,EAAE,CAAC;gBACZ,MAAM,IAAI,KAAK,CAAC,kCAAkC,CAAC,CAAC;YACtD,CAAC;YACD,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAC1B,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;QACnC,CAAC;QAED,IAAI,CAAC,eAAe,GAAG,QAAQ,CAAC;IAClC,CAAC;IAED,KAAK,CAAC,SAAS,CAAC,SAAiB,EAAE,OAAmB;QACpD,8EAA8E;QAC9E,MAAM,QAAQ,GAAG,IAAI,CAAC,iBAAiB,CACrC,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,QAAQ,CACzE,CAAC;QAEF,iCAAiC;QACjC,MAAM,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC;QAEhC,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YACpB,MAAM,IAAI,KAAK,CAAC,qCAAqC,CAAC,CAAC;QACzD,CAAC;QAED,IAAI,CAAC;YACH,4CAA4C;YAC5C,8EAA8E;YAC9E,MAAM,EACJ,IAAI,EAAE,EAAE,MAAM,EAAE,GACjB,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,WAAW,EAAE,SAAS,EAAE,EAAE,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC;YAE9E,wFAAwF;YACxF,MAAM,OAAO,GAAgB,EAAE,CAAC;YAChC,KAAK,MAAM,KAAK,IAAI,MAAM,IAAI,EAAE,EAAE,CAAC;gBACjC,KAAK,MAAM,SAAS,IAAI,KAAK,CAAC,UAAU,IAAI,EAAE,EAAE,CAAC;oBAC/C,KAAK,MAAM,IAAI,IAAI,SAAS,CAAC,KAAK,IAAI,EAAE,EAAE,CAAC;wBACzC,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,KAAK,IAAI,EAAE,EAAE,CAAC;4BACpC,OAAO,CAAC,IAAI,CAAC;gCACX,IAAI,EAAE,IAAI,CAAC,IAAI;gCACf,IAAI,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,IAAI,CAAC,IAAI,CAAC,EAAE,CAK5D;gCACD,UAAU,EAAE,IAAI,CAAC,UAAU,GAAG,GAAG,EAAE,uCAAuC;6BAC3E,CAAC,CAAC;wBACL,CAAC;oBACH,CAAC;gBACH,CAAC;YACH,CAAC;YAED,gDAAgD;YAChD,OAAO,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,GAAG,GAAG,CAAC,CAAC;QACnD,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,6BAA6B,SAAS,GAAG,EAAE,KAAK,CAAC,CAAC;YAChE,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC;IAED,KAAK,CAAC,cAAc,CAAC,UAAoB,EAAE,OAAmB;QAC5D,kBAAkB;QAClB,MAAM,QAAQ,GAAG,IAAI,CAAC,iBAAiB,CACrC,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,QAAQ,CACzE,CAAC;QAEF,iCAAiC;QACjC,MAAM,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC;QAEhC,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YACpB,MAAM,IAAI,KAAK,CAAC,qCAAqC,CAAC,CAAC;QACzD,CAAC;QAED,kEAAkE;QAClE,MAAM,IAAI,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,SAAS,EAAE,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC,CAAC;QAE/E,OAAO,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAC3B,CAAC;IAED,KAAK,CAAC,SAAS;QACb,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;YACnB,MAAM,IAAI,CAAC,SAAS,CAAC,SAAS,EAAE,CAAC;YACjC,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC7B,CAAC;QACD,IAAI,CAAC,OAAO,GAAG,EAAE,CAAC;QAClB,IAAI,CAAC,eAAe,GAAG,SAAS,CAAC;IACnC,CAAC;IAED;;;OAGG;IACK,iBAAiB,CAAC,IAAY;QACpC,MAAM,WAAW,GAA2B;YAC1C,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,SAAS;YACb,OAAO,EAAE,SAAS;YAClB,OAAO,EAAE,SAAS;YAClB,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;SACV,CAAC;QAEF,MAAM,UAAU,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC;QAC7C,OAAO,WAAW,CAAC,UAAU,CAAC,IAAI,UAAU,CAAC;IAC/C,CAAC;CACF"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tesseract.test.d.ts","sourceRoot":"","sources":["../../../../src/engines/ocr/tesseract.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
import { vi, describe, it, expect } from "vitest";
|
|
2
|
+
// In tesseract.js v6+, words are nested in blocks → paragraphs → lines → words
|
|
3
|
+
const mockWords = [
|
|
4
|
+
{
|
|
5
|
+
text: "Hello",
|
|
6
|
+
confidence: 95,
|
|
7
|
+
bbox: { x0: 0, y0: 0, x1: 50, y1: 20 },
|
|
8
|
+
},
|
|
9
|
+
{
|
|
10
|
+
text: "World",
|
|
11
|
+
confidence: 92,
|
|
12
|
+
bbox: { x0: 60, y0: 0, x1: 120, y1: 20 },
|
|
13
|
+
},
|
|
14
|
+
];
|
|
15
|
+
const mockTesseractResult = {
|
|
16
|
+
data: {
|
|
17
|
+
text: "Hello World",
|
|
18
|
+
blocks: [
|
|
19
|
+
{
|
|
20
|
+
paragraphs: [
|
|
21
|
+
{
|
|
22
|
+
lines: [
|
|
23
|
+
{
|
|
24
|
+
words: mockWords,
|
|
25
|
+
},
|
|
26
|
+
],
|
|
27
|
+
},
|
|
28
|
+
],
|
|
29
|
+
},
|
|
30
|
+
],
|
|
31
|
+
confidence: 93,
|
|
32
|
+
},
|
|
33
|
+
};
|
|
34
|
+
const mockResults = mockWords.map((word) => ({
|
|
35
|
+
text: word.text,
|
|
36
|
+
bbox: [word.bbox.x0, word.bbox.y0, word.bbox.x1, word.bbox.y1],
|
|
37
|
+
confidence: word.confidence / 100, // Tesseract returns 0-100, we want 0-1
|
|
38
|
+
}));
|
|
39
|
+
const mockTesseractWorker = {
|
|
40
|
+
terminate: vi.fn(async () => { }),
|
|
41
|
+
recognize: vi.fn(async () => {
|
|
42
|
+
return mockTesseractResult;
|
|
43
|
+
}),
|
|
44
|
+
};
|
|
45
|
+
vi.mock("tesseract.js", async () => {
|
|
46
|
+
const actual = await vi.importActual("tesseract.js");
|
|
47
|
+
return {
|
|
48
|
+
...actual,
|
|
49
|
+
createWorker: vi.fn(async (language, _num) => {
|
|
50
|
+
if (language == "it" || language == "ita") {
|
|
51
|
+
return;
|
|
52
|
+
}
|
|
53
|
+
return mockTesseractWorker;
|
|
54
|
+
}),
|
|
55
|
+
};
|
|
56
|
+
});
|
|
57
|
+
import { TesseractEngine } from "./tesseract";
|
|
58
|
+
describe("test Tesseract OCR (single image)", () => {
|
|
59
|
+
it("test engine success", async () => {
|
|
60
|
+
const engine = new TesseractEngine();
|
|
61
|
+
expect(engine.name).toBe("tesseract");
|
|
62
|
+
const result = await engine.recognize("cat.png", { language: "en" });
|
|
63
|
+
expect(result).toStrictEqual(mockResults);
|
|
64
|
+
});
|
|
65
|
+
it("test engine failure (failed to initialize)", async () => {
|
|
66
|
+
const engine = new TesseractEngine();
|
|
67
|
+
expect(engine.name).toBe("tesseract");
|
|
68
|
+
await expect(engine.recognize("cat.png", { language: "it" })).rejects.toThrow("Tesseract worker not initialized");
|
|
69
|
+
});
|
|
70
|
+
});
|
|
71
|
+
describe("test OCR simple HTTP server (batch)", () => {
|
|
72
|
+
it("test engine success", async () => {
|
|
73
|
+
const engine = new TesseractEngine();
|
|
74
|
+
expect(engine.name).toBe("tesseract");
|
|
75
|
+
const result = await engine.recognizeBatch(["cat.png", "dog.png"], { language: "en" });
|
|
76
|
+
expect(result).toStrictEqual([mockResults, mockResults]);
|
|
77
|
+
});
|
|
78
|
+
it("test engine failure (failed to initialize)", async () => {
|
|
79
|
+
const engine = new TesseractEngine();
|
|
80
|
+
expect(engine.name).toBe("tesseract");
|
|
81
|
+
await expect(engine.recognizeBatch(["cat.png", "dog.png"], { language: "it" })).rejects.toThrow("Tesseract worker not initialized");
|
|
82
|
+
});
|
|
83
|
+
});
|
|
84
|
+
//# sourceMappingURL=tesseract.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tesseract.test.js","sourceRoot":"","sources":["../../../../src/engines/ocr/tesseract.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,EAAE,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAElD,+EAA+E;AAC/E,MAAM,SAAS,GAAG;IAChB;QACE,IAAI,EAAE,OAAO;QACb,UAAU,EAAE,EAAE;QACd,IAAI,EAAE,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE;KACvC;IACD;QACE,IAAI,EAAE,OAAO;QACb,UAAU,EAAE,EAAE;QACd,IAAI,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,EAAE,EAAE;KACzC;CACF,CAAC;AAEF,MAAM,mBAAmB,GAAG;IAC1B,IAAI,EAAE;QACJ,IAAI,EAAE,aAAa;QACnB,MAAM,EAAE;YACN;gBACE,UAAU,EAAE;oBACV;wBACE,KAAK,EAAE;4BACL;gCACE,KAAK,EAAE,SAAS;6BACjB;yBACF;qBACF;iBACF;aACF;SACF;QACD,UAAU,EAAE,EAAE;KACf;CACF,CAAC;AAEF,MAAM,WAAW,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;IAC3C,IAAI,EAAE,IAAI,CAAC,IAAI;IACf,IAAI,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,IAAI,CAAC,IAAI,CAAC,EAAE,CAK5D;IACD,UAAU,EAAE,IAAI,CAAC,UAAU,GAAG,GAAG,EAAE,uCAAuC;CAC3E,CAAC,CAAC,CAAC;AAEJ,MAAM,mBAAmB,GAAG;IAC1B,SAAS,EAAE,EAAE,CAAC,EAAE,CAAC,KAAK,IAAI,EAAE,GAAE,CAAC,CAAC;IAChC,SAAS,EAAE,EAAE,CAAC,EAAE,CAAC,KAAK,IAAI,EAAE;QAC1B,OAAO,mBAAmB,CAAC;IAC7B,CAAC,CAAC;CACH,CAAC;AAEF,EAAE,CAAC,IAAI,CAAC,cAAc,EAAE,KAAK,IAAI,EAAE;IACjC,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC,YAAY,CAAgC,cAAc,CAAC,CAAC;IACpF,OAAO;QACL,GAAG,MAAM;QACT,YAAY,EAAE,EAAE,CAAC,EAAE,CAAC,KAAK,EAAE,QAAgB,EAAE,IAAY,EAAE,EAAE;YAC3D,IAAI,QAAQ,IAAI,IAAI,IAAI,QAAQ,IAAI,KAAK,EAAE,CAAC;gBAC1C,OAAO;YACT,CAAC;YACD,OAAO,mBAAmB,CAAC;QAC7B,CAAC,CAAC;KACH,CAAC;AACJ,CAAC,CAAC,CAAC;AAEH,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,QAAQ,CAAC,mCAAmC,EAAE,GAAG,EAAE;IACjD,EAAE,CAAC,qBAAqB,EAAE,KAAK,IAAI,EAAE;QACnC,MAAM,MAAM,GAAG,IAAI,eAAe,EAAE,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACtC,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,SAAS,CAAC,SAAS,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC;QACrE,MAAM,CAAC,MAAM,CAAC,CAAC,aAAa,CAAC,WAAW,CAAC,CAAC;IAC5C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4CAA4C,EAAE,KAAK,IAAI,EAAE;QAC1D,MAAM,MAAM,GAAG,IAAI,eAAe,EAAE,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACtC,MAAM,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,SAAS,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAC3E,kCAAkC,CACnC,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,qCAAqC,EAAE,GAAG,EAAE;IACnD,EAAE,CAAC,qBAAqB,EAAE,KAAK,IAAI,EAAE;QACnC,MAAM,MAAM,GAAG,IAAI,eAAe,EAAE,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACtC,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,cAAc,CAAC,CAAC,SAAS,EAAE,SAAS,CAAC,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC;QACvF,MAAM,CAAC,MAAM,CAAC,CAAC,aAAa,CAAC,CAAC,WAAW,EAAE,WAAW,CAAC,CAAC,CAAC;IAC3D,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4CAA4C,EAAE,KAAK,IAAI,EAAE;QAC1D,MAAM,MAAM,GAAG,IAAI,eAAe,EAAE,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACtC,MAAM,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,SAAS,EAAE,SAAS,CAAC,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAC7F,kCAAkC,CACnC,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import { TextItem } from "../../core/types.js";
|
|
2
|
+
export interface PdfEngine {
|
|
3
|
+
name: string;
|
|
4
|
+
loadDocument(filePath: string): Promise<PdfDocument>;
|
|
5
|
+
extractPage(doc: PdfDocument, pageNum: number): Promise<PageData>;
|
|
6
|
+
extractAllPages(doc: PdfDocument, maxPages?: number, targetPages?: string): Promise<PageData[]>;
|
|
7
|
+
renderPageImage(doc: PdfDocument, pageNum: number, dpi: number): Promise<Buffer>;
|
|
8
|
+
close(doc: PdfDocument): Promise<void>;
|
|
9
|
+
}
|
|
10
|
+
export interface PdfDocument {
|
|
11
|
+
numPages: number;
|
|
12
|
+
data: Uint8Array;
|
|
13
|
+
metadata?: unknown;
|
|
14
|
+
}
|
|
15
|
+
/** Bounding box region */
|
|
16
|
+
export interface BoundingBox {
|
|
17
|
+
x: number;
|
|
18
|
+
y: number;
|
|
19
|
+
width: number;
|
|
20
|
+
height: number;
|
|
21
|
+
}
|
|
22
|
+
export interface PageData {
|
|
23
|
+
pageNum: number;
|
|
24
|
+
width: number;
|
|
25
|
+
height: number;
|
|
26
|
+
textItems: TextItem[];
|
|
27
|
+
images: Image[];
|
|
28
|
+
annotations?: Annotation[];
|
|
29
|
+
/** Bounding boxes of garbled text that was filtered out (for targeted OCR) */
|
|
30
|
+
garbledTextRegions?: BoundingBox[];
|
|
31
|
+
}
|
|
32
|
+
export interface Path {
|
|
33
|
+
type: "rectangle" | "line" | "curve";
|
|
34
|
+
points: number[][];
|
|
35
|
+
color?: string;
|
|
36
|
+
width?: number;
|
|
37
|
+
}
|
|
38
|
+
export interface Image {
|
|
39
|
+
x: number;
|
|
40
|
+
y: number;
|
|
41
|
+
width: number;
|
|
42
|
+
height: number;
|
|
43
|
+
data?: Buffer;
|
|
44
|
+
coords?: {
|
|
45
|
+
x: number;
|
|
46
|
+
y: number;
|
|
47
|
+
w: number;
|
|
48
|
+
h: number;
|
|
49
|
+
};
|
|
50
|
+
scaleFactor?: number;
|
|
51
|
+
originalOrientationAngle?: number;
|
|
52
|
+
type?: string;
|
|
53
|
+
ocrRaw?: EasyOcrResultLine[];
|
|
54
|
+
ocrParsed?: Array<{
|
|
55
|
+
x: number;
|
|
56
|
+
y: number;
|
|
57
|
+
w: number;
|
|
58
|
+
h: number;
|
|
59
|
+
confidence: number;
|
|
60
|
+
text: string;
|
|
61
|
+
}>;
|
|
62
|
+
}
|
|
63
|
+
export type EasyOcrResultLine = [
|
|
64
|
+
[
|
|
65
|
+
[number, number],
|
|
66
|
+
[number, number],
|
|
67
|
+
[number, number],
|
|
68
|
+
[number, number]
|
|
69
|
+
],
|
|
70
|
+
string,
|
|
71
|
+
string | number
|
|
72
|
+
];
|
|
73
|
+
export interface Annotation {
|
|
74
|
+
type: string;
|
|
75
|
+
subtype?: string;
|
|
76
|
+
url?: string;
|
|
77
|
+
rect: number[];
|
|
78
|
+
}
|
|
79
|
+
//# sourceMappingURL=interface.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"interface.d.ts","sourceRoot":"","sources":["../../../../src/engines/pdf/interface.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,qBAAqB,CAAC;AAE/C,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,MAAM,CAAC;IACb,YAAY,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC,CAAC;IACrD,WAAW,CAAC,GAAG,EAAE,WAAW,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;IAClE,eAAe,CAAC,GAAG,EAAE,WAAW,EAAE,QAAQ,CAAC,EAAE,MAAM,EAAE,WAAW,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC,CAAC;IAChG,eAAe,CAAC,GAAG,EAAE,WAAW,EAAE,OAAO,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;IACjF,KAAK,CAAC,GAAG,EAAE,WAAW,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;CACxC;AAED,MAAM,WAAW,WAAW;IAC1B,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,UAAU,CAAC;IACjB,QAAQ,CAAC,EAAE,OAAO,CAAC;CACpB;AAED,0BAA0B;AAC1B,MAAM,WAAW,WAAW;IAC1B,CAAC,EAAE,MAAM,CAAC;IACV,CAAC,EAAE,MAAM,CAAC;IACV,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,QAAQ;IACvB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,QAAQ,EAAE,CAAC;IACtB,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,WAAW,CAAC,EAAE,UAAU,EAAE,CAAC;IAC3B,8EAA8E;IAC9E,kBAAkB,CAAC,EAAE,WAAW,EAAE,CAAC;CACpC;AAED,MAAM,WAAW,IAAI;IACnB,IAAI,EAAE,WAAW,GAAG,MAAM,GAAG,OAAO,CAAC;IACrC,MAAM,EAAE,MAAM,EAAE,EAAE,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,KAAK;IACpB,CAAC,EAAE,MAAM,CAAC;IACV,CAAC,EAAE,MAAM,CAAC;IACV,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE;QAAE,CAAC,EAAE,MAAM,CAAC;QAAC,CAAC,EAAE,MAAM,CAAC;QAAC,CAAC,EAAE,MAAM,CAAC;QAAC,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;IACxD,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,wBAAwB,CAAC,EAAE,MAAM,CAAC;IAClC,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,iBAAiB,EAAE,CAAC;IAC7B,SAAS,CAAC,EAAE,KAAK,CAAC;QAChB,CAAC,EAAE,MAAM,CAAC;QACV,CAAC,EAAE,MAAM,CAAC;QACV,CAAC,EAAE,MAAM,CAAC;QACV,CAAC,EAAE,MAAM,CAAC;QACV,UAAU,EAAE,MAAM,CAAC;QACnB,IAAI,EAAE,MAAM,CAAC;KACd,CAAC,CAAC;CACJ;AAGD,MAAM,MAAM,iBAAiB,GAAG;IAC9B;QAAC,CAAC,MAAM,EAAE,MAAM,CAAC;QAAE,CAAC,MAAM,EAAE,MAAM,CAAC;QAAE,CAAC,MAAM,EAAE,MAAM,CAAC;QAAE,CAAC,MAAM,EAAE,MAAM,CAAC;KAAC;IACxE,MAAM;IACN,MAAM,GAAG,MAAM;CAChB,CAAC;AAEF,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,EAAE,CAAC;CAChB"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"interface.js","sourceRoot":"","sources":["../../../../src/engines/pdf/interface.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PDFium-based PDF screenshot renderer
|
|
3
|
+
* Uses native PDFium library for high-quality, fast screenshots
|
|
4
|
+
*/
|
|
5
|
+
export declare class PdfiumRenderer {
|
|
6
|
+
private pdfium;
|
|
7
|
+
init(): Promise<void>;
|
|
8
|
+
renderPageToBuffer(pdfPath: string, pageNumber: number, dpi?: number): Promise<Buffer>;
|
|
9
|
+
close(): Promise<void>;
|
|
10
|
+
}
|
|
11
|
+
//# sourceMappingURL=pdfium-renderer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pdfium-renderer.d.ts","sourceRoot":"","sources":["../../../../src/engines/pdf/pdfium-renderer.ts"],"names":[],"mappings":"AAIA;;;GAGG;AACH,qBAAa,cAAc;IACzB,OAAO,CAAC,MAAM,CAA8B;IAEtC,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAMrB,kBAAkB,CACtB,OAAO,EAAE,MAAM,EACf,UAAU,EAAE,MAAM,EAClB,GAAG,GAAE,MAAY,GAChB,OAAO,CAAC,MAAM,CAAC;IAgDZ,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;CAO7B"}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import { PDFiumLibrary } from "@hyzyla/pdfium";
|
|
2
|
+
import sharp from "sharp";
|
|
3
|
+
import { promises as fs } from "fs";
|
|
4
|
+
/**
|
|
5
|
+
* PDFium-based PDF screenshot renderer
|
|
6
|
+
* Uses native PDFium library for high-quality, fast screenshots
|
|
7
|
+
*/
|
|
8
|
+
export class PdfiumRenderer {
|
|
9
|
+
pdfium = null;
|
|
10
|
+
async init() {
|
|
11
|
+
if (!this.pdfium) {
|
|
12
|
+
this.pdfium = await PDFiumLibrary.init();
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
async renderPageToBuffer(pdfPath, pageNumber, dpi = 150) {
|
|
16
|
+
await this.init();
|
|
17
|
+
if (!this.pdfium) {
|
|
18
|
+
throw new Error("PDFium not initialized");
|
|
19
|
+
}
|
|
20
|
+
// Read PDF file
|
|
21
|
+
const pdfBuffer = await fs.readFile(pdfPath);
|
|
22
|
+
// Load document
|
|
23
|
+
const document = await this.pdfium.loadDocument(pdfBuffer);
|
|
24
|
+
try {
|
|
25
|
+
// Get page (0-indexed in pdfium)
|
|
26
|
+
const page = document.getPage(pageNumber - 1);
|
|
27
|
+
// Calculate scale from DPI (72 DPI is the default)
|
|
28
|
+
const scale = dpi / 72;
|
|
29
|
+
// Render page using Sharp for image processing
|
|
30
|
+
const image = await page.render({
|
|
31
|
+
scale,
|
|
32
|
+
render: async (options) => {
|
|
33
|
+
return await sharp(options.data, {
|
|
34
|
+
raw: {
|
|
35
|
+
width: options.width,
|
|
36
|
+
height: options.height,
|
|
37
|
+
channels: 4, // RGBA
|
|
38
|
+
},
|
|
39
|
+
})
|
|
40
|
+
.png({
|
|
41
|
+
compressionLevel: 6,
|
|
42
|
+
})
|
|
43
|
+
.withMetadata({
|
|
44
|
+
density: dpi,
|
|
45
|
+
})
|
|
46
|
+
.toBuffer();
|
|
47
|
+
},
|
|
48
|
+
});
|
|
49
|
+
return Buffer.from(image.data);
|
|
50
|
+
}
|
|
51
|
+
finally {
|
|
52
|
+
// Clean up document
|
|
53
|
+
document.destroy();
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
async close() {
|
|
57
|
+
// PDFium WASM doesn't need explicit cleanup
|
|
58
|
+
if (this.pdfium) {
|
|
59
|
+
this.pdfium.destroy();
|
|
60
|
+
this.pdfium = null;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
//# sourceMappingURL=pdfium-renderer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pdfium-renderer.js","sourceRoot":"","sources":["../../../../src/engines/pdf/pdfium-renderer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAgC,MAAM,gBAAgB,CAAC;AAC7E,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,QAAQ,IAAI,EAAE,EAAE,MAAM,IAAI,CAAC;AAEpC;;;GAGG;AACH,MAAM,OAAO,cAAc;IACjB,MAAM,GAAyB,IAAI,CAAC;IAE5C,KAAK,CAAC,IAAI;QACR,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;YACjB,IAAI,CAAC,MAAM,GAAG,MAAM,aAAa,CAAC,IAAI,EAAE,CAAC;QAC3C,CAAC;IACH,CAAC;IAED,KAAK,CAAC,kBAAkB,CACtB,OAAe,EACf,UAAkB,EAClB,MAAc,GAAG;QAEjB,MAAM,IAAI,CAAC,IAAI,EAAE,CAAC;QAElB,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CAAC,wBAAwB,CAAC,CAAC;QAC5C,CAAC;QAED,gBAAgB;QAChB,MAAM,SAAS,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;QAE7C,gBAAgB;QAChB,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC,SAAS,CAAC,CAAC;QAE3D,IAAI,CAAC;YACH,iCAAiC;YACjC,MAAM,IAAI,GAAG,QAAQ,CAAC,OAAO,CAAC,UAAU,GAAG,CAAC,CAAC,CAAC;YAE9C,mDAAmD;YACnD,MAAM,KAAK,GAAG,GAAG,GAAG,EAAE,CAAC;YAEvB,+CAA+C;YAC/C,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC;gBAC9B,KAAK;gBACL,MAAM,EAAE,KAAK,EAAE,OAAgC,EAAE,EAAE;oBACjD,OAAO,MAAM,KAAK,CAAC,OAAO,CAAC,IAAI,EAAE;wBAC/B,GAAG,EAAE;4BACH,KAAK,EAAE,OAAO,CAAC,KAAK;4BACpB,MAAM,EAAE,OAAO,CAAC,MAAM;4BACtB,QAAQ,EAAE,CAAC,EAAE,OAAO;yBACrB;qBACF,CAAC;yBACC,GAAG,CAAC;wBACH,gBAAgB,EAAE,CAAC;qBACpB,CAAC;yBACD,YAAY,CAAC;wBACZ,OAAO,EAAE,GAAG;qBACb,CAAC;yBACD,QAAQ,EAAE,CAAC;gBAChB,CAAC;aACF,CAAC,CAAC;YAEH,OAAO,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QACjC,CAAC;gBAAS,CAAC;YACT,oBAAoB;YACpB,QAAQ,CAAC,OAAO,EAAE,CAAC;QACrB,CAAC;IACH,CAAC;IAED,KAAK,CAAC,KAAK;QACT,4CAA4C;QAC5C,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChB,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;YACtB,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC;QACrB,CAAC;IACH,CAAC;CACF"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pdfium-renderer.test.d.ts","sourceRoot":"","sources":["../../../../src/engines/pdf/pdfium-renderer.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import { vi, describe, it, expect } from "vitest";
|
|
2
|
+
import { PdfiumRenderer } from "./pdfium-renderer";
|
|
3
|
+
const mockPDFiumPageRender = {
|
|
4
|
+
width: 612,
|
|
5
|
+
height: 792,
|
|
6
|
+
originalWidth: 612,
|
|
7
|
+
originalHeight: 792,
|
|
8
|
+
data: new Uint8Array(612 * 792 * 4),
|
|
9
|
+
};
|
|
10
|
+
const mockPdfiumPage = {
|
|
11
|
+
render: vi.fn(async () => {
|
|
12
|
+
return mockPDFiumPageRender;
|
|
13
|
+
}),
|
|
14
|
+
};
|
|
15
|
+
const mockPdfiumDoc = {
|
|
16
|
+
getPage: vi.fn(() => {
|
|
17
|
+
return mockPdfiumPage;
|
|
18
|
+
}),
|
|
19
|
+
destroy: vi.fn(),
|
|
20
|
+
};
|
|
21
|
+
const mockPdfiumLibrary = {
|
|
22
|
+
loadDocument: vi
|
|
23
|
+
.fn()
|
|
24
|
+
.mockImplementationOnce(async () => {
|
|
25
|
+
return mockPdfiumDoc;
|
|
26
|
+
})
|
|
27
|
+
.mockImplementationOnce(async () => {
|
|
28
|
+
throw new Error("loading error");
|
|
29
|
+
}),
|
|
30
|
+
close: vi.fn(async () => { }),
|
|
31
|
+
};
|
|
32
|
+
vi.mock("fs", async () => {
|
|
33
|
+
const actual = await vi.importActual("fs");
|
|
34
|
+
return {
|
|
35
|
+
...actual,
|
|
36
|
+
promises: {
|
|
37
|
+
readFile: vi.fn(async () => {
|
|
38
|
+
return Buffer.from("mock file content");
|
|
39
|
+
}),
|
|
40
|
+
},
|
|
41
|
+
};
|
|
42
|
+
});
|
|
43
|
+
vi.mock("@hyzyla/pdfium", async () => {
|
|
44
|
+
const actual = await vi.importActual("@hyzyla/pdfium");
|
|
45
|
+
return {
|
|
46
|
+
...actual,
|
|
47
|
+
PDFiumLibrary: vi.fn(class {
|
|
48
|
+
constructor() { }
|
|
49
|
+
static init() {
|
|
50
|
+
return mockPdfiumLibrary;
|
|
51
|
+
}
|
|
52
|
+
// implement these just to be on the safe side
|
|
53
|
+
loadDocument = vi
|
|
54
|
+
.fn()
|
|
55
|
+
.mockImplementationOnce(async () => {
|
|
56
|
+
return mockPdfiumDoc;
|
|
57
|
+
})
|
|
58
|
+
.mockImplementationOnce(async () => {
|
|
59
|
+
throw new Error("loading error");
|
|
60
|
+
});
|
|
61
|
+
close = vi.fn(async () => { });
|
|
62
|
+
}),
|
|
63
|
+
};
|
|
64
|
+
});
|
|
65
|
+
describe("test renderPageToBuffer", () => {
|
|
66
|
+
it("test success", async () => {
|
|
67
|
+
const renderer = new PdfiumRenderer();
|
|
68
|
+
const result = await renderer.renderPageToBuffer("test.pdf", 1);
|
|
69
|
+
expect(result).toStrictEqual(Buffer.from(mockPDFiumPageRender.data));
|
|
70
|
+
});
|
|
71
|
+
it("test error propagation", async () => {
|
|
72
|
+
const renderer = new PdfiumRenderer();
|
|
73
|
+
await expect(renderer.renderPageToBuffer("test.pdf", 1)).rejects.toThrow("loading error");
|
|
74
|
+
});
|
|
75
|
+
});
|
|
76
|
+
//# sourceMappingURL=pdfium-renderer.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pdfium-renderer.test.js","sourceRoot":"","sources":["../../../../src/engines/pdf/pdfium-renderer.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,EAAE,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAClD,OAAO,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AAEnD,MAAM,oBAAoB,GAAG;IAC3B,KAAK,EAAE,GAAG;IACV,MAAM,EAAE,GAAG;IACX,aAAa,EAAE,GAAG;IAClB,cAAc,EAAE,GAAG;IACnB,IAAI,EAAE,IAAI,UAAU,CAAC,GAAG,GAAG,GAAG,GAAG,CAAC,CAAC;CACpC,CAAC;AAEF,MAAM,cAAc,GAAG;IACrB,MAAM,EAAE,EAAE,CAAC,EAAE,CAAC,KAAK,IAAI,EAAE;QACvB,OAAO,oBAAoB,CAAC;IAC9B,CAAC,CAAC;CACH,CAAC;AAEF,MAAM,aAAa,GAAG;IACpB,OAAO,EAAE,EAAE,CAAC,EAAE,CAAC,GAAG,EAAE;QAClB,OAAO,cAAc,CAAC;IACxB,CAAC,CAAC;IACF,OAAO,EAAE,EAAE,CAAC,EAAE,EAAE;CACjB,CAAC;AAEF,MAAM,iBAAiB,GAAG;IACxB,YAAY,EAAE,EAAE;SACb,EAAE,EAAE;SACJ,sBAAsB,CAAC,KAAK,IAAI,EAAE;QACjC,OAAO,aAAa,CAAC;IACvB,CAAC,CAAC;SACD,sBAAsB,CAAC,KAAK,IAAI,EAAE;QACjC,MAAM,IAAI,KAAK,CAAC,eAAe,CAAC,CAAC;IACnC,CAAC,CAAC;IACJ,KAAK,EAAE,EAAE,CAAC,EAAE,CAAC,KAAK,IAAI,EAAE,GAAE,CAAC,CAAC;CAC7B,CAAC;AAEF,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,KAAK,IAAI,EAAE;IACvB,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC,YAAY,CAAsB,IAAI,CAAC,CAAC;IAChE,OAAO;QACL,GAAG,MAAM;QACT,QAAQ,EAAE;YACR,QAAQ,EAAE,EAAE,CAAC,EAAE,CAAC,KAAK,IAAI,EAAE;gBACzB,OAAO,MAAM,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC;YAC1C,CAAC,CAAC;SACH;KACF,CAAC;AACJ,CAAC,CAAC,CAAC;AAEH,EAAE,CAAC,IAAI,CAAC,gBAAgB,EAAE,KAAK,IAAI,EAAE;IACnC,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC,YAAY,CAAkC,gBAAgB,CAAC,CAAC;IACxF,OAAO;QACL,GAAG,MAAM;QACT,aAAa,EAAE,EAAE,CAAC,EAAE,CAClB;YACE,gBAAe,CAAC;YAEhB,MAAM,CAAC,IAAI;gBACT,OAAO,iBAAiB,CAAC;YAC3B,CAAC;YAED,8CAA8C;YAC9C,YAAY,GAAG,EAAE;iBACd,EAAE,EAAE;iBACJ,sBAAsB,CAAC,KAAK,IAAI,EAAE;gBACjC,OAAO,aAAa,CAAC;YACvB,CAAC,CAAC;iBACD,sBAAsB,CAAC,KAAK,IAAI,EAAE;gBACjC,MAAM,IAAI,KAAK,CAAC,eAAe,CAAC,CAAC;YACnC,CAAC,CAAC,CAAC;YACL,KAAK,GAAG,EAAE,CAAC,EAAE,CAAC,KAAK,IAAI,EAAE,GAAE,CAAC,CAAC,CAAC;SAC/B,CACF;KACF,CAAC;AACJ,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,yBAAyB,EAAE,GAAG,EAAE;IACvC,EAAE,CAAC,cAAc,EAAE,KAAK,IAAI,EAAE;QAC5B,MAAM,QAAQ,GAAG,IAAI,cAAc,EAAE,CAAC;QACtC,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,kBAAkB,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;QAChE,MAAM,CAAC,MAAM,CAAC,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,oBAAoB,CAAC,IAAI,CAAC,CAAC,CAAC;IACvE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,wBAAwB,EAAE,KAAK,IAAI,EAAE;QACtC,MAAM,QAAQ,GAAG,IAAI,cAAc,EAAE,CAAC;QACtC,MAAM,MAAM,CAAC,QAAQ,CAAC,kBAAkB,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,eAAe,CAAC,CAAC;IAC5F,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { PdfEngine, PdfDocument, PageData } from "./interface.js";
|
|
2
|
+
export declare class PdfJsEngine implements PdfEngine {
|
|
3
|
+
name: string;
|
|
4
|
+
private pdfiumRenderer;
|
|
5
|
+
private currentPdfPath;
|
|
6
|
+
loadDocument(filePath: string): Promise<PdfDocument>;
|
|
7
|
+
extractPage(doc: PdfDocument, pageNum: number): Promise<PageData>;
|
|
8
|
+
extractAllPages(doc: PdfDocument, maxPages?: number, targetPages?: string): Promise<PageData[]>;
|
|
9
|
+
renderPageImage(_doc: PdfDocument, pageNum: number, dpi: number): Promise<Buffer>;
|
|
10
|
+
close(doc: PdfDocument): Promise<void>;
|
|
11
|
+
private parseTargetPages;
|
|
12
|
+
}
|
|
13
|
+
//# sourceMappingURL=pdfjs.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pdfjs.d.ts","sourceRoot":"","sources":["../../../../src/engines/pdf/pdfjs.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,SAAS,EAAE,WAAW,EAAE,QAAQ,EAAkC,MAAM,gBAAgB,CAAC;AA4alG,qBAAa,WAAY,YAAW,SAAS;IAC3C,IAAI,SAAW;IACf,OAAO,CAAC,cAAc,CAA+B;IACrD,OAAO,CAAC,cAAc,CAAuB;IAEvC,YAAY,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC;IAwBpD,WAAW,CAAC,GAAG,EAAE,WAAW,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,QAAQ,CAAC;IAmIjE,eAAe,CACnB,GAAG,EAAE,WAAW,EAChB,QAAQ,CAAC,EAAE,MAAM,EACjB,WAAW,CAAC,EAAE,MAAM,GACnB,OAAO,CAAC,QAAQ,EAAE,CAAC;IAwBhB,eAAe,CAAC,IAAI,EAAE,WAAW,EAAE,OAAO,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAajF,KAAK,CAAC,GAAG,EAAE,WAAW,GAAG,OAAO,CAAC,IAAI,CAAC;IAc5C,OAAO,CAAC,gBAAgB;CAyBzB"}
|