@llamaindex/liteparse 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/README.md +339 -0
- package/dist/cli/parse.d.ts +4 -0
- package/dist/cli/parse.d.ts.map +1 -0
- package/dist/cli/parse.js +401 -0
- package/dist/cli/parse.js.map +1 -0
- package/dist/src/conversion/convertToPdf.d.ts +47 -0
- package/dist/src/conversion/convertToPdf.d.ts.map +1 -0
- package/dist/src/conversion/convertToPdf.js +337 -0
- package/dist/src/conversion/convertToPdf.js.map +1 -0
- package/dist/src/conversion/convertToPdf.test.d.ts +2 -0
- package/dist/src/conversion/convertToPdf.test.d.ts.map +1 -0
- package/dist/src/conversion/convertToPdf.test.js +208 -0
- package/dist/src/conversion/convertToPdf.test.js.map +1 -0
- package/dist/src/core/config.d.ts +4 -0
- package/dist/src/core/config.d.ts.map +1 -0
- package/dist/src/core/config.js +25 -0
- package/dist/src/core/config.js.map +1 -0
- package/dist/src/core/config.test.d.ts +2 -0
- package/dist/src/core/config.test.d.ts.map +1 -0
- package/dist/src/core/config.test.js +21 -0
- package/dist/src/core/config.test.js.map +1 -0
- package/dist/src/core/parser.d.ts +83 -0
- package/dist/src/core/parser.d.ts.map +1 -0
- package/dist/src/core/parser.js +333 -0
- package/dist/src/core/parser.js.map +1 -0
- package/dist/src/core/parser.test.d.ts +2 -0
- package/dist/src/core/parser.test.d.ts.map +1 -0
- package/dist/src/core/parser.test.js +537 -0
- package/dist/src/core/parser.test.js.map +1 -0
- package/dist/src/core/types.d.ts +287 -0
- package/dist/src/core/types.d.ts.map +1 -0
- package/dist/src/core/types.js +2 -0
- package/dist/src/core/types.js.map +1 -0
- package/dist/src/engines/ocr/http-simple.d.ts +19 -0
- package/dist/src/engines/ocr/http-simple.d.ts.map +1 -0
- package/dist/src/engines/ocr/http-simple.js +63 -0
- package/dist/src/engines/ocr/http-simple.js.map +1 -0
- package/dist/src/engines/ocr/http-simple.test.d.ts +2 -0
- package/dist/src/engines/ocr/http-simple.test.d.ts.map +1 -0
- package/dist/src/engines/ocr/http-simple.test.js +108 -0
- package/dist/src/engines/ocr/http-simple.test.js.map +1 -0
- package/dist/src/engines/ocr/interface.d.ts +15 -0
- package/dist/src/engines/ocr/interface.d.ts.map +1 -0
- package/dist/src/engines/ocr/interface.js +2 -0
- package/dist/src/engines/ocr/interface.js.map +1 -0
- package/dist/src/engines/ocr/tesseract.d.ts +19 -0
- package/dist/src/engines/ocr/tesseract.d.ts.map +1 -0
- package/dist/src/engines/ocr/tesseract.js +112 -0
- package/dist/src/engines/ocr/tesseract.js.map +1 -0
- package/dist/src/engines/ocr/tesseract.test.d.ts +2 -0
- package/dist/src/engines/ocr/tesseract.test.d.ts.map +1 -0
- package/dist/src/engines/ocr/tesseract.test.js +84 -0
- package/dist/src/engines/ocr/tesseract.test.js.map +1 -0
- package/dist/src/engines/pdf/interface.d.ts +79 -0
- package/dist/src/engines/pdf/interface.d.ts.map +1 -0
- package/dist/src/engines/pdf/interface.js +2 -0
- package/dist/src/engines/pdf/interface.js.map +1 -0
- package/dist/src/engines/pdf/pdfium-renderer.d.ts +11 -0
- package/dist/src/engines/pdf/pdfium-renderer.d.ts.map +1 -0
- package/dist/src/engines/pdf/pdfium-renderer.js +64 -0
- package/dist/src/engines/pdf/pdfium-renderer.js.map +1 -0
- package/dist/src/engines/pdf/pdfium-renderer.test.d.ts +2 -0
- package/dist/src/engines/pdf/pdfium-renderer.test.d.ts.map +1 -0
- package/dist/src/engines/pdf/pdfium-renderer.test.js +76 -0
- package/dist/src/engines/pdf/pdfium-renderer.test.js.map +1 -0
- package/dist/src/engines/pdf/pdfjs.d.ts +13 -0
- package/dist/src/engines/pdf/pdfjs.d.ts.map +1 -0
- package/dist/src/engines/pdf/pdfjs.js +538 -0
- package/dist/src/engines/pdf/pdfjs.js.map +1 -0
- package/dist/src/engines/pdf/pdfjs.test.d.ts +2 -0
- package/dist/src/engines/pdf/pdfjs.test.d.ts.map +1 -0
- package/dist/src/engines/pdf/pdfjs.test.js +220 -0
- package/dist/src/engines/pdf/pdfjs.test.js.map +1 -0
- package/dist/src/engines/pdf/pdfjsImporter.d.ts +5 -0
- package/dist/src/engines/pdf/pdfjsImporter.d.ts.map +1 -0
- package/dist/src/engines/pdf/pdfjsImporter.js +9 -0
- package/dist/src/engines/pdf/pdfjsImporter.js.map +1 -0
- package/dist/src/index.d.ts +3 -0
- package/dist/src/index.d.ts.map +1 -0
- package/dist/src/index.js +5 -0
- package/dist/src/index.js.map +1 -0
- package/dist/src/lib.d.ts +17 -0
- package/dist/src/lib.d.ts.map +1 -0
- package/dist/src/lib.js +16 -0
- package/dist/src/lib.js.map +1 -0
- package/dist/src/output/json.d.ts +10 -0
- package/dist/src/output/json.d.ts.map +1 -0
- package/dist/src/output/json.js +31 -0
- package/dist/src/output/json.js.map +1 -0
- package/dist/src/output/json.test.d.ts +2 -0
- package/dist/src/output/json.test.d.ts.map +1 -0
- package/dist/src/output/json.test.js +136 -0
- package/dist/src/output/json.test.js.map +1 -0
- package/dist/src/output/text.d.ts +10 -0
- package/dist/src/output/text.d.ts.map +1 -0
- package/dist/src/output/text.js +17 -0
- package/dist/src/output/text.js.map +1 -0
- package/dist/src/output/text.test.d.ts +2 -0
- package/dist/src/output/text.test.d.ts.map +1 -0
- package/dist/src/output/text.test.js +65 -0
- package/dist/src/output/text.test.js.map +1 -0
- package/dist/src/processing/bbox.d.ts +20 -0
- package/dist/src/processing/bbox.d.ts.map +1 -0
- package/dist/src/processing/bbox.js +258 -0
- package/dist/src/processing/bbox.js.map +1 -0
- package/dist/src/processing/bbox.test.d.ts +2 -0
- package/dist/src/processing/bbox.test.d.ts.map +1 -0
- package/dist/src/processing/bbox.test.js +334 -0
- package/dist/src/processing/bbox.test.js.map +1 -0
- package/dist/src/processing/cleanText.d.ts +6 -0
- package/dist/src/processing/cleanText.d.ts.map +1 -0
- package/dist/src/processing/cleanText.js +73 -0
- package/dist/src/processing/cleanText.js.map +1 -0
- package/dist/src/processing/cleanText.test.d.ts +2 -0
- package/dist/src/processing/cleanText.test.d.ts.map +1 -0
- package/dist/src/processing/cleanText.test.js +46 -0
- package/dist/src/processing/cleanText.test.js.map +1 -0
- package/dist/src/processing/grid.d.ts +7 -0
- package/dist/src/processing/grid.d.ts.map +1 -0
- package/dist/src/processing/grid.js +13 -0
- package/dist/src/processing/grid.js.map +1 -0
- package/dist/src/processing/gridProjection.d.ts +18 -0
- package/dist/src/processing/gridProjection.d.ts.map +1 -0
- package/dist/src/processing/gridProjection.js +1392 -0
- package/dist/src/processing/gridProjection.js.map +1 -0
- package/dist/src/processing/gridProjection.test.d.ts +2 -0
- package/dist/src/processing/gridProjection.test.d.ts.map +1 -0
- package/dist/src/processing/gridProjection.test.js +464 -0
- package/dist/src/processing/gridProjection.test.js.map +1 -0
- package/dist/src/processing/markupUtils.d.ts +7 -0
- package/dist/src/processing/markupUtils.d.ts.map +1 -0
- package/dist/src/processing/markupUtils.js +25 -0
- package/dist/src/processing/markupUtils.js.map +1 -0
- package/dist/src/processing/markupUtils.test.d.ts +2 -0
- package/dist/src/processing/markupUtils.test.d.ts.map +1 -0
- package/dist/src/processing/markupUtils.test.js +26 -0
- package/dist/src/processing/markupUtils.test.js.map +1 -0
- package/dist/src/processing/ocrUtils.d.ts +24 -0
- package/dist/src/processing/ocrUtils.d.ts.map +1 -0
- package/dist/src/processing/ocrUtils.js +79 -0
- package/dist/src/processing/ocrUtils.js.map +1 -0
- package/dist/src/processing/octUtils.test.d.ts +2 -0
- package/dist/src/processing/octUtils.test.d.ts.map +1 -0
- package/dist/src/processing/octUtils.test.js +72 -0
- package/dist/src/processing/octUtils.test.js.map +1 -0
- package/dist/src/processing/textUtils.d.ts +20 -0
- package/dist/src/processing/textUtils.d.ts.map +1 -0
- package/dist/src/processing/textUtils.js +142 -0
- package/dist/src/processing/textUtils.js.map +1 -0
- package/dist/src/processing/textUtils.test.d.ts +2 -0
- package/dist/src/processing/textUtils.test.d.ts.map +1 -0
- package/dist/src/processing/textUtils.test.js +45 -0
- package/dist/src/processing/textUtils.test.js.map +1 -0
- package/dist/src/vendor/pdfjs/LICENSE +177 -0
- package/dist/src/vendor/pdfjs/README.md +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78ms-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78ms-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/83pv-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/90ms-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/90ms-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/90msp-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/90msp-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/90pv-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/90pv-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Add-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Add-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Add-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Add-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-0.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-1.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-3.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-4.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-5.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-6.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-UCS2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-0.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-1.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-3.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-4.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-5.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-UCS2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-0.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-1.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-3.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-4.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-5.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-6.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-UCS2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-0.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-1.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-UCS2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/B5pc-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/B5pc-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/CNS-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/CNS-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/CNS1-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/CNS1-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/CNS2-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/CNS2-V.bcmap +3 -0
- package/dist/src/vendor/pdfjs/cmaps/ETHK-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/ETHK-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/ETen-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/ETen-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/ETenms-B5-H.bcmap +3 -0
- package/dist/src/vendor/pdfjs/cmaps/ETenms-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Ext-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Ext-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Ext-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Ext-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GB-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GB-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GB-H.bcmap +4 -0
- package/dist/src/vendor/pdfjs/cmaps/GB-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBK-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBK-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBK2K-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBK2K-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBKp-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBKp-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBT-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBT-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBT-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBT-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBTpc-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBTpc-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBpc-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBpc-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKdla-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKdla-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKdlb-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKdlb-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKgccs-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKgccs-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKm314-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKm314-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKm471-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKm471-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKscs-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKscs-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Hankaku.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Hiragana.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSC-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSC-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSC-Johab-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSC-Johab-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSCpc-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSCpc-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Katakana.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/LICENSE +36 -0
- package/dist/src/vendor/pdfjs/cmaps/NWP-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/NWP-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Roman.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UCS2-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UCS2-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF16-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF16-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF8-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF8-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UCS2-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UCS2-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF16-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF16-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF8-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF8-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF16-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF16-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF8-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF8-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-HW-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISPro-UTF8-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UCS2-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UCS2-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF16-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF16-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF8-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF8-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/WP-Symbol.bcmap +0 -0
- package/dist/src/vendor/pdfjs/pdf.mjs +19481 -0
- package/dist/src/vendor/pdfjs/pdf.mjs.map +1 -0
- package/dist/src/vendor/pdfjs/pdf.sandbox.mjs +210 -0
- package/dist/src/vendor/pdfjs/pdf.sandbox.mjs.map +1 -0
- package/dist/src/vendor/pdfjs/pdf.worker.mjs +56001 -0
- package/dist/src/vendor/pdfjs/pdf.worker.mjs.map +1 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitDingbats.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixed.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixedBold.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixedBoldItalic.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixedItalic.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerif.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerifBold.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerifBoldItalic.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerifItalic.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitSymbol.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/LICENSE_FOXIT +27 -0
- package/dist/src/vendor/pdfjs/standard_fonts/LICENSE_LIBERATION +102 -0
- package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-Bold.ttf +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-BoldItalic.ttf +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-Italic.ttf +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-Regular.ttf +0 -0
- package/package.json +89 -0
- package/src/vendor/pdfjs/LICENSE +177 -0
- package/src/vendor/pdfjs/README.md +0 -0
- package/src/vendor/pdfjs/cmaps/78-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78ms-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78ms-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/83pv-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/90ms-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/90ms-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/90msp-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/90msp-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/90pv-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/90pv-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Add-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Add-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Add-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Add-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-0.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-1.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-3.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-4.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-5.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-6.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-UCS2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-0.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-1.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-3.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-4.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-5.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-UCS2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-0.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-1.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-3.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-4.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-5.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-6.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-UCS2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Korea1-0.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Korea1-1.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Korea1-2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Korea1-UCS2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/B5pc-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/B5pc-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/CNS-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/CNS-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/CNS1-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/CNS1-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/CNS2-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/CNS2-V.bcmap +3 -0
- package/src/vendor/pdfjs/cmaps/ETHK-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/ETHK-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/ETen-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/ETen-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/ETenms-B5-H.bcmap +3 -0
- package/src/vendor/pdfjs/cmaps/ETenms-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Ext-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Ext-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Ext-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Ext-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GB-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GB-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GB-H.bcmap +4 -0
- package/src/vendor/pdfjs/cmaps/GB-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBK-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBK-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBK2K-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBK2K-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBKp-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBKp-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBT-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBT-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBT-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBT-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBTpc-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBTpc-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBpc-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBpc-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKdla-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKdla-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKdlb-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKdlb-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKgccs-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKgccs-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKm314-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKm314-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKm471-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKm471-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKscs-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKscs-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Hankaku.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Hiragana.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSC-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSC-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSC-Johab-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSC-Johab-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSCms-UHC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSCms-UHC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSCpc-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSCpc-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Katakana.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/LICENSE +36 -0
- package/src/vendor/pdfjs/cmaps/NWP-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/NWP-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Roman.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UCS2-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UCS2-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UTF16-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UTF16-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UTF8-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UTF8-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UCS2-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UCS2-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UTF16-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UTF16-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UTF8-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UTF8-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UTF16-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UTF16-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UTF8-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UTF8-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-HW-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISPro-UTF8-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UCS2-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UCS2-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UTF16-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UTF16-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UTF8-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UTF8-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/WP-Symbol.bcmap +0 -0
- package/src/vendor/pdfjs/pdf.mjs +19481 -0
- package/src/vendor/pdfjs/pdf.mjs.map +1 -0
- package/src/vendor/pdfjs/pdf.sandbox.mjs +210 -0
- package/src/vendor/pdfjs/pdf.sandbox.mjs.map +1 -0
- package/src/vendor/pdfjs/pdf.worker.mjs +56001 -0
- package/src/vendor/pdfjs/pdf.worker.mjs.map +1 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitDingbats.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitFixed.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitFixedBold.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitFixedBoldItalic.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitFixedItalic.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitSerif.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitSerifBold.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitSerifBoldItalic.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitSerifItalic.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitSymbol.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/LICENSE_FOXIT +27 -0
- package/src/vendor/pdfjs/standard_fonts/LICENSE_LIBERATION +102 -0
- package/src/vendor/pdfjs/standard_fonts/LiberationSans-Bold.ttf +0 -0
- package/src/vendor/pdfjs/standard_fonts/LiberationSans-BoldItalic.ttf +0 -0
- package/src/vendor/pdfjs/standard_fonts/LiberationSans-Italic.ttf +0 -0
- package/src/vendor/pdfjs/standard_fonts/LiberationSans-Regular.ttf +0 -0
|
@@ -0,0 +1,538 @@
|
|
|
1
|
+
import fs from "node:fs/promises";
|
|
2
|
+
import { PdfiumRenderer } from "./pdfium-renderer.js";
|
|
3
|
+
import { importPdfJs } from "./pdfjsImporter.js";
|
|
4
|
+
// Dynamic import of PDF.js
|
|
5
|
+
const { fn: getDocument, dir: PDFJS_DIR } = await importPdfJs();
|
|
6
|
+
const CMAP_URL = `${PDFJS_DIR}/cmaps/`;
|
|
7
|
+
const STANDARD_FONT_DATA_URL = `${PDFJS_DIR}/standard_fonts/`;
|
|
8
|
+
const CMAP_PACKED = true;
|
|
9
|
+
/**
|
|
10
|
+
* Extract rotation angle in degrees from PDF transformation matrix
|
|
11
|
+
* Matrix format: [a, b, c, d, e, f] where rotation is atan2(b, a)
|
|
12
|
+
*/
|
|
13
|
+
function getRotation(transform) {
|
|
14
|
+
return Math.atan2(transform[1], transform[0]) * (180 / Math.PI);
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Multiply two transformation matrices
|
|
18
|
+
*/
|
|
19
|
+
function multiplyMatrices(m1, m2) {
|
|
20
|
+
return [
|
|
21
|
+
m1[0] * m2[0] + m1[2] * m2[1],
|
|
22
|
+
m1[1] * m2[0] + m1[3] * m2[1],
|
|
23
|
+
m1[0] * m2[2] + m1[2] * m2[3],
|
|
24
|
+
m1[1] * m2[2] + m1[3] * m2[3],
|
|
25
|
+
m1[0] * m2[4] + m1[2] * m2[5] + m1[4],
|
|
26
|
+
m1[1] * m2[4] + m1[3] * m2[5] + m1[5],
|
|
27
|
+
];
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Apply transformation matrix to a point
|
|
31
|
+
*/
|
|
32
|
+
function applyTransformation(point, transform) {
|
|
33
|
+
return {
|
|
34
|
+
x: point.x * transform[0] + point.y * transform[2] + transform[4],
|
|
35
|
+
y: point.x * transform[1] + point.y * transform[3] + transform[5],
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
// Pre-compiled regex patterns for string decoding
|
|
39
|
+
const BUGGY_FONT_MARKER_REGEX = /:->\|>_(\d+)_\d+_<\|<-:/g;
|
|
40
|
+
const BUGGY_FONT_MARKER_CHECK = ":->|>";
|
|
41
|
+
const PIPE_PATTERN_REGEX = /\s*\|([^|])\|\s*/g;
|
|
42
|
+
/**
|
|
43
|
+
* Common tabular figures font encoding mappings.
|
|
44
|
+
* Many fonts with "Differences" arrays use similar patterns for tabular digits.
|
|
45
|
+
* These mappings are derived from common font encoding conventions.
|
|
46
|
+
*
|
|
47
|
+
* Note: The same PDF can use multiple fonts with DIFFERENT glyph-to-character mappings
|
|
48
|
+
* for the same glyph IDs. We try all mappings and pick the best match.
|
|
49
|
+
*
|
|
50
|
+
* Special glyphs:
|
|
51
|
+
* - 42: '*' (asterisk for significance markers)
|
|
52
|
+
* - 150: '-' (minus sign/dash)
|
|
53
|
+
*/
|
|
54
|
+
const TABULAR_FIGURES_MAPPINGS = [
|
|
55
|
+
// Mapping 1: Bold/header style (e.g., census PDF header row)
|
|
56
|
+
// Characters: 0123456789.,
|
|
57
|
+
{
|
|
58
|
+
17: "4",
|
|
59
|
+
18: "6",
|
|
60
|
+
19: "8",
|
|
61
|
+
20: "5",
|
|
62
|
+
21: "9",
|
|
63
|
+
22: "7",
|
|
64
|
+
23: "1",
|
|
65
|
+
24: " ",
|
|
66
|
+
25: ",",
|
|
67
|
+
26: "+",
|
|
68
|
+
27: "-",
|
|
69
|
+
28: "3",
|
|
70
|
+
29: "0",
|
|
71
|
+
30: "2",
|
|
72
|
+
31: ".",
|
|
73
|
+
42: "*",
|
|
74
|
+
150: "-",
|
|
75
|
+
},
|
|
76
|
+
// Mapping 2: Book/body style (e.g., census PDF detail rows)
|
|
77
|
+
// Note: Same glyph IDs but different character assignments!
|
|
78
|
+
{
|
|
79
|
+
17: "+",
|
|
80
|
+
18: "7",
|
|
81
|
+
19: "-",
|
|
82
|
+
20: "9",
|
|
83
|
+
21: "6",
|
|
84
|
+
22: "3",
|
|
85
|
+
23: "1",
|
|
86
|
+
24: " ",
|
|
87
|
+
25: "8",
|
|
88
|
+
26: "5",
|
|
89
|
+
27: "4",
|
|
90
|
+
28: "0",
|
|
91
|
+
29: "2",
|
|
92
|
+
30: ".",
|
|
93
|
+
31: ",",
|
|
94
|
+
42: "*",
|
|
95
|
+
150: "-",
|
|
96
|
+
},
|
|
97
|
+
];
|
|
98
|
+
/**
|
|
99
|
+
* Check if all glyphs in the range would produce printable ASCII via direct char code.
|
|
100
|
+
* Returns true if using String.fromCharCode on these glyphs would produce valid text.
|
|
101
|
+
*/
|
|
102
|
+
function canDecodeAsAscii(glyphs) {
|
|
103
|
+
// Check if ALL glyphs would produce valid printable ASCII or common whitespace
|
|
104
|
+
for (const g of glyphs) {
|
|
105
|
+
// Printable ASCII range (space through tilde), plus tab/newline
|
|
106
|
+
if (!((g >= 32 && g <= 126) || g === 9 || g === 10 || g === 13)) {
|
|
107
|
+
return false;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
return true;
|
|
111
|
+
}
|
|
112
|
+
/**
|
|
113
|
+
* Score a decoded string for how "number-like" it appears.
|
|
114
|
+
* Higher scores indicate better number formatting.
|
|
115
|
+
*/
|
|
116
|
+
function scoreNumberFormat(decoded) {
|
|
117
|
+
let score = 0;
|
|
118
|
+
// Count digits - primary indicator of a number
|
|
119
|
+
const digitCount = (decoded.match(/[0-9]/g) || []).length;
|
|
120
|
+
score += digitCount * 2;
|
|
121
|
+
// Bonus for matching common number patterns
|
|
122
|
+
// Pattern: digits with optional commas for thousands
|
|
123
|
+
if (/^\d{1,3}(,\d{3})*$/.test(decoded)) {
|
|
124
|
+
score += 5; // e.g., "248,800"
|
|
125
|
+
}
|
|
126
|
+
// Pattern: decimal number
|
|
127
|
+
if (/^\d+\.\d+$/.test(decoded)) {
|
|
128
|
+
score += 5; // e.g., "10.5"
|
|
129
|
+
}
|
|
130
|
+
// Pattern: negative number
|
|
131
|
+
if (/^[*-]?\d/.test(decoded)) {
|
|
132
|
+
score += 2; // e.g., "-1,132" or "*-0.4"
|
|
133
|
+
}
|
|
134
|
+
// Pattern: percentage or simple number
|
|
135
|
+
if (/^\d+$/.test(decoded)) {
|
|
136
|
+
score += 3; // e.g., "897"
|
|
137
|
+
}
|
|
138
|
+
// Penalize bad patterns
|
|
139
|
+
// Consecutive punctuation marks (not valid in numbers)
|
|
140
|
+
if (/[.,]{2,}/.test(decoded)) {
|
|
141
|
+
score -= 10;
|
|
142
|
+
}
|
|
143
|
+
// Punctuation at start (except minus/asterisk) or end
|
|
144
|
+
if (/^[.,+]|[.,+]$/.test(decoded)) {
|
|
145
|
+
score -= 5;
|
|
146
|
+
}
|
|
147
|
+
// Comma followed by anything other than 3 digits then boundary
|
|
148
|
+
if (/,(?!\d{3}(?:[,.]|$))/.test(decoded)) {
|
|
149
|
+
score -= 3;
|
|
150
|
+
}
|
|
151
|
+
// Period not followed by digits (except at end)
|
|
152
|
+
if (/\.(?![0-9])/.test(decoded) && !decoded.endsWith(".")) {
|
|
153
|
+
score -= 3;
|
|
154
|
+
}
|
|
155
|
+
return score;
|
|
156
|
+
}
|
|
157
|
+
/**
|
|
158
|
+
* Try to decode buggy font markers using known tabular figures mappings.
|
|
159
|
+
* Returns the decoded string if a mapping produces valid-looking text,
|
|
160
|
+
* otherwise returns null to fall back to charCode decoding.
|
|
161
|
+
*
|
|
162
|
+
* Strategy:
|
|
163
|
+
* 1. If glyphs are in ASCII range (32-126), let the fallback handle it
|
|
164
|
+
* 2. If glyphs are in tabular range (17-31, plus special chars), try mappings
|
|
165
|
+
* 3. Score each result for how "number-like" it appears
|
|
166
|
+
* 4. Return the best result if it looks like a valid number
|
|
167
|
+
*/
|
|
168
|
+
function tryDecodeTabularFigures(str) {
|
|
169
|
+
if (!str.includes(BUGGY_FONT_MARKER_CHECK))
|
|
170
|
+
return null;
|
|
171
|
+
// Extract all glyph IDs from the markers
|
|
172
|
+
const glyphs = [];
|
|
173
|
+
let match;
|
|
174
|
+
const regex = /:->\|>_(\d+)_\d+_<\|<-:/g;
|
|
175
|
+
while ((match = regex.exec(str)) !== null) {
|
|
176
|
+
glyphs.push(parseInt(match[1]));
|
|
177
|
+
}
|
|
178
|
+
if (glyphs.length === 0)
|
|
179
|
+
return null;
|
|
180
|
+
// If these glyphs would decode fine as ASCII, don't use tabular mapping
|
|
181
|
+
if (canDecodeAsAscii(glyphs)) {
|
|
182
|
+
return null;
|
|
183
|
+
}
|
|
184
|
+
// Check if glyphs are in the tabular figures range
|
|
185
|
+
// Tabular figures typically use glyphs 17-31, plus special chars like 42, 150
|
|
186
|
+
const tabularRange = glyphs.every((g) => (g >= 17 && g <= 31) || // Core tabular figures
|
|
187
|
+
g === 42 || // Asterisk
|
|
188
|
+
g === 150 || // Minus
|
|
189
|
+
g === 8 ||
|
|
190
|
+
g === 9 ||
|
|
191
|
+
g === 10 // Some special chars
|
|
192
|
+
);
|
|
193
|
+
if (!tabularRange) {
|
|
194
|
+
// Mixed content - not pure tabular figures
|
|
195
|
+
return null;
|
|
196
|
+
}
|
|
197
|
+
// Try each mapping and pick the best result
|
|
198
|
+
let bestResult = null;
|
|
199
|
+
let bestScore = -Infinity;
|
|
200
|
+
for (const mapping of TABULAR_FIGURES_MAPPINGS) {
|
|
201
|
+
const decoded = glyphs.map((g) => mapping[g] || "").join("");
|
|
202
|
+
// Skip if there are unmapped glyphs
|
|
203
|
+
const unmapped = glyphs.filter((g) => !mapping[g]).length;
|
|
204
|
+
if (unmapped > 0)
|
|
205
|
+
continue;
|
|
206
|
+
// Score based on how "number-like" the result looks
|
|
207
|
+
const score = scoreNumberFormat(decoded);
|
|
208
|
+
if (score > bestScore) {
|
|
209
|
+
bestScore = score;
|
|
210
|
+
bestResult = decoded;
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
// Only return if we got a reasonable score (at least some digits, proper format)
|
|
214
|
+
if (bestResult && bestScore > 0) {
|
|
215
|
+
return bestResult;
|
|
216
|
+
}
|
|
217
|
+
return null;
|
|
218
|
+
}
|
|
219
|
+
/**
|
|
220
|
+
* Strip C0/C1 control characters from text (except common whitespace).
|
|
221
|
+
* These can appear in PDF text due to font encoding issues but the
|
|
222
|
+
* surrounding text may still be valid.
|
|
223
|
+
*/
|
|
224
|
+
function stripControlChars(str) {
|
|
225
|
+
let result = "";
|
|
226
|
+
for (const char of str) {
|
|
227
|
+
const code = char.charCodeAt(0);
|
|
228
|
+
// Skip C0 controls (except tab, newline, carriage return) and C1 controls
|
|
229
|
+
if ((code >= 0x00 && code <= 0x1f && code !== 0x09 && code !== 0x0a && code !== 0x0d) ||
|
|
230
|
+
(code >= 0x80 && code <= 0x9f)) {
|
|
231
|
+
continue;
|
|
232
|
+
}
|
|
233
|
+
result += char;
|
|
234
|
+
}
|
|
235
|
+
return result;
|
|
236
|
+
}
|
|
237
|
+
/**
|
|
238
|
+
* Detect garbled text from fonts with corrupted ToUnicode mappings.
|
|
239
|
+
*
|
|
240
|
+
* When PDF fonts lack proper ToUnicode maps, PDF.js may output characters
|
|
241
|
+
* mapped to unexpected Unicode code points. Common patterns include:
|
|
242
|
+
*
|
|
243
|
+
* 1. Private Use Area (PUA) characters - fonts often map glyphs here
|
|
244
|
+
* 2. Mix of unrelated scripts (Arabic + Latin Extended in English text)
|
|
245
|
+
* 3. Rare/obscure Unicode blocks appearing in normal text
|
|
246
|
+
* 4. Control characters (when text is predominantly control chars)
|
|
247
|
+
*
|
|
248
|
+
* Returns true if the string appears to be garbled font output.
|
|
249
|
+
*/
|
|
250
|
+
function isGarbledFontOutput(str) {
|
|
251
|
+
if (str.length < 3)
|
|
252
|
+
return false;
|
|
253
|
+
let privateUseCount = 0;
|
|
254
|
+
let arabicCount = 0;
|
|
255
|
+
let latinExtendedCount = 0;
|
|
256
|
+
let basicLatinLetterCount = 0;
|
|
257
|
+
let suspiciousCount = 0; // Other suspicious Unicode ranges
|
|
258
|
+
let controlCharCount = 0; // C0/C1 control characters
|
|
259
|
+
let normalCharCount = 0; // Normal printable characters
|
|
260
|
+
for (const char of str) {
|
|
261
|
+
const code = char.charCodeAt(0);
|
|
262
|
+
// C0 control characters (0x00-0x1F) except common whitespace (tab, newline, carriage return)
|
|
263
|
+
// C1 control characters (0x80-0x9F)
|
|
264
|
+
if ((code >= 0x00 && code <= 0x1f && code !== 0x09 && code !== 0x0a && code !== 0x0d) ||
|
|
265
|
+
(code >= 0x80 && code <= 0x9f)) {
|
|
266
|
+
controlCharCount++;
|
|
267
|
+
}
|
|
268
|
+
// Private Use Area (U+E000-U+F8FF) - almost always garbled
|
|
269
|
+
else if (code >= 0xe000 && code <= 0xf8ff) {
|
|
270
|
+
privateUseCount++;
|
|
271
|
+
}
|
|
272
|
+
// Arabic block (0x600-0x6FF) and Arabic Extended (0x750-0x77F, 0x8A0-0x8FF)
|
|
273
|
+
else if ((code >= 0x600 && code <= 0x6ff) ||
|
|
274
|
+
(code >= 0x750 && code <= 0x77f) ||
|
|
275
|
+
(code >= 0x8a0 && code <= 0x8ff)) {
|
|
276
|
+
arabicCount++;
|
|
277
|
+
}
|
|
278
|
+
// Latin Extended-A (0x100-0x17F), Latin Extended-B (0x180-0x24F),
|
|
279
|
+
// Latin Extended Additional (0x1E00-0x1EFF)
|
|
280
|
+
else if ((code >= 0x100 && code <= 0x24f) || (code >= 0x1e00 && code <= 0x1eff)) {
|
|
281
|
+
latinExtendedCount++;
|
|
282
|
+
}
|
|
283
|
+
// Basic Latin letters (a-z, A-Z)
|
|
284
|
+
else if ((code >= 0x41 && code <= 0x5a) || (code >= 0x61 && code <= 0x7a)) {
|
|
285
|
+
basicLatinLetterCount++;
|
|
286
|
+
normalCharCount++;
|
|
287
|
+
}
|
|
288
|
+
// Suspicious ranges that rarely appear in normal text:
|
|
289
|
+
// - Syriac (0x700-0x74F)
|
|
290
|
+
// - Thaana (0x780-0x7BF)
|
|
291
|
+
// - NKo (0x7C0-0x7FF)
|
|
292
|
+
// - Samaritan (0x800-0x83F)
|
|
293
|
+
// - Specials (0xFFF0-0xFFFF)
|
|
294
|
+
// - Geometric Shapes (0x25A0-0x25FF) in running text
|
|
295
|
+
// - Box Drawing (0x2500-0x257F) in running text
|
|
296
|
+
// - Combining Diacritical Marks alone (0x0300-0x036F)
|
|
297
|
+
else if ((code >= 0x700 && code <= 0x7ff) || // Syriac, Thaana, NKo
|
|
298
|
+
(code >= 0x800 && code <= 0x83f) || // Samaritan
|
|
299
|
+
(code >= 0xfff0 && code <= 0xffff) || // Specials
|
|
300
|
+
(code >= 0x2500 && code <= 0x25ff) || // Box drawing, geometric shapes
|
|
301
|
+
(code >= 0x0300 && code <= 0x036f) // Combining marks (suspicious if frequent)
|
|
302
|
+
) {
|
|
303
|
+
suspiciousCount++;
|
|
304
|
+
}
|
|
305
|
+
// Normal printable characters (digits, punctuation, common symbols, space)
|
|
306
|
+
else if ((code >= 0x20 && code <= 0x7e) || code === 0x09 || code === 0x0a || code === 0x0d) {
|
|
307
|
+
normalCharCount++;
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
const totalChars = str.length;
|
|
311
|
+
// Text is predominantly control characters - definitely garbled
|
|
312
|
+
// This catches cases like more_hard_2.pdf where text is entirely control chars
|
|
313
|
+
if (controlCharCount > 0 && controlCharCount > normalCharCount) {
|
|
314
|
+
return true;
|
|
315
|
+
}
|
|
316
|
+
// Private Use Area characters are almost always garbled fonts
|
|
317
|
+
if (privateUseCount >= 2) {
|
|
318
|
+
return true;
|
|
319
|
+
}
|
|
320
|
+
// Mix of Arabic AND Latin Extended is extremely rare in legitimate text
|
|
321
|
+
if (arabicCount >= 2 && latinExtendedCount >= 2) {
|
|
322
|
+
return true;
|
|
323
|
+
}
|
|
324
|
+
// High concentration of suspicious characters
|
|
325
|
+
if (suspiciousCount >= 3 || suspiciousCount > totalChars * 0.2) {
|
|
326
|
+
return true;
|
|
327
|
+
}
|
|
328
|
+
// Text predominantly Latin Extended with very few basic Latin letters
|
|
329
|
+
// (legitimate Latin-script text would have mostly basic Latin)
|
|
330
|
+
if (latinExtendedCount > totalChars * 0.3 && basicLatinLetterCount < totalChars * 0.2) {
|
|
331
|
+
return true;
|
|
332
|
+
}
|
|
333
|
+
// Mix of Arabic/suspicious with Latin Extended (script mixing)
|
|
334
|
+
if ((arabicCount >= 1 || suspiciousCount >= 1) && latinExtendedCount >= 3) {
|
|
335
|
+
return true;
|
|
336
|
+
}
|
|
337
|
+
return false;
|
|
338
|
+
}
|
|
339
|
+
export class PdfJsEngine {
|
|
340
|
+
name = "pdfjs";
|
|
341
|
+
pdfiumRenderer = null;
|
|
342
|
+
currentPdfPath = null;
|
|
343
|
+
async loadDocument(filePath) {
|
|
344
|
+
const data = new Uint8Array(await fs.readFile(filePath));
|
|
345
|
+
// Store path for PDFium rendering
|
|
346
|
+
this.currentPdfPath = filePath;
|
|
347
|
+
const loadingTask = getDocument({
|
|
348
|
+
data,
|
|
349
|
+
cMapUrl: CMAP_URL,
|
|
350
|
+
cMapPacked: CMAP_PACKED,
|
|
351
|
+
standardFontDataUrl: STANDARD_FONT_DATA_URL,
|
|
352
|
+
});
|
|
353
|
+
const pdfDocument = await loadingTask.promise;
|
|
354
|
+
const metadata = await pdfDocument.getMetadata();
|
|
355
|
+
return {
|
|
356
|
+
numPages: pdfDocument.numPages,
|
|
357
|
+
data,
|
|
358
|
+
metadata,
|
|
359
|
+
_pdfDocument: pdfDocument,
|
|
360
|
+
};
|
|
361
|
+
}
|
|
362
|
+
async extractPage(doc, pageNum) {
|
|
363
|
+
const pdfDocument = doc._pdfDocument;
|
|
364
|
+
const page = await pdfDocument.getPage(pageNum);
|
|
365
|
+
// Get viewport
|
|
366
|
+
const viewport = page.getViewport({ scale: 1.0 });
|
|
367
|
+
// Extract text content
|
|
368
|
+
const textContent = await page.getTextContent();
|
|
369
|
+
const viewportWidth = viewport.width;
|
|
370
|
+
const viewportHeight = viewport.height;
|
|
371
|
+
const viewportTransform = viewport.transform;
|
|
372
|
+
const textItems = [];
|
|
373
|
+
const garbledTextRegions = [];
|
|
374
|
+
for (const item of textContent.items) {
|
|
375
|
+
// Skip items with zero dimensions
|
|
376
|
+
if (item.height === 0 || item.width === 0)
|
|
377
|
+
continue;
|
|
378
|
+
// Apply viewport transformation to convert PDF coordinates to screen coordinates
|
|
379
|
+
// This properly handles Y-axis flip (PDF is bottom-up, screen is top-down)
|
|
380
|
+
const cm = multiplyMatrices(viewportTransform, item.transform);
|
|
381
|
+
// Get lower-left corner (text space origin)
|
|
382
|
+
const ll = applyTransformation({ x: 0, y: 0 }, cm);
|
|
383
|
+
// Extract scale factors directly from matrix components (not SVD).
|
|
384
|
+
// For matrix [a, b, c, d, tx, ty]:
|
|
385
|
+
// - Horizontal scale = sqrt(a² + b²)
|
|
386
|
+
// - Vertical scale = sqrt(c² + d²)
|
|
387
|
+
// This correctly preserves axis association unlike SVD which returns
|
|
388
|
+
// singular values sorted by magnitude (causing x/y swap for some fonts).
|
|
389
|
+
const scaleX = Math.sqrt(item.transform[0] ** 2 + item.transform[1] ** 2);
|
|
390
|
+
const scaleY = Math.sqrt(item.transform[2] ** 2 + item.transform[3] ** 2);
|
|
391
|
+
// Get upper-right corner by first converting width/height to text space
|
|
392
|
+
// (dividing by the scale factors), then transforming to viewport space
|
|
393
|
+
const ur = applyTransformation({ x: item.width / scaleX, y: item.height / scaleY }, cm);
|
|
394
|
+
// Calculate final bounding box in viewport space
|
|
395
|
+
const left = Math.min(ll.x, ur.x);
|
|
396
|
+
const right = Math.max(ll.x, ur.x);
|
|
397
|
+
const top = Math.min(ll.y, ur.y);
|
|
398
|
+
const bottom = Math.max(ll.y, ur.y);
|
|
399
|
+
// Skip items that are off-page (negative coordinates or beyond page bounds)
|
|
400
|
+
if (top < 0 || left < 0 || top > viewportHeight || left > viewportWidth)
|
|
401
|
+
continue;
|
|
402
|
+
const width = right - left;
|
|
403
|
+
const height = bottom - top;
|
|
404
|
+
// Calculate rotation from combined transformation matrix
|
|
405
|
+
let rotation = getRotation(cm);
|
|
406
|
+
// Normalize to 0-360 range
|
|
407
|
+
if (rotation < 0) {
|
|
408
|
+
rotation += 360;
|
|
409
|
+
}
|
|
410
|
+
// Decode buggy font markers from PDF.js (only if marker is present)
|
|
411
|
+
// Format: :->|>_<charCode>_<fontChar>_<|<-:
|
|
412
|
+
let decodedStr = item.str;
|
|
413
|
+
if (decodedStr.includes(BUGGY_FONT_MARKER_CHECK)) {
|
|
414
|
+
// Try tabular figures decoding first (common in government/census PDFs)
|
|
415
|
+
const tabularDecoded = tryDecodeTabularFigures(decodedStr);
|
|
416
|
+
if (tabularDecoded) {
|
|
417
|
+
decodedStr = tabularDecoded;
|
|
418
|
+
}
|
|
419
|
+
else {
|
|
420
|
+
// Fall back to original approach: use glyph ID as character code
|
|
421
|
+
BUGGY_FONT_MARKER_REGEX.lastIndex = 0; // Reset regex state
|
|
422
|
+
decodedStr = decodedStr.replace(BUGGY_FONT_MARKER_REGEX, (_, charCode) => String.fromCharCode(parseInt(charCode)));
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
// Handle pipe-separated characters: " |a| |r| |X| " -> "arX"
|
|
426
|
+
// Some PDFs encode text with characters separated by pipes and spaces
|
|
427
|
+
if (decodedStr.includes("|")) {
|
|
428
|
+
PIPE_PATTERN_REGEX.lastIndex = 0; // Reset regex state
|
|
429
|
+
const matches = [...decodedStr.matchAll(PIPE_PATTERN_REGEX)];
|
|
430
|
+
if (matches.length > 0) {
|
|
431
|
+
decodedStr = matches.map((m) => m[1]).join("");
|
|
432
|
+
}
|
|
433
|
+
}
|
|
434
|
+
// Skip garbled text from fonts with corrupted ToUnicode mappings
|
|
435
|
+
// Save the bounding box so OCR can fill in these specific regions
|
|
436
|
+
if (isGarbledFontOutput(decodedStr)) {
|
|
437
|
+
garbledTextRegions.push({ x: left, y: top, width, height });
|
|
438
|
+
continue;
|
|
439
|
+
}
|
|
440
|
+
// Strip any remaining control characters from valid text
|
|
441
|
+
// (e.g., form feed chars that sneak into ligatures like "fi")
|
|
442
|
+
decodedStr = stripControlChars(decodedStr);
|
|
443
|
+
textItems.push({
|
|
444
|
+
str: decodedStr,
|
|
445
|
+
x: left,
|
|
446
|
+
y: top,
|
|
447
|
+
width,
|
|
448
|
+
height,
|
|
449
|
+
w: width,
|
|
450
|
+
h: height,
|
|
451
|
+
r: rotation,
|
|
452
|
+
fontName: item.fontName,
|
|
453
|
+
fontSize: Math.sqrt(item.transform[0] * item.transform[0] + item.transform[1] * item.transform[1]),
|
|
454
|
+
});
|
|
455
|
+
}
|
|
456
|
+
const images = [];
|
|
457
|
+
// Skip annotation extraction - not currently used in processing pipeline
|
|
458
|
+
// Can be re-enabled if needed for link extraction, etc.
|
|
459
|
+
const annotations = [];
|
|
460
|
+
await page.cleanup();
|
|
461
|
+
return {
|
|
462
|
+
pageNum,
|
|
463
|
+
width: viewport.width,
|
|
464
|
+
height: viewport.height,
|
|
465
|
+
textItems,
|
|
466
|
+
images,
|
|
467
|
+
annotations,
|
|
468
|
+
garbledTextRegions: garbledTextRegions.length > 0 ? garbledTextRegions : undefined,
|
|
469
|
+
};
|
|
470
|
+
}
|
|
471
|
+
async extractAllPages(doc, maxPages, targetPages) {
|
|
472
|
+
const numPages = Math.min(doc.numPages, maxPages || doc.numPages);
|
|
473
|
+
const pages = [];
|
|
474
|
+
// Parse target pages if specified
|
|
475
|
+
let pageNumbers;
|
|
476
|
+
if (targetPages) {
|
|
477
|
+
pageNumbers = this.parseTargetPages(targetPages, doc.numPages);
|
|
478
|
+
}
|
|
479
|
+
else {
|
|
480
|
+
pageNumbers = Array.from({ length: numPages }, (_, i) => i + 1);
|
|
481
|
+
}
|
|
482
|
+
for (const pageNum of pageNumbers) {
|
|
483
|
+
if (maxPages && pages.length >= maxPages) {
|
|
484
|
+
break;
|
|
485
|
+
}
|
|
486
|
+
const pageData = await this.extractPage(doc, pageNum);
|
|
487
|
+
pages.push(pageData);
|
|
488
|
+
}
|
|
489
|
+
return pages;
|
|
490
|
+
}
|
|
491
|
+
async renderPageImage(_doc, pageNum, dpi) {
|
|
492
|
+
// Use PDFium for rendering (more robust with inline images)
|
|
493
|
+
if (!this.currentPdfPath) {
|
|
494
|
+
throw new Error("PDF path not available for rendering");
|
|
495
|
+
}
|
|
496
|
+
if (!this.pdfiumRenderer) {
|
|
497
|
+
this.pdfiumRenderer = new PdfiumRenderer();
|
|
498
|
+
}
|
|
499
|
+
return await this.pdfiumRenderer.renderPageToBuffer(this.currentPdfPath, pageNum, dpi);
|
|
500
|
+
}
|
|
501
|
+
async close(doc) {
|
|
502
|
+
const pdfDocument = doc._pdfDocument;
|
|
503
|
+
if (pdfDocument && pdfDocument.destroy) {
|
|
504
|
+
await pdfDocument.destroy();
|
|
505
|
+
}
|
|
506
|
+
// Clean up PDFium renderer (only if it was initialized)
|
|
507
|
+
if (this.pdfiumRenderer) {
|
|
508
|
+
await this.pdfiumRenderer.close();
|
|
509
|
+
this.pdfiumRenderer = null;
|
|
510
|
+
}
|
|
511
|
+
this.currentPdfPath = null;
|
|
512
|
+
}
|
|
513
|
+
parseTargetPages(targetPages, maxPages) {
|
|
514
|
+
const pages = [];
|
|
515
|
+
const parts = targetPages.split(",");
|
|
516
|
+
for (const part of parts) {
|
|
517
|
+
const trimmed = part.trim();
|
|
518
|
+
if (trimmed.includes("-")) {
|
|
519
|
+
// Range: "1-5"
|
|
520
|
+
const [start, end] = trimmed.split("-").map((n) => parseInt(n.trim()));
|
|
521
|
+
for (let i = start; i <= Math.min(end, maxPages); i++) {
|
|
522
|
+
if (i >= 1) {
|
|
523
|
+
pages.push(i);
|
|
524
|
+
}
|
|
525
|
+
}
|
|
526
|
+
}
|
|
527
|
+
else {
|
|
528
|
+
// Single page: "10"
|
|
529
|
+
const pageNum = parseInt(trimmed);
|
|
530
|
+
if (pageNum >= 1 && pageNum <= maxPages) {
|
|
531
|
+
pages.push(pageNum);
|
|
532
|
+
}
|
|
533
|
+
}
|
|
534
|
+
}
|
|
535
|
+
return [...new Set(pages)].sort((a, b) => a - b);
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
//# sourceMappingURL=pdfjs.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pdfjs.js","sourceRoot":"","sources":["../../../../src/engines/pdf/pdfjs.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAGlC,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AA2CjD,2BAA2B;AAC3B,MAAM,EAAE,EAAE,EAAE,WAAW,EAAE,GAAG,EAAE,SAAS,EAAE,GAAG,MAAM,WAAW,EAAE,CAAC;AAEhE,MAAM,QAAQ,GAAG,GAAG,SAAS,SAAS,CAAC;AACvC,MAAM,sBAAsB,GAAG,GAAG,SAAS,kBAAkB,CAAC;AAC9D,MAAM,WAAW,GAAG,IAAI,CAAC;AAEzB;;;GAGG;AACH,SAAS,WAAW,CAAC,SAAmB;IACtC,OAAO,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,GAAG,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC;AAClE,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB,CAAC,EAAY,EAAE,EAAY;IAClD,OAAO;QACL,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;QAC7B,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;QAC7B,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;QAC7B,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;QAC7B,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;QACrC,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;KACtC,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,mBAAmB,CAC1B,KAA+B,EAC/B,SAAmB;IAEnB,OAAO;QACL,CAAC,EAAE,KAAK,CAAC,CAAC,GAAG,SAAS,CAAC,CAAC,CAAC,GAAG,KAAK,CAAC,CAAC,GAAG,SAAS,CAAC,CAAC,CAAC,GAAG,SAAS,CAAC,CAAC,CAAC;QACjE,CAAC,EAAE,KAAK,CAAC,CAAC,GAAG,SAAS,CAAC,CAAC,CAAC,GAAG,KAAK,CAAC,CAAC,GAAG,SAAS,CAAC,CAAC,CAAC,GAAG,SAAS,CAAC,CAAC,CAAC;KAClE,CAAC;AACJ,CAAC;AAED,kDAAkD;AAClD,MAAM,uBAAuB,GAAG,0BAA0B,CAAC;AAC3D,MAAM,uBAAuB,GAAG,OAAO,CAAC;AACxC,MAAM,kBAAkB,GAAG,mBAAmB,CAAC;AAE/C;;;;;;;;;;;GAWG;AACH,MAAM,wBAAwB,GAA6B;IACzD,6DAA6D;IAC7D,2BAA2B;IAC3B;QACE,EAAE,EAAE,GAAG;QACP,EAAE,EAAE,GAAG;QACP,EAAE,EAAE,GAAG;QACP,EAAE,EAAE,GAAG;QACP,EAAE,EAAE,GAAG;QACP,EAAE,EAAE,GAAG;QACP,EAAE,EAAE,GAAG;QACP,EAAE,EAAE,GAAG;QACP,EAAE,EAAE,GAAG;QACP,EAAE,EAAE,GAAG;QACP,EAAE,EAAE,GAAG;QACP,EAAE,EAAE,GAAG;QACP,EAAE,EAAE,GAAG;QACP,EAAE,EAAE,GAAG;QACP,EAAE,EAAE,GAAG;QACP,EAAE,EAAE,GAAG;QACP,GAAG,EAAE,GAAG;KACT;IACD,4DAA4D;IAC5D,4DAA4D;IAC5D;QACE,EAAE,EAAE,GAAG;QACP,EAAE,EAAE,GAAG;QACP,EAAE,EAAE,GAAG;QACP,EAAE,EAAE,GAAG;QACP,EAAE,EAAE,GAAG;QACP,EAAE,EAAE,GAAG;QACP,EAAE,EAAE,GAAG;QACP,EAAE,EAAE,GAAG;QACP,EAAE,EAAE,GAAG;QACP,EAAE,EAAE,GAAG;QACP,EAAE,EAAE,GAAG;QACP,EAAE,EAAE,GAAG;QACP,EAAE,EAAE,GAAG;QACP,EAAE,EAAE,GAAG;QACP,EAAE,EAAE,GAAG;QACP,EAAE,EAAE,GAAG;QACP,GAAG,EAAE,GAAG;KACT;CACF,CAAC;AAEF;;;GAGG;AACH,SAAS,gBAAgB,CAAC,MAAgB;IACxC,+EAA+E;IAC/E,KAAK,MAAM,CAAC,IAAI,MAAM,EAAE,CAAC;QACvB,gEAAgE;QAChE,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,IAAI,CAAC,IAAI,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,EAAE,IAAI,CAAC,KAAK,EAAE,CAAC,EAAE,CAAC;YAChE,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;GAGG;AACH,SAAS,iBAAiB,CAAC,OAAe;IACxC,IAAI,KAAK,GAAG,CAAC,CAAC;IAEd,+CAA+C;IAC/C,MAAM,UAAU,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;IAC1D,KAAK,IAAI,UAAU,GAAG,CAAC,CAAC;IAExB,4CAA4C;IAC5C,qDAAqD;IACrD,IAAI,oBAAoB,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;QACvC,KAAK,IAAI,CAAC,CAAC,CAAC,kBAAkB;IAChC,CAAC;IACD,0BAA0B;IAC1B,IAAI,YAAY,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;QAC/B,KAAK,IAAI,CAAC,CAAC,CAAC,eAAe;IAC7B,CAAC;IACD,2BAA2B;IAC3B,IAAI,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;QAC7B,KAAK,IAAI,CAAC,CAAC,CAAC,4BAA4B;IAC1C,CAAC;IACD,uCAAuC;IACvC,IAAI,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;QAC1B,KAAK,IAAI,CAAC,CAAC,CAAC,cAAc;IAC5B,CAAC;IAED,wBAAwB;IACxB,uDAAuD;IACvD,IAAI,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;QAC7B,KAAK,IAAI,EAAE,CAAC;IACd,CAAC;IACD,sDAAsD;IACtD,IAAI,eAAe,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;QAClC,KAAK,IAAI,CAAC,CAAC;IACb,CAAC;IACD,+DAA+D;IAC/D,IAAI,sBAAsB,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;QACzC,KAAK,IAAI,CAAC,CAAC;IACb,CAAC;IACD,gDAAgD;IAChD,IAAI,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;QAC1D,KAAK,IAAI,CAAC,CAAC;IACb,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;;;;;;;;GAUG;AACH,SAAS,uBAAuB,CAAC,GAAW;IAC1C,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,uBAAuB,CAAC;QAAE,OAAO,IAAI,CAAC;IAExD,yCAAyC;IACzC,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,IAAI,KAAK,CAAC;IACV,MAAM,KAAK,GAAG,0BAA0B,CAAC;IACzC,OAAO,CAAC,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAC1C,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAClC,CAAC;IAED,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAErC,wEAAwE;IACxE,IAAI,gBAAgB,CAAC,MAAM,CAAC,EAAE,CAAC;QAC7B,OAAO,IAAI,CAAC;IACd,CAAC;IAED,mDAAmD;IACnD,8EAA8E;IAC9E,MAAM,YAAY,GAAG,MAAM,CAAC,KAAK,CAC/B,CAAC,CAAC,EAAE,EAAE,CACJ,CAAC,CAAC,IAAI,EAAE,IAAI,CAAC,IAAI,EAAE,CAAC,IAAI,uBAAuB;QAC/C,CAAC,KAAK,EAAE,IAAI,WAAW;QACvB,CAAC,KAAK,GAAG,IAAI,QAAQ;QACrB,CAAC,KAAK,CAAC;QACP,CAAC,KAAK,CAAC;QACP,CAAC,KAAK,EAAE,CAAC,qBAAqB;KACjC,CAAC;IAEF,IAAI,CAAC,YAAY,EAAE,CAAC;QAClB,2CAA2C;QAC3C,OAAO,IAAI,CAAC;IACd,CAAC;IAED,4CAA4C;IAC5C,IAAI,UAAU,GAAkB,IAAI,CAAC;IACrC,IAAI,SAAS,GAAG,CAAC,QAAQ,CAAC;IAE1B,KAAK,MAAM,OAAO,IAAI,wBAAwB,EAAE,CAAC;QAC/C,MAAM,OAAO,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAE7D,oCAAoC;QACpC,MAAM,QAAQ,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;QAC1D,IAAI,QAAQ,GAAG,CAAC;YAAE,SAAS;QAE3B,oDAAoD;QACpD,MAAM,KAAK,GAAG,iBAAiB,CAAC,OAAO,CAAC,CAAC;QAEzC,IAAI,KAAK,GAAG,SAAS,EAAE,CAAC;YACtB,SAAS,GAAG,KAAK,CAAC;YAClB,UAAU,GAAG,OAAO,CAAC;QACvB,CAAC;IACH,CAAC;IAED,iFAAiF;IACjF,IAAI,UAAU,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;QAChC,OAAO,UAAU,CAAC;IACpB,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;;GAIG;AACH,SAAS,iBAAiB,CAAC,GAAW;IACpC,IAAI,MAAM,GAAG,EAAE,CAAC;IAChB,KAAK,MAAM,IAAI,IAAI,GAAG,EAAE,CAAC;QACvB,MAAM,IAAI,GAAG,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;QAChC,0EAA0E;QAC1E,IACE,CAAC,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,KAAK,IAAI,IAAI,IAAI,KAAK,IAAI,IAAI,IAAI,KAAK,IAAI,CAAC;YACjF,CAAC,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,CAAC,EAC9B,CAAC;YACD,SAAS;QACX,CAAC;QACD,MAAM,IAAI,IAAI,CAAC;IACjB,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;;;;;;;;;;;GAYG;AACH,SAAS,mBAAmB,CAAC,GAAW;IACtC,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,KAAK,CAAC;IAEjC,IAAI,eAAe,GAAG,CAAC,CAAC;IACxB,IAAI,WAAW,GAAG,CAAC,CAAC;IACpB,IAAI,kBAAkB,GAAG,CAAC,CAAC;IAC3B,IAAI,qBAAqB,GAAG,CAAC,CAAC;IAC9B,IAAI,eAAe,GAAG,CAAC,CAAC,CAAC,kCAAkC;IAC3D,IAAI,gBAAgB,GAAG,CAAC,CAAC,CAAC,2BAA2B;IACrD,IAAI,eAAe,GAAG,CAAC,CAAC,CAAC,8BAA8B;IAEvD,KAAK,MAAM,IAAI,IAAI,GAAG,EAAE,CAAC;QACvB,MAAM,IAAI,GAAG,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;QAEhC,6FAA6F;QAC7F,oCAAoC;QACpC,IACE,CAAC,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,KAAK,IAAI,IAAI,IAAI,KAAK,IAAI,IAAI,IAAI,KAAK,IAAI,CAAC;YACjF,CAAC,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,CAAC,EAC9B,CAAC;YACD,gBAAgB,EAAE,CAAC;QACrB,CAAC;QACD,2DAA2D;aACtD,IAAI,IAAI,IAAI,MAAM,IAAI,IAAI,IAAI,MAAM,EAAE,CAAC;YAC1C,eAAe,EAAE,CAAC;QACpB,CAAC;QACD,4EAA4E;aACvE,IACH,CAAC,IAAI,IAAI,KAAK,IAAI,IAAI,IAAI,KAAK,CAAC;YAChC,CAAC,IAAI,IAAI,KAAK,IAAI,IAAI,IAAI,KAAK,CAAC;YAChC,CAAC,IAAI,IAAI,KAAK,IAAI,IAAI,IAAI,KAAK,CAAC,EAChC,CAAC;YACD,WAAW,EAAE,CAAC;QAChB,CAAC;QACD,kEAAkE;QAClE,4CAA4C;aACvC,IAAI,CAAC,IAAI,IAAI,KAAK,IAAI,IAAI,IAAI,KAAK,CAAC,IAAI,CAAC,IAAI,IAAI,MAAM,IAAI,IAAI,IAAI,MAAM,CAAC,EAAE,CAAC;YAChF,kBAAkB,EAAE,CAAC;QACvB,CAAC;QACD,iCAAiC;aAC5B,IAAI,CAAC,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,CAAC,EAAE,CAAC;YAC1E,qBAAqB,EAAE,CAAC;YACxB,eAAe,EAAE,CAAC;QACpB,CAAC;QACD,uDAAuD;QACvD,yBAAyB;QACzB,yBAAyB;QACzB,sBAAsB;QACtB,4BAA4B;QAC5B,6BAA6B;QAC7B,qDAAqD;QACrD,gDAAgD;QAChD,sDAAsD;aACjD,IACH,CAAC,IAAI,IAAI,KAAK,IAAI,IAAI,IAAI,KAAK,CAAC,IAAI,sBAAsB;YAC1D,CAAC,IAAI,IAAI,KAAK,IAAI,IAAI,IAAI,KAAK,CAAC,IAAI,YAAY;YAChD,CAAC,IAAI,IAAI,MAAM,IAAI,IAAI,IAAI,MAAM,CAAC,IAAI,WAAW;YACjD,CAAC,IAAI,IAAI,MAAM,IAAI,IAAI,IAAI,MAAM,CAAC,IAAI,gCAAgC;YACtE,CAAC,IAAI,IAAI,MAAM,IAAI,IAAI,IAAI,MAAM,CAAC,CAAC,2CAA2C;UAC9E,CAAC;YACD,eAAe,EAAE,CAAC;QACpB,CAAC;QACD,2EAA2E;aACtE,IAAI,CAAC,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,CAAC,IAAI,IAAI,KAAK,IAAI,IAAI,IAAI,KAAK,IAAI,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;YAC3F,eAAe,EAAE,CAAC;QACpB,CAAC;IACH,CAAC;IAED,MAAM,UAAU,GAAG,GAAG,CAAC,MAAM,CAAC;IAE9B,gEAAgE;IAChE,+EAA+E;IAC/E,IAAI,gBAAgB,GAAG,CAAC,IAAI,gBAAgB,GAAG,eAAe,EAAE,CAAC;QAC/D,OAAO,IAAI,CAAC;IACd,CAAC;IAED,8DAA8D;IAC9D,IAAI,eAAe,IAAI,CAAC,EAAE,CAAC;QACzB,OAAO,IAAI,CAAC;IACd,CAAC;IAED,wEAAwE;IACxE,IAAI,WAAW,IAAI,CAAC,IAAI,kBAAkB,IAAI,CAAC,EAAE,CAAC;QAChD,OAAO,IAAI,CAAC;IACd,CAAC;IAED,8CAA8C;IAC9C,IAAI,eAAe,IAAI,CAAC,IAAI,eAAe,GAAG,UAAU,GAAG,GAAG,EAAE,CAAC;QAC/D,OAAO,IAAI,CAAC;IACd,CAAC;IAED,sEAAsE;IACtE,+DAA+D;IAC/D,IAAI,kBAAkB,GAAG,UAAU,GAAG,GAAG,IAAI,qBAAqB,GAAG,UAAU,GAAG,GAAG,EAAE,CAAC;QACtF,OAAO,IAAI,CAAC;IACd,CAAC;IAED,+DAA+D;IAC/D,IAAI,CAAC,WAAW,IAAI,CAAC,IAAI,eAAe,IAAI,CAAC,CAAC,IAAI,kBAAkB,IAAI,CAAC,EAAE,CAAC;QAC1E,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED,MAAM,OAAO,WAAW;IACtB,IAAI,GAAG,OAAO,CAAC;IACP,cAAc,GAA0B,IAAI,CAAC;IAC7C,cAAc,GAAkB,IAAI,CAAC;IAE7C,KAAK,CAAC,YAAY,CAAC,QAAgB;QACjC,MAAM,IAAI,GAAG,IAAI,UAAU,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAC;QAEzD,kCAAkC;QAClC,IAAI,CAAC,cAAc,GAAG,QAAQ,CAAC;QAE/B,MAAM,WAAW,GAAG,WAAW,CAAC;YAC9B,IAAI;YACJ,OAAO,EAAE,QAAQ;YACjB,UAAU,EAAE,WAAW;YACvB,mBAAmB,EAAE,sBAAsB;SAC5C,CAAC,CAAC;QAEH,MAAM,WAAW,GAAG,MAAM,WAAW,CAAC,OAAO,CAAC;QAC9C,MAAM,QAAQ,GAAG,MAAM,WAAW,CAAC,WAAW,EAAE,CAAC;QAEjD,OAAO;YACL,QAAQ,EAAE,WAAW,CAAC,QAAQ;YAC9B,IAAI;YACJ,QAAQ;YACR,YAAY,EAAE,WAAW;SACD,CAAC;IAC7B,CAAC;IAED,KAAK,CAAC,WAAW,CAAC,GAAgB,EAAE,OAAe;QACjD,MAAM,WAAW,GAAI,GAA6B,CAAC,YAAY,CAAC;QAChE,MAAM,IAAI,GAAG,MAAM,WAAW,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QAEhD,eAAe;QACf,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC;QAElD,uBAAuB;QACvB,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,cAAc,EAAE,CAAC;QAChD,MAAM,aAAa,GAAG,QAAQ,CAAC,KAAK,CAAC;QACrC,MAAM,cAAc,GAAG,QAAQ,CAAC,MAAM,CAAC;QACvC,MAAM,iBAAiB,GAAG,QAAQ,CAAC,SAAS,CAAC;QAE7C,MAAM,SAAS,GAAe,EAAE,CAAC;QACjC,MAAM,kBAAkB,GAAkB,EAAE,CAAC;QAC7C,KAAK,MAAM,IAAI,IAAI,WAAW,CAAC,KAAK,EAAE,CAAC;YACrC,kCAAkC;YAClC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,IAAI,IAAI,CAAC,KAAK,KAAK,CAAC;gBAAE,SAAS;YAEpD,iFAAiF;YACjF,2EAA2E;YAC3E,MAAM,EAAE,GAAG,gBAAgB,CAAC,iBAAiB,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC;YAE/D,4CAA4C;YAC5C,MAAM,EAAE,GAAG,mBAAmB,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC;YAEnD,mEAAmE;YACnE,mCAAmC;YACnC,qCAAqC;YACrC,mCAAmC;YACnC,qEAAqE;YACrE,yEAAyE;YACzE,MAAM,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;YAC1E,MAAM,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;YAE1E,wEAAwE;YACxE,uEAAuE;YACvE,MAAM,EAAE,GAAG,mBAAmB,CAAC,EAAE,CAAC,EAAE,IAAI,CAAC,KAAK,GAAG,MAAM,EAAE,CAAC,EAAE,IAAI,CAAC,MAAM,GAAG,MAAM,EAAE,EAAE,EAAE,CAAC,CAAC;YAExF,iDAAiD;YACjD,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC;YAClC,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC;YACnC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC;YACjC,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC;YAEpC,4EAA4E;YAC5E,IAAI,GAAG,GAAG,CAAC,IAAI,IAAI,GAAG,CAAC,IAAI,GAAG,GAAG,cAAc,IAAI,IAAI,GAAG,aAAa;gBAAE,SAAS;YAElF,MAAM,KAAK,GAAG,KAAK,GAAG,IAAI,CAAC;YAC3B,MAAM,MAAM,GAAG,MAAM,GAAG,GAAG,CAAC;YAE5B,yDAAyD;YACzD,IAAI,QAAQ,GAAG,WAAW,CAAC,EAAE,CAAC,CAAC;YAC/B,2BAA2B;YAC3B,IAAI,QAAQ,GAAG,CAAC,EAAE,CAAC;gBACjB,QAAQ,IAAI,GAAG,CAAC;YAClB,CAAC;YAED,oEAAoE;YACpE,4CAA4C;YAC5C,IAAI,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC;YAC1B,IAAI,UAAU,CAAC,QAAQ,CAAC,uBAAuB,CAAC,EAAE,CAAC;gBACjD,wEAAwE;gBACxE,MAAM,cAAc,GAAG,uBAAuB,CAAC,UAAU,CAAC,CAAC;gBAC3D,IAAI,cAAc,EAAE,CAAC;oBACnB,UAAU,GAAG,cAAc,CAAC;gBAC9B,CAAC;qBAAM,CAAC;oBACN,iEAAiE;oBACjE,uBAAuB,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC,oBAAoB;oBAC3D,UAAU,GAAG,UAAU,CAAC,OAAO,CAAC,uBAAuB,EAAE,CAAC,CAAS,EAAE,QAAgB,EAAE,EAAE,CACvF,MAAM,CAAC,YAAY,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,CACxC,CAAC;gBACJ,CAAC;YACH,CAAC;YAED,+DAA+D;YAC/D,sEAAsE;YACtE,IAAI,UAAU,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC7B,kBAAkB,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC,oBAAoB;gBACtD,MAAM,OAAO,GAAG,CAAC,GAAG,UAAU,CAAC,QAAQ,CAAC,kBAAkB,CAAC,CAAC,CAAC;gBAC7D,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBACvB,UAAU,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBACjD,CAAC;YACH,CAAC;YAED,iEAAiE;YACjE,kEAAkE;YAClE,IAAI,mBAAmB,CAAC,UAAU,CAAC,EAAE,CAAC;gBACpC,kBAAkB,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC;gBAC5D,SAAS;YACX,CAAC;YAED,yDAAyD;YACzD,8DAA8D;YAC9D,UAAU,GAAG,iBAAiB,CAAC,UAAU,CAAC,CAAC;YAE3C,SAAS,CAAC,IAAI,CAAC;gBACb,GAAG,EAAE,UAAU;gBACf,CAAC,EAAE,IAAI;gBACP,CAAC,EAAE,GAAG;gBACN,KAAK;gBACL,MAAM;gBACN,CAAC,EAAE,KAAK;gBACR,CAAC,EAAE,MAAM;gBACT,CAAC,EAAE,QAAQ;gBACX,QAAQ,EAAE,IAAI,CAAC,QAAQ;gBACvB,QAAQ,EAAE,IAAI,CAAC,IAAI,CACjB,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,CAC9E;aACF,CAAC,CAAC;QACL,CAAC;QAED,MAAM,MAAM,GAAY,EAAE,CAAC;QAE3B,yEAAyE;QACzE,wDAAwD;QACxD,MAAM,WAAW,GAAiB,EAAE,CAAC;QAErC,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;QAErB,OAAO;YACL,OAAO;YACP,KAAK,EAAE,QAAQ,CAAC,KAAK;YACrB,MAAM,EAAE,QAAQ,CAAC,MAAM;YACvB,SAAS;YACT,MAAM;YACN,WAAW;YACX,kBAAkB,EAAE,kBAAkB,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,kBAAkB,CAAC,CAAC,CAAC,SAAS;SACnF,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,eAAe,CACnB,GAAgB,EAChB,QAAiB,EACjB,WAAoB;QAEpB,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,QAAQ,EAAE,QAAQ,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC;QAElE,MAAM,KAAK,GAAe,EAAE,CAAC;QAE7B,kCAAkC;QAClC,IAAI,WAAqB,CAAC;QAC1B,IAAI,WAAW,EAAE,CAAC;YAChB,WAAW,GAAG,IAAI,CAAC,gBAAgB,CAAC,WAAW,EAAE,GAAG,CAAC,QAAQ,CAAC,CAAC;QACjE,CAAC;aAAM,CAAC;YACN,WAAW,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,QAAQ,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAClE,CAAC;QAED,KAAK,MAAM,OAAO,IAAI,WAAW,EAAE,CAAC;YAClC,IAAI,QAAQ,IAAI,KAAK,CAAC,MAAM,IAAI,QAAQ,EAAE,CAAC;gBACzC,MAAM;YACR,CAAC;YACD,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;YACtD,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACvB,CAAC;QAED,OAAO,KAAK,CAAC;IACf,CAAC;IAED,KAAK,CAAC,eAAe,CAAC,IAAiB,EAAE,OAAe,EAAE,GAAW;QACnE,4DAA4D;QAC5D,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,CAAC;YACzB,MAAM,IAAI,KAAK,CAAC,sCAAsC,CAAC,CAAC;QAC1D,CAAC;QAED,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,CAAC;YACzB,IAAI,CAAC,cAAc,GAAG,IAAI,cAAc,EAAE,CAAC;QAC7C,CAAC;QAED,OAAO,MAAM,IAAI,CAAC,cAAc,CAAC,kBAAkB,CAAC,IAAI,CAAC,cAAc,EAAE,OAAO,EAAE,GAAG,CAAC,CAAC;IACzF,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,GAAgB;QAC1B,MAAM,WAAW,GAAI,GAA6B,CAAC,YAAY,CAAC;QAChE,IAAI,WAAW,IAAI,WAAW,CAAC,OAAO,EAAE,CAAC;YACvC,MAAM,WAAW,CAAC,OAAO,EAAE,CAAC;QAC9B,CAAC;QAED,wDAAwD;QACxD,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;YACxB,MAAM,IAAI,CAAC,cAAc,CAAC,KAAK,EAAE,CAAC;YAClC,IAAI,CAAC,cAAc,GAAG,IAAI,CAAC;QAC7B,CAAC;QACD,IAAI,CAAC,cAAc,GAAG,IAAI,CAAC;IAC7B,CAAC;IAEO,gBAAgB,CAAC,WAAmB,EAAE,QAAgB;QAC5D,MAAM,KAAK,GAAa,EAAE,CAAC;QAC3B,MAAM,KAAK,GAAG,WAAW,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAErC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;YAC5B,IAAI,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC1B,eAAe;gBACf,MAAM,CAAC,KAAK,EAAE,GAAG,CAAC,GAAG,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;gBACvE,KAAK,IAAI,CAAC,GAAG,KAAK,EAAE,CAAC,IAAI,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,QAAQ,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;oBACtD,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;wBACX,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;oBAChB,CAAC;gBACH,CAAC;YACH,CAAC;iBAAM,CAAC;gBACN,oBAAoB;gBACpB,MAAM,OAAO,GAAG,QAAQ,CAAC,OAAO,CAAC,CAAC;gBAClC,IAAI,OAAO,IAAI,CAAC,IAAI,OAAO,IAAI,QAAQ,EAAE,CAAC;oBACxC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;gBACtB,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,CAAC,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IACnD,CAAC;CACF"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pdfjs.test.d.ts","sourceRoot":"","sources":["../../../../src/engines/pdf/pdfjs.test.ts"],"names":[],"mappings":""}
|