@llamaindex/liteparse 1.5.2 → 2.0.0-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +50 -373
- package/dist/cli.d.ts +3 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +87 -0
- package/dist/cli.js.map +1 -0
- package/dist/lib.d.ts +58 -0
- package/dist/lib.d.ts.map +1 -0
- package/dist/lib.js +88 -0
- package/dist/lib.js.map +1 -0
- package/dist/native.d.ts +54 -0
- package/dist/native.d.ts.map +1 -0
- package/dist/native.js +70 -0
- package/dist/native.js.map +1 -0
- package/libpdfium.so +0 -0
- package/liteparse.linux-x64-gnu.node +0 -0
- package/package.json +36 -50
- package/LICENSE +0 -201
- package/dist/cli/parse.d.ts +0 -4
- package/dist/cli/parse.d.ts.map +0 -1
- package/dist/cli/parse.js +0 -450
- package/dist/cli/parse.js.map +0 -1
- package/dist/package.json +0 -90
- package/dist/src/conversion/convertToPdf.d.ts +0 -65
- package/dist/src/conversion/convertToPdf.d.ts.map +0 -1
- package/dist/src/conversion/convertToPdf.js +0 -405
- package/dist/src/conversion/convertToPdf.js.map +0 -1
- package/dist/src/conversion/convertToPdf.test.d.ts +0 -2
- package/dist/src/conversion/convertToPdf.test.d.ts.map +0 -1
- package/dist/src/conversion/convertToPdf.test.js +0 -327
- package/dist/src/conversion/convertToPdf.test.js.map +0 -1
- package/dist/src/core/config.d.ts +0 -4
- package/dist/src/core/config.d.ts.map +0 -1
- package/dist/src/core/config.js +0 -26
- package/dist/src/core/config.js.map +0 -1
- package/dist/src/core/config.test.d.ts +0 -2
- package/dist/src/core/config.test.d.ts.map +0 -1
- package/dist/src/core/config.test.js +0 -21
- package/dist/src/core/config.test.js.map +0 -1
- package/dist/src/core/parser.d.ts +0 -92
- package/dist/src/core/parser.d.ts.map +0 -1
- package/dist/src/core/parser.js +0 -401
- package/dist/src/core/parser.js.map +0 -1
- package/dist/src/core/parser.test.d.ts +0 -2
- package/dist/src/core/parser.test.d.ts.map +0 -1
- package/dist/src/core/parser.test.js +0 -541
- package/dist/src/core/parser.test.js.map +0 -1
- package/dist/src/core/types.d.ts +0 -370
- package/dist/src/core/types.d.ts.map +0 -1
- package/dist/src/core/types.js +0 -2
- package/dist/src/core/types.js.map +0 -1
- package/dist/src/engines/ocr/http-simple.d.ts +0 -19
- package/dist/src/engines/ocr/http-simple.d.ts.map +0 -1
- package/dist/src/engines/ocr/http-simple.js +0 -69
- package/dist/src/engines/ocr/http-simple.js.map +0 -1
- package/dist/src/engines/ocr/http-simple.test.d.ts +0 -2
- package/dist/src/engines/ocr/http-simple.test.d.ts.map +0 -1
- package/dist/src/engines/ocr/http-simple.test.js +0 -108
- package/dist/src/engines/ocr/http-simple.test.js.map +0 -1
- package/dist/src/engines/ocr/interface.d.ts +0 -15
- package/dist/src/engines/ocr/interface.d.ts.map +0 -1
- package/dist/src/engines/ocr/interface.js +0 -2
- package/dist/src/engines/ocr/interface.js.map +0 -1
- package/dist/src/engines/ocr/tesseract.d.ts +0 -20
- package/dist/src/engines/ocr/tesseract.d.ts.map +0 -1
- package/dist/src/engines/ocr/tesseract.js +0 -161
- package/dist/src/engines/ocr/tesseract.js.map +0 -1
- package/dist/src/engines/ocr/tesseract.test.d.ts +0 -2
- package/dist/src/engines/ocr/tesseract.test.d.ts.map +0 -1
- package/dist/src/engines/ocr/tesseract.test.js +0 -94
- package/dist/src/engines/ocr/tesseract.test.js.map +0 -1
- package/dist/src/engines/pdf/interface.d.ts +0 -84
- package/dist/src/engines/pdf/interface.d.ts.map +0 -1
- package/dist/src/engines/pdf/interface.js +0 -2
- package/dist/src/engines/pdf/interface.js.map +0 -1
- package/dist/src/engines/pdf/pdfium-renderer.d.ts +0 -31
- package/dist/src/engines/pdf/pdfium-renderer.d.ts.map +0 -1
- package/dist/src/engines/pdf/pdfium-renderer.js +0 -145
- package/dist/src/engines/pdf/pdfium-renderer.js.map +0 -1
- package/dist/src/engines/pdf/pdfium-renderer.test.d.ts +0 -2
- package/dist/src/engines/pdf/pdfium-renderer.test.d.ts.map +0 -1
- package/dist/src/engines/pdf/pdfium-renderer.test.js +0 -109
- package/dist/src/engines/pdf/pdfium-renderer.test.js.map +0 -1
- package/dist/src/engines/pdf/pdfjs.d.ts +0 -14
- package/dist/src/engines/pdf/pdfjs.d.ts.map +0 -1
- package/dist/src/engines/pdf/pdfjs.js +0 -799
- package/dist/src/engines/pdf/pdfjs.js.map +0 -1
- package/dist/src/engines/pdf/pdfjs.test.d.ts +0 -2
- package/dist/src/engines/pdf/pdfjs.test.d.ts.map +0 -1
- package/dist/src/engines/pdf/pdfjs.test.js +0 -225
- package/dist/src/engines/pdf/pdfjs.test.js.map +0 -1
- package/dist/src/engines/pdf/pdfjsImporter.d.ts +0 -5
- package/dist/src/engines/pdf/pdfjsImporter.d.ts.map +0 -1
- package/dist/src/engines/pdf/pdfjsImporter.js +0 -45
- package/dist/src/engines/pdf/pdfjsImporter.js.map +0 -1
- package/dist/src/index.d.ts +0 -3
- package/dist/src/index.d.ts.map +0 -1
- package/dist/src/index.js +0 -5
- package/dist/src/index.js.map +0 -1
- package/dist/src/lib.d.ts +0 -19
- package/dist/src/lib.d.ts.map +0 -1
- package/dist/src/lib.js +0 -17
- package/dist/src/lib.js.map +0 -1
- package/dist/src/output/json.d.ts +0 -10
- package/dist/src/output/json.d.ts.map +0 -1
- package/dist/src/output/json.js +0 -32
- package/dist/src/output/json.js.map +0 -1
- package/dist/src/output/json.test.d.ts +0 -2
- package/dist/src/output/json.test.d.ts.map +0 -1
- package/dist/src/output/json.test.js +0 -199
- package/dist/src/output/json.test.js.map +0 -1
- package/dist/src/output/text.d.ts +0 -10
- package/dist/src/output/text.d.ts.map +0 -1
- package/dist/src/output/text.js +0 -17
- package/dist/src/output/text.js.map +0 -1
- package/dist/src/output/text.test.d.ts +0 -2
- package/dist/src/output/text.test.d.ts.map +0 -1
- package/dist/src/output/text.test.js +0 -65
- package/dist/src/output/text.test.js.map +0 -1
- package/dist/src/processing/bbox.d.ts +0 -20
- package/dist/src/processing/bbox.d.ts.map +0 -1
- package/dist/src/processing/bbox.js +0 -258
- package/dist/src/processing/bbox.js.map +0 -1
- package/dist/src/processing/bbox.test.d.ts +0 -2
- package/dist/src/processing/bbox.test.d.ts.map +0 -1
- package/dist/src/processing/bbox.test.js +0 -334
- package/dist/src/processing/bbox.test.js.map +0 -1
- package/dist/src/processing/cleanText.d.ts +0 -6
- package/dist/src/processing/cleanText.d.ts.map +0 -1
- package/dist/src/processing/cleanText.js +0 -73
- package/dist/src/processing/cleanText.js.map +0 -1
- package/dist/src/processing/cleanText.test.d.ts +0 -2
- package/dist/src/processing/cleanText.test.d.ts.map +0 -1
- package/dist/src/processing/cleanText.test.js +0 -46
- package/dist/src/processing/cleanText.test.js.map +0 -1
- package/dist/src/processing/grid.d.ts +0 -7
- package/dist/src/processing/grid.d.ts.map +0 -1
- package/dist/src/processing/grid.js +0 -13
- package/dist/src/processing/grid.js.map +0 -1
- package/dist/src/processing/gridDebugLogger.d.ts +0 -206
- package/dist/src/processing/gridDebugLogger.d.ts.map +0 -1
- package/dist/src/processing/gridDebugLogger.js +0 -446
- package/dist/src/processing/gridDebugLogger.js.map +0 -1
- package/dist/src/processing/gridProjection.d.ts +0 -19
- package/dist/src/processing/gridProjection.d.ts.map +0 -1
- package/dist/src/processing/gridProjection.js +0 -1813
- package/dist/src/processing/gridProjection.js.map +0 -1
- package/dist/src/processing/gridProjection.test.d.ts +0 -2
- package/dist/src/processing/gridProjection.test.d.ts.map +0 -1
- package/dist/src/processing/gridProjection.test.js +0 -495
- package/dist/src/processing/gridProjection.test.js.map +0 -1
- package/dist/src/processing/gridVisualizer.d.ts +0 -14
- package/dist/src/processing/gridVisualizer.d.ts.map +0 -1
- package/dist/src/processing/gridVisualizer.js +0 -166
- package/dist/src/processing/gridVisualizer.js.map +0 -1
- package/dist/src/processing/markupUtils.d.ts +0 -7
- package/dist/src/processing/markupUtils.d.ts.map +0 -1
- package/dist/src/processing/markupUtils.js +0 -25
- package/dist/src/processing/markupUtils.js.map +0 -1
- package/dist/src/processing/markupUtils.test.d.ts +0 -2
- package/dist/src/processing/markupUtils.test.d.ts.map +0 -1
- package/dist/src/processing/markupUtils.test.js +0 -26
- package/dist/src/processing/markupUtils.test.js.map +0 -1
- package/dist/src/processing/ocrUtils.d.ts +0 -24
- package/dist/src/processing/ocrUtils.d.ts.map +0 -1
- package/dist/src/processing/ocrUtils.js +0 -79
- package/dist/src/processing/ocrUtils.js.map +0 -1
- package/dist/src/processing/octUtils.test.d.ts +0 -2
- package/dist/src/processing/octUtils.test.d.ts.map +0 -1
- package/dist/src/processing/octUtils.test.js +0 -72
- package/dist/src/processing/octUtils.test.js.map +0 -1
- package/dist/src/processing/searchItems.d.ts +0 -26
- package/dist/src/processing/searchItems.d.ts.map +0 -1
- package/dist/src/processing/searchItems.js +0 -93
- package/dist/src/processing/searchItems.js.map +0 -1
- package/dist/src/processing/searchItems.test.d.ts +0 -2
- package/dist/src/processing/searchItems.test.d.ts.map +0 -1
- package/dist/src/processing/searchItems.test.js +0 -84
- package/dist/src/processing/searchItems.test.js.map +0 -1
- package/dist/src/processing/textUtils.d.ts +0 -20
- package/dist/src/processing/textUtils.d.ts.map +0 -1
- package/dist/src/processing/textUtils.js +0 -142
- package/dist/src/processing/textUtils.js.map +0 -1
- package/dist/src/processing/textUtils.test.d.ts +0 -2
- package/dist/src/processing/textUtils.test.d.ts.map +0 -1
- package/dist/src/processing/textUtils.test.js +0 -45
- package/dist/src/processing/textUtils.test.js.map +0 -1
- package/dist/src/vendor/pdfjs/LICENSE +0 -177
- package/dist/src/vendor/pdfjs/README.md +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78ms-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78ms-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/83pv-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/90ms-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/90ms-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/90msp-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/90msp-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/90pv-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/90pv-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Add-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Add-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Add-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Add-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-0.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-1.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-3.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-4.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-5.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-6.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-UCS2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-0.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-1.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-3.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-4.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-5.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-UCS2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-0.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-1.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-3.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-4.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-5.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-6.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-UCS2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-0.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-1.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-UCS2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/B5pc-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/B5pc-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/CNS-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/CNS-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/CNS1-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/CNS1-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/CNS2-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/CNS2-V.bcmap +0 -3
- package/dist/src/vendor/pdfjs/cmaps/ETHK-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/ETHK-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/ETen-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/ETen-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/ETenms-B5-H.bcmap +0 -3
- package/dist/src/vendor/pdfjs/cmaps/ETenms-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Ext-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Ext-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Ext-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Ext-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GB-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GB-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GB-H.bcmap +0 -4
- package/dist/src/vendor/pdfjs/cmaps/GB-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBK-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBK-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBK2K-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBK2K-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBKp-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBKp-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBT-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBT-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBT-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBT-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBTpc-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBTpc-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBpc-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBpc-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKdla-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKdla-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKdlb-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKdlb-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKgccs-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKgccs-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKm314-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKm314-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKm471-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKm471-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKscs-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKscs-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Hankaku.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Hiragana.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSC-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSC-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSC-Johab-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSC-Johab-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSCpc-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSCpc-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Katakana.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/LICENSE +0 -36
- package/dist/src/vendor/pdfjs/cmaps/NWP-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/NWP-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Roman.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UCS2-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UCS2-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF16-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF16-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF8-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF8-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UCS2-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UCS2-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF16-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF16-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF8-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF8-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF16-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF16-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF8-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF8-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-HW-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISPro-UTF8-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UCS2-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UCS2-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF16-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF16-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF8-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF8-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/WP-Symbol.bcmap +0 -0
- package/dist/src/vendor/pdfjs/jbig2.wasm +0 -0
- package/dist/src/vendor/pdfjs/openjpeg.wasm +0 -0
- package/dist/src/vendor/pdfjs/pdf.mjs +0 -33603
- package/dist/src/vendor/pdfjs/pdf.mjs.map +0 -1
- package/dist/src/vendor/pdfjs/pdf.sandbox.mjs +0 -4936
- package/dist/src/vendor/pdfjs/pdf.sandbox.mjs.map +0 -1
- package/dist/src/vendor/pdfjs/pdf.worker.mjs +0 -70100
- package/dist/src/vendor/pdfjs/pdf.worker.mjs.map +0 -1
- package/dist/src/vendor/pdfjs/qcms_bg.wasm +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitDingbats.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixed.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixedBold.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixedBoldItalic.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixedItalic.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerif.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerifBold.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerifBoldItalic.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerifItalic.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitSymbol.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/LICENSE_FOXIT +0 -27
- package/dist/src/vendor/pdfjs/standard_fonts/LICENSE_LIBERATION +0 -102
- package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-Bold.ttf +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-BoldItalic.ttf +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-Italic.ttf +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-Regular.ttf +0 -0
- package/src/vendor/pdfjs/LICENSE +0 -177
- package/src/vendor/pdfjs/README.md +0 -0
- package/src/vendor/pdfjs/cmaps/78-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78ms-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78ms-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/83pv-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/90ms-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/90ms-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/90msp-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/90msp-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/90pv-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/90pv-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Add-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Add-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Add-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Add-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-0.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-1.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-3.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-4.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-5.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-6.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-UCS2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-0.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-1.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-3.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-4.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-5.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-UCS2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-0.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-1.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-3.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-4.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-5.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-6.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-UCS2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Korea1-0.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Korea1-1.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Korea1-2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Korea1-UCS2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/B5pc-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/B5pc-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/CNS-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/CNS-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/CNS1-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/CNS1-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/CNS2-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/CNS2-V.bcmap +0 -3
- package/src/vendor/pdfjs/cmaps/ETHK-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/ETHK-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/ETen-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/ETen-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/ETenms-B5-H.bcmap +0 -3
- package/src/vendor/pdfjs/cmaps/ETenms-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Ext-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Ext-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Ext-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Ext-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GB-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GB-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GB-H.bcmap +0 -4
- package/src/vendor/pdfjs/cmaps/GB-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBK-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBK-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBK2K-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBK2K-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBKp-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBKp-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBT-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBT-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBT-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBT-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBTpc-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBTpc-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBpc-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBpc-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKdla-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKdla-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKdlb-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKdlb-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKgccs-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKgccs-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKm314-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKm314-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKm471-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKm471-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKscs-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKscs-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Hankaku.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Hiragana.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSC-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSC-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSC-Johab-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSC-Johab-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSCms-UHC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSCms-UHC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSCpc-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSCpc-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Katakana.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/LICENSE +0 -36
- package/src/vendor/pdfjs/cmaps/NWP-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/NWP-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Roman.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UCS2-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UCS2-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UTF16-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UTF16-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UTF8-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UTF8-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UCS2-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UCS2-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UTF16-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UTF16-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UTF8-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UTF8-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UTF16-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UTF16-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UTF8-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UTF8-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-HW-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISPro-UTF8-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UCS2-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UCS2-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UTF16-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UTF16-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UTF8-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UTF8-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/WP-Symbol.bcmap +0 -0
- package/src/vendor/pdfjs/jbig2.wasm +0 -0
- package/src/vendor/pdfjs/openjpeg.wasm +0 -0
- package/src/vendor/pdfjs/pdf.mjs +0 -33603
- package/src/vendor/pdfjs/pdf.mjs.map +0 -1
- package/src/vendor/pdfjs/pdf.sandbox.mjs +0 -4936
- package/src/vendor/pdfjs/pdf.sandbox.mjs.map +0 -1
- package/src/vendor/pdfjs/pdf.worker.mjs +0 -70100
- package/src/vendor/pdfjs/pdf.worker.mjs.map +0 -1
- package/src/vendor/pdfjs/qcms_bg.wasm +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitDingbats.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitFixed.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitFixedBold.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitFixedBoldItalic.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitFixedItalic.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitSerif.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitSerifBold.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitSerifBoldItalic.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitSerifItalic.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitSymbol.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/LICENSE_FOXIT +0 -27
- package/src/vendor/pdfjs/standard_fonts/LICENSE_LIBERATION +0 -102
- package/src/vendor/pdfjs/standard_fonts/LiberationSans-Bold.ttf +0 -0
- package/src/vendor/pdfjs/standard_fonts/LiberationSans-BoldItalic.ttf +0 -0
- package/src/vendor/pdfjs/standard_fonts/LiberationSans-Italic.ttf +0 -0
- package/src/vendor/pdfjs/standard_fonts/LiberationSans-Regular.ttf +0 -0
|
@@ -1,799 +0,0 @@
|
|
|
1
|
-
import fs from "node:fs/promises";
|
|
2
|
-
import { PdfiumRenderer } from "./pdfium-renderer.js";
|
|
3
|
-
import { importPdfJs } from "./pdfjsImporter.js";
|
|
4
|
-
// Dynamic import of PDF.js
|
|
5
|
-
const { fn: getDocument, dir: PDFJS_DIR } = await importPdfJs();
|
|
6
|
-
const CMAP_URL = `${PDFJS_DIR}/cmaps/`;
|
|
7
|
-
const STANDARD_FONT_DATA_URL = `${PDFJS_DIR}/standard_fonts/`;
|
|
8
|
-
const WASM_URL = `${PDFJS_DIR}/`;
|
|
9
|
-
const CMAP_PACKED = true;
|
|
10
|
-
/**
|
|
11
|
-
* Extract rotation angle in degrees from PDF transformation matrix
|
|
12
|
-
* Matrix format: [a, b, c, d, e, f] where rotation is atan2(b, a)
|
|
13
|
-
*/
|
|
14
|
-
function getRotation(transform) {
|
|
15
|
-
return Math.atan2(transform[1], transform[0]) * (180 / Math.PI);
|
|
16
|
-
}
|
|
17
|
-
/**
|
|
18
|
-
* Multiply two transformation matrices
|
|
19
|
-
*/
|
|
20
|
-
function multiplyMatrices(m1, m2) {
|
|
21
|
-
return [
|
|
22
|
-
m1[0] * m2[0] + m1[2] * m2[1],
|
|
23
|
-
m1[1] * m2[0] + m1[3] * m2[1],
|
|
24
|
-
m1[0] * m2[2] + m1[2] * m2[3],
|
|
25
|
-
m1[1] * m2[2] + m1[3] * m2[3],
|
|
26
|
-
m1[0] * m2[4] + m1[2] * m2[5] + m1[4],
|
|
27
|
-
m1[1] * m2[4] + m1[3] * m2[5] + m1[5],
|
|
28
|
-
];
|
|
29
|
-
}
|
|
30
|
-
/**
|
|
31
|
-
* Apply transformation matrix to a point
|
|
32
|
-
*/
|
|
33
|
-
function applyTransformation(point, transform) {
|
|
34
|
-
return {
|
|
35
|
-
x: point.x * transform[0] + point.y * transform[2] + transform[4],
|
|
36
|
-
y: point.x * transform[1] + point.y * transform[3] + transform[5],
|
|
37
|
-
};
|
|
38
|
-
}
|
|
39
|
-
// Pre-compiled regex patterns for string decoding
|
|
40
|
-
const BUGGY_FONT_MARKER_CHECK = ":->|>";
|
|
41
|
-
const PIPE_PATTERN_REGEX = /\s*\|([^|])\|\s*/g;
|
|
42
|
-
/**
|
|
43
|
-
* Adobe Glyph List subset: maps standard PostScript glyph names to Unicode characters.
|
|
44
|
-
*
|
|
45
|
-
* When PDF.js detects a "buggy" font (one whose ToUnicode/encoding maps glyphs to
|
|
46
|
-
* control characters or PUA code points), it emits markers containing the glyph's
|
|
47
|
-
* original char code AND the glyph name from the font's /Differences or /Encoding
|
|
48
|
-
* dictionary. This map resolves those glyph names to correct Unicode characters.
|
|
49
|
-
*
|
|
50
|
-
* This is a ~200-entry subset of the full Adobe Glyph List (~4,300 entries).
|
|
51
|
-
* The full canonical source is: https://github.com/adobe-type-tools/agl-aglfn
|
|
52
|
-
* (see glyphlist.txt). Our subset covers basic Latin, digits, ligatures, punctuation,
|
|
53
|
-
* typographic characters, Greek, math symbols, and common accented Latin. Glyph names
|
|
54
|
-
* not in this subset fall through to the uniXXXX convention and ASCII-range fallbacks
|
|
55
|
-
* in resolveGlyphName(). Add entries here if a PDF's buggy font uses a standard glyph
|
|
56
|
-
* name that isn't covered and doesn't match those fallbacks.
|
|
57
|
-
*/
|
|
58
|
-
const ADOBE_GLYPH_MAP = {
|
|
59
|
-
// Basic Latin letters
|
|
60
|
-
A: "A",
|
|
61
|
-
B: "B",
|
|
62
|
-
C: "C",
|
|
63
|
-
D: "D",
|
|
64
|
-
E: "E",
|
|
65
|
-
F: "F",
|
|
66
|
-
G: "G",
|
|
67
|
-
H: "H",
|
|
68
|
-
I: "I",
|
|
69
|
-
J: "J",
|
|
70
|
-
K: "K",
|
|
71
|
-
L: "L",
|
|
72
|
-
M: "M",
|
|
73
|
-
N: "N",
|
|
74
|
-
O: "O",
|
|
75
|
-
P: "P",
|
|
76
|
-
Q: "Q",
|
|
77
|
-
R: "R",
|
|
78
|
-
S: "S",
|
|
79
|
-
T: "T",
|
|
80
|
-
U: "U",
|
|
81
|
-
V: "V",
|
|
82
|
-
W: "W",
|
|
83
|
-
X: "X",
|
|
84
|
-
Y: "Y",
|
|
85
|
-
Z: "Z",
|
|
86
|
-
a: "a",
|
|
87
|
-
b: "b",
|
|
88
|
-
c: "c",
|
|
89
|
-
d: "d",
|
|
90
|
-
e: "e",
|
|
91
|
-
f: "f",
|
|
92
|
-
g: "g",
|
|
93
|
-
h: "h",
|
|
94
|
-
i: "i",
|
|
95
|
-
j: "j",
|
|
96
|
-
k: "k",
|
|
97
|
-
l: "l",
|
|
98
|
-
m: "m",
|
|
99
|
-
n: "n",
|
|
100
|
-
o: "o",
|
|
101
|
-
p: "p",
|
|
102
|
-
q: "q",
|
|
103
|
-
r: "r",
|
|
104
|
-
s: "s",
|
|
105
|
-
t: "t",
|
|
106
|
-
u: "u",
|
|
107
|
-
v: "v",
|
|
108
|
-
w: "w",
|
|
109
|
-
x: "x",
|
|
110
|
-
y: "y",
|
|
111
|
-
z: "z",
|
|
112
|
-
// Digits
|
|
113
|
-
zero: "0",
|
|
114
|
-
one: "1",
|
|
115
|
-
two: "2",
|
|
116
|
-
three: "3",
|
|
117
|
-
four: "4",
|
|
118
|
-
five: "5",
|
|
119
|
-
six: "6",
|
|
120
|
-
seven: "7",
|
|
121
|
-
eight: "8",
|
|
122
|
-
nine: "9",
|
|
123
|
-
// Ligatures (Unicode presentation forms — decomposed later by stripControlChars)
|
|
124
|
-
fi: "\uFB01",
|
|
125
|
-
fl: "\uFB02",
|
|
126
|
-
ff: "\uFB00",
|
|
127
|
-
ffi: "\uFB03",
|
|
128
|
-
ffl: "\uFB04",
|
|
129
|
-
// Punctuation and symbols
|
|
130
|
-
space: " ",
|
|
131
|
-
period: ".",
|
|
132
|
-
comma: ",",
|
|
133
|
-
colon: ":",
|
|
134
|
-
semicolon: ";",
|
|
135
|
-
hyphen: "-",
|
|
136
|
-
minus: "\u2212",
|
|
137
|
-
slash: "/",
|
|
138
|
-
question: "?",
|
|
139
|
-
dollar: "$",
|
|
140
|
-
parenleft: "(",
|
|
141
|
-
parenright: ")",
|
|
142
|
-
asterisk: "*",
|
|
143
|
-
plus: "+",
|
|
144
|
-
equal: "=",
|
|
145
|
-
numbersign: "#",
|
|
146
|
-
percent: "%",
|
|
147
|
-
ampersand: "&",
|
|
148
|
-
at: "@",
|
|
149
|
-
exclam: "!",
|
|
150
|
-
bracketleft: "[",
|
|
151
|
-
bracketright: "]",
|
|
152
|
-
braceleft: "{",
|
|
153
|
-
braceright: "}",
|
|
154
|
-
underscore: "_",
|
|
155
|
-
quotedbl: '"',
|
|
156
|
-
quotesingle: "'",
|
|
157
|
-
backslash: "\\",
|
|
158
|
-
bar: "|",
|
|
159
|
-
asciitilde: "~",
|
|
160
|
-
asciicircum: "^",
|
|
161
|
-
grave: "`",
|
|
162
|
-
less: "<",
|
|
163
|
-
greater: ">",
|
|
164
|
-
// Typographic
|
|
165
|
-
quoteright: "\u2019",
|
|
166
|
-
quoteleft: "\u2018",
|
|
167
|
-
quotedblleft: "\u201C",
|
|
168
|
-
quotedblright: "\u201D",
|
|
169
|
-
quotesinglbase: "\u201A",
|
|
170
|
-
quotedblbase: "\u201E",
|
|
171
|
-
endash: "\u2013",
|
|
172
|
-
emdash: "\u2014",
|
|
173
|
-
bullet: "\u2022",
|
|
174
|
-
ellipsis: "\u2026",
|
|
175
|
-
dagger: "\u2020",
|
|
176
|
-
daggerdbl: "\u2021",
|
|
177
|
-
guilsinglleft: "\u2039",
|
|
178
|
-
guilsinglright: "\u203A",
|
|
179
|
-
guillemotleft: "\u00AB",
|
|
180
|
-
guillemotright: "\u00BB",
|
|
181
|
-
trademark: "\u2122",
|
|
182
|
-
registered: "\u00AE",
|
|
183
|
-
copyright: "\u00A9",
|
|
184
|
-
// Greek
|
|
185
|
-
Alpha: "\u0391",
|
|
186
|
-
Beta: "\u0392",
|
|
187
|
-
Gamma: "\u0393",
|
|
188
|
-
Delta: "\u2206",
|
|
189
|
-
Epsilon: "\u0395",
|
|
190
|
-
Zeta: "\u0396",
|
|
191
|
-
Eta: "\u0397",
|
|
192
|
-
Theta: "\u0398",
|
|
193
|
-
Iota: "\u0399",
|
|
194
|
-
Kappa: "\u039A",
|
|
195
|
-
Lambda: "\u039B",
|
|
196
|
-
Mu: "\u039C",
|
|
197
|
-
Nu: "\u039D",
|
|
198
|
-
Xi: "\u039E",
|
|
199
|
-
Omicron: "\u039F",
|
|
200
|
-
Pi: "\u03A0",
|
|
201
|
-
Rho: "\u03A1",
|
|
202
|
-
Sigma: "\u03A3",
|
|
203
|
-
Tau: "\u03A4",
|
|
204
|
-
Upsilon: "\u03A5",
|
|
205
|
-
Phi: "\u03A6",
|
|
206
|
-
Chi: "\u03A7",
|
|
207
|
-
Psi: "\u03A8",
|
|
208
|
-
Omega: "\u2126",
|
|
209
|
-
alpha: "\u03B1",
|
|
210
|
-
beta: "\u03B2",
|
|
211
|
-
gamma: "\u03B3",
|
|
212
|
-
delta: "\u03B4",
|
|
213
|
-
epsilon: "\u03B5",
|
|
214
|
-
zeta: "\u03B6",
|
|
215
|
-
eta: "\u03B7",
|
|
216
|
-
theta: "\u03B8",
|
|
217
|
-
iota: "\u03B9",
|
|
218
|
-
kappa: "\u03BA",
|
|
219
|
-
lambda: "\u03BB",
|
|
220
|
-
mu: "\u00B5",
|
|
221
|
-
nu: "\u03BD",
|
|
222
|
-
xi: "\u03BE",
|
|
223
|
-
omicron: "\u03BF",
|
|
224
|
-
pi: "\u03C0",
|
|
225
|
-
rho: "\u03C1",
|
|
226
|
-
sigma: "\u03C3",
|
|
227
|
-
tau: "\u03C4",
|
|
228
|
-
upsilon: "\u03C5",
|
|
229
|
-
phi: "\u03C6",
|
|
230
|
-
chi: "\u03C7",
|
|
231
|
-
psi: "\u03C8",
|
|
232
|
-
omega: "\u03C9",
|
|
233
|
-
// Math symbols
|
|
234
|
-
greaterequal: "\u2265",
|
|
235
|
-
lessequal: "\u2264",
|
|
236
|
-
notequal: "\u2260",
|
|
237
|
-
plusminus: "\u00B1",
|
|
238
|
-
multiply: "\u00D7",
|
|
239
|
-
divide: "\u00F7",
|
|
240
|
-
infinity: "\u221E",
|
|
241
|
-
summation: "\u2211",
|
|
242
|
-
integral: "\u222B",
|
|
243
|
-
partialdiff: "\u2202",
|
|
244
|
-
radical: "\u221A",
|
|
245
|
-
approxequal: "\u2248",
|
|
246
|
-
degree: "\u00B0",
|
|
247
|
-
// Accented Latin (common)
|
|
248
|
-
Aacute: "\u00C1",
|
|
249
|
-
Agrave: "\u00C0",
|
|
250
|
-
Acircumflex: "\u00C2",
|
|
251
|
-
Atilde: "\u00C3",
|
|
252
|
-
Adieresis: "\u00C4",
|
|
253
|
-
Aring: "\u00C5",
|
|
254
|
-
Eacute: "\u00C9",
|
|
255
|
-
Egrave: "\u00C8",
|
|
256
|
-
Ecircumflex: "\u00CA",
|
|
257
|
-
Edieresis: "\u00CB",
|
|
258
|
-
Iacute: "\u00CD",
|
|
259
|
-
Igrave: "\u00CC",
|
|
260
|
-
Icircumflex: "\u00CE",
|
|
261
|
-
Idieresis: "\u00CF",
|
|
262
|
-
Oacute: "\u00D3",
|
|
263
|
-
Ograve: "\u00D2",
|
|
264
|
-
Ocircumflex: "\u00D4",
|
|
265
|
-
Otilde: "\u00D5",
|
|
266
|
-
Odieresis: "\u00D6",
|
|
267
|
-
Uacute: "\u00DA",
|
|
268
|
-
Ugrave: "\u00D9",
|
|
269
|
-
Ucircumflex: "\u00DB",
|
|
270
|
-
Udieresis: "\u00DC",
|
|
271
|
-
Ntilde: "\u00D1",
|
|
272
|
-
Ccedilla: "\u00C7",
|
|
273
|
-
Scaron: "\u0160",
|
|
274
|
-
Zcaron: "\u017D",
|
|
275
|
-
aacute: "\u00E1",
|
|
276
|
-
agrave: "\u00E0",
|
|
277
|
-
acircumflex: "\u00E2",
|
|
278
|
-
atilde: "\u00E3",
|
|
279
|
-
adieresis: "\u00E4",
|
|
280
|
-
aring: "\u00E5",
|
|
281
|
-
eacute: "\u00E9",
|
|
282
|
-
egrave: "\u00E8",
|
|
283
|
-
ecircumflex: "\u00EA",
|
|
284
|
-
edieresis: "\u00EB",
|
|
285
|
-
iacute: "\u00ED",
|
|
286
|
-
igrave: "\u00EC",
|
|
287
|
-
icircumflex: "\u00EE",
|
|
288
|
-
idieresis: "\u00EF",
|
|
289
|
-
oacute: "\u00F3",
|
|
290
|
-
ograve: "\u00F2",
|
|
291
|
-
ocircumflex: "\u00F4",
|
|
292
|
-
otilde: "\u00F5",
|
|
293
|
-
odieresis: "\u00F6",
|
|
294
|
-
uacute: "\u00FA",
|
|
295
|
-
ugrave: "\u00F9",
|
|
296
|
-
ucircumflex: "\u00FB",
|
|
297
|
-
udieresis: "\u00FC",
|
|
298
|
-
ntilde: "\u00F1",
|
|
299
|
-
ccedilla: "\u00E7",
|
|
300
|
-
scaron: "\u0161",
|
|
301
|
-
zcaron: "\u017E",
|
|
302
|
-
ydieresis: "\u00FF",
|
|
303
|
-
// Miscellaneous
|
|
304
|
-
AE: "\u00C6",
|
|
305
|
-
ae: "\u00E6",
|
|
306
|
-
OE: "\u0152",
|
|
307
|
-
oe: "\u0153",
|
|
308
|
-
Eth: "\u00D0",
|
|
309
|
-
eth: "\u00F0",
|
|
310
|
-
Thorn: "\u00DE",
|
|
311
|
-
thorn: "\u00FE",
|
|
312
|
-
germandbls: "\u00DF",
|
|
313
|
-
dotlessi: "\u0131",
|
|
314
|
-
section: "\u00A7",
|
|
315
|
-
paragraph: "\u00B6",
|
|
316
|
-
currency: "\u00A4",
|
|
317
|
-
cent: "\u00A2",
|
|
318
|
-
sterling: "\u00A3",
|
|
319
|
-
yen: "\u00A5",
|
|
320
|
-
Euro: "\u20AC",
|
|
321
|
-
logicalnot: "\u00AC",
|
|
322
|
-
nbspace: "\u00A0",
|
|
323
|
-
};
|
|
324
|
-
/**
|
|
325
|
-
* Resolve a glyph name to its Unicode character using the Adobe Glyph List.
|
|
326
|
-
* Handles standard names, the "uniXXXX" convention, and underscore-separated
|
|
327
|
-
* composite names (e.g., "f_i" → resolve "f" + "i" = "fi").
|
|
328
|
-
*/
|
|
329
|
-
function resolveGlyphName(glyphName) {
|
|
330
|
-
if (glyphName in ADOBE_GLYPH_MAP)
|
|
331
|
-
return ADOBE_GLYPH_MAP[glyphName];
|
|
332
|
-
// Handle "uniXXXX" convention (e.g., "uni00A0" → U+00A0)
|
|
333
|
-
if (glyphName.startsWith("uni") && glyphName.length === 7) {
|
|
334
|
-
const code = parseInt(glyphName.slice(3), 16);
|
|
335
|
-
if (!isNaN(code) && code > 0)
|
|
336
|
-
return String.fromCharCode(code);
|
|
337
|
-
}
|
|
338
|
-
// Handle underscore-separated composite names (e.g., "f_i" → "fi", "f_f_i" → "ffi")
|
|
339
|
-
// Some fonts use this convention instead of standard ligature names
|
|
340
|
-
if (glyphName.includes("_")) {
|
|
341
|
-
const parts = glyphName.split("_");
|
|
342
|
-
const resolved = parts.map((p) => resolveGlyphName(p));
|
|
343
|
-
if (resolved.every((r) => r !== null)) {
|
|
344
|
-
return resolved.join("");
|
|
345
|
-
}
|
|
346
|
-
}
|
|
347
|
-
return null;
|
|
348
|
-
}
|
|
349
|
-
/**
|
|
350
|
-
* Decode buggy font markers emitted by patched PDF.js.
|
|
351
|
-
*
|
|
352
|
-
* Marker format: :->|>_<glyphId>_<fontCharCode>@<glyphName>@<|<-:
|
|
353
|
-
* The glyph name is delimited by @ instead of _ because some fonts use
|
|
354
|
-
* non-standard glyph names containing underscores (e.g., "f_i" for "fi").
|
|
355
|
-
*
|
|
356
|
-
* Resolution strategy:
|
|
357
|
-
* 1. Use glyph name from font's /Differences or /Encoding dictionary
|
|
358
|
-
* 2. Fall back to glyphId if it's in printable ASCII range (32-126)
|
|
359
|
-
* 3. Drop the character if neither works (better than guessing)
|
|
360
|
-
*/
|
|
361
|
-
const BUGGY_FONT_MARKER_RE = /:->\|>_(\d+)_\d+@([^@]*)@<\|<-:/g;
|
|
362
|
-
function decodeBuggyFontMarkers(str) {
|
|
363
|
-
return str.replace(BUGGY_FONT_MARKER_RE, (_match, glyphIdStr, glyphName) => {
|
|
364
|
-
// Priority 1: Resolve via glyph name from font metadata
|
|
365
|
-
if (glyphName) {
|
|
366
|
-
const resolved = resolveGlyphName(glyphName);
|
|
367
|
-
if (resolved)
|
|
368
|
-
return resolved;
|
|
369
|
-
}
|
|
370
|
-
// Priority 2: If glyphId is in printable ASCII range, use it directly
|
|
371
|
-
const glyphId = parseInt(glyphIdStr);
|
|
372
|
-
if (glyphId >= 32 && glyphId <= 126) {
|
|
373
|
-
return String.fromCharCode(glyphId);
|
|
374
|
-
}
|
|
375
|
-
// Priority 3: Drop unresolvable characters
|
|
376
|
-
return "";
|
|
377
|
-
});
|
|
378
|
-
}
|
|
379
|
-
/**
|
|
380
|
-
* Windows-1252 to Unicode mapping for the C1 control range (0x80-0x9F).
|
|
381
|
-
*
|
|
382
|
-
* Many PDFs encode smart quotes, em-dashes, and other typographic characters
|
|
383
|
-
* using Windows-1252 byte values. When PDF.js decodes these without a proper
|
|
384
|
-
* ToUnicode map, the raw byte values end up in the 0x80-0x9F range — which is
|
|
385
|
-
* technically the C1 control character block in Unicode. Rather than stripping
|
|
386
|
-
* them (which loses apostrophes, quotes, dashes, etc.), we map them to their
|
|
387
|
-
* correct Unicode equivalents.
|
|
388
|
-
*/
|
|
389
|
-
const WINDOWS_1252_TO_UNICODE = {
|
|
390
|
-
0x80: "\u20AC", // €
|
|
391
|
-
0x82: "\u201A", // ‚
|
|
392
|
-
0x83: "\u0192", // ƒ
|
|
393
|
-
0x84: "\u201E", // „
|
|
394
|
-
0x85: "\u2026", // …
|
|
395
|
-
0x86: "\u2020", // †
|
|
396
|
-
0x87: "\u2021", // ‡
|
|
397
|
-
0x88: "\u02C6", // ˆ
|
|
398
|
-
0x89: "\u2030", // ‰
|
|
399
|
-
0x8a: "\u0160", // Š
|
|
400
|
-
0x8b: "\u2039", // ‹
|
|
401
|
-
0x8c: "\u0152", // Œ
|
|
402
|
-
0x8e: "\u017D", // Ž
|
|
403
|
-
0x91: "\u2018", // '
|
|
404
|
-
0x92: "\u2019", // ' (right single quote / apostrophe)
|
|
405
|
-
0x93: "\u201C", // "
|
|
406
|
-
0x94: "\u201D", // "
|
|
407
|
-
0x95: "\u2022", // •
|
|
408
|
-
0x96: "\u2013", // –
|
|
409
|
-
0x97: "\u2014", // —
|
|
410
|
-
0x98: "\u02DC", // ˜
|
|
411
|
-
0x99: "\u2122", // ™
|
|
412
|
-
0x9a: "\u0161", // š
|
|
413
|
-
0x9b: "\u203A", // ›
|
|
414
|
-
0x9c: "\u0153", // œ
|
|
415
|
-
0x9e: "\u017E", // ž
|
|
416
|
-
0x9f: "\u0178", // Ÿ
|
|
417
|
-
};
|
|
418
|
-
/**
|
|
419
|
-
* Unicode ligature decomposition map.
|
|
420
|
-
* PDF fonts often use ligature glyphs; decomposing them to plain ASCII
|
|
421
|
-
* ensures the text is searchable and NLP-friendly.
|
|
422
|
-
*/
|
|
423
|
-
const LIGATURE_MAP = {
|
|
424
|
-
"\uFB00": "ff",
|
|
425
|
-
"\uFB01": "fi",
|
|
426
|
-
"\uFB02": "fl",
|
|
427
|
-
"\uFB03": "ffi",
|
|
428
|
-
"\uFB04": "ffl",
|
|
429
|
-
"\uFB05": "st",
|
|
430
|
-
"\uFB06": "st",
|
|
431
|
-
};
|
|
432
|
-
/**
|
|
433
|
-
* Strip C0 control characters from text (except common whitespace),
|
|
434
|
-
* map C1 control range (0x80-0x9F) to proper Unicode via Windows-1252,
|
|
435
|
-
* and decompose Unicode ligatures to plain text.
|
|
436
|
-
*/
|
|
437
|
-
function stripControlChars(str) {
|
|
438
|
-
let result = "";
|
|
439
|
-
for (const char of str) {
|
|
440
|
-
const code = char.charCodeAt(0);
|
|
441
|
-
// Decompose Unicode ligatures (fi, fl, ff, ffi, ffl, st)
|
|
442
|
-
if (LIGATURE_MAP[char]) {
|
|
443
|
-
result += LIGATURE_MAP[char];
|
|
444
|
-
continue;
|
|
445
|
-
}
|
|
446
|
-
// Map Windows-1252 C1 range to proper Unicode (smart quotes, em-dashes, etc.)
|
|
447
|
-
if (code >= 0x80 && code <= 0x9f) {
|
|
448
|
-
const mapped = WINDOWS_1252_TO_UNICODE[code];
|
|
449
|
-
if (mapped) {
|
|
450
|
-
result += mapped;
|
|
451
|
-
}
|
|
452
|
-
// Undefined C1 positions (0x81, 0x8D, 0x8F, 0x90) are dropped
|
|
453
|
-
continue;
|
|
454
|
-
}
|
|
455
|
-
// Skip C0 controls (except tab, newline, carriage return)
|
|
456
|
-
if (code >= 0x00 && code <= 0x1f && code !== 0x09 && code !== 0x0a && code !== 0x0d) {
|
|
457
|
-
continue;
|
|
458
|
-
}
|
|
459
|
-
result += char;
|
|
460
|
-
}
|
|
461
|
-
return result;
|
|
462
|
-
}
|
|
463
|
-
/**
|
|
464
|
-
* Detect garbled text from fonts with corrupted ToUnicode mappings.
|
|
465
|
-
*
|
|
466
|
-
* When PDF fonts lack proper ToUnicode maps, PDF.js may output characters
|
|
467
|
-
* mapped to unexpected Unicode code points. Common patterns include:
|
|
468
|
-
*
|
|
469
|
-
* 1. Private Use Area (PUA) characters - fonts often map glyphs here
|
|
470
|
-
* 2. Mix of unrelated scripts (Arabic + Latin Extended in English text)
|
|
471
|
-
* 3. Rare/obscure Unicode blocks appearing in normal text
|
|
472
|
-
* 4. Control characters (when text is predominantly control chars)
|
|
473
|
-
*
|
|
474
|
-
* Returns true if the string appears to be garbled font output.
|
|
475
|
-
*/
|
|
476
|
-
function isGarbledFontOutput(str) {
|
|
477
|
-
if (str.length < 3)
|
|
478
|
-
return false;
|
|
479
|
-
let privateUseCount = 0;
|
|
480
|
-
let arabicCount = 0;
|
|
481
|
-
let latinExtendedCount = 0;
|
|
482
|
-
let basicLatinLetterCount = 0;
|
|
483
|
-
let suspiciousCount = 0; // Other suspicious Unicode ranges
|
|
484
|
-
let controlCharCount = 0; // C0/C1 control characters
|
|
485
|
-
let normalCharCount = 0; // Normal printable characters
|
|
486
|
-
for (const char of str) {
|
|
487
|
-
const code = char.charCodeAt(0);
|
|
488
|
-
// C0 control characters (0x00-0x1F) except common whitespace (tab, newline, carriage return)
|
|
489
|
-
if (code >= 0x00 && code <= 0x1f && code !== 0x09 && code !== 0x0a && code !== 0x0d) {
|
|
490
|
-
controlCharCount++;
|
|
491
|
-
}
|
|
492
|
-
// C1 range (0x80-0x9F): only count as control chars if NOT a valid Windows-1252 character.
|
|
493
|
-
// Many PDFs use Windows-1252 encoding for smart quotes, em-dashes, etc.
|
|
494
|
-
else if (code >= 0x80 && code <= 0x9f) {
|
|
495
|
-
if (WINDOWS_1252_TO_UNICODE[code]) {
|
|
496
|
-
normalCharCount++; // Valid Windows-1252 char (smart quote, dash, etc.)
|
|
497
|
-
}
|
|
498
|
-
else {
|
|
499
|
-
controlCharCount++; // Undefined C1 position — likely garbled
|
|
500
|
-
}
|
|
501
|
-
}
|
|
502
|
-
// Private Use Area (U+E000-U+F8FF) - almost always garbled
|
|
503
|
-
else if (code >= 0xe000 && code <= 0xf8ff) {
|
|
504
|
-
privateUseCount++;
|
|
505
|
-
}
|
|
506
|
-
// Arabic block (0x600-0x6FF) and Arabic Extended (0x750-0x77F, 0x8A0-0x8FF)
|
|
507
|
-
else if ((code >= 0x600 && code <= 0x6ff) ||
|
|
508
|
-
(code >= 0x750 && code <= 0x77f) ||
|
|
509
|
-
(code >= 0x8a0 && code <= 0x8ff)) {
|
|
510
|
-
arabicCount++;
|
|
511
|
-
}
|
|
512
|
-
// Latin Extended-A (0x100-0x17F), Latin Extended-B (0x180-0x24F),
|
|
513
|
-
// Latin Extended Additional (0x1E00-0x1EFF)
|
|
514
|
-
else if ((code >= 0x100 && code <= 0x24f) || (code >= 0x1e00 && code <= 0x1eff)) {
|
|
515
|
-
latinExtendedCount++;
|
|
516
|
-
}
|
|
517
|
-
// Basic Latin letters (a-z, A-Z)
|
|
518
|
-
else if ((code >= 0x41 && code <= 0x5a) || (code >= 0x61 && code <= 0x7a)) {
|
|
519
|
-
basicLatinLetterCount++;
|
|
520
|
-
normalCharCount++;
|
|
521
|
-
}
|
|
522
|
-
// Suspicious ranges that rarely appear in normal text:
|
|
523
|
-
// - Syriac (0x700-0x74F)
|
|
524
|
-
// - Thaana (0x780-0x7BF)
|
|
525
|
-
// - NKo (0x7C0-0x7FF)
|
|
526
|
-
// - Samaritan (0x800-0x83F)
|
|
527
|
-
// - Specials (0xFFF0-0xFFFF)
|
|
528
|
-
// - Geometric Shapes (0x25A0-0x25FF) in running text
|
|
529
|
-
// - Box Drawing (0x2500-0x257F) in running text
|
|
530
|
-
// - Combining Diacritical Marks alone (0x0300-0x036F)
|
|
531
|
-
else if ((code >= 0x700 && code <= 0x7ff) || // Syriac, Thaana, NKo
|
|
532
|
-
(code >= 0x800 && code <= 0x83f) || // Samaritan
|
|
533
|
-
(code >= 0xfff0 && code <= 0xffff) || // Specials
|
|
534
|
-
(code >= 0x2500 && code <= 0x25ff) || // Box drawing, geometric shapes
|
|
535
|
-
(code >= 0x0300 && code <= 0x036f) // Combining marks (suspicious if frequent)
|
|
536
|
-
) {
|
|
537
|
-
suspiciousCount++;
|
|
538
|
-
}
|
|
539
|
-
// Normal printable characters (digits, punctuation, common symbols, space)
|
|
540
|
-
else if ((code >= 0x20 && code <= 0x7e) || code === 0x09 || code === 0x0a || code === 0x0d) {
|
|
541
|
-
normalCharCount++;
|
|
542
|
-
}
|
|
543
|
-
}
|
|
544
|
-
const totalChars = str.length;
|
|
545
|
-
// Text is predominantly control characters - definitely garbled
|
|
546
|
-
// This catches cases like more_hard_2.pdf where text is entirely control chars
|
|
547
|
-
if (controlCharCount > 0 && controlCharCount > normalCharCount) {
|
|
548
|
-
return true;
|
|
549
|
-
}
|
|
550
|
-
// Private Use Area characters are almost always garbled fonts
|
|
551
|
-
if (privateUseCount >= 2) {
|
|
552
|
-
return true;
|
|
553
|
-
}
|
|
554
|
-
// Mix of Arabic AND Latin Extended is extremely rare in legitimate text
|
|
555
|
-
if (arabicCount >= 2 && latinExtendedCount >= 2) {
|
|
556
|
-
return true;
|
|
557
|
-
}
|
|
558
|
-
// High concentration of suspicious characters
|
|
559
|
-
if (suspiciousCount >= 3 || suspiciousCount > totalChars * 0.2) {
|
|
560
|
-
return true;
|
|
561
|
-
}
|
|
562
|
-
// Text predominantly Latin Extended with very few basic Latin letters
|
|
563
|
-
// (legitimate Latin-script text would have mostly basic Latin)
|
|
564
|
-
if (latinExtendedCount > totalChars * 0.3 && basicLatinLetterCount < totalChars * 0.2) {
|
|
565
|
-
return true;
|
|
566
|
-
}
|
|
567
|
-
// Mix of Arabic/suspicious with Latin Extended (script mixing)
|
|
568
|
-
if ((arabicCount >= 1 || suspiciousCount >= 1) && latinExtendedCount >= 3) {
|
|
569
|
-
return true;
|
|
570
|
-
}
|
|
571
|
-
return false;
|
|
572
|
-
}
|
|
573
|
-
export class PdfJsEngine {
|
|
574
|
-
name = "pdfjs";
|
|
575
|
-
pdfiumRenderer = null;
|
|
576
|
-
currentPdfPath = null;
|
|
577
|
-
currentPdfData = null;
|
|
578
|
-
async loadDocument(input, password) {
|
|
579
|
-
let data;
|
|
580
|
-
if (typeof input === "string") {
|
|
581
|
-
data = new Uint8Array(await fs.readFile(input));
|
|
582
|
-
this.currentPdfPath = input;
|
|
583
|
-
}
|
|
584
|
-
else {
|
|
585
|
-
// pdf.js requires a plain Uint8Array, not a Buffer subclass
|
|
586
|
-
data = new Uint8Array(input.buffer, input.byteOffset, input.byteLength);
|
|
587
|
-
this.currentPdfPath = null;
|
|
588
|
-
}
|
|
589
|
-
// Store data for buffer-based rendering
|
|
590
|
-
this.currentPdfData = data;
|
|
591
|
-
const loadingTask = getDocument({
|
|
592
|
-
data,
|
|
593
|
-
password,
|
|
594
|
-
cMapUrl: CMAP_URL,
|
|
595
|
-
cMapPacked: CMAP_PACKED,
|
|
596
|
-
standardFontDataUrl: STANDARD_FONT_DATA_URL,
|
|
597
|
-
wasmUrl: WASM_URL,
|
|
598
|
-
verbosity: 0, // VerbosityLevel.ERRORS — suppress Type3 font warnings
|
|
599
|
-
});
|
|
600
|
-
let pdfDocument;
|
|
601
|
-
try {
|
|
602
|
-
pdfDocument = await loadingTask.promise;
|
|
603
|
-
}
|
|
604
|
-
catch (error) {
|
|
605
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
606
|
-
if (message.includes("password") || message.includes("Password")) {
|
|
607
|
-
if (password) {
|
|
608
|
-
throw new Error("Incorrect password for this PDF. Please check the password and try again.", { cause: error });
|
|
609
|
-
}
|
|
610
|
-
else {
|
|
611
|
-
throw new Error("This PDF is password-protected. Use --password <password> to provide the document password.", { cause: error });
|
|
612
|
-
}
|
|
613
|
-
}
|
|
614
|
-
throw error;
|
|
615
|
-
}
|
|
616
|
-
const metadata = await pdfDocument.getMetadata();
|
|
617
|
-
return {
|
|
618
|
-
numPages: pdfDocument.numPages,
|
|
619
|
-
data,
|
|
620
|
-
metadata,
|
|
621
|
-
_pdfDocument: pdfDocument,
|
|
622
|
-
};
|
|
623
|
-
}
|
|
624
|
-
async extractPage(doc, pageNum, options) {
|
|
625
|
-
const pdfDocument = doc._pdfDocument;
|
|
626
|
-
const page = await pdfDocument.getPage(pageNum);
|
|
627
|
-
// Get viewport
|
|
628
|
-
const viewport = page.getViewport({ scale: 1.0 });
|
|
629
|
-
// Extract text content
|
|
630
|
-
const textContent = await page.getTextContent();
|
|
631
|
-
const viewportWidth = viewport.width;
|
|
632
|
-
const viewportHeight = viewport.height;
|
|
633
|
-
const viewportTransform = viewport.transform;
|
|
634
|
-
const textItems = [];
|
|
635
|
-
const garbledTextRegions = [];
|
|
636
|
-
for (const item of textContent.items) {
|
|
637
|
-
// Skip items with zero dimensions
|
|
638
|
-
if (item.height === 0 || item.width === 0)
|
|
639
|
-
continue;
|
|
640
|
-
// Apply viewport transformation to convert PDF coordinates to screen coordinates
|
|
641
|
-
// This properly handles Y-axis flip (PDF is bottom-up, screen is top-down)
|
|
642
|
-
const cm = multiplyMatrices(viewportTransform, item.transform);
|
|
643
|
-
// Get lower-left corner (text space origin)
|
|
644
|
-
const ll = applyTransformation({ x: 0, y: 0 }, cm);
|
|
645
|
-
const scaleX = Math.sqrt(item.transform[0] ** 2 + item.transform[1] ** 2);
|
|
646
|
-
const scaleY = Math.sqrt(item.transform[2] ** 2 + item.transform[3] ** 2);
|
|
647
|
-
const ur = applyTransformation({ x: item.width / scaleX, y: item.height / scaleY }, cm);
|
|
648
|
-
const left = Math.min(ll.x, ur.x);
|
|
649
|
-
const right = Math.max(ll.x, ur.x);
|
|
650
|
-
const top = Math.min(ll.y, ur.y);
|
|
651
|
-
const bottom = Math.max(ll.y, ur.y);
|
|
652
|
-
// Skip items that are off-page (negative coordinates or beyond page bounds)
|
|
653
|
-
if (top < 0 || left < 0 || top > viewportHeight || left > viewportWidth)
|
|
654
|
-
continue;
|
|
655
|
-
const width = right - left;
|
|
656
|
-
const height = bottom - top;
|
|
657
|
-
// Get rotation angle from the transformation matrix
|
|
658
|
-
let rotation = getRotation(cm);
|
|
659
|
-
if (rotation < 0)
|
|
660
|
-
rotation += 360;
|
|
661
|
-
// Decode buggy font markers using glyph names from font metadata
|
|
662
|
-
let decodedStr = item.str;
|
|
663
|
-
if (decodedStr.includes(BUGGY_FONT_MARKER_CHECK)) {
|
|
664
|
-
BUGGY_FONT_MARKER_RE.lastIndex = 0;
|
|
665
|
-
decodedStr = decodeBuggyFontMarkers(decodedStr);
|
|
666
|
-
}
|
|
667
|
-
// Handle pipe-separated characters: " |a| |r| |X| " -> "arX"
|
|
668
|
-
if (decodedStr.includes("|")) {
|
|
669
|
-
PIPE_PATTERN_REGEX.lastIndex = 0;
|
|
670
|
-
const matches = [...decodedStr.matchAll(PIPE_PATTERN_REGEX)];
|
|
671
|
-
if (matches.length > 0) {
|
|
672
|
-
decodedStr = matches.map((m) => m[1]).join("");
|
|
673
|
-
}
|
|
674
|
-
}
|
|
675
|
-
// Skip garbled text from fonts with corrupted ToUnicode mappings
|
|
676
|
-
if (isGarbledFontOutput(decodedStr)) {
|
|
677
|
-
garbledTextRegions.push({ x: left, y: top, width, height });
|
|
678
|
-
continue;
|
|
679
|
-
}
|
|
680
|
-
// Strip remaining control characters, map Windows-1252, decompose ligatures
|
|
681
|
-
decodedStr = stripControlChars(decodedStr);
|
|
682
|
-
textItems.push({
|
|
683
|
-
str: decodedStr,
|
|
684
|
-
x: left,
|
|
685
|
-
y: top,
|
|
686
|
-
width,
|
|
687
|
-
height,
|
|
688
|
-
w: width,
|
|
689
|
-
h: height,
|
|
690
|
-
r: rotation,
|
|
691
|
-
fontName: item.fontName,
|
|
692
|
-
fontSize: Math.sqrt(item.transform[0] * item.transform[0] + item.transform[1] * item.transform[1]),
|
|
693
|
-
confidence: 1.0,
|
|
694
|
-
});
|
|
695
|
-
}
|
|
696
|
-
let images = [];
|
|
697
|
-
if (options?.extractImages !== false) {
|
|
698
|
-
try {
|
|
699
|
-
const pdfInput = this.currentPdfPath || this.currentPdfData || doc.data;
|
|
700
|
-
if (!this.pdfiumRenderer) {
|
|
701
|
-
this.pdfiumRenderer = new PdfiumRenderer();
|
|
702
|
-
await this.pdfiumRenderer.loadDocument(pdfInput);
|
|
703
|
-
}
|
|
704
|
-
const imageBounds = await this.pdfiumRenderer.extractImageBounds(pdfInput, pageNum);
|
|
705
|
-
images = imageBounds.map((bounds) => ({
|
|
706
|
-
x: bounds.x,
|
|
707
|
-
y: bounds.y,
|
|
708
|
-
width: bounds.width,
|
|
709
|
-
height: bounds.height,
|
|
710
|
-
}));
|
|
711
|
-
}
|
|
712
|
-
catch {
|
|
713
|
-
// Image extraction is best-effort
|
|
714
|
-
}
|
|
715
|
-
}
|
|
716
|
-
// Skip annotation extraction - not currently used in processing pipeline
|
|
717
|
-
// Can be re-enabled if needed for link extraction, etc.
|
|
718
|
-
const annotations = [];
|
|
719
|
-
await page.cleanup();
|
|
720
|
-
return {
|
|
721
|
-
pageNum,
|
|
722
|
-
width: viewport.width,
|
|
723
|
-
height: viewport.height,
|
|
724
|
-
textItems,
|
|
725
|
-
images,
|
|
726
|
-
annotations,
|
|
727
|
-
garbledTextRegions: garbledTextRegions.length > 0 ? garbledTextRegions : undefined,
|
|
728
|
-
};
|
|
729
|
-
}
|
|
730
|
-
async extractAllPages(doc, maxPages, targetPages, options) {
|
|
731
|
-
const numPages = Math.min(doc.numPages, maxPages || doc.numPages);
|
|
732
|
-
const pages = [];
|
|
733
|
-
// Parse target pages if specified
|
|
734
|
-
let pageNumbers;
|
|
735
|
-
if (targetPages) {
|
|
736
|
-
pageNumbers = this.parseTargetPages(targetPages, doc.numPages);
|
|
737
|
-
}
|
|
738
|
-
else {
|
|
739
|
-
pageNumbers = Array.from({ length: numPages }, (_, i) => i + 1);
|
|
740
|
-
}
|
|
741
|
-
for (const pageNum of pageNumbers) {
|
|
742
|
-
if (maxPages && pages.length >= maxPages) {
|
|
743
|
-
break;
|
|
744
|
-
}
|
|
745
|
-
const pageData = await this.extractPage(doc, pageNum, options);
|
|
746
|
-
pages.push(pageData);
|
|
747
|
-
}
|
|
748
|
-
return pages;
|
|
749
|
-
}
|
|
750
|
-
async renderPageImage(_doc, pageNum, dpi, password) {
|
|
751
|
-
const pdfInput = this.currentPdfPath || this.currentPdfData;
|
|
752
|
-
if (!pdfInput) {
|
|
753
|
-
throw new Error("No PDF path or data available for rendering");
|
|
754
|
-
}
|
|
755
|
-
if (!this.pdfiumRenderer) {
|
|
756
|
-
this.pdfiumRenderer = new PdfiumRenderer();
|
|
757
|
-
await this.pdfiumRenderer.loadDocument(pdfInput, password);
|
|
758
|
-
}
|
|
759
|
-
return await this.pdfiumRenderer.renderPageToBuffer(pdfInput, pageNum, dpi, password);
|
|
760
|
-
}
|
|
761
|
-
async close(doc) {
|
|
762
|
-
const pdfDocument = doc._pdfDocument;
|
|
763
|
-
if (pdfDocument && pdfDocument.destroy) {
|
|
764
|
-
await pdfDocument.destroy();
|
|
765
|
-
}
|
|
766
|
-
// Clean up PDFium renderer (only if it was initialized)
|
|
767
|
-
if (this.pdfiumRenderer) {
|
|
768
|
-
await this.pdfiumRenderer.close();
|
|
769
|
-
this.pdfiumRenderer = null;
|
|
770
|
-
}
|
|
771
|
-
this.currentPdfPath = null;
|
|
772
|
-
this.currentPdfData = null;
|
|
773
|
-
}
|
|
774
|
-
parseTargetPages(targetPages, maxPages) {
|
|
775
|
-
const pages = [];
|
|
776
|
-
const parts = targetPages.split(",");
|
|
777
|
-
for (const part of parts) {
|
|
778
|
-
const trimmed = part.trim();
|
|
779
|
-
if (trimmed.includes("-")) {
|
|
780
|
-
// Range: "1-5"
|
|
781
|
-
const [start, end] = trimmed.split("-").map((n) => parseInt(n.trim()));
|
|
782
|
-
for (let i = start; i <= Math.min(end, maxPages); i++) {
|
|
783
|
-
if (i >= 1) {
|
|
784
|
-
pages.push(i);
|
|
785
|
-
}
|
|
786
|
-
}
|
|
787
|
-
}
|
|
788
|
-
else {
|
|
789
|
-
// Single page: "10"
|
|
790
|
-
const pageNum = parseInt(trimmed);
|
|
791
|
-
if (pageNum >= 1 && pageNum <= maxPages) {
|
|
792
|
-
pages.push(pageNum);
|
|
793
|
-
}
|
|
794
|
-
}
|
|
795
|
-
}
|
|
796
|
-
return [...new Set(pages)].sort((a, b) => a - b);
|
|
797
|
-
}
|
|
798
|
-
}
|
|
799
|
-
//# sourceMappingURL=pdfjs.js.map
|