@llamaindex/liteparse 1.5.3 → 2.0.0-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +49 -448
- package/dist/cli.d.ts +3 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +87 -0
- package/dist/cli.js.map +1 -0
- package/dist/lib.d.ts +58 -0
- package/dist/lib.d.ts.map +1 -0
- package/dist/lib.js +88 -0
- package/dist/lib.js.map +1 -0
- package/dist/native.d.ts +54 -0
- package/dist/native.d.ts.map +1 -0
- package/dist/native.js +70 -0
- package/dist/native.js.map +1 -0
- package/libpdfium.so +0 -0
- package/liteparse.linux-x64-gnu.node +0 -0
- package/package.json +36 -50
- package/LICENSE +0 -201
- package/dist/cli/parse.d.ts +0 -4
- package/dist/cli/parse.d.ts.map +0 -1
- package/dist/cli/parse.js +0 -450
- package/dist/cli/parse.js.map +0 -1
- package/dist/package.json +0 -90
- package/dist/src/conversion/convertToPdf.d.ts +0 -65
- package/dist/src/conversion/convertToPdf.d.ts.map +0 -1
- package/dist/src/conversion/convertToPdf.js +0 -405
- package/dist/src/conversion/convertToPdf.js.map +0 -1
- package/dist/src/conversion/convertToPdf.test.d.ts +0 -2
- package/dist/src/conversion/convertToPdf.test.d.ts.map +0 -1
- package/dist/src/conversion/convertToPdf.test.js +0 -327
- package/dist/src/conversion/convertToPdf.test.js.map +0 -1
- package/dist/src/core/config.d.ts +0 -4
- package/dist/src/core/config.d.ts.map +0 -1
- package/dist/src/core/config.js +0 -26
- package/dist/src/core/config.js.map +0 -1
- package/dist/src/core/config.test.d.ts +0 -2
- package/dist/src/core/config.test.d.ts.map +0 -1
- package/dist/src/core/config.test.js +0 -21
- package/dist/src/core/config.test.js.map +0 -1
- package/dist/src/core/parser.d.ts +0 -92
- package/dist/src/core/parser.d.ts.map +0 -1
- package/dist/src/core/parser.js +0 -401
- package/dist/src/core/parser.js.map +0 -1
- package/dist/src/core/parser.test.d.ts +0 -2
- package/dist/src/core/parser.test.d.ts.map +0 -1
- package/dist/src/core/parser.test.js +0 -541
- package/dist/src/core/parser.test.js.map +0 -1
- package/dist/src/core/types.d.ts +0 -370
- package/dist/src/core/types.d.ts.map +0 -1
- package/dist/src/core/types.js +0 -2
- package/dist/src/core/types.js.map +0 -1
- package/dist/src/engines/ocr/http-simple.d.ts +0 -19
- package/dist/src/engines/ocr/http-simple.d.ts.map +0 -1
- package/dist/src/engines/ocr/http-simple.js +0 -69
- package/dist/src/engines/ocr/http-simple.js.map +0 -1
- package/dist/src/engines/ocr/http-simple.test.d.ts +0 -2
- package/dist/src/engines/ocr/http-simple.test.d.ts.map +0 -1
- package/dist/src/engines/ocr/http-simple.test.js +0 -108
- package/dist/src/engines/ocr/http-simple.test.js.map +0 -1
- package/dist/src/engines/ocr/interface.d.ts +0 -15
- package/dist/src/engines/ocr/interface.d.ts.map +0 -1
- package/dist/src/engines/ocr/interface.js +0 -2
- package/dist/src/engines/ocr/interface.js.map +0 -1
- package/dist/src/engines/ocr/tesseract.d.ts +0 -20
- package/dist/src/engines/ocr/tesseract.d.ts.map +0 -1
- package/dist/src/engines/ocr/tesseract.js +0 -162
- package/dist/src/engines/ocr/tesseract.js.map +0 -1
- package/dist/src/engines/ocr/tesseract.test.d.ts +0 -2
- package/dist/src/engines/ocr/tesseract.test.d.ts.map +0 -1
- package/dist/src/engines/ocr/tesseract.test.js +0 -94
- package/dist/src/engines/ocr/tesseract.test.js.map +0 -1
- package/dist/src/engines/pdf/interface.d.ts +0 -84
- package/dist/src/engines/pdf/interface.d.ts.map +0 -1
- package/dist/src/engines/pdf/interface.js +0 -2
- package/dist/src/engines/pdf/interface.js.map +0 -1
- package/dist/src/engines/pdf/pdfium-renderer.d.ts +0 -31
- package/dist/src/engines/pdf/pdfium-renderer.d.ts.map +0 -1
- package/dist/src/engines/pdf/pdfium-renderer.js +0 -145
- package/dist/src/engines/pdf/pdfium-renderer.js.map +0 -1
- package/dist/src/engines/pdf/pdfium-renderer.test.d.ts +0 -2
- package/dist/src/engines/pdf/pdfium-renderer.test.d.ts.map +0 -1
- package/dist/src/engines/pdf/pdfium-renderer.test.js +0 -109
- package/dist/src/engines/pdf/pdfium-renderer.test.js.map +0 -1
- package/dist/src/engines/pdf/pdfjs.d.ts +0 -14
- package/dist/src/engines/pdf/pdfjs.d.ts.map +0 -1
- package/dist/src/engines/pdf/pdfjs.js +0 -804
- package/dist/src/engines/pdf/pdfjs.js.map +0 -1
- package/dist/src/engines/pdf/pdfjs.test.d.ts +0 -2
- package/dist/src/engines/pdf/pdfjs.test.d.ts.map +0 -1
- package/dist/src/engines/pdf/pdfjs.test.js +0 -225
- package/dist/src/engines/pdf/pdfjs.test.js.map +0 -1
- package/dist/src/engines/pdf/pdfjsImporter.d.ts +0 -5
- package/dist/src/engines/pdf/pdfjsImporter.d.ts.map +0 -1
- package/dist/src/engines/pdf/pdfjsImporter.js +0 -45
- package/dist/src/engines/pdf/pdfjsImporter.js.map +0 -1
- package/dist/src/index.d.ts +0 -3
- package/dist/src/index.d.ts.map +0 -1
- package/dist/src/index.js +0 -5
- package/dist/src/index.js.map +0 -1
- package/dist/src/lib.d.ts +0 -19
- package/dist/src/lib.d.ts.map +0 -1
- package/dist/src/lib.js +0 -17
- package/dist/src/lib.js.map +0 -1
- package/dist/src/output/json.d.ts +0 -10
- package/dist/src/output/json.d.ts.map +0 -1
- package/dist/src/output/json.js +0 -32
- package/dist/src/output/json.js.map +0 -1
- package/dist/src/output/json.test.d.ts +0 -2
- package/dist/src/output/json.test.d.ts.map +0 -1
- package/dist/src/output/json.test.js +0 -199
- package/dist/src/output/json.test.js.map +0 -1
- package/dist/src/output/text.d.ts +0 -10
- package/dist/src/output/text.d.ts.map +0 -1
- package/dist/src/output/text.js +0 -17
- package/dist/src/output/text.js.map +0 -1
- package/dist/src/output/text.test.d.ts +0 -2
- package/dist/src/output/text.test.d.ts.map +0 -1
- package/dist/src/output/text.test.js +0 -65
- package/dist/src/output/text.test.js.map +0 -1
- package/dist/src/processing/bbox.d.ts +0 -20
- package/dist/src/processing/bbox.d.ts.map +0 -1
- package/dist/src/processing/bbox.js +0 -258
- package/dist/src/processing/bbox.js.map +0 -1
- package/dist/src/processing/bbox.test.d.ts +0 -2
- package/dist/src/processing/bbox.test.d.ts.map +0 -1
- package/dist/src/processing/bbox.test.js +0 -334
- package/dist/src/processing/bbox.test.js.map +0 -1
- package/dist/src/processing/cleanText.d.ts +0 -6
- package/dist/src/processing/cleanText.d.ts.map +0 -1
- package/dist/src/processing/cleanText.js +0 -73
- package/dist/src/processing/cleanText.js.map +0 -1
- package/dist/src/processing/cleanText.test.d.ts +0 -2
- package/dist/src/processing/cleanText.test.d.ts.map +0 -1
- package/dist/src/processing/cleanText.test.js +0 -46
- package/dist/src/processing/cleanText.test.js.map +0 -1
- package/dist/src/processing/grid.d.ts +0 -7
- package/dist/src/processing/grid.d.ts.map +0 -1
- package/dist/src/processing/grid.js +0 -13
- package/dist/src/processing/grid.js.map +0 -1
- package/dist/src/processing/gridDebugLogger.d.ts +0 -206
- package/dist/src/processing/gridDebugLogger.d.ts.map +0 -1
- package/dist/src/processing/gridDebugLogger.js +0 -446
- package/dist/src/processing/gridDebugLogger.js.map +0 -1
- package/dist/src/processing/gridProjection.d.ts +0 -19
- package/dist/src/processing/gridProjection.d.ts.map +0 -1
- package/dist/src/processing/gridProjection.js +0 -1813
- package/dist/src/processing/gridProjection.js.map +0 -1
- package/dist/src/processing/gridProjection.test.d.ts +0 -2
- package/dist/src/processing/gridProjection.test.d.ts.map +0 -1
- package/dist/src/processing/gridProjection.test.js +0 -495
- package/dist/src/processing/gridProjection.test.js.map +0 -1
- package/dist/src/processing/gridVisualizer.d.ts +0 -14
- package/dist/src/processing/gridVisualizer.d.ts.map +0 -1
- package/dist/src/processing/gridVisualizer.js +0 -166
- package/dist/src/processing/gridVisualizer.js.map +0 -1
- package/dist/src/processing/markupUtils.d.ts +0 -7
- package/dist/src/processing/markupUtils.d.ts.map +0 -1
- package/dist/src/processing/markupUtils.js +0 -25
- package/dist/src/processing/markupUtils.js.map +0 -1
- package/dist/src/processing/markupUtils.test.d.ts +0 -2
- package/dist/src/processing/markupUtils.test.d.ts.map +0 -1
- package/dist/src/processing/markupUtils.test.js +0 -26
- package/dist/src/processing/markupUtils.test.js.map +0 -1
- package/dist/src/processing/ocrUtils.d.ts +0 -24
- package/dist/src/processing/ocrUtils.d.ts.map +0 -1
- package/dist/src/processing/ocrUtils.js +0 -79
- package/dist/src/processing/ocrUtils.js.map +0 -1
- package/dist/src/processing/octUtils.test.d.ts +0 -2
- package/dist/src/processing/octUtils.test.d.ts.map +0 -1
- package/dist/src/processing/octUtils.test.js +0 -72
- package/dist/src/processing/octUtils.test.js.map +0 -1
- package/dist/src/processing/searchItems.d.ts +0 -26
- package/dist/src/processing/searchItems.d.ts.map +0 -1
- package/dist/src/processing/searchItems.js +0 -93
- package/dist/src/processing/searchItems.js.map +0 -1
- package/dist/src/processing/searchItems.test.d.ts +0 -2
- package/dist/src/processing/searchItems.test.d.ts.map +0 -1
- package/dist/src/processing/searchItems.test.js +0 -84
- package/dist/src/processing/searchItems.test.js.map +0 -1
- package/dist/src/processing/textUtils.d.ts +0 -20
- package/dist/src/processing/textUtils.d.ts.map +0 -1
- package/dist/src/processing/textUtils.js +0 -142
- package/dist/src/processing/textUtils.js.map +0 -1
- package/dist/src/processing/textUtils.test.d.ts +0 -2
- package/dist/src/processing/textUtils.test.d.ts.map +0 -1
- package/dist/src/processing/textUtils.test.js +0 -45
- package/dist/src/processing/textUtils.test.js.map +0 -1
- package/dist/src/vendor/pdfjs/LICENSE +0 -177
- package/dist/src/vendor/pdfjs/README.md +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78ms-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78ms-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/83pv-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/90ms-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/90ms-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/90msp-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/90msp-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/90pv-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/90pv-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Add-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Add-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Add-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Add-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-0.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-1.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-3.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-4.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-5.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-6.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-UCS2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-0.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-1.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-3.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-4.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-5.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-UCS2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-0.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-1.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-3.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-4.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-5.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-6.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-UCS2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-0.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-1.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-UCS2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/B5pc-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/B5pc-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/CNS-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/CNS-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/CNS1-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/CNS1-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/CNS2-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/CNS2-V.bcmap +0 -3
- package/dist/src/vendor/pdfjs/cmaps/ETHK-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/ETHK-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/ETen-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/ETen-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/ETenms-B5-H.bcmap +0 -3
- package/dist/src/vendor/pdfjs/cmaps/ETenms-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Ext-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Ext-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Ext-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Ext-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GB-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GB-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GB-H.bcmap +0 -4
- package/dist/src/vendor/pdfjs/cmaps/GB-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBK-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBK-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBK2K-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBK2K-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBKp-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBKp-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBT-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBT-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBT-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBT-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBTpc-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBTpc-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBpc-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBpc-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKdla-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKdla-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKdlb-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKdlb-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKgccs-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKgccs-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKm314-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKm314-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKm471-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKm471-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKscs-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKscs-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Hankaku.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Hiragana.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSC-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSC-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSC-Johab-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSC-Johab-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSCpc-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSCpc-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Katakana.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/LICENSE +0 -36
- package/dist/src/vendor/pdfjs/cmaps/NWP-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/NWP-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Roman.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UCS2-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UCS2-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF16-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF16-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF8-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF8-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UCS2-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UCS2-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF16-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF16-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF8-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF8-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF16-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF16-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF8-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF8-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-HW-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISPro-UTF8-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UCS2-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UCS2-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF16-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF16-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF8-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF8-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/WP-Symbol.bcmap +0 -0
- package/dist/src/vendor/pdfjs/jbig2.wasm +0 -0
- package/dist/src/vendor/pdfjs/openjpeg.wasm +0 -0
- package/dist/src/vendor/pdfjs/pdf.mjs +0 -33603
- package/dist/src/vendor/pdfjs/pdf.mjs.map +0 -1
- package/dist/src/vendor/pdfjs/pdf.sandbox.mjs +0 -4936
- package/dist/src/vendor/pdfjs/pdf.sandbox.mjs.map +0 -1
- package/dist/src/vendor/pdfjs/pdf.worker.mjs +0 -70100
- package/dist/src/vendor/pdfjs/pdf.worker.mjs.map +0 -1
- package/dist/src/vendor/pdfjs/qcms_bg.wasm +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitDingbats.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixed.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixedBold.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixedBoldItalic.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixedItalic.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerif.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerifBold.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerifBoldItalic.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerifItalic.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitSymbol.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/LICENSE_FOXIT +0 -27
- package/dist/src/vendor/pdfjs/standard_fonts/LICENSE_LIBERATION +0 -102
- package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-Bold.ttf +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-BoldItalic.ttf +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-Italic.ttf +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-Regular.ttf +0 -0
- package/src/vendor/pdfjs/LICENSE +0 -177
- package/src/vendor/pdfjs/README.md +0 -0
- package/src/vendor/pdfjs/cmaps/78-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78ms-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78ms-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/83pv-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/90ms-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/90ms-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/90msp-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/90msp-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/90pv-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/90pv-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Add-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Add-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Add-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Add-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-0.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-1.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-3.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-4.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-5.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-6.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-UCS2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-0.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-1.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-3.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-4.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-5.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-UCS2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-0.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-1.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-3.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-4.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-5.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-6.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-UCS2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Korea1-0.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Korea1-1.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Korea1-2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Korea1-UCS2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/B5pc-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/B5pc-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/CNS-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/CNS-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/CNS1-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/CNS1-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/CNS2-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/CNS2-V.bcmap +0 -3
- package/src/vendor/pdfjs/cmaps/ETHK-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/ETHK-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/ETen-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/ETen-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/ETenms-B5-H.bcmap +0 -3
- package/src/vendor/pdfjs/cmaps/ETenms-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Ext-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Ext-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Ext-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Ext-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GB-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GB-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GB-H.bcmap +0 -4
- package/src/vendor/pdfjs/cmaps/GB-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBK-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBK-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBK2K-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBK2K-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBKp-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBKp-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBT-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBT-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBT-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBT-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBTpc-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBTpc-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBpc-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBpc-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKdla-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKdla-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKdlb-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKdlb-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKgccs-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKgccs-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKm314-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKm314-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKm471-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKm471-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKscs-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKscs-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Hankaku.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Hiragana.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSC-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSC-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSC-Johab-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSC-Johab-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSCms-UHC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSCms-UHC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSCpc-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSCpc-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Katakana.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/LICENSE +0 -36
- package/src/vendor/pdfjs/cmaps/NWP-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/NWP-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Roman.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UCS2-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UCS2-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UTF16-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UTF16-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UTF8-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UTF8-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UCS2-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UCS2-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UTF16-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UTF16-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UTF8-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UTF8-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UTF16-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UTF16-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UTF8-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UTF8-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-HW-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISPro-UTF8-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UCS2-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UCS2-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UTF16-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UTF16-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UTF8-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UTF8-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/WP-Symbol.bcmap +0 -0
- package/src/vendor/pdfjs/jbig2.wasm +0 -0
- package/src/vendor/pdfjs/openjpeg.wasm +0 -0
- package/src/vendor/pdfjs/pdf.mjs +0 -33603
- package/src/vendor/pdfjs/pdf.mjs.map +0 -1
- package/src/vendor/pdfjs/pdf.sandbox.mjs +0 -4936
- package/src/vendor/pdfjs/pdf.sandbox.mjs.map +0 -1
- package/src/vendor/pdfjs/pdf.worker.mjs +0 -70100
- package/src/vendor/pdfjs/pdf.worker.mjs.map +0 -1
- package/src/vendor/pdfjs/qcms_bg.wasm +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitDingbats.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitFixed.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitFixedBold.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitFixedBoldItalic.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitFixedItalic.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitSerif.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitSerifBold.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitSerifBoldItalic.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitSerifItalic.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitSymbol.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/LICENSE_FOXIT +0 -27
- package/src/vendor/pdfjs/standard_fonts/LICENSE_LIBERATION +0 -102
- package/src/vendor/pdfjs/standard_fonts/LiberationSans-Bold.ttf +0 -0
- package/src/vendor/pdfjs/standard_fonts/LiberationSans-BoldItalic.ttf +0 -0
- package/src/vendor/pdfjs/standard_fonts/LiberationSans-Italic.ttf +0 -0
- package/src/vendor/pdfjs/standard_fonts/LiberationSans-Regular.ttf +0 -0
package/README.md
CHANGED
|
@@ -1,488 +1,89 @@
|
|
|
1
|
-
# LiteParse
|
|
1
|
+
# LiteParse Node.js
|
|
2
2
|
|
|
3
|
-
[
|
|
4
|
-
|
|
|
5
|
-
[](https://www.npmjs.com/package/@llamaindex/liteparse)
|
|
6
|
-
|
|
|
7
|
-
[](https://opensource.org/licenses/Apache-2.0)
|
|
8
|
-
|
|
|
9
|
-
[Docs](https://developers.llamaindex.ai/liteparse/)
|
|
10
|
-
|
|
11
|
-
<img src="https://github.com/user-attachments/assets/07ba6a82-6bb1-4dea-b0ef-cad7df7d1622" alt="out" width="600">
|
|
12
|
-
|
|
13
|
-
LiteParse is a standalone OSS PDF parsing tool focused exclusively on **fast and light** parsing. It provides high-quality spatial text parsing with bounding boxes, without proprietary LLM features or cloud dependencies. Everything runs locally on your machine.
|
|
14
|
-
|
|
15
|
-
**Hitting the limits of local parsing?**
|
|
16
|
-
For complex documents (dense tables, multi-column layouts, charts, handwritten text, or
|
|
17
|
-
scanned PDFs), you'll get significantly better results with [LlamaParse](https://developers.llamaindex.ai/python/cloud/llamaparse/?utm_source=github&utm_medium=liteparse),
|
|
18
|
-
our cloud-based document parser built for production document pipelines. LlamaParse handles the
|
|
19
|
-
hard stuff so your models see clean, structured data and markdown.
|
|
20
|
-
|
|
21
|
-
> 👉 [Sign up for LlamaParse free](https://cloud.llamaindex.ai?utm_source=github&utm_medium=liteparse)
|
|
22
|
-
|
|
23
|
-
## Overview
|
|
24
|
-
|
|
25
|
-
- **Fast Text Parsing**: Spatial text parsing using PDF.js
|
|
26
|
-
- **Flexible OCR System**:
|
|
27
|
-
- **Built-in**: Tesseract.js (zero setup, works out of the box!)
|
|
28
|
-
- **HTTP Servers**: Plug in any OCR server (EasyOCR, PaddleOCR, custom)
|
|
29
|
-
- **Standard API**: Simple, well-defined OCR API specification
|
|
30
|
-
- **Screenshot Generation**: Generate high-quality page screenshots for LLM agents
|
|
31
|
-
- **Multiple Output Formats**: JSON and Text
|
|
32
|
-
- **Bounding Boxes**: Precise text positioning information
|
|
33
|
-
- **Standalone Binary**: No cloud dependencies, runs entirely locally
|
|
34
|
-
- **Multi-platform**: Linux, macOS (Intel/ARM), Windows
|
|
3
|
+
Node.js/TypeScript bindings for [LiteParse](https://github.com/run-llama/liteparse) — fast, lightweight PDF and document parsing with spatial text extraction.
|
|
35
4
|
|
|
36
5
|
## Installation
|
|
37
6
|
|
|
38
|
-
### CLI Tool
|
|
39
|
-
|
|
40
|
-
#### Option 1: Global Install (Recommended)
|
|
41
|
-
|
|
42
|
-
Install globally via npm to use the `lit` command anywhere:
|
|
43
|
-
|
|
44
|
-
```bash
|
|
45
|
-
npm i -g @llamaindex/liteparse
|
|
46
|
-
```
|
|
47
|
-
|
|
48
|
-
Then use it:
|
|
49
|
-
|
|
50
|
-
```bash
|
|
51
|
-
lit parse document.pdf
|
|
52
|
-
lit screenshot document.pdf
|
|
53
|
-
```
|
|
54
|
-
|
|
55
|
-
For macOS and Linux users, `liteparse` can be also installed via `brew`:
|
|
56
|
-
|
|
57
|
-
```bash
|
|
58
|
-
brew tap run-llama/liteparse
|
|
59
|
-
brew install llamaindex-liteparse
|
|
60
|
-
```
|
|
61
|
-
|
|
62
|
-
#### Option 2: Install from Source
|
|
63
|
-
|
|
64
|
-
You can clone the repo and install the CLI globally from source:
|
|
65
|
-
|
|
66
|
-
```
|
|
67
|
-
git clone https://github.com/run-llama/liteparse.git
|
|
68
|
-
cd liteparse
|
|
69
|
-
npm run build
|
|
70
|
-
npm pack
|
|
71
|
-
npm install -g ./liteparse-*.tgz
|
|
72
|
-
```
|
|
73
|
-
|
|
74
|
-
### Agent Skill
|
|
75
|
-
|
|
76
|
-
You can use `liteparse` as an agent skill, downloading it with the `skills` CLI tool:
|
|
77
|
-
|
|
78
|
-
```bash
|
|
79
|
-
npx skills add run-llama/llamaparse-agent-skills --skill liteparse
|
|
80
|
-
```
|
|
81
|
-
|
|
82
|
-
Or copy-pasting the [`SKILL.md`](https://github.com/run-llama/llamaparse-agent-skills/blob/main/skills/liteparse/SKILL.md) file to your own skills setup.
|
|
83
|
-
|
|
84
|
-
## Usage
|
|
85
|
-
|
|
86
|
-
### Parse Files
|
|
87
|
-
|
|
88
|
-
```bash
|
|
89
|
-
# Basic parsing
|
|
90
|
-
lit parse document.pdf
|
|
91
|
-
|
|
92
|
-
# Parse with specific format
|
|
93
|
-
lit parse document.pdf --format json -o output.md
|
|
94
|
-
|
|
95
|
-
# Parse specific pages
|
|
96
|
-
lit parse document.pdf --target-pages "1-5,10,15-20"
|
|
97
|
-
|
|
98
|
-
# Parse without OCR
|
|
99
|
-
lit parse document.pdf --no-ocr
|
|
100
|
-
|
|
101
|
-
# Parse a remote PDF
|
|
102
|
-
curl -sL https://example.com/report.pdf | lit parse -
|
|
103
|
-
```
|
|
104
|
-
|
|
105
|
-
### Batch Parsing
|
|
106
|
-
|
|
107
|
-
You can also parse an entire directory of documents:
|
|
108
|
-
|
|
109
7
|
```bash
|
|
110
|
-
|
|
111
|
-
```
|
|
112
|
-
|
|
113
|
-
### Generate Screenshots
|
|
114
|
-
|
|
115
|
-
Screenshots are essential for LLM agents to extract visual information that text alone cannot capture.
|
|
116
|
-
|
|
117
|
-
```bash
|
|
118
|
-
# Screenshot all pages
|
|
119
|
-
lit screenshot document.pdf -o ./screenshots
|
|
120
|
-
|
|
121
|
-
# Screenshot specific pages
|
|
122
|
-
lit screenshot document.pdf --target-pages "1,3,5" -o ./screenshots
|
|
123
|
-
|
|
124
|
-
# Custom DPI
|
|
125
|
-
lit screenshot document.pdf --dpi 300 -o ./screenshots
|
|
126
|
-
|
|
127
|
-
# Screenshot page range
|
|
128
|
-
lit screenshot document.pdf --target-pages "1-10" -o ./screenshots
|
|
8
|
+
npm i @llamaindex/liteparse
|
|
129
9
|
```
|
|
130
10
|
|
|
131
|
-
|
|
11
|
+
This also installs the `lit` CLI command (use `npm i -g` for global access).
|
|
132
12
|
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
```bash
|
|
136
|
-
npm install @llamaindex/liteparse
|
|
137
|
-
# or
|
|
138
|
-
pnpm add @llamaindex/liteparse
|
|
139
|
-
```
|
|
13
|
+
## Quick Start
|
|
140
14
|
|
|
141
15
|
```typescript
|
|
142
16
|
import { LiteParse } from '@llamaindex/liteparse';
|
|
143
17
|
|
|
144
|
-
const parser = new LiteParse(
|
|
18
|
+
const parser = new LiteParse();
|
|
145
19
|
const result = await parser.parse('document.pdf');
|
|
146
20
|
console.log(result.text);
|
|
147
|
-
```
|
|
148
|
-
|
|
149
|
-
#### Buffer / Uint8Array Input
|
|
150
|
-
|
|
151
|
-
You can pass raw bytes directly instead of a file path, which is useful for remote files:
|
|
152
|
-
|
|
153
|
-
```typescript
|
|
154
|
-
import { LiteParse } from '@llamaindex/liteparse';
|
|
155
|
-
import { readFile } from 'fs/promises';
|
|
156
|
-
|
|
157
|
-
const parser = new LiteParse();
|
|
158
|
-
|
|
159
|
-
// From a file read
|
|
160
|
-
const pdfBytes = await readFile('document.pdf');
|
|
161
|
-
const result = await parser.parse(pdfBytes);
|
|
162
21
|
|
|
163
|
-
//
|
|
164
|
-
const
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
```
|
|
168
|
-
|
|
169
|
-
Non-PDF buffers (images, Office documents) are written to a temp directory for format conversion. Screenshots also work with buffer input:
|
|
170
|
-
|
|
171
|
-
```typescript
|
|
172
|
-
const screenshots = await parser.screenshot(pdfBytes, [1, 2, 3]);
|
|
22
|
+
// Access structured data
|
|
23
|
+
for (const page of result.pages) {
|
|
24
|
+
console.log(`Page ${page.pageNum}: ${page.textItems.length} text items`);
|
|
25
|
+
}
|
|
173
26
|
```
|
|
174
27
|
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
LiteParse's core parsing engine (PDF.js text extraction, grid projection, OCR via Tesseract.js) can run in the browser. Since the library has Node-only dependencies (sharp, fs, child_process), you'll need a bundler like Vite to swap those out with browser stubs.
|
|
178
|
-
|
|
179
|
-
#### Vite Configuration
|
|
28
|
+
## Configuration
|
|
180
29
|
|
|
181
|
-
|
|
30
|
+
All options are passed to the constructor:
|
|
182
31
|
|
|
183
32
|
```typescript
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
//
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
];
|
|
197
|
-
|
|
198
|
-
function liteparseNodeRedirects(): Plugin {
|
|
199
|
-
return {
|
|
200
|
-
name: "liteparse-node-redirects",
|
|
201
|
-
enforce: "pre",
|
|
202
|
-
async resolveId(source, importer) {
|
|
203
|
-
if (!importer) return null;
|
|
204
|
-
const abs = source.startsWith(".") ? resolve(dirname(importer), source) : source;
|
|
205
|
-
for (const { match, target } of FILE_REDIRECTS) {
|
|
206
|
-
if (match.test(abs) || match.test(source)) return resolve(target);
|
|
207
|
-
}
|
|
208
|
-
return null;
|
|
209
|
-
},
|
|
210
|
-
};
|
|
211
|
-
}
|
|
212
|
-
|
|
213
|
-
export default defineConfig({
|
|
214
|
-
plugins: [liteparseNodeRedirects()],
|
|
215
|
-
optimizeDeps: { include: ["tesseract.js"] },
|
|
216
|
-
resolve: {
|
|
217
|
-
alias: [
|
|
218
|
-
{ find: "node:fs/promises", replacement: "stubs/empty.ts" },
|
|
219
|
-
{ find: "node:fs", replacement: "stubs/empty.ts" },
|
|
220
|
-
{ find: "node:url", replacement: "stubs/empty.ts" },
|
|
221
|
-
{ find: "node:path", replacement: "stubs/empty.ts" },
|
|
222
|
-
{ find: "node:os", replacement: "stubs/empty.ts" },
|
|
223
|
-
{ find: "node:child_process", replacement: "stubs/empty.ts" },
|
|
224
|
-
{ find: /^fs$/, replacement: "stubs/empty.ts" },
|
|
225
|
-
{ find: /^path$/, replacement: "stubs/empty.ts" },
|
|
226
|
-
{ find: /^os$/, replacement: "stubs/empty.ts" },
|
|
227
|
-
{ find: /^child_process$/, replacement: "stubs/empty.ts" },
|
|
228
|
-
{ find: "form-data", replacement: "stubs/empty.ts" },
|
|
229
|
-
{ find: "axios", replacement: "stubs/empty.ts" },
|
|
230
|
-
{ find: "file-type", replacement: "stubs/file-type.ts" },
|
|
231
|
-
],
|
|
232
|
-
},
|
|
33
|
+
const parser = new LiteParse({
|
|
34
|
+
ocrEnabled: true, // Enable OCR (default: true)
|
|
35
|
+
ocrLanguage: 'eng', // Tesseract language code
|
|
36
|
+
ocrServerUrl: undefined, // HTTP OCR server URL (optional)
|
|
37
|
+
tessdataPath: undefined, // Path to tessdata directory (optional)
|
|
38
|
+
maxPages: 1000, // Max pages to parse
|
|
39
|
+
targetPages: '1-5,10', // Specific pages (optional)
|
|
40
|
+
dpi: 150, // Rendering DPI
|
|
41
|
+
preserveVerySmallText: false, // Keep tiny text
|
|
42
|
+
password: undefined, // Password for protected documents
|
|
43
|
+
quiet: false, // Suppress progress output
|
|
44
|
+
numWorkers: 4, // Concurrent OCR workers
|
|
233
45
|
});
|
|
234
46
|
```
|
|
235
47
|
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
#### What works in the browser
|
|
239
|
-
|
|
240
|
-
- PDF parsing from `Uint8Array` input (use `file.arrayBuffer()` to get bytes from a `<input type="file">`)
|
|
241
|
-
- OCR via Tesseract.js (runs in Web Workers, fetches language data from CDN on first use)
|
|
242
|
-
- Text and JSON output formats
|
|
243
|
-
|
|
244
|
-
#### What doesn't work
|
|
48
|
+
## Parsing from Bytes
|
|
245
49
|
|
|
246
|
-
|
|
247
|
-
- DOCX/XLSX/PPTX/image conversion (requires LibreOffice/ImageMagick)
|
|
248
|
-
- HTTP OCR server backend
|
|
249
|
-
- Screenshots (these use PDFium + sharp, which are native Node addons)
|
|
250
|
-
|
|
251
|
-
### CLI Options
|
|
252
|
-
|
|
253
|
-
#### Parse Command
|
|
254
|
-
|
|
255
|
-
```
|
|
256
|
-
$ lit parse --help
|
|
257
|
-
Usage: lit parse [options] <file>
|
|
258
|
-
|
|
259
|
-
Parse a document file (PDF, DOCX, XLSX, PPTX, images, etc.)
|
|
260
|
-
|
|
261
|
-
Options:
|
|
262
|
-
-o, --output <file> Output file path
|
|
263
|
-
--format <format> Output format: json|text (default: "text")
|
|
264
|
-
--ocr-server-url <url> HTTP OCR server URL (uses Tesseract if not provided)
|
|
265
|
-
--no-ocr Disable OCR
|
|
266
|
-
--ocr-language <lang> OCR language(s) (default: "en")
|
|
267
|
-
--num-workers <n> Number of pages to OCR in parallel (default: CPU cores - 1)
|
|
268
|
-
--max-pages <n> Max pages to parse (default: "10000")
|
|
269
|
-
--target-pages <pages> Target pages (e.g., "1-5,10,15-20")
|
|
270
|
-
--dpi <dpi> DPI for rendering (default: "150")
|
|
271
|
-
--no-precise-bbox Disable precise bounding boxes
|
|
272
|
-
--preserve-small-text Preserve very small text
|
|
273
|
-
--password <password> Password for encrypted/protected documents
|
|
274
|
-
--config <file> Config file (JSON)
|
|
275
|
-
-q, --quiet Suppress progress output
|
|
276
|
-
-h, --help display help for command
|
|
277
|
-
```
|
|
278
|
-
|
|
279
|
-
#### Batch Parse Command
|
|
280
|
-
|
|
281
|
-
```
|
|
282
|
-
$ lit batch-parse --help
|
|
283
|
-
Usage: lit batch-parse [options] <input-dir> <output-dir>
|
|
284
|
-
|
|
285
|
-
Parse multiple documents in batch mode (reuses PDF engine for efficiency)
|
|
286
|
-
|
|
287
|
-
Options:
|
|
288
|
-
--format <format> Output format: json|text (default: "text")
|
|
289
|
-
--ocr-server-url <url> HTTP OCR server URL (uses Tesseract if not provided)
|
|
290
|
-
--no-ocr Disable OCR
|
|
291
|
-
--ocr-language <lang> OCR language(s) (default: "en")
|
|
292
|
-
--num-workers <n> Number of pages to OCR in parallel (default: CPU cores - 1)
|
|
293
|
-
--max-pages <n> Max pages to parse per file (default: "10000")
|
|
294
|
-
--dpi <dpi> DPI for rendering (default: "150")
|
|
295
|
-
--no-precise-bbox Disable precise bounding boxes
|
|
296
|
-
--recursive Recursively search input directory
|
|
297
|
-
--extension <ext> Only process files with this extension (e.g., ".pdf")
|
|
298
|
-
--password <password> Password for encrypted/protected documents (applied to all files)
|
|
299
|
-
--config <file> Config file (JSON)
|
|
300
|
-
-q, --quiet Suppress progress output
|
|
301
|
-
-h, --help display help for command
|
|
302
|
-
```
|
|
303
|
-
|
|
304
|
-
#### Screenshot Command
|
|
305
|
-
|
|
306
|
-
```
|
|
307
|
-
$ lit screenshot --help
|
|
308
|
-
Usage: lit screenshot [options] <file>
|
|
309
|
-
|
|
310
|
-
Generate screenshots of PDF pages
|
|
311
|
-
|
|
312
|
-
Options:
|
|
313
|
-
-o, --output-dir <dir> Output directory for screenshots (default: "./screenshots")
|
|
314
|
-
--target-pages <pages> Page numbers to screenshot (e.g., "1,3,5" or "1-5")
|
|
315
|
-
--dpi <dpi> DPI for rendering (default: "150")
|
|
316
|
-
--format <format> Image format: png|jpg (default: "png")
|
|
317
|
-
--password <password> Password for encrypted/protected documents
|
|
318
|
-
--config <file> Config file (JSON)
|
|
319
|
-
-q, --quiet Suppress progress output
|
|
320
|
-
-h, --help display help for command
|
|
321
|
-
```
|
|
322
|
-
|
|
323
|
-
## OCR Setup
|
|
324
|
-
|
|
325
|
-
### Default: Tesseract.js
|
|
326
|
-
|
|
327
|
-
```bash
|
|
328
|
-
# Tesseract is enabled by default
|
|
329
|
-
lit parse document.pdf
|
|
330
|
-
|
|
331
|
-
# Specify language
|
|
332
|
-
lit parse document.pdf --ocr-language fra
|
|
333
|
-
|
|
334
|
-
# Disable OCR
|
|
335
|
-
lit parse document.pdf --no-ocr
|
|
336
|
-
```
|
|
337
|
-
|
|
338
|
-
By default, Tesseract.js downloads language data from the internet on first use. For offline or air-gapped environments, set the `TESSDATA_PREFIX` environment variable to a directory containing pre-downloaded `.traineddata` files:
|
|
339
|
-
|
|
340
|
-
```bash
|
|
341
|
-
export TESSDATA_PREFIX=/path/to/tessdata
|
|
342
|
-
lit parse document.pdf --ocr-language eng
|
|
343
|
-
```
|
|
344
|
-
|
|
345
|
-
You can also pass `tessdataPath` in the library config:
|
|
50
|
+
Pass a `Buffer` or `Uint8Array` directly — useful for HTTP responses or in-memory data:
|
|
346
51
|
|
|
347
52
|
```typescript
|
|
348
|
-
|
|
349
|
-
```
|
|
350
|
-
|
|
351
|
-
### Optional: HTTP OCR Servers
|
|
352
|
-
|
|
353
|
-
For higher accuracy or better performance, you can use an HTTP OCR server. We provide ready-to-use example wrappers for popular OCR engines:
|
|
354
|
-
|
|
355
|
-
- [EasyOCR](ocr/easyocr/README.md)
|
|
356
|
-
- [PaddleOCR](ocr/paddleocr/README.md)
|
|
357
|
-
|
|
358
|
-
You can integrate any OCR service by implementing the simple LiteParse OCR API specification (see [`OCR_API_SPEC.md`](OCR_API_SPEC.md)).
|
|
359
|
-
|
|
360
|
-
The API requires:
|
|
361
|
-
- POST `/ocr` endpoint
|
|
362
|
-
- Accepts `file` and `language` parameters
|
|
363
|
-
- Returns JSON: `{ results: [{ text, bbox: [x1,y1,x2,y2], confidence }] }`
|
|
364
|
-
|
|
365
|
-
See the example servers in `ocr/easyocr/` and `ocr/paddleocr/` as templates.
|
|
366
|
-
|
|
367
|
-
For the complete OCR API specification, see [`OCR_API_SPEC.md`](OCR_API_SPEC.md).
|
|
368
|
-
|
|
369
|
-
## Multi-Format Input Support
|
|
370
|
-
|
|
371
|
-
LiteParse supports **automatic conversion** of various document formats to PDF before parsing. This makes it unique compared to other PDF-only parsing tools!
|
|
372
|
-
|
|
373
|
-
### Supported Input Formats
|
|
374
|
-
|
|
375
|
-
#### Office Documents (via LibreOffice)
|
|
376
|
-
- **Word**: `.doc`, `.docx`, `.docm`, `.odt`, `.rtf`
|
|
377
|
-
- **PowerPoint**: `.ppt`, `.pptx`, `.pptm`, `.odp`
|
|
378
|
-
- **Spreadsheets**: `.xls`, `.xlsx`, `.xlsm`, `.ods`, `.csv`, `.tsv`
|
|
379
|
-
|
|
380
|
-
Just install the dependency and LiteParse will automatically convert these formats to PDF for parsing:
|
|
381
|
-
|
|
382
|
-
```bash
|
|
383
|
-
# macOS
|
|
384
|
-
brew install --cask libreoffice
|
|
385
|
-
|
|
386
|
-
# Ubuntu/Debian
|
|
387
|
-
apt-get install libreoffice
|
|
388
|
-
|
|
389
|
-
# Windows
|
|
390
|
-
choco install libreoffice-fresh # might require admin permissions
|
|
391
|
-
```
|
|
392
|
-
|
|
393
|
-
> _For Windows, you might need to add the path to the directory containing LibreOffice CLI executable (generally `C:\Program Files\LibreOffice\program`) to the environment variables and re-start the machine._
|
|
394
|
-
|
|
395
|
-
#### Images (via ImageMagick)
|
|
396
|
-
- **Formats**: `.jpg`, `.jpeg`, `.png`, `.gif`, `.bmp`, `.tiff`, `.webp`, `.svg`
|
|
397
|
-
|
|
398
|
-
Just install ImageMagick and LiteParse will convert images to PDF for parsing (with OCR):
|
|
399
|
-
|
|
400
|
-
```bash
|
|
401
|
-
# macOS
|
|
402
|
-
brew install imagemagick
|
|
403
|
-
|
|
404
|
-
# Ubuntu/Debian
|
|
405
|
-
apt-get install imagemagick
|
|
53
|
+
import { readFile } from 'fs/promises';
|
|
406
54
|
|
|
407
|
-
|
|
408
|
-
|
|
55
|
+
const pdfBytes = await readFile('document.pdf');
|
|
56
|
+
const result = await parser.parse(pdfBytes);
|
|
57
|
+
console.log(result.text);
|
|
409
58
|
```
|
|
410
59
|
|
|
411
|
-
##
|
|
412
|
-
|
|
413
|
-
| Variable | Description |
|
|
414
|
-
|----------|-------------|
|
|
415
|
-
| `TESSDATA_PREFIX` | Path to a directory containing Tesseract `.traineddata` files. Used for offline/air-gapped environments where Tesseract.js cannot download language data from the internet. |
|
|
416
|
-
| `LITEPARSE_TMPDIR` | Override the temp directory used for format conversion and intermediate files. Defaults to the OS temp directory (`os.tmpdir()`). Useful in containerized or read-only filesystem environments. |
|
|
417
|
-
|
|
418
|
-
## Configuration
|
|
60
|
+
## Screenshots
|
|
419
61
|
|
|
420
|
-
|
|
62
|
+
Generate PNG screenshots of document pages:
|
|
421
63
|
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
{
|
|
428
|
-
"ocrLanguage": "en",
|
|
429
|
-
"ocrEnabled": true,
|
|
430
|
-
"maxPages": 1000,
|
|
431
|
-
"dpi": 150,
|
|
432
|
-
"outputFormat": "json",
|
|
433
|
-
"preciseBoundingBox": true,
|
|
434
|
-
"preserveVerySmallText": false,
|
|
435
|
-
"password": "optional_password"
|
|
436
|
-
}
|
|
437
|
-
```
|
|
438
|
-
|
|
439
|
-
For HTTP OCR servers, just add `ocrServerUrl`:
|
|
440
|
-
|
|
441
|
-
```json
|
|
442
|
-
{
|
|
443
|
-
"ocrServerUrl": "http://localhost:8828/ocr",
|
|
444
|
-
"ocrLanguage": "en",
|
|
445
|
-
"outputFormat": "json"
|
|
64
|
+
```typescript
|
|
65
|
+
const screenshots = parser.screenshot('document.pdf', [1, 2, 3]);
|
|
66
|
+
for (const s of screenshots) {
|
|
67
|
+
console.log(`Page ${s.pageNum}: ${s.width}x${s.height}`);
|
|
68
|
+
// s.imageBuffer contains PNG bytes
|
|
446
69
|
}
|
|
447
70
|
```
|
|
448
71
|
|
|
449
|
-
|
|
72
|
+
## Supported Formats
|
|
450
73
|
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
74
|
+
- PDF (`.pdf`)
|
|
75
|
+
- Microsoft Office (`.docx`, `.xlsx`, `.pptx`, etc.) — requires LibreOffice
|
|
76
|
+
- OpenDocument (`.odt`, `.ods`, `.odp`) — requires LibreOffice
|
|
77
|
+
- Images (`.png`, `.jpg`, `.tiff`, etc.) — requires ImageMagick
|
|
78
|
+
- And more!
|
|
454
79
|
|
|
455
|
-
##
|
|
80
|
+
## CLI
|
|
456
81
|
|
|
457
|
-
|
|
82
|
+
The npm package includes the `lit` CLI:
|
|
458
83
|
|
|
459
84
|
```bash
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
npm run build
|
|
465
|
-
|
|
466
|
-
# Build Typescript (Windows)
|
|
467
|
-
npm run build:windows
|
|
468
|
-
|
|
469
|
-
# Watch mode
|
|
470
|
-
npm run dev
|
|
471
|
-
|
|
472
|
-
# Test parsing
|
|
473
|
-
npm test
|
|
85
|
+
lit parse document.pdf
|
|
86
|
+
lit parse document.pdf --format json -o output.json
|
|
87
|
+
lit screenshot document.pdf -o ./screenshots
|
|
88
|
+
lit batch-parse ./input ./output
|
|
474
89
|
```
|
|
475
|
-
|
|
476
|
-
## License
|
|
477
|
-
|
|
478
|
-
Apache 2.0
|
|
479
|
-
|
|
480
|
-
## Credits
|
|
481
|
-
|
|
482
|
-
Built on top of:
|
|
483
|
-
|
|
484
|
-
- [PDF.js](https://github.com/mozilla/pdf.js) - PDF parsing engine
|
|
485
|
-
- [Tesseract.js](https://github.com/naptha/tesseract.js) - In-process OCR engine
|
|
486
|
-
- [EasyOCR](https://github.com/JaidedAI/EasyOCR) - HTTP OCR server (optional)
|
|
487
|
-
- [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR) - HTTP OCR server (optional)
|
|
488
|
-
- [Sharp](https://github.com/lovell/sharp) - Image processing
|
package/dist/cli.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cli.d.ts","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":""}
|
package/dist/cli.js
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { program } from "commander";
|
|
3
|
+
import { LiteParse } from "./lib.js";
|
|
4
|
+
import { readFileSync } from "node:fs";
|
|
5
|
+
import { writeFileSync } from "node:fs";
|
|
6
|
+
program
|
|
7
|
+
.name("liteparse")
|
|
8
|
+
.description("Fast, lightweight PDF and document parsing")
|
|
9
|
+
.version("2.0.0");
|
|
10
|
+
program
|
|
11
|
+
.command("parse")
|
|
12
|
+
.description("Parse a document and extract text")
|
|
13
|
+
.argument("<file>", "Path to the document file")
|
|
14
|
+
.option("-o, --output <file>", "Output file path")
|
|
15
|
+
.option("--format <format>", 'Output format: json|text (default: "text")')
|
|
16
|
+
.option("--ocr-server-url <url>", "HTTP OCR server URL")
|
|
17
|
+
.option("--no-ocr", "Disable OCR")
|
|
18
|
+
.option("--ocr-language <lang>", "OCR language (default: eng)")
|
|
19
|
+
.option("--max-pages <n>", "Max pages to parse", parseInt)
|
|
20
|
+
.option("--target-pages <pages>", 'Pages to parse (e.g., "1-5,10,15-20")')
|
|
21
|
+
.option("--dpi <dpi>", "Rendering DPI", parseFloat)
|
|
22
|
+
.option("--preserve-small-text", "Keep very small text")
|
|
23
|
+
.option("--password <password>", "Password for encrypted documents")
|
|
24
|
+
.option("--config <file>", "JSON config file path")
|
|
25
|
+
.option("-q, --quiet", "Suppress progress output")
|
|
26
|
+
.option("--num-workers <n>", "Number of concurrent OCR workers", parseInt)
|
|
27
|
+
.action(async (file, opts) => {
|
|
28
|
+
try {
|
|
29
|
+
const config = {};
|
|
30
|
+
// Load config file if provided
|
|
31
|
+
if (opts.config) {
|
|
32
|
+
const fileConfig = JSON.parse(readFileSync(opts.config, "utf-8"));
|
|
33
|
+
Object.assign(config, fileConfig);
|
|
34
|
+
}
|
|
35
|
+
// CLI options override config file
|
|
36
|
+
if (opts.format)
|
|
37
|
+
config.outputFormat = opts.format;
|
|
38
|
+
if (opts.ocrServerUrl)
|
|
39
|
+
config.ocrServerUrl = opts.ocrServerUrl;
|
|
40
|
+
if (opts.ocr === false)
|
|
41
|
+
config.ocrEnabled = false;
|
|
42
|
+
if (opts.ocrLanguage)
|
|
43
|
+
config.ocrLanguage = opts.ocrLanguage;
|
|
44
|
+
if (opts.maxPages)
|
|
45
|
+
config.maxPages = opts.maxPages;
|
|
46
|
+
if (opts.targetPages)
|
|
47
|
+
config.targetPages = opts.targetPages;
|
|
48
|
+
if (opts.dpi)
|
|
49
|
+
config.dpi = opts.dpi;
|
|
50
|
+
if (opts.preserveSmallText)
|
|
51
|
+
config.preserveVerySmallText = true;
|
|
52
|
+
if (opts.password)
|
|
53
|
+
config.password = opts.password;
|
|
54
|
+
if (opts.quiet)
|
|
55
|
+
config.quiet = true;
|
|
56
|
+
if (opts.numWorkers)
|
|
57
|
+
config.numWorkers = opts.numWorkers;
|
|
58
|
+
// Default CLI output to text (library defaults to json)
|
|
59
|
+
if (!config.outputFormat)
|
|
60
|
+
config.outputFormat = "text";
|
|
61
|
+
const parser = new LiteParse(config);
|
|
62
|
+
const result = await parser.parse(file);
|
|
63
|
+
const output = config.outputFormat === "json"
|
|
64
|
+
? JSON.stringify({
|
|
65
|
+
pages: result.pages.map((p) => ({
|
|
66
|
+
page: p.pageNum,
|
|
67
|
+
width: p.width,
|
|
68
|
+
height: p.height,
|
|
69
|
+
text: p.text,
|
|
70
|
+
textItems: p.textItems,
|
|
71
|
+
})),
|
|
72
|
+
}, null, 2)
|
|
73
|
+
: result.text;
|
|
74
|
+
if (opts.output) {
|
|
75
|
+
writeFileSync(opts.output, output, "utf-8");
|
|
76
|
+
}
|
|
77
|
+
else {
|
|
78
|
+
process.stdout.write(output);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
catch (err) {
|
|
82
|
+
console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);
|
|
83
|
+
process.exit(1);
|
|
84
|
+
}
|
|
85
|
+
});
|
|
86
|
+
program.parse(process.argv);
|
|
87
|
+
//# sourceMappingURL=cli.js.map
|
package/dist/cli.js.map
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AAEA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,SAAS,EAAwB,MAAM,UAAU,CAAC;AAC3D,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,aAAa,EAAE,MAAM,SAAS,CAAC;AAExC,OAAO;KACJ,IAAI,CAAC,WAAW,CAAC;KACjB,WAAW,CAAC,4CAA4C,CAAC;KACzD,OAAO,CAAC,OAAO,CAAC,CAAC;AAEpB,OAAO;KACJ,OAAO,CAAC,OAAO,CAAC;KAChB,WAAW,CAAC,mCAAmC,CAAC;KAChD,QAAQ,CAAC,QAAQ,EAAE,2BAA2B,CAAC;KAC/C,MAAM,CAAC,qBAAqB,EAAE,kBAAkB,CAAC;KACjD,MAAM,CAAC,mBAAmB,EAAE,4CAA4C,CAAC;KACzE,MAAM,CAAC,wBAAwB,EAAE,qBAAqB,CAAC;KACvD,MAAM,CAAC,UAAU,EAAE,aAAa,CAAC;KACjC,MAAM,CAAC,uBAAuB,EAAE,6BAA6B,CAAC;KAC9D,MAAM,CAAC,iBAAiB,EAAE,oBAAoB,EAAE,QAAQ,CAAC;KACzD,MAAM,CACL,wBAAwB,EACxB,uCAAuC,CACxC;KACA,MAAM,CAAC,aAAa,EAAE,eAAe,EAAE,UAAU,CAAC;KAClD,MAAM,CAAC,uBAAuB,EAAE,sBAAsB,CAAC;KACvD,MAAM,CAAC,uBAAuB,EAAE,kCAAkC,CAAC;KACnE,MAAM,CAAC,iBAAiB,EAAE,uBAAuB,CAAC;KAClD,MAAM,CAAC,aAAa,EAAE,0BAA0B,CAAC;KACjD,MAAM,CAAC,mBAAmB,EAAE,kCAAkC,EAAE,QAAQ,CAAC;KACzE,MAAM,CAAC,KAAK,EAAE,IAAY,EAAE,IAA6B,EAAE,EAAE;IAC5D,IAAI,CAAC;QACH,MAAM,MAAM,GAA6B,EAAE,CAAC;QAE5C,+BAA+B;QAC/B,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChB,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAC3B,YAAY,CAAC,IAAI,CAAC,MAAgB,EAAE,OAAO,CAAC,CAC7C,CAAC;YACF,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC;QACpC,CAAC;QAED,mCAAmC;QACnC,IAAI,IAAI,CAAC,MAAM;YAAE,MAAM,CAAC,YAAY,GAAG,IAAI,CAAC,MAAyB,CAAC;QACtE,IAAI,IAAI,CAAC,YAAY;YACnB,MAAM,CAAC,YAAY,GAAG,IAAI,CAAC,YAAsB,CAAC;QACpD,IAAI,IAAI,CAAC,GAAG,KAAK,KAAK;YAAE,MAAM,CAAC,UAAU,GAAG,KAAK,CAAC;QAClD,IAAI,IAAI,CAAC,WAAW;YAAE,MAAM,CAAC,WAAW,GAAG,IAAI,CAAC,WAAqB,CAAC;QACtE,IAAI,IAAI,CAAC,QAAQ;YAAE,MAAM,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAkB,CAAC;QAC7D,IAAI,IAAI,CAAC,WAAW;YAAE,MAAM,CAAC,WAAW,GAAG,IAAI,CAAC,WAAqB,CAAC;QACtE,IAAI,IAAI,CAAC,GAAG;YAAE,MAAM,CAAC,GAAG,GAAG,IAAI,CAAC,GAAa,CAAC;QAC9C,IAAI,IAAI,CAAC,iBAAiB;YAAE,MAAM,CAAC,qBAAqB,GAAG,IAAI,CAAC;QAChE,IAAI,IAAI,CAAC,QAAQ;YAAE,MAAM,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAkB,CAAC;QAC7D,IAAI,IAAI,CAAC,KAAK;YAAE,MAAM,CAAC,KAAK,GAAG,IAAI,CAAC;QACpC,IAAI,IAAI,CAAC,UAAU;YAAE,MAAM,CAAC,UAAU,GAAG,IAAI,CAAC,UAAoB,CAAC;QAEnE,wDAAwD;QACxD,IAAI,CAAC,MAAM,CAAC,YAAY;YAAE,MAAM,CAAC,YAAY,GAAG,MAAM,CAAC;QAEvD,MAAM,MAAM,GAAG,IAAI,SAAS,CAAC,MAAM,CAAC,CAAC;QACrC,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAExC,MAAM,MAAM,GACV,MAAM,CAAC,YAAY,KAAK,MAAM;YAC5B,CAAC,CAAC,IAAI,CAAC,SAAS,CACZ;gBACE,KAAK,EAAE,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;oBAC9B,IAAI,EAAE,CAAC,CAAC,OAAO;oBACf,KAAK,EAAE,CAAC,CAAC,KAAK;oBACd,MAAM,EAAE,CAAC,CAAC,MAAM;oBAChB,IAAI,EAAE,CAAC,CAAC,IAAI;oBACZ,SAAS,EAAE,CAAC,CAAC,SAAS;iBACvB,CAAC,CAAC;aACJ,EACD,IAAI,EACJ,CAAC,CACF;YACH,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC;QAElB,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChB,aAAa,CAAC,IAAI,CAAC,MAAgB,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC;QACxD,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;QAC/B,CAAC;IACH,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,OAAO,CAAC,KAAK,CACX,UAAU,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAC7D,CAAC;QACF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC,CAAC,CAAC;AAEL,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC"}
|