@llamaindex/liteparse 1.5.2 → 2.0.0-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +50 -373
- package/dist/cli.d.ts +3 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +87 -0
- package/dist/cli.js.map +1 -0
- package/dist/lib.d.ts +58 -0
- package/dist/lib.d.ts.map +1 -0
- package/dist/lib.js +88 -0
- package/dist/lib.js.map +1 -0
- package/dist/native.d.ts +54 -0
- package/dist/native.d.ts.map +1 -0
- package/dist/native.js +70 -0
- package/dist/native.js.map +1 -0
- package/libpdfium.so +0 -0
- package/liteparse.linux-x64-gnu.node +0 -0
- package/package.json +36 -50
- package/LICENSE +0 -201
- package/dist/cli/parse.d.ts +0 -4
- package/dist/cli/parse.d.ts.map +0 -1
- package/dist/cli/parse.js +0 -450
- package/dist/cli/parse.js.map +0 -1
- package/dist/package.json +0 -90
- package/dist/src/conversion/convertToPdf.d.ts +0 -65
- package/dist/src/conversion/convertToPdf.d.ts.map +0 -1
- package/dist/src/conversion/convertToPdf.js +0 -405
- package/dist/src/conversion/convertToPdf.js.map +0 -1
- package/dist/src/conversion/convertToPdf.test.d.ts +0 -2
- package/dist/src/conversion/convertToPdf.test.d.ts.map +0 -1
- package/dist/src/conversion/convertToPdf.test.js +0 -327
- package/dist/src/conversion/convertToPdf.test.js.map +0 -1
- package/dist/src/core/config.d.ts +0 -4
- package/dist/src/core/config.d.ts.map +0 -1
- package/dist/src/core/config.js +0 -26
- package/dist/src/core/config.js.map +0 -1
- package/dist/src/core/config.test.d.ts +0 -2
- package/dist/src/core/config.test.d.ts.map +0 -1
- package/dist/src/core/config.test.js +0 -21
- package/dist/src/core/config.test.js.map +0 -1
- package/dist/src/core/parser.d.ts +0 -92
- package/dist/src/core/parser.d.ts.map +0 -1
- package/dist/src/core/parser.js +0 -401
- package/dist/src/core/parser.js.map +0 -1
- package/dist/src/core/parser.test.d.ts +0 -2
- package/dist/src/core/parser.test.d.ts.map +0 -1
- package/dist/src/core/parser.test.js +0 -541
- package/dist/src/core/parser.test.js.map +0 -1
- package/dist/src/core/types.d.ts +0 -370
- package/dist/src/core/types.d.ts.map +0 -1
- package/dist/src/core/types.js +0 -2
- package/dist/src/core/types.js.map +0 -1
- package/dist/src/engines/ocr/http-simple.d.ts +0 -19
- package/dist/src/engines/ocr/http-simple.d.ts.map +0 -1
- package/dist/src/engines/ocr/http-simple.js +0 -69
- package/dist/src/engines/ocr/http-simple.js.map +0 -1
- package/dist/src/engines/ocr/http-simple.test.d.ts +0 -2
- package/dist/src/engines/ocr/http-simple.test.d.ts.map +0 -1
- package/dist/src/engines/ocr/http-simple.test.js +0 -108
- package/dist/src/engines/ocr/http-simple.test.js.map +0 -1
- package/dist/src/engines/ocr/interface.d.ts +0 -15
- package/dist/src/engines/ocr/interface.d.ts.map +0 -1
- package/dist/src/engines/ocr/interface.js +0 -2
- package/dist/src/engines/ocr/interface.js.map +0 -1
- package/dist/src/engines/ocr/tesseract.d.ts +0 -20
- package/dist/src/engines/ocr/tesseract.d.ts.map +0 -1
- package/dist/src/engines/ocr/tesseract.js +0 -161
- package/dist/src/engines/ocr/tesseract.js.map +0 -1
- package/dist/src/engines/ocr/tesseract.test.d.ts +0 -2
- package/dist/src/engines/ocr/tesseract.test.d.ts.map +0 -1
- package/dist/src/engines/ocr/tesseract.test.js +0 -94
- package/dist/src/engines/ocr/tesseract.test.js.map +0 -1
- package/dist/src/engines/pdf/interface.d.ts +0 -84
- package/dist/src/engines/pdf/interface.d.ts.map +0 -1
- package/dist/src/engines/pdf/interface.js +0 -2
- package/dist/src/engines/pdf/interface.js.map +0 -1
- package/dist/src/engines/pdf/pdfium-renderer.d.ts +0 -31
- package/dist/src/engines/pdf/pdfium-renderer.d.ts.map +0 -1
- package/dist/src/engines/pdf/pdfium-renderer.js +0 -145
- package/dist/src/engines/pdf/pdfium-renderer.js.map +0 -1
- package/dist/src/engines/pdf/pdfium-renderer.test.d.ts +0 -2
- package/dist/src/engines/pdf/pdfium-renderer.test.d.ts.map +0 -1
- package/dist/src/engines/pdf/pdfium-renderer.test.js +0 -109
- package/dist/src/engines/pdf/pdfium-renderer.test.js.map +0 -1
- package/dist/src/engines/pdf/pdfjs.d.ts +0 -14
- package/dist/src/engines/pdf/pdfjs.d.ts.map +0 -1
- package/dist/src/engines/pdf/pdfjs.js +0 -799
- package/dist/src/engines/pdf/pdfjs.js.map +0 -1
- package/dist/src/engines/pdf/pdfjs.test.d.ts +0 -2
- package/dist/src/engines/pdf/pdfjs.test.d.ts.map +0 -1
- package/dist/src/engines/pdf/pdfjs.test.js +0 -225
- package/dist/src/engines/pdf/pdfjs.test.js.map +0 -1
- package/dist/src/engines/pdf/pdfjsImporter.d.ts +0 -5
- package/dist/src/engines/pdf/pdfjsImporter.d.ts.map +0 -1
- package/dist/src/engines/pdf/pdfjsImporter.js +0 -45
- package/dist/src/engines/pdf/pdfjsImporter.js.map +0 -1
- package/dist/src/index.d.ts +0 -3
- package/dist/src/index.d.ts.map +0 -1
- package/dist/src/index.js +0 -5
- package/dist/src/index.js.map +0 -1
- package/dist/src/lib.d.ts +0 -19
- package/dist/src/lib.d.ts.map +0 -1
- package/dist/src/lib.js +0 -17
- package/dist/src/lib.js.map +0 -1
- package/dist/src/output/json.d.ts +0 -10
- package/dist/src/output/json.d.ts.map +0 -1
- package/dist/src/output/json.js +0 -32
- package/dist/src/output/json.js.map +0 -1
- package/dist/src/output/json.test.d.ts +0 -2
- package/dist/src/output/json.test.d.ts.map +0 -1
- package/dist/src/output/json.test.js +0 -199
- package/dist/src/output/json.test.js.map +0 -1
- package/dist/src/output/text.d.ts +0 -10
- package/dist/src/output/text.d.ts.map +0 -1
- package/dist/src/output/text.js +0 -17
- package/dist/src/output/text.js.map +0 -1
- package/dist/src/output/text.test.d.ts +0 -2
- package/dist/src/output/text.test.d.ts.map +0 -1
- package/dist/src/output/text.test.js +0 -65
- package/dist/src/output/text.test.js.map +0 -1
- package/dist/src/processing/bbox.d.ts +0 -20
- package/dist/src/processing/bbox.d.ts.map +0 -1
- package/dist/src/processing/bbox.js +0 -258
- package/dist/src/processing/bbox.js.map +0 -1
- package/dist/src/processing/bbox.test.d.ts +0 -2
- package/dist/src/processing/bbox.test.d.ts.map +0 -1
- package/dist/src/processing/bbox.test.js +0 -334
- package/dist/src/processing/bbox.test.js.map +0 -1
- package/dist/src/processing/cleanText.d.ts +0 -6
- package/dist/src/processing/cleanText.d.ts.map +0 -1
- package/dist/src/processing/cleanText.js +0 -73
- package/dist/src/processing/cleanText.js.map +0 -1
- package/dist/src/processing/cleanText.test.d.ts +0 -2
- package/dist/src/processing/cleanText.test.d.ts.map +0 -1
- package/dist/src/processing/cleanText.test.js +0 -46
- package/dist/src/processing/cleanText.test.js.map +0 -1
- package/dist/src/processing/grid.d.ts +0 -7
- package/dist/src/processing/grid.d.ts.map +0 -1
- package/dist/src/processing/grid.js +0 -13
- package/dist/src/processing/grid.js.map +0 -1
- package/dist/src/processing/gridDebugLogger.d.ts +0 -206
- package/dist/src/processing/gridDebugLogger.d.ts.map +0 -1
- package/dist/src/processing/gridDebugLogger.js +0 -446
- package/dist/src/processing/gridDebugLogger.js.map +0 -1
- package/dist/src/processing/gridProjection.d.ts +0 -19
- package/dist/src/processing/gridProjection.d.ts.map +0 -1
- package/dist/src/processing/gridProjection.js +0 -1813
- package/dist/src/processing/gridProjection.js.map +0 -1
- package/dist/src/processing/gridProjection.test.d.ts +0 -2
- package/dist/src/processing/gridProjection.test.d.ts.map +0 -1
- package/dist/src/processing/gridProjection.test.js +0 -495
- package/dist/src/processing/gridProjection.test.js.map +0 -1
- package/dist/src/processing/gridVisualizer.d.ts +0 -14
- package/dist/src/processing/gridVisualizer.d.ts.map +0 -1
- package/dist/src/processing/gridVisualizer.js +0 -166
- package/dist/src/processing/gridVisualizer.js.map +0 -1
- package/dist/src/processing/markupUtils.d.ts +0 -7
- package/dist/src/processing/markupUtils.d.ts.map +0 -1
- package/dist/src/processing/markupUtils.js +0 -25
- package/dist/src/processing/markupUtils.js.map +0 -1
- package/dist/src/processing/markupUtils.test.d.ts +0 -2
- package/dist/src/processing/markupUtils.test.d.ts.map +0 -1
- package/dist/src/processing/markupUtils.test.js +0 -26
- package/dist/src/processing/markupUtils.test.js.map +0 -1
- package/dist/src/processing/ocrUtils.d.ts +0 -24
- package/dist/src/processing/ocrUtils.d.ts.map +0 -1
- package/dist/src/processing/ocrUtils.js +0 -79
- package/dist/src/processing/ocrUtils.js.map +0 -1
- package/dist/src/processing/octUtils.test.d.ts +0 -2
- package/dist/src/processing/octUtils.test.d.ts.map +0 -1
- package/dist/src/processing/octUtils.test.js +0 -72
- package/dist/src/processing/octUtils.test.js.map +0 -1
- package/dist/src/processing/searchItems.d.ts +0 -26
- package/dist/src/processing/searchItems.d.ts.map +0 -1
- package/dist/src/processing/searchItems.js +0 -93
- package/dist/src/processing/searchItems.js.map +0 -1
- package/dist/src/processing/searchItems.test.d.ts +0 -2
- package/dist/src/processing/searchItems.test.d.ts.map +0 -1
- package/dist/src/processing/searchItems.test.js +0 -84
- package/dist/src/processing/searchItems.test.js.map +0 -1
- package/dist/src/processing/textUtils.d.ts +0 -20
- package/dist/src/processing/textUtils.d.ts.map +0 -1
- package/dist/src/processing/textUtils.js +0 -142
- package/dist/src/processing/textUtils.js.map +0 -1
- package/dist/src/processing/textUtils.test.d.ts +0 -2
- package/dist/src/processing/textUtils.test.d.ts.map +0 -1
- package/dist/src/processing/textUtils.test.js +0 -45
- package/dist/src/processing/textUtils.test.js.map +0 -1
- package/dist/src/vendor/pdfjs/LICENSE +0 -177
- package/dist/src/vendor/pdfjs/README.md +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78ms-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/78ms-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/83pv-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/90ms-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/90ms-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/90msp-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/90msp-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/90pv-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/90pv-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Add-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Add-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Add-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Add-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-0.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-1.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-3.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-4.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-5.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-6.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-UCS2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-0.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-1.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-3.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-4.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-5.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-UCS2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-0.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-1.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-3.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-4.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-5.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-6.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-UCS2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-0.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-1.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-UCS2.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/B5pc-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/B5pc-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/CNS-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/CNS-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/CNS1-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/CNS1-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/CNS2-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/CNS2-V.bcmap +0 -3
- package/dist/src/vendor/pdfjs/cmaps/ETHK-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/ETHK-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/ETen-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/ETen-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/ETenms-B5-H.bcmap +0 -3
- package/dist/src/vendor/pdfjs/cmaps/ETenms-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Ext-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Ext-RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Ext-RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Ext-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GB-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GB-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GB-H.bcmap +0 -4
- package/dist/src/vendor/pdfjs/cmaps/GB-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBK-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBK-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBK2K-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBK2K-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBKp-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBKp-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBT-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBT-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBT-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBT-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBTpc-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBTpc-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBpc-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/GBpc-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKdla-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKdla-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKdlb-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKdlb-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKgccs-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKgccs-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKm314-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKm314-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKm471-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKm471-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKscs-B5-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/HKscs-B5-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Hankaku.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Hiragana.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSC-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSC-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSC-Johab-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSC-Johab-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSCpc-EUC-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/KSCpc-EUC-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Katakana.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/LICENSE +0 -36
- package/dist/src/vendor/pdfjs/cmaps/NWP-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/NWP-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/RKSJ-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/RKSJ-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/Roman.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UCS2-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UCS2-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF16-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF16-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF8-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF8-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UCS2-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UCS2-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF16-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF16-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF8-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF8-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF16-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF16-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF8-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF8-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-HW-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISPro-UTF8-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UCS2-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UCS2-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF16-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF16-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF32-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF32-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF8-H.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF8-V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/V.bcmap +0 -0
- package/dist/src/vendor/pdfjs/cmaps/WP-Symbol.bcmap +0 -0
- package/dist/src/vendor/pdfjs/jbig2.wasm +0 -0
- package/dist/src/vendor/pdfjs/openjpeg.wasm +0 -0
- package/dist/src/vendor/pdfjs/pdf.mjs +0 -33603
- package/dist/src/vendor/pdfjs/pdf.mjs.map +0 -1
- package/dist/src/vendor/pdfjs/pdf.sandbox.mjs +0 -4936
- package/dist/src/vendor/pdfjs/pdf.sandbox.mjs.map +0 -1
- package/dist/src/vendor/pdfjs/pdf.worker.mjs +0 -70100
- package/dist/src/vendor/pdfjs/pdf.worker.mjs.map +0 -1
- package/dist/src/vendor/pdfjs/qcms_bg.wasm +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitDingbats.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixed.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixedBold.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixedBoldItalic.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixedItalic.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerif.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerifBold.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerifBoldItalic.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerifItalic.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/FoxitSymbol.pfb +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/LICENSE_FOXIT +0 -27
- package/dist/src/vendor/pdfjs/standard_fonts/LICENSE_LIBERATION +0 -102
- package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-Bold.ttf +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-BoldItalic.ttf +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-Italic.ttf +0 -0
- package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-Regular.ttf +0 -0
- package/src/vendor/pdfjs/LICENSE +0 -177
- package/src/vendor/pdfjs/README.md +0 -0
- package/src/vendor/pdfjs/cmaps/78-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78ms-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/78ms-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/83pv-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/90ms-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/90ms-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/90msp-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/90msp-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/90pv-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/90pv-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Add-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Add-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Add-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Add-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-0.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-1.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-3.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-4.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-5.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-6.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-CNS1-UCS2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-0.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-1.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-3.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-4.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-5.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-GB1-UCS2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-0.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-1.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-3.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-4.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-5.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-6.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Japan1-UCS2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Korea1-0.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Korea1-1.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Korea1-2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Adobe-Korea1-UCS2.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/B5pc-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/B5pc-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/CNS-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/CNS-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/CNS1-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/CNS1-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/CNS2-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/CNS2-V.bcmap +0 -3
- package/src/vendor/pdfjs/cmaps/ETHK-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/ETHK-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/ETen-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/ETen-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/ETenms-B5-H.bcmap +0 -3
- package/src/vendor/pdfjs/cmaps/ETenms-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Ext-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Ext-RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Ext-RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Ext-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GB-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GB-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GB-H.bcmap +0 -4
- package/src/vendor/pdfjs/cmaps/GB-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBK-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBK-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBK2K-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBK2K-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBKp-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBKp-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBT-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBT-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBT-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBT-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBTpc-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBTpc-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBpc-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/GBpc-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKdla-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKdla-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKdlb-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKdlb-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKgccs-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKgccs-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKm314-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKm314-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKm471-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKm471-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKscs-B5-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/HKscs-B5-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Hankaku.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Hiragana.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSC-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSC-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSC-Johab-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSC-Johab-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSCms-UHC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSCms-UHC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSCpc-EUC-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/KSCpc-EUC-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Katakana.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/LICENSE +0 -36
- package/src/vendor/pdfjs/cmaps/NWP-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/NWP-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/RKSJ-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/RKSJ-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/Roman.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UCS2-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UCS2-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UTF16-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UTF16-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UTF8-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniCNS-UTF8-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UCS2-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UCS2-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UTF16-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UTF16-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UTF8-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniGB-UTF8-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UTF16-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UTF16-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UTF8-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS-UTF8-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-HW-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISPro-UTF8-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UCS2-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UCS2-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UTF16-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UTF16-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UTF32-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UTF32-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UTF8-H.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/UniKS-UTF8-V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/V.bcmap +0 -0
- package/src/vendor/pdfjs/cmaps/WP-Symbol.bcmap +0 -0
- package/src/vendor/pdfjs/jbig2.wasm +0 -0
- package/src/vendor/pdfjs/openjpeg.wasm +0 -0
- package/src/vendor/pdfjs/pdf.mjs +0 -33603
- package/src/vendor/pdfjs/pdf.mjs.map +0 -1
- package/src/vendor/pdfjs/pdf.sandbox.mjs +0 -4936
- package/src/vendor/pdfjs/pdf.sandbox.mjs.map +0 -1
- package/src/vendor/pdfjs/pdf.worker.mjs +0 -70100
- package/src/vendor/pdfjs/pdf.worker.mjs.map +0 -1
- package/src/vendor/pdfjs/qcms_bg.wasm +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitDingbats.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitFixed.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitFixedBold.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitFixedBoldItalic.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitFixedItalic.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitSerif.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitSerifBold.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitSerifBoldItalic.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitSerifItalic.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/FoxitSymbol.pfb +0 -0
- package/src/vendor/pdfjs/standard_fonts/LICENSE_FOXIT +0 -27
- package/src/vendor/pdfjs/standard_fonts/LICENSE_LIBERATION +0 -102
- package/src/vendor/pdfjs/standard_fonts/LiberationSans-Bold.ttf +0 -0
- package/src/vendor/pdfjs/standard_fonts/LiberationSans-BoldItalic.ttf +0 -0
- package/src/vendor/pdfjs/standard_fonts/LiberationSans-Italic.ttf +0 -0
- package/src/vendor/pdfjs/standard_fonts/LiberationSans-Regular.ttf +0 -0
package/README.md
CHANGED
|
@@ -1,412 +1,89 @@
|
|
|
1
|
-
# LiteParse
|
|
1
|
+
# LiteParse Node.js
|
|
2
2
|
|
|
3
|
-
[
|
|
4
|
-
|
|
|
5
|
-
[](https://www.npmjs.com/package/@llamaindex/liteparse)
|
|
6
|
-
|
|
|
7
|
-
[](https://opensource.org/licenses/Apache-2.0)
|
|
8
|
-
|
|
|
9
|
-
[Docs](https://developers.llamaindex.ai/liteparse/)
|
|
10
|
-
|
|
11
|
-
<img src="https://github.com/user-attachments/assets/07ba6a82-6bb1-4dea-b0ef-cad7df7d1622" alt="out" width="600">
|
|
12
|
-
|
|
13
|
-
LiteParse is a standalone OSS PDF parsing tool focused exclusively on **fast and light** parsing. It provides high-quality spatial text parsing with bounding boxes, without proprietary LLM features or cloud dependencies. Everything runs locally on your machine.
|
|
14
|
-
|
|
15
|
-
**Hitting the limits of local parsing?**
|
|
16
|
-
For complex documents (dense tables, multi-column layouts, charts, handwritten text, or
|
|
17
|
-
scanned PDFs), you'll get significantly better results with [LlamaParse](https://developers.llamaindex.ai/python/cloud/llamaparse/?utm_source=github&utm_medium=liteparse),
|
|
18
|
-
our cloud-based document parser built for production document pipelines. LlamaParse handles the
|
|
19
|
-
hard stuff so your models see clean, structured data and markdown.
|
|
20
|
-
|
|
21
|
-
> 👉 [Sign up for LlamaParse free](https://cloud.llamaindex.ai?utm_source=github&utm_medium=liteparse)
|
|
22
|
-
|
|
23
|
-
## Overview
|
|
24
|
-
|
|
25
|
-
- **Fast Text Parsing**: Spatial text parsing using PDF.js
|
|
26
|
-
- **Flexible OCR System**:
|
|
27
|
-
- **Built-in**: Tesseract.js (zero setup, works out of the box!)
|
|
28
|
-
- **HTTP Servers**: Plug in any OCR server (EasyOCR, PaddleOCR, custom)
|
|
29
|
-
- **Standard API**: Simple, well-defined OCR API specification
|
|
30
|
-
- **Screenshot Generation**: Generate high-quality page screenshots for LLM agents
|
|
31
|
-
- **Multiple Output Formats**: JSON and Text
|
|
32
|
-
- **Bounding Boxes**: Precise text positioning information
|
|
33
|
-
- **Standalone Binary**: No cloud dependencies, runs entirely locally
|
|
34
|
-
- **Multi-platform**: Linux, macOS (Intel/ARM), Windows
|
|
3
|
+
Node.js/TypeScript bindings for [LiteParse](https://github.com/run-llama/liteparse) — fast, lightweight PDF and document parsing with spatial text extraction.
|
|
35
4
|
|
|
36
5
|
## Installation
|
|
37
6
|
|
|
38
|
-
### CLI Tool
|
|
39
|
-
|
|
40
|
-
#### Option 1: Global Install (Recommended)
|
|
41
|
-
|
|
42
|
-
Install globally via npm to use the `lit` command anywhere:
|
|
43
|
-
|
|
44
|
-
```bash
|
|
45
|
-
npm i -g @llamaindex/liteparse
|
|
46
|
-
```
|
|
47
|
-
|
|
48
|
-
Then use it:
|
|
49
|
-
|
|
50
|
-
```bash
|
|
51
|
-
lit parse document.pdf
|
|
52
|
-
lit screenshot document.pdf
|
|
53
|
-
```
|
|
54
|
-
|
|
55
|
-
For macOS and Linux users, `liteparse` can be also installed via `brew`:
|
|
56
|
-
|
|
57
|
-
```bash
|
|
58
|
-
brew tap run-llama/liteparse
|
|
59
|
-
brew install llamaindex-liteparse
|
|
60
|
-
```
|
|
61
|
-
|
|
62
|
-
#### Option 2: Install from Source
|
|
63
|
-
|
|
64
|
-
You can clone the repo and install the CLI globally from source:
|
|
65
|
-
|
|
66
|
-
```
|
|
67
|
-
git clone https://github.com/run-llama/liteparse.git
|
|
68
|
-
cd liteparse
|
|
69
|
-
npm run build
|
|
70
|
-
npm pack
|
|
71
|
-
npm install -g ./liteparse-*.tgz
|
|
72
|
-
```
|
|
73
|
-
|
|
74
|
-
### Agent Skill
|
|
75
|
-
|
|
76
|
-
You can use `liteparse` as an agent skill, downloading it with the `skills` CLI tool:
|
|
77
|
-
|
|
78
|
-
```bash
|
|
79
|
-
npx skills add run-llama/llamaparse-agent-skills --skill liteparse
|
|
80
|
-
```
|
|
81
|
-
|
|
82
|
-
Or copy-pasting the [`SKILL.md`](https://github.com/run-llama/llamaparse-agent-skills/blob/main/skills/liteparse/SKILL.md) file to your own skills setup.
|
|
83
|
-
|
|
84
|
-
## Usage
|
|
85
|
-
|
|
86
|
-
### Parse Files
|
|
87
|
-
|
|
88
|
-
```bash
|
|
89
|
-
# Basic parsing
|
|
90
|
-
lit parse document.pdf
|
|
91
|
-
|
|
92
|
-
# Parse with specific format
|
|
93
|
-
lit parse document.pdf --format json -o output.md
|
|
94
|
-
|
|
95
|
-
# Parse specific pages
|
|
96
|
-
lit parse document.pdf --target-pages "1-5,10,15-20"
|
|
97
|
-
|
|
98
|
-
# Parse without OCR
|
|
99
|
-
lit parse document.pdf --no-ocr
|
|
100
|
-
|
|
101
|
-
# Parse a remote PDF
|
|
102
|
-
curl -sL https://example.com/report.pdf | lit parse -
|
|
103
|
-
```
|
|
104
|
-
|
|
105
|
-
### Batch Parsing
|
|
106
|
-
|
|
107
|
-
You can also parse an entire directory of documents:
|
|
108
|
-
|
|
109
7
|
```bash
|
|
110
|
-
|
|
8
|
+
npm i @llamaindex/liteparse
|
|
111
9
|
```
|
|
112
10
|
|
|
113
|
-
|
|
11
|
+
This also installs the `lit` CLI command (use `npm i -g` for global access).
|
|
114
12
|
|
|
115
|
-
|
|
13
|
+
## Quick Start
|
|
116
14
|
|
|
117
|
-
```
|
|
118
|
-
|
|
119
|
-
lit screenshot document.pdf -o ./screenshots
|
|
120
|
-
|
|
121
|
-
# Screenshot specific pages
|
|
122
|
-
lit screenshot document.pdf --target-pages "1,3,5" -o ./screenshots
|
|
15
|
+
```typescript
|
|
16
|
+
import { LiteParse } from '@llamaindex/liteparse';
|
|
123
17
|
|
|
124
|
-
|
|
125
|
-
|
|
18
|
+
const parser = new LiteParse();
|
|
19
|
+
const result = await parser.parse('document.pdf');
|
|
20
|
+
console.log(result.text);
|
|
126
21
|
|
|
127
|
-
|
|
128
|
-
|
|
22
|
+
// Access structured data
|
|
23
|
+
for (const page of result.pages) {
|
|
24
|
+
console.log(`Page ${page.pageNum}: ${page.textItems.length} text items`);
|
|
25
|
+
}
|
|
129
26
|
```
|
|
130
27
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
Install as a dependency in your project:
|
|
28
|
+
## Configuration
|
|
134
29
|
|
|
135
|
-
|
|
136
|
-
npm install @llamaindex/liteparse
|
|
137
|
-
# or
|
|
138
|
-
pnpm add @llamaindex/liteparse
|
|
139
|
-
```
|
|
30
|
+
All options are passed to the constructor:
|
|
140
31
|
|
|
141
32
|
```typescript
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
33
|
+
const parser = new LiteParse({
|
|
34
|
+
ocrEnabled: true, // Enable OCR (default: true)
|
|
35
|
+
ocrLanguage: 'eng', // Tesseract language code
|
|
36
|
+
ocrServerUrl: undefined, // HTTP OCR server URL (optional)
|
|
37
|
+
tessdataPath: undefined, // Path to tessdata directory (optional)
|
|
38
|
+
maxPages: 1000, // Max pages to parse
|
|
39
|
+
targetPages: '1-5,10', // Specific pages (optional)
|
|
40
|
+
dpi: 150, // Rendering DPI
|
|
41
|
+
preserveVerySmallText: false, // Keep tiny text
|
|
42
|
+
password: undefined, // Password for protected documents
|
|
43
|
+
quiet: false, // Suppress progress output
|
|
44
|
+
numWorkers: 4, // Concurrent OCR workers
|
|
45
|
+
});
|
|
147
46
|
```
|
|
148
47
|
|
|
149
|
-
|
|
48
|
+
## Parsing from Bytes
|
|
150
49
|
|
|
151
|
-
|
|
50
|
+
Pass a `Buffer` or `Uint8Array` directly — useful for HTTP responses or in-memory data:
|
|
152
51
|
|
|
153
52
|
```typescript
|
|
154
|
-
import { LiteParse } from '@llamaindex/liteparse';
|
|
155
53
|
import { readFile } from 'fs/promises';
|
|
156
54
|
|
|
157
|
-
const parser = new LiteParse();
|
|
158
|
-
|
|
159
|
-
// From a file read
|
|
160
55
|
const pdfBytes = await readFile('document.pdf');
|
|
161
56
|
const result = await parser.parse(pdfBytes);
|
|
162
|
-
|
|
163
|
-
// From an HTTP response
|
|
164
|
-
const response = await fetch('https://example.com/document.pdf');
|
|
165
|
-
const buffer = Buffer.from(await response.arrayBuffer());
|
|
166
|
-
const result2 = await parser.parse(buffer);
|
|
167
|
-
```
|
|
168
|
-
|
|
169
|
-
Non-PDF buffers (images, Office documents) are written to a temp directory for format conversion. Screenshots also work with buffer input:
|
|
170
|
-
|
|
171
|
-
```typescript
|
|
172
|
-
const screenshots = await parser.screenshot(pdfBytes, [1, 2, 3]);
|
|
173
|
-
```
|
|
174
|
-
|
|
175
|
-
### CLI Options
|
|
176
|
-
|
|
177
|
-
#### Parse Command
|
|
178
|
-
|
|
179
|
-
```
|
|
180
|
-
$ lit parse --help
|
|
181
|
-
Usage: lit parse [options] <file>
|
|
182
|
-
|
|
183
|
-
Parse a document file (PDF, DOCX, XLSX, PPTX, images, etc.)
|
|
184
|
-
|
|
185
|
-
Options:
|
|
186
|
-
-o, --output <file> Output file path
|
|
187
|
-
--format <format> Output format: json|text (default: "text")
|
|
188
|
-
--ocr-server-url <url> HTTP OCR server URL (uses Tesseract if not provided)
|
|
189
|
-
--no-ocr Disable OCR
|
|
190
|
-
--ocr-language <lang> OCR language(s) (default: "en")
|
|
191
|
-
--num-workers <n> Number of pages to OCR in parallel (default: CPU cores - 1)
|
|
192
|
-
--max-pages <n> Max pages to parse (default: "10000")
|
|
193
|
-
--target-pages <pages> Target pages (e.g., "1-5,10,15-20")
|
|
194
|
-
--dpi <dpi> DPI for rendering (default: "150")
|
|
195
|
-
--no-precise-bbox Disable precise bounding boxes
|
|
196
|
-
--preserve-small-text Preserve very small text
|
|
197
|
-
--password <password> Password for encrypted/protected documents
|
|
198
|
-
--config <file> Config file (JSON)
|
|
199
|
-
-q, --quiet Suppress progress output
|
|
200
|
-
-h, --help display help for command
|
|
201
|
-
```
|
|
202
|
-
|
|
203
|
-
#### Batch Parse Command
|
|
204
|
-
|
|
205
|
-
```
|
|
206
|
-
$ lit batch-parse --help
|
|
207
|
-
Usage: lit batch-parse [options] <input-dir> <output-dir>
|
|
208
|
-
|
|
209
|
-
Parse multiple documents in batch mode (reuses PDF engine for efficiency)
|
|
210
|
-
|
|
211
|
-
Options:
|
|
212
|
-
--format <format> Output format: json|text (default: "text")
|
|
213
|
-
--ocr-server-url <url> HTTP OCR server URL (uses Tesseract if not provided)
|
|
214
|
-
--no-ocr Disable OCR
|
|
215
|
-
--ocr-language <lang> OCR language(s) (default: "en")
|
|
216
|
-
--num-workers <n> Number of pages to OCR in parallel (default: CPU cores - 1)
|
|
217
|
-
--max-pages <n> Max pages to parse per file (default: "10000")
|
|
218
|
-
--dpi <dpi> DPI for rendering (default: "150")
|
|
219
|
-
--no-precise-bbox Disable precise bounding boxes
|
|
220
|
-
--recursive Recursively search input directory
|
|
221
|
-
--extension <ext> Only process files with this extension (e.g., ".pdf")
|
|
222
|
-
--password <password> Password for encrypted/protected documents (applied to all files)
|
|
223
|
-
--config <file> Config file (JSON)
|
|
224
|
-
-q, --quiet Suppress progress output
|
|
225
|
-
-h, --help display help for command
|
|
226
|
-
```
|
|
227
|
-
|
|
228
|
-
#### Screenshot Command
|
|
229
|
-
|
|
230
|
-
```
|
|
231
|
-
$ lit screenshot --help
|
|
232
|
-
Usage: lit screenshot [options] <file>
|
|
233
|
-
|
|
234
|
-
Generate screenshots of PDF pages
|
|
235
|
-
|
|
236
|
-
Options:
|
|
237
|
-
-o, --output-dir <dir> Output directory for screenshots (default: "./screenshots")
|
|
238
|
-
--target-pages <pages> Page numbers to screenshot (e.g., "1,3,5" or "1-5")
|
|
239
|
-
--dpi <dpi> DPI for rendering (default: "150")
|
|
240
|
-
--format <format> Image format: png|jpg (default: "png")
|
|
241
|
-
--password <password> Password for encrypted/protected documents
|
|
242
|
-
--config <file> Config file (JSON)
|
|
243
|
-
-q, --quiet Suppress progress output
|
|
244
|
-
-h, --help display help for command
|
|
245
|
-
```
|
|
246
|
-
|
|
247
|
-
## OCR Setup
|
|
248
|
-
|
|
249
|
-
### Default: Tesseract.js
|
|
250
|
-
|
|
251
|
-
```bash
|
|
252
|
-
# Tesseract is enabled by default
|
|
253
|
-
lit parse document.pdf
|
|
254
|
-
|
|
255
|
-
# Specify language
|
|
256
|
-
lit parse document.pdf --ocr-language fra
|
|
257
|
-
|
|
258
|
-
# Disable OCR
|
|
259
|
-
lit parse document.pdf --no-ocr
|
|
57
|
+
console.log(result.text);
|
|
260
58
|
```
|
|
261
59
|
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
```bash
|
|
265
|
-
export TESSDATA_PREFIX=/path/to/tessdata
|
|
266
|
-
lit parse document.pdf --ocr-language eng
|
|
267
|
-
```
|
|
60
|
+
## Screenshots
|
|
268
61
|
|
|
269
|
-
|
|
62
|
+
Generate PNG screenshots of document pages:
|
|
270
63
|
|
|
271
64
|
```typescript
|
|
272
|
-
const
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
For higher accuracy or better performance, you can use an HTTP OCR server. We provide ready-to-use example wrappers for popular OCR engines:
|
|
278
|
-
|
|
279
|
-
- [EasyOCR](ocr/easyocr/README.md)
|
|
280
|
-
- [PaddleOCR](ocr/paddleocr/README.md)
|
|
281
|
-
|
|
282
|
-
You can integrate any OCR service by implementing the simple LiteParse OCR API specification (see [`OCR_API_SPEC.md`](OCR_API_SPEC.md)).
|
|
283
|
-
|
|
284
|
-
The API requires:
|
|
285
|
-
- POST `/ocr` endpoint
|
|
286
|
-
- Accepts `file` and `language` parameters
|
|
287
|
-
- Returns JSON: `{ results: [{ text, bbox: [x1,y1,x2,y2], confidence }] }`
|
|
288
|
-
|
|
289
|
-
See the example servers in `ocr/easyocr/` and `ocr/paddleocr/` as templates.
|
|
290
|
-
|
|
291
|
-
For the complete OCR API specification, see [`OCR_API_SPEC.md`](OCR_API_SPEC.md).
|
|
292
|
-
|
|
293
|
-
## Multi-Format Input Support
|
|
294
|
-
|
|
295
|
-
LiteParse supports **automatic conversion** of various document formats to PDF before parsing. This makes it unique compared to other PDF-only parsing tools!
|
|
296
|
-
|
|
297
|
-
### Supported Input Formats
|
|
298
|
-
|
|
299
|
-
#### Office Documents (via LibreOffice)
|
|
300
|
-
- **Word**: `.doc`, `.docx`, `.docm`, `.odt`, `.rtf`
|
|
301
|
-
- **PowerPoint**: `.ppt`, `.pptx`, `.pptm`, `.odp`
|
|
302
|
-
- **Spreadsheets**: `.xls`, `.xlsx`, `.xlsm`, `.ods`, `.csv`, `.tsv`
|
|
303
|
-
|
|
304
|
-
Just install the dependency and LiteParse will automatically convert these formats to PDF for parsing:
|
|
305
|
-
|
|
306
|
-
```bash
|
|
307
|
-
# macOS
|
|
308
|
-
brew install --cask libreoffice
|
|
309
|
-
|
|
310
|
-
# Ubuntu/Debian
|
|
311
|
-
apt-get install libreoffice
|
|
312
|
-
|
|
313
|
-
# Windows
|
|
314
|
-
choco install libreoffice-fresh # might require admin permissions
|
|
315
|
-
```
|
|
316
|
-
|
|
317
|
-
> _For Windows, you might need to add the path to the directory containing LibreOffice CLI executable (generally `C:\Program Files\LibreOffice\program`) to the environment variables and re-start the machine._
|
|
318
|
-
|
|
319
|
-
#### Images (via ImageMagick)
|
|
320
|
-
- **Formats**: `.jpg`, `.jpeg`, `.png`, `.gif`, `.bmp`, `.tiff`, `.webp`, `.svg`
|
|
321
|
-
|
|
322
|
-
Just install ImageMagick and LiteParse will convert images to PDF for parsing (with OCR):
|
|
323
|
-
|
|
324
|
-
```bash
|
|
325
|
-
# macOS
|
|
326
|
-
brew install imagemagick
|
|
327
|
-
|
|
328
|
-
# Ubuntu/Debian
|
|
329
|
-
apt-get install imagemagick
|
|
330
|
-
|
|
331
|
-
# Windows
|
|
332
|
-
choco install imagemagick.app # might require admin permissions
|
|
333
|
-
```
|
|
334
|
-
|
|
335
|
-
## Environment Variables
|
|
336
|
-
|
|
337
|
-
| Variable | Description |
|
|
338
|
-
|----------|-------------|
|
|
339
|
-
| `TESSDATA_PREFIX` | Path to a directory containing Tesseract `.traineddata` files. Used for offline/air-gapped environments where Tesseract.js cannot download language data from the internet. |
|
|
340
|
-
| `LITEPARSE_TMPDIR` | Override the temp directory used for format conversion and intermediate files. Defaults to the OS temp directory (`os.tmpdir()`). Useful in containerized or read-only filesystem environments. |
|
|
341
|
-
|
|
342
|
-
## Configuration
|
|
343
|
-
|
|
344
|
-
You can configure parsing options via CLI flags or a JSON config file. The config file allows you to set sensible defaults and override as needed.
|
|
345
|
-
|
|
346
|
-
### Config File Example
|
|
347
|
-
|
|
348
|
-
Create a `liteparse.config.json` file:
|
|
349
|
-
|
|
350
|
-
```json
|
|
351
|
-
{
|
|
352
|
-
"ocrLanguage": "en",
|
|
353
|
-
"ocrEnabled": true,
|
|
354
|
-
"maxPages": 1000,
|
|
355
|
-
"dpi": 150,
|
|
356
|
-
"outputFormat": "json",
|
|
357
|
-
"preciseBoundingBox": true,
|
|
358
|
-
"preserveVerySmallText": false,
|
|
359
|
-
"password": "optional_password"
|
|
65
|
+
const screenshots = parser.screenshot('document.pdf', [1, 2, 3]);
|
|
66
|
+
for (const s of screenshots) {
|
|
67
|
+
console.log(`Page ${s.pageNum}: ${s.width}x${s.height}`);
|
|
68
|
+
// s.imageBuffer contains PNG bytes
|
|
360
69
|
}
|
|
361
70
|
```
|
|
362
71
|
|
|
363
|
-
|
|
72
|
+
## Supported Formats
|
|
364
73
|
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
}
|
|
371
|
-
```
|
|
74
|
+
- PDF (`.pdf`)
|
|
75
|
+
- Microsoft Office (`.docx`, `.xlsx`, `.pptx`, etc.) — requires LibreOffice
|
|
76
|
+
- OpenDocument (`.odt`, `.ods`, `.odp`) — requires LibreOffice
|
|
77
|
+
- Images (`.png`, `.jpg`, `.tiff`, etc.) — requires ImageMagick
|
|
78
|
+
- And more!
|
|
372
79
|
|
|
373
|
-
|
|
80
|
+
## CLI
|
|
374
81
|
|
|
375
|
-
|
|
376
|
-
lit parse document.pdf --config liteparse.config.json
|
|
377
|
-
```
|
|
378
|
-
|
|
379
|
-
## Development
|
|
380
|
-
|
|
381
|
-
We provide a fairly rich `AGENTS.md`/`CLAUDE.md` that we recommend using to help with development + coding agents.
|
|
82
|
+
The npm package includes the `lit` CLI:
|
|
382
83
|
|
|
383
84
|
```bash
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
npm run build
|
|
389
|
-
|
|
390
|
-
# Build Typescript (Windows)
|
|
391
|
-
npm run build:windows
|
|
392
|
-
|
|
393
|
-
# Watch mode
|
|
394
|
-
npm run dev
|
|
395
|
-
|
|
396
|
-
# Test parsing
|
|
397
|
-
npm test
|
|
85
|
+
lit parse document.pdf
|
|
86
|
+
lit parse document.pdf --format json -o output.json
|
|
87
|
+
lit screenshot document.pdf -o ./screenshots
|
|
88
|
+
lit batch-parse ./input ./output
|
|
398
89
|
```
|
|
399
|
-
|
|
400
|
-
## License
|
|
401
|
-
|
|
402
|
-
Apache 2.0
|
|
403
|
-
|
|
404
|
-
## Credits
|
|
405
|
-
|
|
406
|
-
Built on top of:
|
|
407
|
-
|
|
408
|
-
- [PDF.js](https://github.com/mozilla/pdf.js) - PDF parsing engine
|
|
409
|
-
- [Tesseract.js](https://github.com/naptha/tesseract.js) - In-process OCR engine
|
|
410
|
-
- [EasyOCR](https://github.com/JaidedAI/EasyOCR) - HTTP OCR server (optional)
|
|
411
|
-
- [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR) - HTTP OCR server (optional)
|
|
412
|
-
- [Sharp](https://github.com/lovell/sharp) - Image processing
|
package/dist/cli.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cli.d.ts","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":""}
|
package/dist/cli.js
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { program } from "commander";
|
|
3
|
+
import { LiteParse } from "./lib.js";
|
|
4
|
+
import { readFileSync } from "node:fs";
|
|
5
|
+
import { writeFileSync } from "node:fs";
|
|
6
|
+
program
|
|
7
|
+
.name("liteparse")
|
|
8
|
+
.description("Fast, lightweight PDF and document parsing")
|
|
9
|
+
.version("2.0.0");
|
|
10
|
+
program
|
|
11
|
+
.command("parse")
|
|
12
|
+
.description("Parse a document and extract text")
|
|
13
|
+
.argument("<file>", "Path to the document file")
|
|
14
|
+
.option("-o, --output <file>", "Output file path")
|
|
15
|
+
.option("--format <format>", 'Output format: json|text (default: "text")')
|
|
16
|
+
.option("--ocr-server-url <url>", "HTTP OCR server URL")
|
|
17
|
+
.option("--no-ocr", "Disable OCR")
|
|
18
|
+
.option("--ocr-language <lang>", "OCR language (default: eng)")
|
|
19
|
+
.option("--max-pages <n>", "Max pages to parse", parseInt)
|
|
20
|
+
.option("--target-pages <pages>", 'Pages to parse (e.g., "1-5,10,15-20")')
|
|
21
|
+
.option("--dpi <dpi>", "Rendering DPI", parseFloat)
|
|
22
|
+
.option("--preserve-small-text", "Keep very small text")
|
|
23
|
+
.option("--password <password>", "Password for encrypted documents")
|
|
24
|
+
.option("--config <file>", "JSON config file path")
|
|
25
|
+
.option("-q, --quiet", "Suppress progress output")
|
|
26
|
+
.option("--num-workers <n>", "Number of concurrent OCR workers", parseInt)
|
|
27
|
+
.action(async (file, opts) => {
|
|
28
|
+
try {
|
|
29
|
+
const config = {};
|
|
30
|
+
// Load config file if provided
|
|
31
|
+
if (opts.config) {
|
|
32
|
+
const fileConfig = JSON.parse(readFileSync(opts.config, "utf-8"));
|
|
33
|
+
Object.assign(config, fileConfig);
|
|
34
|
+
}
|
|
35
|
+
// CLI options override config file
|
|
36
|
+
if (opts.format)
|
|
37
|
+
config.outputFormat = opts.format;
|
|
38
|
+
if (opts.ocrServerUrl)
|
|
39
|
+
config.ocrServerUrl = opts.ocrServerUrl;
|
|
40
|
+
if (opts.ocr === false)
|
|
41
|
+
config.ocrEnabled = false;
|
|
42
|
+
if (opts.ocrLanguage)
|
|
43
|
+
config.ocrLanguage = opts.ocrLanguage;
|
|
44
|
+
if (opts.maxPages)
|
|
45
|
+
config.maxPages = opts.maxPages;
|
|
46
|
+
if (opts.targetPages)
|
|
47
|
+
config.targetPages = opts.targetPages;
|
|
48
|
+
if (opts.dpi)
|
|
49
|
+
config.dpi = opts.dpi;
|
|
50
|
+
if (opts.preserveSmallText)
|
|
51
|
+
config.preserveVerySmallText = true;
|
|
52
|
+
if (opts.password)
|
|
53
|
+
config.password = opts.password;
|
|
54
|
+
if (opts.quiet)
|
|
55
|
+
config.quiet = true;
|
|
56
|
+
if (opts.numWorkers)
|
|
57
|
+
config.numWorkers = opts.numWorkers;
|
|
58
|
+
// Default CLI output to text (library defaults to json)
|
|
59
|
+
if (!config.outputFormat)
|
|
60
|
+
config.outputFormat = "text";
|
|
61
|
+
const parser = new LiteParse(config);
|
|
62
|
+
const result = await parser.parse(file);
|
|
63
|
+
const output = config.outputFormat === "json"
|
|
64
|
+
? JSON.stringify({
|
|
65
|
+
pages: result.pages.map((p) => ({
|
|
66
|
+
page: p.pageNum,
|
|
67
|
+
width: p.width,
|
|
68
|
+
height: p.height,
|
|
69
|
+
text: p.text,
|
|
70
|
+
textItems: p.textItems,
|
|
71
|
+
})),
|
|
72
|
+
}, null, 2)
|
|
73
|
+
: result.text;
|
|
74
|
+
if (opts.output) {
|
|
75
|
+
writeFileSync(opts.output, output, "utf-8");
|
|
76
|
+
}
|
|
77
|
+
else {
|
|
78
|
+
process.stdout.write(output);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
catch (err) {
|
|
82
|
+
console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);
|
|
83
|
+
process.exit(1);
|
|
84
|
+
}
|
|
85
|
+
});
|
|
86
|
+
program.parse(process.argv);
|
|
87
|
+
//# sourceMappingURL=cli.js.map
|
package/dist/cli.js.map
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AAEA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,SAAS,EAAwB,MAAM,UAAU,CAAC;AAC3D,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,aAAa,EAAE,MAAM,SAAS,CAAC;AAExC,OAAO;KACJ,IAAI,CAAC,WAAW,CAAC;KACjB,WAAW,CAAC,4CAA4C,CAAC;KACzD,OAAO,CAAC,OAAO,CAAC,CAAC;AAEpB,OAAO;KACJ,OAAO,CAAC,OAAO,CAAC;KAChB,WAAW,CAAC,mCAAmC,CAAC;KAChD,QAAQ,CAAC,QAAQ,EAAE,2BAA2B,CAAC;KAC/C,MAAM,CAAC,qBAAqB,EAAE,kBAAkB,CAAC;KACjD,MAAM,CAAC,mBAAmB,EAAE,4CAA4C,CAAC;KACzE,MAAM,CAAC,wBAAwB,EAAE,qBAAqB,CAAC;KACvD,MAAM,CAAC,UAAU,EAAE,aAAa,CAAC;KACjC,MAAM,CAAC,uBAAuB,EAAE,6BAA6B,CAAC;KAC9D,MAAM,CAAC,iBAAiB,EAAE,oBAAoB,EAAE,QAAQ,CAAC;KACzD,MAAM,CACL,wBAAwB,EACxB,uCAAuC,CACxC;KACA,MAAM,CAAC,aAAa,EAAE,eAAe,EAAE,UAAU,CAAC;KAClD,MAAM,CAAC,uBAAuB,EAAE,sBAAsB,CAAC;KACvD,MAAM,CAAC,uBAAuB,EAAE,kCAAkC,CAAC;KACnE,MAAM,CAAC,iBAAiB,EAAE,uBAAuB,CAAC;KAClD,MAAM,CAAC,aAAa,EAAE,0BAA0B,CAAC;KACjD,MAAM,CAAC,mBAAmB,EAAE,kCAAkC,EAAE,QAAQ,CAAC;KACzE,MAAM,CAAC,KAAK,EAAE,IAAY,EAAE,IAA6B,EAAE,EAAE;IAC5D,IAAI,CAAC;QACH,MAAM,MAAM,GAA6B,EAAE,CAAC;QAE5C,+BAA+B;QAC/B,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChB,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAC3B,YAAY,CAAC,IAAI,CAAC,MAAgB,EAAE,OAAO,CAAC,CAC7C,CAAC;YACF,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC;QACpC,CAAC;QAED,mCAAmC;QACnC,IAAI,IAAI,CAAC,MAAM;YAAE,MAAM,CAAC,YAAY,GAAG,IAAI,CAAC,MAAyB,CAAC;QACtE,IAAI,IAAI,CAAC,YAAY;YACnB,MAAM,CAAC,YAAY,GAAG,IAAI,CAAC,YAAsB,CAAC;QACpD,IAAI,IAAI,CAAC,GAAG,KAAK,KAAK;YAAE,MAAM,CAAC,UAAU,GAAG,KAAK,CAAC;QAClD,IAAI,IAAI,CAAC,WAAW;YAAE,MAAM,CAAC,WAAW,GAAG,IAAI,CAAC,WAAqB,CAAC;QACtE,IAAI,IAAI,CAAC,QAAQ;YAAE,MAAM,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAkB,CAAC;QAC7D,IAAI,IAAI,CAAC,WAAW;YAAE,MAAM,CAAC,WAAW,GAAG,IAAI,CAAC,WAAqB,CAAC;QACtE,IAAI,IAAI,CAAC,GAAG;YAAE,MAAM,CAAC,GAAG,GAAG,IAAI,CAAC,GAAa,CAAC;QAC9C,IAAI,IAAI,CAAC,iBAAiB;YAAE,MAAM,CAAC,qBAAqB,GAAG,IAAI,CAAC;QAChE,IAAI,IAAI,CAAC,QAAQ;YAAE,MAAM,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAkB,CAAC;QAC7D,IAAI,IAAI,CAAC,KAAK;YAAE,MAAM,CAAC,KAAK,GAAG,IAAI,CAAC;QACpC,IAAI,IAAI,CAAC,UAAU;YAAE,MAAM,CAAC,UAAU,GAAG,IAAI,CAAC,UAAoB,CAAC;QAEnE,wDAAwD;QACxD,IAAI,CAAC,MAAM,CAAC,YAAY;YAAE,MAAM,CAAC,YAAY,GAAG,MAAM,CAAC;QAEvD,MAAM,MAAM,GAAG,IAAI,SAAS,CAAC,MAAM,CAAC,CAAC;QACrC,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAExC,MAAM,MAAM,GACV,MAAM,CAAC,YAAY,KAAK,MAAM;YAC5B,CAAC,CAAC,IAAI,CAAC,SAAS,CACZ;gBACE,KAAK,EAAE,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;oBAC9B,IAAI,EAAE,CAAC,CAAC,OAAO;oBACf,KAAK,EAAE,CAAC,CAAC,KAAK;oBACd,MAAM,EAAE,CAAC,CAAC,MAAM;oBAChB,IAAI,EAAE,CAAC,CAAC,IAAI;oBACZ,SAAS,EAAE,CAAC,CAAC,SAAS;iBACvB,CAAC,CAAC;aACJ,EACD,IAAI,EACJ,CAAC,CACF;YACH,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC;QAElB,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChB,aAAa,CAAC,IAAI,CAAC,MAAgB,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC;QACxD,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;QAC/B,CAAC;IACH,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,OAAO,CAAC,KAAK,CACX,UAAU,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAC7D,CAAC;QACF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC,CAAC,CAAC;AAEL,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC"}
|
package/dist/lib.d.ts
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
export type LiteParseInput = string | Buffer | Uint8Array;
|
|
2
|
+
export type OutputFormat = "json" | "text";
|
|
3
|
+
export interface LiteParseConfig {
|
|
4
|
+
ocrLanguage: string;
|
|
5
|
+
ocrEnabled: boolean;
|
|
6
|
+
ocrServerUrl?: string;
|
|
7
|
+
tessdataPath?: string;
|
|
8
|
+
maxPages: number;
|
|
9
|
+
targetPages?: string;
|
|
10
|
+
dpi: number;
|
|
11
|
+
outputFormat: OutputFormat;
|
|
12
|
+
preserveVerySmallText: boolean;
|
|
13
|
+
password?: string;
|
|
14
|
+
quiet: boolean;
|
|
15
|
+
numWorkers: number;
|
|
16
|
+
}
|
|
17
|
+
export interface TextItem {
|
|
18
|
+
text: string;
|
|
19
|
+
x: number;
|
|
20
|
+
y: number;
|
|
21
|
+
width: number;
|
|
22
|
+
height: number;
|
|
23
|
+
fontName?: string;
|
|
24
|
+
fontSize?: number;
|
|
25
|
+
confidence?: number;
|
|
26
|
+
}
|
|
27
|
+
export interface ParsedPage {
|
|
28
|
+
pageNum: number;
|
|
29
|
+
width: number;
|
|
30
|
+
height: number;
|
|
31
|
+
text: string;
|
|
32
|
+
textItems: TextItem[];
|
|
33
|
+
}
|
|
34
|
+
export interface ParseResult {
|
|
35
|
+
pages: ParsedPage[];
|
|
36
|
+
text: string;
|
|
37
|
+
}
|
|
38
|
+
export interface ScreenshotResult {
|
|
39
|
+
pageNum: number;
|
|
40
|
+
width: number;
|
|
41
|
+
height: number;
|
|
42
|
+
imageBuffer: Buffer;
|
|
43
|
+
}
|
|
44
|
+
export declare class LiteParse {
|
|
45
|
+
private _native;
|
|
46
|
+
private _config;
|
|
47
|
+
constructor(userConfig?: Partial<LiteParseConfig>);
|
|
48
|
+
parse(input: LiteParseInput): Promise<ParseResult>;
|
|
49
|
+
screenshot(input: string, pageNumbers?: number[]): ScreenshotResult[];
|
|
50
|
+
getConfig(): LiteParseConfig;
|
|
51
|
+
}
|
|
52
|
+
export interface SearchItemsOptions {
|
|
53
|
+
phrase: string;
|
|
54
|
+
caseSensitive?: boolean;
|
|
55
|
+
}
|
|
56
|
+
export declare function searchItems(items: TextItem[], options: SearchItemsOptions): TextItem[];
|
|
57
|
+
export default LiteParse;
|
|
58
|
+
//# sourceMappingURL=lib.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"lib.d.ts","sourceRoot":"","sources":["../src/lib.ts"],"names":[],"mappings":"AAaA,MAAM,MAAM,cAAc,GAAG,MAAM,GAAG,MAAM,GAAG,UAAU,CAAC;AAC1D,MAAM,MAAM,YAAY,GAAG,MAAM,GAAG,MAAM,CAAC;AAE3C,MAAM,WAAW,eAAe;IAC9B,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,OAAO,CAAC;IACpB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,QAAQ,EAAE,MAAM,CAAC;IACjB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,GAAG,EAAE,MAAM,CAAC;IACZ,YAAY,EAAE,YAAY,CAAC;IAC3B,qBAAqB,EAAE,OAAO,CAAC;IAC/B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,OAAO,CAAC;IACf,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,CAAC,EAAE,MAAM,CAAC;IACV,CAAC,EAAE,MAAM,CAAC;IACV,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,UAAU;IACzB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,QAAQ,EAAE,CAAC;CACvB;AAED,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,UAAU,EAAE,CAAC;IACpB,IAAI,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,gBAAgB;IAC/B,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,MAAM,CAAC;CACrB;AAMD,qBAAa,SAAS;IACpB,OAAO,CAAC,OAAO,CAAkB;IACjC,OAAO,CAAC,OAAO,CAAkB;gBAErB,UAAU,GAAE,OAAO,CAAC,eAAe,CAAM;IAoC/C,KAAK,CAAC,KAAK,EAAE,cAAc,GAAG,OAAO,CAAC,WAAW,CAAC;IAWxD,UAAU,CACR,KAAK,EAAE,MAAM,EACb,WAAW,CAAC,EAAE,MAAM,EAAE,GACrB,gBAAgB,EAAE;IASrB,SAAS,IAAI,eAAe;CAG7B;AA6BD,MAAM,WAAW,kBAAkB;IACjC,MAAM,EAAE,MAAM,CAAC;IACf,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB;AAED,wBAAgB,WAAW,CACzB,KAAK,EAAE,QAAQ,EAAE,EACjB,OAAO,EAAE,kBAAkB,GAC1B,QAAQ,EAAE,CAOZ;AAED,eAAe,SAAS,CAAC"}
|