@llamaindex/liteparse 1.5.3 → 2.0.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (578) hide show
  1. package/README.md +49 -448
  2. package/dist/cli.d.ts +3 -0
  3. package/dist/cli.d.ts.map +1 -0
  4. package/dist/cli.js +87 -0
  5. package/dist/cli.js.map +1 -0
  6. package/dist/lib.d.ts +58 -0
  7. package/dist/lib.d.ts.map +1 -0
  8. package/dist/lib.js +88 -0
  9. package/dist/lib.js.map +1 -0
  10. package/dist/native.d.ts +54 -0
  11. package/dist/native.d.ts.map +1 -0
  12. package/dist/native.js +70 -0
  13. package/dist/native.js.map +1 -0
  14. package/libpdfium.so +0 -0
  15. package/liteparse.linux-x64-gnu.node +0 -0
  16. package/package.json +36 -50
  17. package/LICENSE +0 -201
  18. package/dist/cli/parse.d.ts +0 -4
  19. package/dist/cli/parse.d.ts.map +0 -1
  20. package/dist/cli/parse.js +0 -450
  21. package/dist/cli/parse.js.map +0 -1
  22. package/dist/package.json +0 -90
  23. package/dist/src/conversion/convertToPdf.d.ts +0 -65
  24. package/dist/src/conversion/convertToPdf.d.ts.map +0 -1
  25. package/dist/src/conversion/convertToPdf.js +0 -405
  26. package/dist/src/conversion/convertToPdf.js.map +0 -1
  27. package/dist/src/conversion/convertToPdf.test.d.ts +0 -2
  28. package/dist/src/conversion/convertToPdf.test.d.ts.map +0 -1
  29. package/dist/src/conversion/convertToPdf.test.js +0 -327
  30. package/dist/src/conversion/convertToPdf.test.js.map +0 -1
  31. package/dist/src/core/config.d.ts +0 -4
  32. package/dist/src/core/config.d.ts.map +0 -1
  33. package/dist/src/core/config.js +0 -26
  34. package/dist/src/core/config.js.map +0 -1
  35. package/dist/src/core/config.test.d.ts +0 -2
  36. package/dist/src/core/config.test.d.ts.map +0 -1
  37. package/dist/src/core/config.test.js +0 -21
  38. package/dist/src/core/config.test.js.map +0 -1
  39. package/dist/src/core/parser.d.ts +0 -92
  40. package/dist/src/core/parser.d.ts.map +0 -1
  41. package/dist/src/core/parser.js +0 -401
  42. package/dist/src/core/parser.js.map +0 -1
  43. package/dist/src/core/parser.test.d.ts +0 -2
  44. package/dist/src/core/parser.test.d.ts.map +0 -1
  45. package/dist/src/core/parser.test.js +0 -541
  46. package/dist/src/core/parser.test.js.map +0 -1
  47. package/dist/src/core/types.d.ts +0 -370
  48. package/dist/src/core/types.d.ts.map +0 -1
  49. package/dist/src/core/types.js +0 -2
  50. package/dist/src/core/types.js.map +0 -1
  51. package/dist/src/engines/ocr/http-simple.d.ts +0 -19
  52. package/dist/src/engines/ocr/http-simple.d.ts.map +0 -1
  53. package/dist/src/engines/ocr/http-simple.js +0 -69
  54. package/dist/src/engines/ocr/http-simple.js.map +0 -1
  55. package/dist/src/engines/ocr/http-simple.test.d.ts +0 -2
  56. package/dist/src/engines/ocr/http-simple.test.d.ts.map +0 -1
  57. package/dist/src/engines/ocr/http-simple.test.js +0 -108
  58. package/dist/src/engines/ocr/http-simple.test.js.map +0 -1
  59. package/dist/src/engines/ocr/interface.d.ts +0 -15
  60. package/dist/src/engines/ocr/interface.d.ts.map +0 -1
  61. package/dist/src/engines/ocr/interface.js +0 -2
  62. package/dist/src/engines/ocr/interface.js.map +0 -1
  63. package/dist/src/engines/ocr/tesseract.d.ts +0 -20
  64. package/dist/src/engines/ocr/tesseract.d.ts.map +0 -1
  65. package/dist/src/engines/ocr/tesseract.js +0 -162
  66. package/dist/src/engines/ocr/tesseract.js.map +0 -1
  67. package/dist/src/engines/ocr/tesseract.test.d.ts +0 -2
  68. package/dist/src/engines/ocr/tesseract.test.d.ts.map +0 -1
  69. package/dist/src/engines/ocr/tesseract.test.js +0 -94
  70. package/dist/src/engines/ocr/tesseract.test.js.map +0 -1
  71. package/dist/src/engines/pdf/interface.d.ts +0 -84
  72. package/dist/src/engines/pdf/interface.d.ts.map +0 -1
  73. package/dist/src/engines/pdf/interface.js +0 -2
  74. package/dist/src/engines/pdf/interface.js.map +0 -1
  75. package/dist/src/engines/pdf/pdfium-renderer.d.ts +0 -31
  76. package/dist/src/engines/pdf/pdfium-renderer.d.ts.map +0 -1
  77. package/dist/src/engines/pdf/pdfium-renderer.js +0 -145
  78. package/dist/src/engines/pdf/pdfium-renderer.js.map +0 -1
  79. package/dist/src/engines/pdf/pdfium-renderer.test.d.ts +0 -2
  80. package/dist/src/engines/pdf/pdfium-renderer.test.d.ts.map +0 -1
  81. package/dist/src/engines/pdf/pdfium-renderer.test.js +0 -109
  82. package/dist/src/engines/pdf/pdfium-renderer.test.js.map +0 -1
  83. package/dist/src/engines/pdf/pdfjs.d.ts +0 -14
  84. package/dist/src/engines/pdf/pdfjs.d.ts.map +0 -1
  85. package/dist/src/engines/pdf/pdfjs.js +0 -804
  86. package/dist/src/engines/pdf/pdfjs.js.map +0 -1
  87. package/dist/src/engines/pdf/pdfjs.test.d.ts +0 -2
  88. package/dist/src/engines/pdf/pdfjs.test.d.ts.map +0 -1
  89. package/dist/src/engines/pdf/pdfjs.test.js +0 -225
  90. package/dist/src/engines/pdf/pdfjs.test.js.map +0 -1
  91. package/dist/src/engines/pdf/pdfjsImporter.d.ts +0 -5
  92. package/dist/src/engines/pdf/pdfjsImporter.d.ts.map +0 -1
  93. package/dist/src/engines/pdf/pdfjsImporter.js +0 -45
  94. package/dist/src/engines/pdf/pdfjsImporter.js.map +0 -1
  95. package/dist/src/index.d.ts +0 -3
  96. package/dist/src/index.d.ts.map +0 -1
  97. package/dist/src/index.js +0 -5
  98. package/dist/src/index.js.map +0 -1
  99. package/dist/src/lib.d.ts +0 -19
  100. package/dist/src/lib.d.ts.map +0 -1
  101. package/dist/src/lib.js +0 -17
  102. package/dist/src/lib.js.map +0 -1
  103. package/dist/src/output/json.d.ts +0 -10
  104. package/dist/src/output/json.d.ts.map +0 -1
  105. package/dist/src/output/json.js +0 -32
  106. package/dist/src/output/json.js.map +0 -1
  107. package/dist/src/output/json.test.d.ts +0 -2
  108. package/dist/src/output/json.test.d.ts.map +0 -1
  109. package/dist/src/output/json.test.js +0 -199
  110. package/dist/src/output/json.test.js.map +0 -1
  111. package/dist/src/output/text.d.ts +0 -10
  112. package/dist/src/output/text.d.ts.map +0 -1
  113. package/dist/src/output/text.js +0 -17
  114. package/dist/src/output/text.js.map +0 -1
  115. package/dist/src/output/text.test.d.ts +0 -2
  116. package/dist/src/output/text.test.d.ts.map +0 -1
  117. package/dist/src/output/text.test.js +0 -65
  118. package/dist/src/output/text.test.js.map +0 -1
  119. package/dist/src/processing/bbox.d.ts +0 -20
  120. package/dist/src/processing/bbox.d.ts.map +0 -1
  121. package/dist/src/processing/bbox.js +0 -258
  122. package/dist/src/processing/bbox.js.map +0 -1
  123. package/dist/src/processing/bbox.test.d.ts +0 -2
  124. package/dist/src/processing/bbox.test.d.ts.map +0 -1
  125. package/dist/src/processing/bbox.test.js +0 -334
  126. package/dist/src/processing/bbox.test.js.map +0 -1
  127. package/dist/src/processing/cleanText.d.ts +0 -6
  128. package/dist/src/processing/cleanText.d.ts.map +0 -1
  129. package/dist/src/processing/cleanText.js +0 -73
  130. package/dist/src/processing/cleanText.js.map +0 -1
  131. package/dist/src/processing/cleanText.test.d.ts +0 -2
  132. package/dist/src/processing/cleanText.test.d.ts.map +0 -1
  133. package/dist/src/processing/cleanText.test.js +0 -46
  134. package/dist/src/processing/cleanText.test.js.map +0 -1
  135. package/dist/src/processing/grid.d.ts +0 -7
  136. package/dist/src/processing/grid.d.ts.map +0 -1
  137. package/dist/src/processing/grid.js +0 -13
  138. package/dist/src/processing/grid.js.map +0 -1
  139. package/dist/src/processing/gridDebugLogger.d.ts +0 -206
  140. package/dist/src/processing/gridDebugLogger.d.ts.map +0 -1
  141. package/dist/src/processing/gridDebugLogger.js +0 -446
  142. package/dist/src/processing/gridDebugLogger.js.map +0 -1
  143. package/dist/src/processing/gridProjection.d.ts +0 -19
  144. package/dist/src/processing/gridProjection.d.ts.map +0 -1
  145. package/dist/src/processing/gridProjection.js +0 -1813
  146. package/dist/src/processing/gridProjection.js.map +0 -1
  147. package/dist/src/processing/gridProjection.test.d.ts +0 -2
  148. package/dist/src/processing/gridProjection.test.d.ts.map +0 -1
  149. package/dist/src/processing/gridProjection.test.js +0 -495
  150. package/dist/src/processing/gridProjection.test.js.map +0 -1
  151. package/dist/src/processing/gridVisualizer.d.ts +0 -14
  152. package/dist/src/processing/gridVisualizer.d.ts.map +0 -1
  153. package/dist/src/processing/gridVisualizer.js +0 -166
  154. package/dist/src/processing/gridVisualizer.js.map +0 -1
  155. package/dist/src/processing/markupUtils.d.ts +0 -7
  156. package/dist/src/processing/markupUtils.d.ts.map +0 -1
  157. package/dist/src/processing/markupUtils.js +0 -25
  158. package/dist/src/processing/markupUtils.js.map +0 -1
  159. package/dist/src/processing/markupUtils.test.d.ts +0 -2
  160. package/dist/src/processing/markupUtils.test.d.ts.map +0 -1
  161. package/dist/src/processing/markupUtils.test.js +0 -26
  162. package/dist/src/processing/markupUtils.test.js.map +0 -1
  163. package/dist/src/processing/ocrUtils.d.ts +0 -24
  164. package/dist/src/processing/ocrUtils.d.ts.map +0 -1
  165. package/dist/src/processing/ocrUtils.js +0 -79
  166. package/dist/src/processing/ocrUtils.js.map +0 -1
  167. package/dist/src/processing/octUtils.test.d.ts +0 -2
  168. package/dist/src/processing/octUtils.test.d.ts.map +0 -1
  169. package/dist/src/processing/octUtils.test.js +0 -72
  170. package/dist/src/processing/octUtils.test.js.map +0 -1
  171. package/dist/src/processing/searchItems.d.ts +0 -26
  172. package/dist/src/processing/searchItems.d.ts.map +0 -1
  173. package/dist/src/processing/searchItems.js +0 -93
  174. package/dist/src/processing/searchItems.js.map +0 -1
  175. package/dist/src/processing/searchItems.test.d.ts +0 -2
  176. package/dist/src/processing/searchItems.test.d.ts.map +0 -1
  177. package/dist/src/processing/searchItems.test.js +0 -84
  178. package/dist/src/processing/searchItems.test.js.map +0 -1
  179. package/dist/src/processing/textUtils.d.ts +0 -20
  180. package/dist/src/processing/textUtils.d.ts.map +0 -1
  181. package/dist/src/processing/textUtils.js +0 -142
  182. package/dist/src/processing/textUtils.js.map +0 -1
  183. package/dist/src/processing/textUtils.test.d.ts +0 -2
  184. package/dist/src/processing/textUtils.test.d.ts.map +0 -1
  185. package/dist/src/processing/textUtils.test.js +0 -45
  186. package/dist/src/processing/textUtils.test.js.map +0 -1
  187. package/dist/src/vendor/pdfjs/LICENSE +0 -177
  188. package/dist/src/vendor/pdfjs/README.md +0 -0
  189. package/dist/src/vendor/pdfjs/cmaps/78-EUC-H.bcmap +0 -0
  190. package/dist/src/vendor/pdfjs/cmaps/78-EUC-V.bcmap +0 -0
  191. package/dist/src/vendor/pdfjs/cmaps/78-H.bcmap +0 -0
  192. package/dist/src/vendor/pdfjs/cmaps/78-RKSJ-H.bcmap +0 -0
  193. package/dist/src/vendor/pdfjs/cmaps/78-RKSJ-V.bcmap +0 -0
  194. package/dist/src/vendor/pdfjs/cmaps/78-V.bcmap +0 -0
  195. package/dist/src/vendor/pdfjs/cmaps/78ms-RKSJ-H.bcmap +0 -0
  196. package/dist/src/vendor/pdfjs/cmaps/78ms-RKSJ-V.bcmap +0 -0
  197. package/dist/src/vendor/pdfjs/cmaps/83pv-RKSJ-H.bcmap +0 -0
  198. package/dist/src/vendor/pdfjs/cmaps/90ms-RKSJ-H.bcmap +0 -0
  199. package/dist/src/vendor/pdfjs/cmaps/90ms-RKSJ-V.bcmap +0 -0
  200. package/dist/src/vendor/pdfjs/cmaps/90msp-RKSJ-H.bcmap +0 -0
  201. package/dist/src/vendor/pdfjs/cmaps/90msp-RKSJ-V.bcmap +0 -0
  202. package/dist/src/vendor/pdfjs/cmaps/90pv-RKSJ-H.bcmap +0 -0
  203. package/dist/src/vendor/pdfjs/cmaps/90pv-RKSJ-V.bcmap +0 -0
  204. package/dist/src/vendor/pdfjs/cmaps/Add-H.bcmap +0 -0
  205. package/dist/src/vendor/pdfjs/cmaps/Add-RKSJ-H.bcmap +0 -0
  206. package/dist/src/vendor/pdfjs/cmaps/Add-RKSJ-V.bcmap +0 -0
  207. package/dist/src/vendor/pdfjs/cmaps/Add-V.bcmap +0 -0
  208. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-0.bcmap +0 -0
  209. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-1.bcmap +0 -0
  210. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-2.bcmap +0 -0
  211. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-3.bcmap +0 -0
  212. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-4.bcmap +0 -0
  213. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-5.bcmap +0 -0
  214. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-6.bcmap +0 -0
  215. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-UCS2.bcmap +0 -0
  216. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-0.bcmap +0 -0
  217. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-1.bcmap +0 -0
  218. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-2.bcmap +0 -0
  219. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-3.bcmap +0 -0
  220. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-4.bcmap +0 -0
  221. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-5.bcmap +0 -0
  222. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-UCS2.bcmap +0 -0
  223. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-0.bcmap +0 -0
  224. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-1.bcmap +0 -0
  225. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-2.bcmap +0 -0
  226. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-3.bcmap +0 -0
  227. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-4.bcmap +0 -0
  228. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-5.bcmap +0 -0
  229. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-6.bcmap +0 -0
  230. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-UCS2.bcmap +0 -0
  231. package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-0.bcmap +0 -0
  232. package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-1.bcmap +0 -0
  233. package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-2.bcmap +0 -0
  234. package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-UCS2.bcmap +0 -0
  235. package/dist/src/vendor/pdfjs/cmaps/B5-H.bcmap +0 -0
  236. package/dist/src/vendor/pdfjs/cmaps/B5-V.bcmap +0 -0
  237. package/dist/src/vendor/pdfjs/cmaps/B5pc-H.bcmap +0 -0
  238. package/dist/src/vendor/pdfjs/cmaps/B5pc-V.bcmap +0 -0
  239. package/dist/src/vendor/pdfjs/cmaps/CNS-EUC-H.bcmap +0 -0
  240. package/dist/src/vendor/pdfjs/cmaps/CNS-EUC-V.bcmap +0 -0
  241. package/dist/src/vendor/pdfjs/cmaps/CNS1-H.bcmap +0 -0
  242. package/dist/src/vendor/pdfjs/cmaps/CNS1-V.bcmap +0 -0
  243. package/dist/src/vendor/pdfjs/cmaps/CNS2-H.bcmap +0 -0
  244. package/dist/src/vendor/pdfjs/cmaps/CNS2-V.bcmap +0 -3
  245. package/dist/src/vendor/pdfjs/cmaps/ETHK-B5-H.bcmap +0 -0
  246. package/dist/src/vendor/pdfjs/cmaps/ETHK-B5-V.bcmap +0 -0
  247. package/dist/src/vendor/pdfjs/cmaps/ETen-B5-H.bcmap +0 -0
  248. package/dist/src/vendor/pdfjs/cmaps/ETen-B5-V.bcmap +0 -0
  249. package/dist/src/vendor/pdfjs/cmaps/ETenms-B5-H.bcmap +0 -3
  250. package/dist/src/vendor/pdfjs/cmaps/ETenms-B5-V.bcmap +0 -0
  251. package/dist/src/vendor/pdfjs/cmaps/EUC-H.bcmap +0 -0
  252. package/dist/src/vendor/pdfjs/cmaps/EUC-V.bcmap +0 -0
  253. package/dist/src/vendor/pdfjs/cmaps/Ext-H.bcmap +0 -0
  254. package/dist/src/vendor/pdfjs/cmaps/Ext-RKSJ-H.bcmap +0 -0
  255. package/dist/src/vendor/pdfjs/cmaps/Ext-RKSJ-V.bcmap +0 -0
  256. package/dist/src/vendor/pdfjs/cmaps/Ext-V.bcmap +0 -0
  257. package/dist/src/vendor/pdfjs/cmaps/GB-EUC-H.bcmap +0 -0
  258. package/dist/src/vendor/pdfjs/cmaps/GB-EUC-V.bcmap +0 -0
  259. package/dist/src/vendor/pdfjs/cmaps/GB-H.bcmap +0 -4
  260. package/dist/src/vendor/pdfjs/cmaps/GB-V.bcmap +0 -0
  261. package/dist/src/vendor/pdfjs/cmaps/GBK-EUC-H.bcmap +0 -0
  262. package/dist/src/vendor/pdfjs/cmaps/GBK-EUC-V.bcmap +0 -0
  263. package/dist/src/vendor/pdfjs/cmaps/GBK2K-H.bcmap +0 -0
  264. package/dist/src/vendor/pdfjs/cmaps/GBK2K-V.bcmap +0 -0
  265. package/dist/src/vendor/pdfjs/cmaps/GBKp-EUC-H.bcmap +0 -0
  266. package/dist/src/vendor/pdfjs/cmaps/GBKp-EUC-V.bcmap +0 -0
  267. package/dist/src/vendor/pdfjs/cmaps/GBT-EUC-H.bcmap +0 -0
  268. package/dist/src/vendor/pdfjs/cmaps/GBT-EUC-V.bcmap +0 -0
  269. package/dist/src/vendor/pdfjs/cmaps/GBT-H.bcmap +0 -0
  270. package/dist/src/vendor/pdfjs/cmaps/GBT-V.bcmap +0 -0
  271. package/dist/src/vendor/pdfjs/cmaps/GBTpc-EUC-H.bcmap +0 -0
  272. package/dist/src/vendor/pdfjs/cmaps/GBTpc-EUC-V.bcmap +0 -0
  273. package/dist/src/vendor/pdfjs/cmaps/GBpc-EUC-H.bcmap +0 -0
  274. package/dist/src/vendor/pdfjs/cmaps/GBpc-EUC-V.bcmap +0 -0
  275. package/dist/src/vendor/pdfjs/cmaps/H.bcmap +0 -0
  276. package/dist/src/vendor/pdfjs/cmaps/HKdla-B5-H.bcmap +0 -0
  277. package/dist/src/vendor/pdfjs/cmaps/HKdla-B5-V.bcmap +0 -0
  278. package/dist/src/vendor/pdfjs/cmaps/HKdlb-B5-H.bcmap +0 -0
  279. package/dist/src/vendor/pdfjs/cmaps/HKdlb-B5-V.bcmap +0 -0
  280. package/dist/src/vendor/pdfjs/cmaps/HKgccs-B5-H.bcmap +0 -0
  281. package/dist/src/vendor/pdfjs/cmaps/HKgccs-B5-V.bcmap +0 -0
  282. package/dist/src/vendor/pdfjs/cmaps/HKm314-B5-H.bcmap +0 -0
  283. package/dist/src/vendor/pdfjs/cmaps/HKm314-B5-V.bcmap +0 -0
  284. package/dist/src/vendor/pdfjs/cmaps/HKm471-B5-H.bcmap +0 -0
  285. package/dist/src/vendor/pdfjs/cmaps/HKm471-B5-V.bcmap +0 -0
  286. package/dist/src/vendor/pdfjs/cmaps/HKscs-B5-H.bcmap +0 -0
  287. package/dist/src/vendor/pdfjs/cmaps/HKscs-B5-V.bcmap +0 -0
  288. package/dist/src/vendor/pdfjs/cmaps/Hankaku.bcmap +0 -0
  289. package/dist/src/vendor/pdfjs/cmaps/Hiragana.bcmap +0 -0
  290. package/dist/src/vendor/pdfjs/cmaps/KSC-EUC-H.bcmap +0 -0
  291. package/dist/src/vendor/pdfjs/cmaps/KSC-EUC-V.bcmap +0 -0
  292. package/dist/src/vendor/pdfjs/cmaps/KSC-H.bcmap +0 -0
  293. package/dist/src/vendor/pdfjs/cmaps/KSC-Johab-H.bcmap +0 -0
  294. package/dist/src/vendor/pdfjs/cmaps/KSC-Johab-V.bcmap +0 -0
  295. package/dist/src/vendor/pdfjs/cmaps/KSC-V.bcmap +0 -0
  296. package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-H.bcmap +0 -0
  297. package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-H.bcmap +0 -0
  298. package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-V.bcmap +0 -0
  299. package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-V.bcmap +0 -0
  300. package/dist/src/vendor/pdfjs/cmaps/KSCpc-EUC-H.bcmap +0 -0
  301. package/dist/src/vendor/pdfjs/cmaps/KSCpc-EUC-V.bcmap +0 -0
  302. package/dist/src/vendor/pdfjs/cmaps/Katakana.bcmap +0 -0
  303. package/dist/src/vendor/pdfjs/cmaps/LICENSE +0 -36
  304. package/dist/src/vendor/pdfjs/cmaps/NWP-H.bcmap +0 -0
  305. package/dist/src/vendor/pdfjs/cmaps/NWP-V.bcmap +0 -0
  306. package/dist/src/vendor/pdfjs/cmaps/RKSJ-H.bcmap +0 -0
  307. package/dist/src/vendor/pdfjs/cmaps/RKSJ-V.bcmap +0 -0
  308. package/dist/src/vendor/pdfjs/cmaps/Roman.bcmap +0 -0
  309. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UCS2-H.bcmap +0 -0
  310. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UCS2-V.bcmap +0 -0
  311. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF16-H.bcmap +0 -0
  312. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF16-V.bcmap +0 -0
  313. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF32-H.bcmap +0 -0
  314. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF32-V.bcmap +0 -0
  315. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF8-H.bcmap +0 -0
  316. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF8-V.bcmap +0 -0
  317. package/dist/src/vendor/pdfjs/cmaps/UniGB-UCS2-H.bcmap +0 -0
  318. package/dist/src/vendor/pdfjs/cmaps/UniGB-UCS2-V.bcmap +0 -0
  319. package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF16-H.bcmap +0 -0
  320. package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF16-V.bcmap +0 -0
  321. package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF32-H.bcmap +0 -0
  322. package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF32-V.bcmap +0 -0
  323. package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF8-H.bcmap +0 -0
  324. package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF8-V.bcmap +0 -0
  325. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-H.bcmap +0 -0
  326. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-H.bcmap +0 -0
  327. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-V.bcmap +0 -0
  328. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-V.bcmap +0 -0
  329. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF16-H.bcmap +0 -0
  330. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF16-V.bcmap +0 -0
  331. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF32-H.bcmap +0 -0
  332. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF32-V.bcmap +0 -0
  333. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF8-H.bcmap +0 -0
  334. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF8-V.bcmap +0 -0
  335. package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-H.bcmap +0 -0
  336. package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-V.bcmap +0 -0
  337. package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-H.bcmap +0 -0
  338. package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-V.bcmap +0 -0
  339. package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-H.bcmap +0 -0
  340. package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-V.bcmap +0 -0
  341. package/dist/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-HW-V.bcmap +0 -0
  342. package/dist/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-V.bcmap +0 -0
  343. package/dist/src/vendor/pdfjs/cmaps/UniJISPro-UTF8-V.bcmap +0 -0
  344. package/dist/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-H.bcmap +0 -0
  345. package/dist/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-V.bcmap +0 -0
  346. package/dist/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-H.bcmap +0 -0
  347. package/dist/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-V.bcmap +0 -0
  348. package/dist/src/vendor/pdfjs/cmaps/UniKS-UCS2-H.bcmap +0 -0
  349. package/dist/src/vendor/pdfjs/cmaps/UniKS-UCS2-V.bcmap +0 -0
  350. package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF16-H.bcmap +0 -0
  351. package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF16-V.bcmap +0 -0
  352. package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF32-H.bcmap +0 -0
  353. package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF32-V.bcmap +0 -0
  354. package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF8-H.bcmap +0 -0
  355. package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF8-V.bcmap +0 -0
  356. package/dist/src/vendor/pdfjs/cmaps/V.bcmap +0 -0
  357. package/dist/src/vendor/pdfjs/cmaps/WP-Symbol.bcmap +0 -0
  358. package/dist/src/vendor/pdfjs/jbig2.wasm +0 -0
  359. package/dist/src/vendor/pdfjs/openjpeg.wasm +0 -0
  360. package/dist/src/vendor/pdfjs/pdf.mjs +0 -33603
  361. package/dist/src/vendor/pdfjs/pdf.mjs.map +0 -1
  362. package/dist/src/vendor/pdfjs/pdf.sandbox.mjs +0 -4936
  363. package/dist/src/vendor/pdfjs/pdf.sandbox.mjs.map +0 -1
  364. package/dist/src/vendor/pdfjs/pdf.worker.mjs +0 -70100
  365. package/dist/src/vendor/pdfjs/pdf.worker.mjs.map +0 -1
  366. package/dist/src/vendor/pdfjs/qcms_bg.wasm +0 -0
  367. package/dist/src/vendor/pdfjs/standard_fonts/FoxitDingbats.pfb +0 -0
  368. package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixed.pfb +0 -0
  369. package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixedBold.pfb +0 -0
  370. package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixedBoldItalic.pfb +0 -0
  371. package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixedItalic.pfb +0 -0
  372. package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerif.pfb +0 -0
  373. package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerifBold.pfb +0 -0
  374. package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerifBoldItalic.pfb +0 -0
  375. package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerifItalic.pfb +0 -0
  376. package/dist/src/vendor/pdfjs/standard_fonts/FoxitSymbol.pfb +0 -0
  377. package/dist/src/vendor/pdfjs/standard_fonts/LICENSE_FOXIT +0 -27
  378. package/dist/src/vendor/pdfjs/standard_fonts/LICENSE_LIBERATION +0 -102
  379. package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-Bold.ttf +0 -0
  380. package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-BoldItalic.ttf +0 -0
  381. package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-Italic.ttf +0 -0
  382. package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-Regular.ttf +0 -0
  383. package/src/vendor/pdfjs/LICENSE +0 -177
  384. package/src/vendor/pdfjs/README.md +0 -0
  385. package/src/vendor/pdfjs/cmaps/78-EUC-H.bcmap +0 -0
  386. package/src/vendor/pdfjs/cmaps/78-EUC-V.bcmap +0 -0
  387. package/src/vendor/pdfjs/cmaps/78-H.bcmap +0 -0
  388. package/src/vendor/pdfjs/cmaps/78-RKSJ-H.bcmap +0 -0
  389. package/src/vendor/pdfjs/cmaps/78-RKSJ-V.bcmap +0 -0
  390. package/src/vendor/pdfjs/cmaps/78-V.bcmap +0 -0
  391. package/src/vendor/pdfjs/cmaps/78ms-RKSJ-H.bcmap +0 -0
  392. package/src/vendor/pdfjs/cmaps/78ms-RKSJ-V.bcmap +0 -0
  393. package/src/vendor/pdfjs/cmaps/83pv-RKSJ-H.bcmap +0 -0
  394. package/src/vendor/pdfjs/cmaps/90ms-RKSJ-H.bcmap +0 -0
  395. package/src/vendor/pdfjs/cmaps/90ms-RKSJ-V.bcmap +0 -0
  396. package/src/vendor/pdfjs/cmaps/90msp-RKSJ-H.bcmap +0 -0
  397. package/src/vendor/pdfjs/cmaps/90msp-RKSJ-V.bcmap +0 -0
  398. package/src/vendor/pdfjs/cmaps/90pv-RKSJ-H.bcmap +0 -0
  399. package/src/vendor/pdfjs/cmaps/90pv-RKSJ-V.bcmap +0 -0
  400. package/src/vendor/pdfjs/cmaps/Add-H.bcmap +0 -0
  401. package/src/vendor/pdfjs/cmaps/Add-RKSJ-H.bcmap +0 -0
  402. package/src/vendor/pdfjs/cmaps/Add-RKSJ-V.bcmap +0 -0
  403. package/src/vendor/pdfjs/cmaps/Add-V.bcmap +0 -0
  404. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-0.bcmap +0 -0
  405. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-1.bcmap +0 -0
  406. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-2.bcmap +0 -0
  407. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-3.bcmap +0 -0
  408. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-4.bcmap +0 -0
  409. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-5.bcmap +0 -0
  410. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-6.bcmap +0 -0
  411. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-UCS2.bcmap +0 -0
  412. package/src/vendor/pdfjs/cmaps/Adobe-GB1-0.bcmap +0 -0
  413. package/src/vendor/pdfjs/cmaps/Adobe-GB1-1.bcmap +0 -0
  414. package/src/vendor/pdfjs/cmaps/Adobe-GB1-2.bcmap +0 -0
  415. package/src/vendor/pdfjs/cmaps/Adobe-GB1-3.bcmap +0 -0
  416. package/src/vendor/pdfjs/cmaps/Adobe-GB1-4.bcmap +0 -0
  417. package/src/vendor/pdfjs/cmaps/Adobe-GB1-5.bcmap +0 -0
  418. package/src/vendor/pdfjs/cmaps/Adobe-GB1-UCS2.bcmap +0 -0
  419. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-0.bcmap +0 -0
  420. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-1.bcmap +0 -0
  421. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-2.bcmap +0 -0
  422. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-3.bcmap +0 -0
  423. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-4.bcmap +0 -0
  424. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-5.bcmap +0 -0
  425. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-6.bcmap +0 -0
  426. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-UCS2.bcmap +0 -0
  427. package/src/vendor/pdfjs/cmaps/Adobe-Korea1-0.bcmap +0 -0
  428. package/src/vendor/pdfjs/cmaps/Adobe-Korea1-1.bcmap +0 -0
  429. package/src/vendor/pdfjs/cmaps/Adobe-Korea1-2.bcmap +0 -0
  430. package/src/vendor/pdfjs/cmaps/Adobe-Korea1-UCS2.bcmap +0 -0
  431. package/src/vendor/pdfjs/cmaps/B5-H.bcmap +0 -0
  432. package/src/vendor/pdfjs/cmaps/B5-V.bcmap +0 -0
  433. package/src/vendor/pdfjs/cmaps/B5pc-H.bcmap +0 -0
  434. package/src/vendor/pdfjs/cmaps/B5pc-V.bcmap +0 -0
  435. package/src/vendor/pdfjs/cmaps/CNS-EUC-H.bcmap +0 -0
  436. package/src/vendor/pdfjs/cmaps/CNS-EUC-V.bcmap +0 -0
  437. package/src/vendor/pdfjs/cmaps/CNS1-H.bcmap +0 -0
  438. package/src/vendor/pdfjs/cmaps/CNS1-V.bcmap +0 -0
  439. package/src/vendor/pdfjs/cmaps/CNS2-H.bcmap +0 -0
  440. package/src/vendor/pdfjs/cmaps/CNS2-V.bcmap +0 -3
  441. package/src/vendor/pdfjs/cmaps/ETHK-B5-H.bcmap +0 -0
  442. package/src/vendor/pdfjs/cmaps/ETHK-B5-V.bcmap +0 -0
  443. package/src/vendor/pdfjs/cmaps/ETen-B5-H.bcmap +0 -0
  444. package/src/vendor/pdfjs/cmaps/ETen-B5-V.bcmap +0 -0
  445. package/src/vendor/pdfjs/cmaps/ETenms-B5-H.bcmap +0 -3
  446. package/src/vendor/pdfjs/cmaps/ETenms-B5-V.bcmap +0 -0
  447. package/src/vendor/pdfjs/cmaps/EUC-H.bcmap +0 -0
  448. package/src/vendor/pdfjs/cmaps/EUC-V.bcmap +0 -0
  449. package/src/vendor/pdfjs/cmaps/Ext-H.bcmap +0 -0
  450. package/src/vendor/pdfjs/cmaps/Ext-RKSJ-H.bcmap +0 -0
  451. package/src/vendor/pdfjs/cmaps/Ext-RKSJ-V.bcmap +0 -0
  452. package/src/vendor/pdfjs/cmaps/Ext-V.bcmap +0 -0
  453. package/src/vendor/pdfjs/cmaps/GB-EUC-H.bcmap +0 -0
  454. package/src/vendor/pdfjs/cmaps/GB-EUC-V.bcmap +0 -0
  455. package/src/vendor/pdfjs/cmaps/GB-H.bcmap +0 -4
  456. package/src/vendor/pdfjs/cmaps/GB-V.bcmap +0 -0
  457. package/src/vendor/pdfjs/cmaps/GBK-EUC-H.bcmap +0 -0
  458. package/src/vendor/pdfjs/cmaps/GBK-EUC-V.bcmap +0 -0
  459. package/src/vendor/pdfjs/cmaps/GBK2K-H.bcmap +0 -0
  460. package/src/vendor/pdfjs/cmaps/GBK2K-V.bcmap +0 -0
  461. package/src/vendor/pdfjs/cmaps/GBKp-EUC-H.bcmap +0 -0
  462. package/src/vendor/pdfjs/cmaps/GBKp-EUC-V.bcmap +0 -0
  463. package/src/vendor/pdfjs/cmaps/GBT-EUC-H.bcmap +0 -0
  464. package/src/vendor/pdfjs/cmaps/GBT-EUC-V.bcmap +0 -0
  465. package/src/vendor/pdfjs/cmaps/GBT-H.bcmap +0 -0
  466. package/src/vendor/pdfjs/cmaps/GBT-V.bcmap +0 -0
  467. package/src/vendor/pdfjs/cmaps/GBTpc-EUC-H.bcmap +0 -0
  468. package/src/vendor/pdfjs/cmaps/GBTpc-EUC-V.bcmap +0 -0
  469. package/src/vendor/pdfjs/cmaps/GBpc-EUC-H.bcmap +0 -0
  470. package/src/vendor/pdfjs/cmaps/GBpc-EUC-V.bcmap +0 -0
  471. package/src/vendor/pdfjs/cmaps/H.bcmap +0 -0
  472. package/src/vendor/pdfjs/cmaps/HKdla-B5-H.bcmap +0 -0
  473. package/src/vendor/pdfjs/cmaps/HKdla-B5-V.bcmap +0 -0
  474. package/src/vendor/pdfjs/cmaps/HKdlb-B5-H.bcmap +0 -0
  475. package/src/vendor/pdfjs/cmaps/HKdlb-B5-V.bcmap +0 -0
  476. package/src/vendor/pdfjs/cmaps/HKgccs-B5-H.bcmap +0 -0
  477. package/src/vendor/pdfjs/cmaps/HKgccs-B5-V.bcmap +0 -0
  478. package/src/vendor/pdfjs/cmaps/HKm314-B5-H.bcmap +0 -0
  479. package/src/vendor/pdfjs/cmaps/HKm314-B5-V.bcmap +0 -0
  480. package/src/vendor/pdfjs/cmaps/HKm471-B5-H.bcmap +0 -0
  481. package/src/vendor/pdfjs/cmaps/HKm471-B5-V.bcmap +0 -0
  482. package/src/vendor/pdfjs/cmaps/HKscs-B5-H.bcmap +0 -0
  483. package/src/vendor/pdfjs/cmaps/HKscs-B5-V.bcmap +0 -0
  484. package/src/vendor/pdfjs/cmaps/Hankaku.bcmap +0 -0
  485. package/src/vendor/pdfjs/cmaps/Hiragana.bcmap +0 -0
  486. package/src/vendor/pdfjs/cmaps/KSC-EUC-H.bcmap +0 -0
  487. package/src/vendor/pdfjs/cmaps/KSC-EUC-V.bcmap +0 -0
  488. package/src/vendor/pdfjs/cmaps/KSC-H.bcmap +0 -0
  489. package/src/vendor/pdfjs/cmaps/KSC-Johab-H.bcmap +0 -0
  490. package/src/vendor/pdfjs/cmaps/KSC-Johab-V.bcmap +0 -0
  491. package/src/vendor/pdfjs/cmaps/KSC-V.bcmap +0 -0
  492. package/src/vendor/pdfjs/cmaps/KSCms-UHC-H.bcmap +0 -0
  493. package/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-H.bcmap +0 -0
  494. package/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-V.bcmap +0 -0
  495. package/src/vendor/pdfjs/cmaps/KSCms-UHC-V.bcmap +0 -0
  496. package/src/vendor/pdfjs/cmaps/KSCpc-EUC-H.bcmap +0 -0
  497. package/src/vendor/pdfjs/cmaps/KSCpc-EUC-V.bcmap +0 -0
  498. package/src/vendor/pdfjs/cmaps/Katakana.bcmap +0 -0
  499. package/src/vendor/pdfjs/cmaps/LICENSE +0 -36
  500. package/src/vendor/pdfjs/cmaps/NWP-H.bcmap +0 -0
  501. package/src/vendor/pdfjs/cmaps/NWP-V.bcmap +0 -0
  502. package/src/vendor/pdfjs/cmaps/RKSJ-H.bcmap +0 -0
  503. package/src/vendor/pdfjs/cmaps/RKSJ-V.bcmap +0 -0
  504. package/src/vendor/pdfjs/cmaps/Roman.bcmap +0 -0
  505. package/src/vendor/pdfjs/cmaps/UniCNS-UCS2-H.bcmap +0 -0
  506. package/src/vendor/pdfjs/cmaps/UniCNS-UCS2-V.bcmap +0 -0
  507. package/src/vendor/pdfjs/cmaps/UniCNS-UTF16-H.bcmap +0 -0
  508. package/src/vendor/pdfjs/cmaps/UniCNS-UTF16-V.bcmap +0 -0
  509. package/src/vendor/pdfjs/cmaps/UniCNS-UTF32-H.bcmap +0 -0
  510. package/src/vendor/pdfjs/cmaps/UniCNS-UTF32-V.bcmap +0 -0
  511. package/src/vendor/pdfjs/cmaps/UniCNS-UTF8-H.bcmap +0 -0
  512. package/src/vendor/pdfjs/cmaps/UniCNS-UTF8-V.bcmap +0 -0
  513. package/src/vendor/pdfjs/cmaps/UniGB-UCS2-H.bcmap +0 -0
  514. package/src/vendor/pdfjs/cmaps/UniGB-UCS2-V.bcmap +0 -0
  515. package/src/vendor/pdfjs/cmaps/UniGB-UTF16-H.bcmap +0 -0
  516. package/src/vendor/pdfjs/cmaps/UniGB-UTF16-V.bcmap +0 -0
  517. package/src/vendor/pdfjs/cmaps/UniGB-UTF32-H.bcmap +0 -0
  518. package/src/vendor/pdfjs/cmaps/UniGB-UTF32-V.bcmap +0 -0
  519. package/src/vendor/pdfjs/cmaps/UniGB-UTF8-H.bcmap +0 -0
  520. package/src/vendor/pdfjs/cmaps/UniGB-UTF8-V.bcmap +0 -0
  521. package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-H.bcmap +0 -0
  522. package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-H.bcmap +0 -0
  523. package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-V.bcmap +0 -0
  524. package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-V.bcmap +0 -0
  525. package/src/vendor/pdfjs/cmaps/UniJIS-UTF16-H.bcmap +0 -0
  526. package/src/vendor/pdfjs/cmaps/UniJIS-UTF16-V.bcmap +0 -0
  527. package/src/vendor/pdfjs/cmaps/UniJIS-UTF32-H.bcmap +0 -0
  528. package/src/vendor/pdfjs/cmaps/UniJIS-UTF32-V.bcmap +0 -0
  529. package/src/vendor/pdfjs/cmaps/UniJIS-UTF8-H.bcmap +0 -0
  530. package/src/vendor/pdfjs/cmaps/UniJIS-UTF8-V.bcmap +0 -0
  531. package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-H.bcmap +0 -0
  532. package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-V.bcmap +0 -0
  533. package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-H.bcmap +0 -0
  534. package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-V.bcmap +0 -0
  535. package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-H.bcmap +0 -0
  536. package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-V.bcmap +0 -0
  537. package/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-HW-V.bcmap +0 -0
  538. package/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-V.bcmap +0 -0
  539. package/src/vendor/pdfjs/cmaps/UniJISPro-UTF8-V.bcmap +0 -0
  540. package/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-H.bcmap +0 -0
  541. package/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-V.bcmap +0 -0
  542. package/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-H.bcmap +0 -0
  543. package/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-V.bcmap +0 -0
  544. package/src/vendor/pdfjs/cmaps/UniKS-UCS2-H.bcmap +0 -0
  545. package/src/vendor/pdfjs/cmaps/UniKS-UCS2-V.bcmap +0 -0
  546. package/src/vendor/pdfjs/cmaps/UniKS-UTF16-H.bcmap +0 -0
  547. package/src/vendor/pdfjs/cmaps/UniKS-UTF16-V.bcmap +0 -0
  548. package/src/vendor/pdfjs/cmaps/UniKS-UTF32-H.bcmap +0 -0
  549. package/src/vendor/pdfjs/cmaps/UniKS-UTF32-V.bcmap +0 -0
  550. package/src/vendor/pdfjs/cmaps/UniKS-UTF8-H.bcmap +0 -0
  551. package/src/vendor/pdfjs/cmaps/UniKS-UTF8-V.bcmap +0 -0
  552. package/src/vendor/pdfjs/cmaps/V.bcmap +0 -0
  553. package/src/vendor/pdfjs/cmaps/WP-Symbol.bcmap +0 -0
  554. package/src/vendor/pdfjs/jbig2.wasm +0 -0
  555. package/src/vendor/pdfjs/openjpeg.wasm +0 -0
  556. package/src/vendor/pdfjs/pdf.mjs +0 -33603
  557. package/src/vendor/pdfjs/pdf.mjs.map +0 -1
  558. package/src/vendor/pdfjs/pdf.sandbox.mjs +0 -4936
  559. package/src/vendor/pdfjs/pdf.sandbox.mjs.map +0 -1
  560. package/src/vendor/pdfjs/pdf.worker.mjs +0 -70100
  561. package/src/vendor/pdfjs/pdf.worker.mjs.map +0 -1
  562. package/src/vendor/pdfjs/qcms_bg.wasm +0 -0
  563. package/src/vendor/pdfjs/standard_fonts/FoxitDingbats.pfb +0 -0
  564. package/src/vendor/pdfjs/standard_fonts/FoxitFixed.pfb +0 -0
  565. package/src/vendor/pdfjs/standard_fonts/FoxitFixedBold.pfb +0 -0
  566. package/src/vendor/pdfjs/standard_fonts/FoxitFixedBoldItalic.pfb +0 -0
  567. package/src/vendor/pdfjs/standard_fonts/FoxitFixedItalic.pfb +0 -0
  568. package/src/vendor/pdfjs/standard_fonts/FoxitSerif.pfb +0 -0
  569. package/src/vendor/pdfjs/standard_fonts/FoxitSerifBold.pfb +0 -0
  570. package/src/vendor/pdfjs/standard_fonts/FoxitSerifBoldItalic.pfb +0 -0
  571. package/src/vendor/pdfjs/standard_fonts/FoxitSerifItalic.pfb +0 -0
  572. package/src/vendor/pdfjs/standard_fonts/FoxitSymbol.pfb +0 -0
  573. package/src/vendor/pdfjs/standard_fonts/LICENSE_FOXIT +0 -27
  574. package/src/vendor/pdfjs/standard_fonts/LICENSE_LIBERATION +0 -102
  575. package/src/vendor/pdfjs/standard_fonts/LiberationSans-Bold.ttf +0 -0
  576. package/src/vendor/pdfjs/standard_fonts/LiberationSans-BoldItalic.ttf +0 -0
  577. package/src/vendor/pdfjs/standard_fonts/LiberationSans-Italic.ttf +0 -0
  578. package/src/vendor/pdfjs/standard_fonts/LiberationSans-Regular.ttf +0 -0
package/README.md CHANGED
@@ -1,488 +1,89 @@
1
- # LiteParse
1
+ # LiteParse Node.js
2
2
 
3
- [![CI](https://github.com/run-llama/liteparse/actions/workflows/ci.yml/badge.svg)](https://github.com/run-llama/liteparse/actions/workflows/ci.yml)
4
- |
5
- [![npm version](https://img.shields.io/npm/v/@llamaindex/liteparse.svg)](https://www.npmjs.com/package/@llamaindex/liteparse)
6
- |
7
- [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
8
- |
9
- [Docs](https://developers.llamaindex.ai/liteparse/)
10
-
11
- <img src="https://github.com/user-attachments/assets/07ba6a82-6bb1-4dea-b0ef-cad7df7d1622" alt="out" width="600">
12
-
13
- LiteParse is a standalone OSS PDF parsing tool focused exclusively on **fast and light** parsing. It provides high-quality spatial text parsing with bounding boxes, without proprietary LLM features or cloud dependencies. Everything runs locally on your machine.
14
-
15
- **Hitting the limits of local parsing?**
16
- For complex documents (dense tables, multi-column layouts, charts, handwritten text, or
17
- scanned PDFs), you'll get significantly better results with [LlamaParse](https://developers.llamaindex.ai/python/cloud/llamaparse/?utm_source=github&utm_medium=liteparse),
18
- our cloud-based document parser built for production document pipelines. LlamaParse handles the
19
- hard stuff so your models see clean, structured data and markdown.
20
-
21
- > 👉 [Sign up for LlamaParse free](https://cloud.llamaindex.ai?utm_source=github&utm_medium=liteparse)
22
-
23
- ## Overview
24
-
25
- - **Fast Text Parsing**: Spatial text parsing using PDF.js
26
- - **Flexible OCR System**:
27
- - **Built-in**: Tesseract.js (zero setup, works out of the box!)
28
- - **HTTP Servers**: Plug in any OCR server (EasyOCR, PaddleOCR, custom)
29
- - **Standard API**: Simple, well-defined OCR API specification
30
- - **Screenshot Generation**: Generate high-quality page screenshots for LLM agents
31
- - **Multiple Output Formats**: JSON and Text
32
- - **Bounding Boxes**: Precise text positioning information
33
- - **Standalone Binary**: No cloud dependencies, runs entirely locally
34
- - **Multi-platform**: Linux, macOS (Intel/ARM), Windows
3
+ Node.js/TypeScript bindings for [LiteParse](https://github.com/run-llama/liteparse) — fast, lightweight PDF and document parsing with spatial text extraction.
35
4
 
36
5
  ## Installation
37
6
 
38
- ### CLI Tool
39
-
40
- #### Option 1: Global Install (Recommended)
41
-
42
- Install globally via npm to use the `lit` command anywhere:
43
-
44
- ```bash
45
- npm i -g @llamaindex/liteparse
46
- ```
47
-
48
- Then use it:
49
-
50
- ```bash
51
- lit parse document.pdf
52
- lit screenshot document.pdf
53
- ```
54
-
55
- For macOS and Linux users, `liteparse` can be also installed via `brew`:
56
-
57
- ```bash
58
- brew tap run-llama/liteparse
59
- brew install llamaindex-liteparse
60
- ```
61
-
62
- #### Option 2: Install from Source
63
-
64
- You can clone the repo and install the CLI globally from source:
65
-
66
- ```
67
- git clone https://github.com/run-llama/liteparse.git
68
- cd liteparse
69
- npm run build
70
- npm pack
71
- npm install -g ./liteparse-*.tgz
72
- ```
73
-
74
- ### Agent Skill
75
-
76
- You can use `liteparse` as an agent skill, downloading it with the `skills` CLI tool:
77
-
78
- ```bash
79
- npx skills add run-llama/llamaparse-agent-skills --skill liteparse
80
- ```
81
-
82
- Or copy-pasting the [`SKILL.md`](https://github.com/run-llama/llamaparse-agent-skills/blob/main/skills/liteparse/SKILL.md) file to your own skills setup.
83
-
84
- ## Usage
85
-
86
- ### Parse Files
87
-
88
- ```bash
89
- # Basic parsing
90
- lit parse document.pdf
91
-
92
- # Parse with specific format
93
- lit parse document.pdf --format json -o output.md
94
-
95
- # Parse specific pages
96
- lit parse document.pdf --target-pages "1-5,10,15-20"
97
-
98
- # Parse without OCR
99
- lit parse document.pdf --no-ocr
100
-
101
- # Parse a remote PDF
102
- curl -sL https://example.com/report.pdf | lit parse -
103
- ```
104
-
105
- ### Batch Parsing
106
-
107
- You can also parse an entire directory of documents:
108
-
109
7
  ```bash
110
- lit batch-parse ./input-directory ./output-directory
111
- ```
112
-
113
- ### Generate Screenshots
114
-
115
- Screenshots are essential for LLM agents to extract visual information that text alone cannot capture.
116
-
117
- ```bash
118
- # Screenshot all pages
119
- lit screenshot document.pdf -o ./screenshots
120
-
121
- # Screenshot specific pages
122
- lit screenshot document.pdf --target-pages "1,3,5" -o ./screenshots
123
-
124
- # Custom DPI
125
- lit screenshot document.pdf --dpi 300 -o ./screenshots
126
-
127
- # Screenshot page range
128
- lit screenshot document.pdf --target-pages "1-10" -o ./screenshots
8
+ npm i @llamaindex/liteparse
129
9
  ```
130
10
 
131
- ### Library Usage
11
+ This also installs the `lit` CLI command (use `npm i -g` for global access).
132
12
 
133
- Install as a dependency in your project:
134
-
135
- ```bash
136
- npm install @llamaindex/liteparse
137
- # or
138
- pnpm add @llamaindex/liteparse
139
- ```
13
+ ## Quick Start
140
14
 
141
15
  ```typescript
142
16
  import { LiteParse } from '@llamaindex/liteparse';
143
17
 
144
- const parser = new LiteParse({ ocrEnabled: true });
18
+ const parser = new LiteParse();
145
19
  const result = await parser.parse('document.pdf');
146
20
  console.log(result.text);
147
- ```
148
-
149
- #### Buffer / Uint8Array Input
150
-
151
- You can pass raw bytes directly instead of a file path, which is useful for remote files:
152
-
153
- ```typescript
154
- import { LiteParse } from '@llamaindex/liteparse';
155
- import { readFile } from 'fs/promises';
156
-
157
- const parser = new LiteParse();
158
-
159
- // From a file read
160
- const pdfBytes = await readFile('document.pdf');
161
- const result = await parser.parse(pdfBytes);
162
21
 
163
- // From an HTTP response
164
- const response = await fetch('https://example.com/document.pdf');
165
- const buffer = Buffer.from(await response.arrayBuffer());
166
- const result2 = await parser.parse(buffer);
167
- ```
168
-
169
- Non-PDF buffers (images, Office documents) are written to a temp directory for format conversion. Screenshots also work with buffer input:
170
-
171
- ```typescript
172
- const screenshots = await parser.screenshot(pdfBytes, [1, 2, 3]);
22
+ // Access structured data
23
+ for (const page of result.pages) {
24
+ console.log(`Page ${page.pageNum}: ${page.textItems.length} text items`);
25
+ }
173
26
  ```
174
27
 
175
- ### Browser Usage
176
-
177
- LiteParse's core parsing engine (PDF.js text extraction, grid projection, OCR via Tesseract.js) can run in the browser. Since the library has Node-only dependencies (sharp, fs, child_process), you'll need a bundler like Vite to swap those out with browser stubs.
178
-
179
- #### Vite Configuration
28
+ ## Configuration
180
29
 
181
- The key is a Vite plugin that redirects Node-only source files to browser-safe replacements, plus `resolve.alias` entries that stub out Node built-in modules:
30
+ All options are passed to the constructor:
182
31
 
183
32
  ```typescript
184
- // vite.config.ts
185
- import { defineConfig, type Plugin } from "vite";
186
- import { resolve, dirname } from "node:path";
187
-
188
- // Node-only files browser stubs (you write these)
189
- const FILE_REDIRECTS = [
190
- { match: /\/engines\/pdf\/pdfium-renderer(\.js|\.ts)?$/, target: "stubs/pdfium-renderer.ts" },
191
- { match: /\/engines\/pdf\/pdfjsImporter(\.js|\.ts)?$/, target: "stubs/pdfjsImporter.ts" },
192
- { match: /\/engines\/ocr\/http-simple(\.js|\.ts)?$/, target: "stubs/http-simple.ts" },
193
- { match: /\/conversion\/convertToPdf(\.js|\.ts)?$/, target: "stubs/convertToPdf.ts" },
194
- { match: /\/processing\/gridDebugLogger(\.js|\.ts)?$/, target: "stubs/gridDebugLogger.ts" },
195
- { match: /\/processing\/gridVisualizer(\.js|\.ts)?$/, target: "stubs/gridVisualizer.ts" },
196
- ];
197
-
198
- function liteparseNodeRedirects(): Plugin {
199
- return {
200
- name: "liteparse-node-redirects",
201
- enforce: "pre",
202
- async resolveId(source, importer) {
203
- if (!importer) return null;
204
- const abs = source.startsWith(".") ? resolve(dirname(importer), source) : source;
205
- for (const { match, target } of FILE_REDIRECTS) {
206
- if (match.test(abs) || match.test(source)) return resolve(target);
207
- }
208
- return null;
209
- },
210
- };
211
- }
212
-
213
- export default defineConfig({
214
- plugins: [liteparseNodeRedirects()],
215
- optimizeDeps: { include: ["tesseract.js"] },
216
- resolve: {
217
- alias: [
218
- { find: "node:fs/promises", replacement: "stubs/empty.ts" },
219
- { find: "node:fs", replacement: "stubs/empty.ts" },
220
- { find: "node:url", replacement: "stubs/empty.ts" },
221
- { find: "node:path", replacement: "stubs/empty.ts" },
222
- { find: "node:os", replacement: "stubs/empty.ts" },
223
- { find: "node:child_process", replacement: "stubs/empty.ts" },
224
- { find: /^fs$/, replacement: "stubs/empty.ts" },
225
- { find: /^path$/, replacement: "stubs/empty.ts" },
226
- { find: /^os$/, replacement: "stubs/empty.ts" },
227
- { find: /^child_process$/, replacement: "stubs/empty.ts" },
228
- { find: "form-data", replacement: "stubs/empty.ts" },
229
- { find: "axios", replacement: "stubs/empty.ts" },
230
- { find: "file-type", replacement: "stubs/file-type.ts" },
231
- ],
232
- },
33
+ const parser = new LiteParse({
34
+ ocrEnabled: true, // Enable OCR (default: true)
35
+ ocrLanguage: 'eng', // Tesseract language code
36
+ ocrServerUrl: undefined, // HTTP OCR server URL (optional)
37
+ tessdataPath: undefined, // Path to tessdata directory (optional)
38
+ maxPages: 1000, // Max pages to parse
39
+ targetPages: '1-5,10', // Specific pages (optional)
40
+ dpi: 150, // Rendering DPI
41
+ preserveVerySmallText: false, // Keep tiny text
42
+ password: undefined, // Password for protected documents
43
+ quiet: false, // Suppress progress output
44
+ numWorkers: 4, // Concurrent OCR workers
233
45
  });
234
46
  ```
235
47
 
236
- See [`scripts/browser-compat/`](scripts/browser-compat/) for a complete working example with all the stub files.
237
-
238
- #### What works in the browser
239
-
240
- - PDF parsing from `Uint8Array` input (use `file.arrayBuffer()` to get bytes from a `<input type="file">`)
241
- - OCR via Tesseract.js (runs in Web Workers, fetches language data from CDN on first use)
242
- - Text and JSON output formats
243
-
244
- #### What doesn't work
48
+ ## Parsing from Bytes
245
49
 
246
- - File path input (pass `Uint8Array` instead)
247
- - DOCX/XLSX/PPTX/image conversion (requires LibreOffice/ImageMagick)
248
- - HTTP OCR server backend
249
- - Screenshots (these use PDFium + sharp, which are native Node addons)
250
-
251
- ### CLI Options
252
-
253
- #### Parse Command
254
-
255
- ```
256
- $ lit parse --help
257
- Usage: lit parse [options] <file>
258
-
259
- Parse a document file (PDF, DOCX, XLSX, PPTX, images, etc.)
260
-
261
- Options:
262
- -o, --output <file> Output file path
263
- --format <format> Output format: json|text (default: "text")
264
- --ocr-server-url <url> HTTP OCR server URL (uses Tesseract if not provided)
265
- --no-ocr Disable OCR
266
- --ocr-language <lang> OCR language(s) (default: "en")
267
- --num-workers <n> Number of pages to OCR in parallel (default: CPU cores - 1)
268
- --max-pages <n> Max pages to parse (default: "10000")
269
- --target-pages <pages> Target pages (e.g., "1-5,10,15-20")
270
- --dpi <dpi> DPI for rendering (default: "150")
271
- --no-precise-bbox Disable precise bounding boxes
272
- --preserve-small-text Preserve very small text
273
- --password <password> Password for encrypted/protected documents
274
- --config <file> Config file (JSON)
275
- -q, --quiet Suppress progress output
276
- -h, --help display help for command
277
- ```
278
-
279
- #### Batch Parse Command
280
-
281
- ```
282
- $ lit batch-parse --help
283
- Usage: lit batch-parse [options] <input-dir> <output-dir>
284
-
285
- Parse multiple documents in batch mode (reuses PDF engine for efficiency)
286
-
287
- Options:
288
- --format <format> Output format: json|text (default: "text")
289
- --ocr-server-url <url> HTTP OCR server URL (uses Tesseract if not provided)
290
- --no-ocr Disable OCR
291
- --ocr-language <lang> OCR language(s) (default: "en")
292
- --num-workers <n> Number of pages to OCR in parallel (default: CPU cores - 1)
293
- --max-pages <n> Max pages to parse per file (default: "10000")
294
- --dpi <dpi> DPI for rendering (default: "150")
295
- --no-precise-bbox Disable precise bounding boxes
296
- --recursive Recursively search input directory
297
- --extension <ext> Only process files with this extension (e.g., ".pdf")
298
- --password <password> Password for encrypted/protected documents (applied to all files)
299
- --config <file> Config file (JSON)
300
- -q, --quiet Suppress progress output
301
- -h, --help display help for command
302
- ```
303
-
304
- #### Screenshot Command
305
-
306
- ```
307
- $ lit screenshot --help
308
- Usage: lit screenshot [options] <file>
309
-
310
- Generate screenshots of PDF pages
311
-
312
- Options:
313
- -o, --output-dir <dir> Output directory for screenshots (default: "./screenshots")
314
- --target-pages <pages> Page numbers to screenshot (e.g., "1,3,5" or "1-5")
315
- --dpi <dpi> DPI for rendering (default: "150")
316
- --format <format> Image format: png|jpg (default: "png")
317
- --password <password> Password for encrypted/protected documents
318
- --config <file> Config file (JSON)
319
- -q, --quiet Suppress progress output
320
- -h, --help display help for command
321
- ```
322
-
323
- ## OCR Setup
324
-
325
- ### Default: Tesseract.js
326
-
327
- ```bash
328
- # Tesseract is enabled by default
329
- lit parse document.pdf
330
-
331
- # Specify language
332
- lit parse document.pdf --ocr-language fra
333
-
334
- # Disable OCR
335
- lit parse document.pdf --no-ocr
336
- ```
337
-
338
- By default, Tesseract.js downloads language data from the internet on first use. For offline or air-gapped environments, set the `TESSDATA_PREFIX` environment variable to a directory containing pre-downloaded `.traineddata` files:
339
-
340
- ```bash
341
- export TESSDATA_PREFIX=/path/to/tessdata
342
- lit parse document.pdf --ocr-language eng
343
- ```
344
-
345
- You can also pass `tessdataPath` in the library config:
50
+ Pass a `Buffer` or `Uint8Array` directly — useful for HTTP responses or in-memory data:
346
51
 
347
52
  ```typescript
348
- const parser = new LiteParse({ tessdataPath: '/path/to/tessdata' });
349
- ```
350
-
351
- ### Optional: HTTP OCR Servers
352
-
353
- For higher accuracy or better performance, you can use an HTTP OCR server. We provide ready-to-use example wrappers for popular OCR engines:
354
-
355
- - [EasyOCR](ocr/easyocr/README.md)
356
- - [PaddleOCR](ocr/paddleocr/README.md)
357
-
358
- You can integrate any OCR service by implementing the simple LiteParse OCR API specification (see [`OCR_API_SPEC.md`](OCR_API_SPEC.md)).
359
-
360
- The API requires:
361
- - POST `/ocr` endpoint
362
- - Accepts `file` and `language` parameters
363
- - Returns JSON: `{ results: [{ text, bbox: [x1,y1,x2,y2], confidence }] }`
364
-
365
- See the example servers in `ocr/easyocr/` and `ocr/paddleocr/` as templates.
366
-
367
- For the complete OCR API specification, see [`OCR_API_SPEC.md`](OCR_API_SPEC.md).
368
-
369
- ## Multi-Format Input Support
370
-
371
- LiteParse supports **automatic conversion** of various document formats to PDF before parsing. This makes it unique compared to other PDF-only parsing tools!
372
-
373
- ### Supported Input Formats
374
-
375
- #### Office Documents (via LibreOffice)
376
- - **Word**: `.doc`, `.docx`, `.docm`, `.odt`, `.rtf`
377
- - **PowerPoint**: `.ppt`, `.pptx`, `.pptm`, `.odp`
378
- - **Spreadsheets**: `.xls`, `.xlsx`, `.xlsm`, `.ods`, `.csv`, `.tsv`
379
-
380
- Just install the dependency and LiteParse will automatically convert these formats to PDF for parsing:
381
-
382
- ```bash
383
- # macOS
384
- brew install --cask libreoffice
385
-
386
- # Ubuntu/Debian
387
- apt-get install libreoffice
388
-
389
- # Windows
390
- choco install libreoffice-fresh # might require admin permissions
391
- ```
392
-
393
- > _For Windows, you might need to add the path to the directory containing LibreOffice CLI executable (generally `C:\Program Files\LibreOffice\program`) to the environment variables and re-start the machine._
394
-
395
- #### Images (via ImageMagick)
396
- - **Formats**: `.jpg`, `.jpeg`, `.png`, `.gif`, `.bmp`, `.tiff`, `.webp`, `.svg`
397
-
398
- Just install ImageMagick and LiteParse will convert images to PDF for parsing (with OCR):
399
-
400
- ```bash
401
- # macOS
402
- brew install imagemagick
403
-
404
- # Ubuntu/Debian
405
- apt-get install imagemagick
53
+ import { readFile } from 'fs/promises';
406
54
 
407
- # Windows
408
- choco install imagemagick.app # might require admin permissions
55
+ const pdfBytes = await readFile('document.pdf');
56
+ const result = await parser.parse(pdfBytes);
57
+ console.log(result.text);
409
58
  ```
410
59
 
411
- ## Environment Variables
412
-
413
- | Variable | Description |
414
- |----------|-------------|
415
- | `TESSDATA_PREFIX` | Path to a directory containing Tesseract `.traineddata` files. Used for offline/air-gapped environments where Tesseract.js cannot download language data from the internet. |
416
- | `LITEPARSE_TMPDIR` | Override the temp directory used for format conversion and intermediate files. Defaults to the OS temp directory (`os.tmpdir()`). Useful in containerized or read-only filesystem environments. |
417
-
418
- ## Configuration
60
+ ## Screenshots
419
61
 
420
- You can configure parsing options via CLI flags or a JSON config file. The config file allows you to set sensible defaults and override as needed.
62
+ Generate PNG screenshots of document pages:
421
63
 
422
- ### Config File Example
423
-
424
- Create a `liteparse.config.json` file:
425
-
426
- ```json
427
- {
428
- "ocrLanguage": "en",
429
- "ocrEnabled": true,
430
- "maxPages": 1000,
431
- "dpi": 150,
432
- "outputFormat": "json",
433
- "preciseBoundingBox": true,
434
- "preserveVerySmallText": false,
435
- "password": "optional_password"
436
- }
437
- ```
438
-
439
- For HTTP OCR servers, just add `ocrServerUrl`:
440
-
441
- ```json
442
- {
443
- "ocrServerUrl": "http://localhost:8828/ocr",
444
- "ocrLanguage": "en",
445
- "outputFormat": "json"
64
+ ```typescript
65
+ const screenshots = parser.screenshot('document.pdf', [1, 2, 3]);
66
+ for (const s of screenshots) {
67
+ console.log(`Page ${s.pageNum}: ${s.width}x${s.height}`);
68
+ // s.imageBuffer contains PNG bytes
446
69
  }
447
70
  ```
448
71
 
449
- Use with:
72
+ ## Supported Formats
450
73
 
451
- ```bash
452
- lit parse document.pdf --config liteparse.config.json
453
- ```
74
+ - PDF (`.pdf`)
75
+ - Microsoft Office (`.docx`, `.xlsx`, `.pptx`, etc.) requires LibreOffice
76
+ - OpenDocument (`.odt`, `.ods`, `.odp`) — requires LibreOffice
77
+ - Images (`.png`, `.jpg`, `.tiff`, etc.) — requires ImageMagick
78
+ - And more!
454
79
 
455
- ## Development
80
+ ## CLI
456
81
 
457
- We provide a fairly rich `AGENTS.md`/`CLAUDE.md` that we recommend using to help with development + coding agents.
82
+ The npm package includes the `lit` CLI:
458
83
 
459
84
  ```bash
460
- # Install dependencies
461
- npm install
462
-
463
- # Build TypeScript (Linux/macOs)
464
- npm run build
465
-
466
- # Build Typescript (Windows)
467
- npm run build:windows
468
-
469
- # Watch mode
470
- npm run dev
471
-
472
- # Test parsing
473
- npm test
85
+ lit parse document.pdf
86
+ lit parse document.pdf --format json -o output.json
87
+ lit screenshot document.pdf -o ./screenshots
88
+ lit batch-parse ./input ./output
474
89
  ```
475
-
476
- ## License
477
-
478
- Apache 2.0
479
-
480
- ## Credits
481
-
482
- Built on top of:
483
-
484
- - [PDF.js](https://github.com/mozilla/pdf.js) - PDF parsing engine
485
- - [Tesseract.js](https://github.com/naptha/tesseract.js) - In-process OCR engine
486
- - [EasyOCR](https://github.com/JaidedAI/EasyOCR) - HTTP OCR server (optional)
487
- - [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR) - HTTP OCR server (optional)
488
- - [Sharp](https://github.com/lovell/sharp) - Image processing
package/dist/cli.d.ts ADDED
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env node
2
+ export {};
3
+ //# sourceMappingURL=cli.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cli.d.ts","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":""}
package/dist/cli.js ADDED
@@ -0,0 +1,87 @@
1
+ #!/usr/bin/env node
2
+ import { program } from "commander";
3
+ import { LiteParse } from "./lib.js";
4
+ import { readFileSync } from "node:fs";
5
+ import { writeFileSync } from "node:fs";
6
+ program
7
+ .name("liteparse")
8
+ .description("Fast, lightweight PDF and document parsing")
9
+ .version("2.0.0");
10
+ program
11
+ .command("parse")
12
+ .description("Parse a document and extract text")
13
+ .argument("<file>", "Path to the document file")
14
+ .option("-o, --output <file>", "Output file path")
15
+ .option("--format <format>", 'Output format: json|text (default: "text")')
16
+ .option("--ocr-server-url <url>", "HTTP OCR server URL")
17
+ .option("--no-ocr", "Disable OCR")
18
+ .option("--ocr-language <lang>", "OCR language (default: eng)")
19
+ .option("--max-pages <n>", "Max pages to parse", parseInt)
20
+ .option("--target-pages <pages>", 'Pages to parse (e.g., "1-5,10,15-20")')
21
+ .option("--dpi <dpi>", "Rendering DPI", parseFloat)
22
+ .option("--preserve-small-text", "Keep very small text")
23
+ .option("--password <password>", "Password for encrypted documents")
24
+ .option("--config <file>", "JSON config file path")
25
+ .option("-q, --quiet", "Suppress progress output")
26
+ .option("--num-workers <n>", "Number of concurrent OCR workers", parseInt)
27
+ .action(async (file, opts) => {
28
+ try {
29
+ const config = {};
30
+ // Load config file if provided
31
+ if (opts.config) {
32
+ const fileConfig = JSON.parse(readFileSync(opts.config, "utf-8"));
33
+ Object.assign(config, fileConfig);
34
+ }
35
+ // CLI options override config file
36
+ if (opts.format)
37
+ config.outputFormat = opts.format;
38
+ if (opts.ocrServerUrl)
39
+ config.ocrServerUrl = opts.ocrServerUrl;
40
+ if (opts.ocr === false)
41
+ config.ocrEnabled = false;
42
+ if (opts.ocrLanguage)
43
+ config.ocrLanguage = opts.ocrLanguage;
44
+ if (opts.maxPages)
45
+ config.maxPages = opts.maxPages;
46
+ if (opts.targetPages)
47
+ config.targetPages = opts.targetPages;
48
+ if (opts.dpi)
49
+ config.dpi = opts.dpi;
50
+ if (opts.preserveSmallText)
51
+ config.preserveVerySmallText = true;
52
+ if (opts.password)
53
+ config.password = opts.password;
54
+ if (opts.quiet)
55
+ config.quiet = true;
56
+ if (opts.numWorkers)
57
+ config.numWorkers = opts.numWorkers;
58
+ // Default CLI output to text (library defaults to json)
59
+ if (!config.outputFormat)
60
+ config.outputFormat = "text";
61
+ const parser = new LiteParse(config);
62
+ const result = await parser.parse(file);
63
+ const output = config.outputFormat === "json"
64
+ ? JSON.stringify({
65
+ pages: result.pages.map((p) => ({
66
+ page: p.pageNum,
67
+ width: p.width,
68
+ height: p.height,
69
+ text: p.text,
70
+ textItems: p.textItems,
71
+ })),
72
+ }, null, 2)
73
+ : result.text;
74
+ if (opts.output) {
75
+ writeFileSync(opts.output, output, "utf-8");
76
+ }
77
+ else {
78
+ process.stdout.write(output);
79
+ }
80
+ }
81
+ catch (err) {
82
+ console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);
83
+ process.exit(1);
84
+ }
85
+ });
86
+ program.parse(process.argv);
87
+ //# sourceMappingURL=cli.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AAEA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,SAAS,EAAwB,MAAM,UAAU,CAAC;AAC3D,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,aAAa,EAAE,MAAM,SAAS,CAAC;AAExC,OAAO;KACJ,IAAI,CAAC,WAAW,CAAC;KACjB,WAAW,CAAC,4CAA4C,CAAC;KACzD,OAAO,CAAC,OAAO,CAAC,CAAC;AAEpB,OAAO;KACJ,OAAO,CAAC,OAAO,CAAC;KAChB,WAAW,CAAC,mCAAmC,CAAC;KAChD,QAAQ,CAAC,QAAQ,EAAE,2BAA2B,CAAC;KAC/C,MAAM,CAAC,qBAAqB,EAAE,kBAAkB,CAAC;KACjD,MAAM,CAAC,mBAAmB,EAAE,4CAA4C,CAAC;KACzE,MAAM,CAAC,wBAAwB,EAAE,qBAAqB,CAAC;KACvD,MAAM,CAAC,UAAU,EAAE,aAAa,CAAC;KACjC,MAAM,CAAC,uBAAuB,EAAE,6BAA6B,CAAC;KAC9D,MAAM,CAAC,iBAAiB,EAAE,oBAAoB,EAAE,QAAQ,CAAC;KACzD,MAAM,CACL,wBAAwB,EACxB,uCAAuC,CACxC;KACA,MAAM,CAAC,aAAa,EAAE,eAAe,EAAE,UAAU,CAAC;KAClD,MAAM,CAAC,uBAAuB,EAAE,sBAAsB,CAAC;KACvD,MAAM,CAAC,uBAAuB,EAAE,kCAAkC,CAAC;KACnE,MAAM,CAAC,iBAAiB,EAAE,uBAAuB,CAAC;KAClD,MAAM,CAAC,aAAa,EAAE,0BAA0B,CAAC;KACjD,MAAM,CAAC,mBAAmB,EAAE,kCAAkC,EAAE,QAAQ,CAAC;KACzE,MAAM,CAAC,KAAK,EAAE,IAAY,EAAE,IAA6B,EAAE,EAAE;IAC5D,IAAI,CAAC;QACH,MAAM,MAAM,GAA6B,EAAE,CAAC;QAE5C,+BAA+B;QAC/B,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChB,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAC3B,YAAY,CAAC,IAAI,CAAC,MAAgB,EAAE,OAAO,CAAC,CAC7C,CAAC;YACF,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC;QACpC,CAAC;QAED,mCAAmC;QACnC,IAAI,IAAI,CAAC,MAAM;YAAE,MAAM,CAAC,YAAY,GAAG,IAAI,CAAC,MAAyB,CAAC;QACtE,IAAI,IAAI,CAAC,YAAY;YACnB,MAAM,CAAC,YAAY,GAAG,IAAI,CAAC,YAAsB,CAAC;QACpD,IAAI,IAAI,CAAC,GAAG,KAAK,KAAK;YAAE,MAAM,CAAC,UAAU,GAAG,KAAK,CAAC;QAClD,IAAI,IAAI,CAAC,WAAW;YAAE,MAAM,CAAC,WAAW,GAAG,IAAI,CAAC,WAAqB,CAAC;QACtE,IAAI,IAAI,CAAC,QAAQ;YAAE,MAAM,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAkB,CAAC;QAC7D,IAAI,IAAI,CAAC,WAAW;YAAE,MAAM,CAAC,WAAW,GAAG,IAAI,CAAC,WAAqB,CAAC;QACtE,IAAI,IAAI,CAAC,GAAG;YAAE,MAAM,CAAC,GAAG,GAAG,IAAI,CAAC,GAAa,CAAC;QAC9C,IAAI,IAAI,CAAC,iBAAiB;YAAE,MAAM,CAAC,qBAAqB,GAAG,IAAI,CAAC;QAChE,IAAI,IAAI,CAAC,QAAQ;YAAE,MAAM,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAkB,CAAC;QAC7D,IAAI,IAAI,CAAC,KAAK;YAAE,MAAM,CAAC,KAAK,GAAG,IAAI,CAAC;QACpC,IAAI,IAAI,CAAC,UAAU;YAAE,MAAM,CAAC,UAAU,GAAG,IAAI,CAAC,UAAoB,CAAC;QAEnE,wDAAwD;QACxD,IAAI,CAAC,MAAM,CAAC,YAAY;YAAE,MAAM,CAAC,YAAY,GAAG,MAAM,CAAC;QAEvD,MAAM,MAAM,GAAG,IAAI,SAAS,CAAC,MAAM,CAAC,CAAC;QACrC,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAExC,MAAM,MAAM,GACV,MAAM,CAAC,YAAY,KAAK,MAAM;YAC5B,CAAC,CAAC,IAAI,CAAC,SAAS,CACZ;gBACE,KAAK,EAAE,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;oBAC9B,IAAI,EAAE,CAAC,CAAC,OAAO;oBACf,KAAK,EAAE,CAAC,CAAC,KAAK;oBACd,MAAM,EAAE,CAAC,CAAC,MAAM;oBAChB,IAAI,EAAE,CAAC,CAAC,IAAI;oBACZ,SAAS,EAAE,CAAC,CAAC,SAAS;iBACvB,CAAC,CAAC;aACJ,EACD,IAAI,EACJ,CAAC,CACF;YACH,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC;QAElB,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChB,aAAa,CAAC,IAAI,CAAC,MAAgB,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC;QACxD,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;QAC/B,CAAC;IACH,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,OAAO,CAAC,KAAK,CACX,UAAU,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAC7D,CAAC;QACF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC,CAAC,CAAC;AAEL,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC"}