@llamaindex/liteparse 1.5.2 → 2.0.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (578) hide show
  1. package/README.md +50 -373
  2. package/dist/cli.d.ts +3 -0
  3. package/dist/cli.d.ts.map +1 -0
  4. package/dist/cli.js +87 -0
  5. package/dist/cli.js.map +1 -0
  6. package/dist/lib.d.ts +58 -0
  7. package/dist/lib.d.ts.map +1 -0
  8. package/dist/lib.js +88 -0
  9. package/dist/lib.js.map +1 -0
  10. package/dist/native.d.ts +54 -0
  11. package/dist/native.d.ts.map +1 -0
  12. package/dist/native.js +70 -0
  13. package/dist/native.js.map +1 -0
  14. package/libpdfium.so +0 -0
  15. package/liteparse.linux-x64-gnu.node +0 -0
  16. package/package.json +36 -50
  17. package/LICENSE +0 -201
  18. package/dist/cli/parse.d.ts +0 -4
  19. package/dist/cli/parse.d.ts.map +0 -1
  20. package/dist/cli/parse.js +0 -450
  21. package/dist/cli/parse.js.map +0 -1
  22. package/dist/package.json +0 -90
  23. package/dist/src/conversion/convertToPdf.d.ts +0 -65
  24. package/dist/src/conversion/convertToPdf.d.ts.map +0 -1
  25. package/dist/src/conversion/convertToPdf.js +0 -405
  26. package/dist/src/conversion/convertToPdf.js.map +0 -1
  27. package/dist/src/conversion/convertToPdf.test.d.ts +0 -2
  28. package/dist/src/conversion/convertToPdf.test.d.ts.map +0 -1
  29. package/dist/src/conversion/convertToPdf.test.js +0 -327
  30. package/dist/src/conversion/convertToPdf.test.js.map +0 -1
  31. package/dist/src/core/config.d.ts +0 -4
  32. package/dist/src/core/config.d.ts.map +0 -1
  33. package/dist/src/core/config.js +0 -26
  34. package/dist/src/core/config.js.map +0 -1
  35. package/dist/src/core/config.test.d.ts +0 -2
  36. package/dist/src/core/config.test.d.ts.map +0 -1
  37. package/dist/src/core/config.test.js +0 -21
  38. package/dist/src/core/config.test.js.map +0 -1
  39. package/dist/src/core/parser.d.ts +0 -92
  40. package/dist/src/core/parser.d.ts.map +0 -1
  41. package/dist/src/core/parser.js +0 -401
  42. package/dist/src/core/parser.js.map +0 -1
  43. package/dist/src/core/parser.test.d.ts +0 -2
  44. package/dist/src/core/parser.test.d.ts.map +0 -1
  45. package/dist/src/core/parser.test.js +0 -541
  46. package/dist/src/core/parser.test.js.map +0 -1
  47. package/dist/src/core/types.d.ts +0 -370
  48. package/dist/src/core/types.d.ts.map +0 -1
  49. package/dist/src/core/types.js +0 -2
  50. package/dist/src/core/types.js.map +0 -1
  51. package/dist/src/engines/ocr/http-simple.d.ts +0 -19
  52. package/dist/src/engines/ocr/http-simple.d.ts.map +0 -1
  53. package/dist/src/engines/ocr/http-simple.js +0 -69
  54. package/dist/src/engines/ocr/http-simple.js.map +0 -1
  55. package/dist/src/engines/ocr/http-simple.test.d.ts +0 -2
  56. package/dist/src/engines/ocr/http-simple.test.d.ts.map +0 -1
  57. package/dist/src/engines/ocr/http-simple.test.js +0 -108
  58. package/dist/src/engines/ocr/http-simple.test.js.map +0 -1
  59. package/dist/src/engines/ocr/interface.d.ts +0 -15
  60. package/dist/src/engines/ocr/interface.d.ts.map +0 -1
  61. package/dist/src/engines/ocr/interface.js +0 -2
  62. package/dist/src/engines/ocr/interface.js.map +0 -1
  63. package/dist/src/engines/ocr/tesseract.d.ts +0 -20
  64. package/dist/src/engines/ocr/tesseract.d.ts.map +0 -1
  65. package/dist/src/engines/ocr/tesseract.js +0 -161
  66. package/dist/src/engines/ocr/tesseract.js.map +0 -1
  67. package/dist/src/engines/ocr/tesseract.test.d.ts +0 -2
  68. package/dist/src/engines/ocr/tesseract.test.d.ts.map +0 -1
  69. package/dist/src/engines/ocr/tesseract.test.js +0 -94
  70. package/dist/src/engines/ocr/tesseract.test.js.map +0 -1
  71. package/dist/src/engines/pdf/interface.d.ts +0 -84
  72. package/dist/src/engines/pdf/interface.d.ts.map +0 -1
  73. package/dist/src/engines/pdf/interface.js +0 -2
  74. package/dist/src/engines/pdf/interface.js.map +0 -1
  75. package/dist/src/engines/pdf/pdfium-renderer.d.ts +0 -31
  76. package/dist/src/engines/pdf/pdfium-renderer.d.ts.map +0 -1
  77. package/dist/src/engines/pdf/pdfium-renderer.js +0 -145
  78. package/dist/src/engines/pdf/pdfium-renderer.js.map +0 -1
  79. package/dist/src/engines/pdf/pdfium-renderer.test.d.ts +0 -2
  80. package/dist/src/engines/pdf/pdfium-renderer.test.d.ts.map +0 -1
  81. package/dist/src/engines/pdf/pdfium-renderer.test.js +0 -109
  82. package/dist/src/engines/pdf/pdfium-renderer.test.js.map +0 -1
  83. package/dist/src/engines/pdf/pdfjs.d.ts +0 -14
  84. package/dist/src/engines/pdf/pdfjs.d.ts.map +0 -1
  85. package/dist/src/engines/pdf/pdfjs.js +0 -799
  86. package/dist/src/engines/pdf/pdfjs.js.map +0 -1
  87. package/dist/src/engines/pdf/pdfjs.test.d.ts +0 -2
  88. package/dist/src/engines/pdf/pdfjs.test.d.ts.map +0 -1
  89. package/dist/src/engines/pdf/pdfjs.test.js +0 -225
  90. package/dist/src/engines/pdf/pdfjs.test.js.map +0 -1
  91. package/dist/src/engines/pdf/pdfjsImporter.d.ts +0 -5
  92. package/dist/src/engines/pdf/pdfjsImporter.d.ts.map +0 -1
  93. package/dist/src/engines/pdf/pdfjsImporter.js +0 -45
  94. package/dist/src/engines/pdf/pdfjsImporter.js.map +0 -1
  95. package/dist/src/index.d.ts +0 -3
  96. package/dist/src/index.d.ts.map +0 -1
  97. package/dist/src/index.js +0 -5
  98. package/dist/src/index.js.map +0 -1
  99. package/dist/src/lib.d.ts +0 -19
  100. package/dist/src/lib.d.ts.map +0 -1
  101. package/dist/src/lib.js +0 -17
  102. package/dist/src/lib.js.map +0 -1
  103. package/dist/src/output/json.d.ts +0 -10
  104. package/dist/src/output/json.d.ts.map +0 -1
  105. package/dist/src/output/json.js +0 -32
  106. package/dist/src/output/json.js.map +0 -1
  107. package/dist/src/output/json.test.d.ts +0 -2
  108. package/dist/src/output/json.test.d.ts.map +0 -1
  109. package/dist/src/output/json.test.js +0 -199
  110. package/dist/src/output/json.test.js.map +0 -1
  111. package/dist/src/output/text.d.ts +0 -10
  112. package/dist/src/output/text.d.ts.map +0 -1
  113. package/dist/src/output/text.js +0 -17
  114. package/dist/src/output/text.js.map +0 -1
  115. package/dist/src/output/text.test.d.ts +0 -2
  116. package/dist/src/output/text.test.d.ts.map +0 -1
  117. package/dist/src/output/text.test.js +0 -65
  118. package/dist/src/output/text.test.js.map +0 -1
  119. package/dist/src/processing/bbox.d.ts +0 -20
  120. package/dist/src/processing/bbox.d.ts.map +0 -1
  121. package/dist/src/processing/bbox.js +0 -258
  122. package/dist/src/processing/bbox.js.map +0 -1
  123. package/dist/src/processing/bbox.test.d.ts +0 -2
  124. package/dist/src/processing/bbox.test.d.ts.map +0 -1
  125. package/dist/src/processing/bbox.test.js +0 -334
  126. package/dist/src/processing/bbox.test.js.map +0 -1
  127. package/dist/src/processing/cleanText.d.ts +0 -6
  128. package/dist/src/processing/cleanText.d.ts.map +0 -1
  129. package/dist/src/processing/cleanText.js +0 -73
  130. package/dist/src/processing/cleanText.js.map +0 -1
  131. package/dist/src/processing/cleanText.test.d.ts +0 -2
  132. package/dist/src/processing/cleanText.test.d.ts.map +0 -1
  133. package/dist/src/processing/cleanText.test.js +0 -46
  134. package/dist/src/processing/cleanText.test.js.map +0 -1
  135. package/dist/src/processing/grid.d.ts +0 -7
  136. package/dist/src/processing/grid.d.ts.map +0 -1
  137. package/dist/src/processing/grid.js +0 -13
  138. package/dist/src/processing/grid.js.map +0 -1
  139. package/dist/src/processing/gridDebugLogger.d.ts +0 -206
  140. package/dist/src/processing/gridDebugLogger.d.ts.map +0 -1
  141. package/dist/src/processing/gridDebugLogger.js +0 -446
  142. package/dist/src/processing/gridDebugLogger.js.map +0 -1
  143. package/dist/src/processing/gridProjection.d.ts +0 -19
  144. package/dist/src/processing/gridProjection.d.ts.map +0 -1
  145. package/dist/src/processing/gridProjection.js +0 -1813
  146. package/dist/src/processing/gridProjection.js.map +0 -1
  147. package/dist/src/processing/gridProjection.test.d.ts +0 -2
  148. package/dist/src/processing/gridProjection.test.d.ts.map +0 -1
  149. package/dist/src/processing/gridProjection.test.js +0 -495
  150. package/dist/src/processing/gridProjection.test.js.map +0 -1
  151. package/dist/src/processing/gridVisualizer.d.ts +0 -14
  152. package/dist/src/processing/gridVisualizer.d.ts.map +0 -1
  153. package/dist/src/processing/gridVisualizer.js +0 -166
  154. package/dist/src/processing/gridVisualizer.js.map +0 -1
  155. package/dist/src/processing/markupUtils.d.ts +0 -7
  156. package/dist/src/processing/markupUtils.d.ts.map +0 -1
  157. package/dist/src/processing/markupUtils.js +0 -25
  158. package/dist/src/processing/markupUtils.js.map +0 -1
  159. package/dist/src/processing/markupUtils.test.d.ts +0 -2
  160. package/dist/src/processing/markupUtils.test.d.ts.map +0 -1
  161. package/dist/src/processing/markupUtils.test.js +0 -26
  162. package/dist/src/processing/markupUtils.test.js.map +0 -1
  163. package/dist/src/processing/ocrUtils.d.ts +0 -24
  164. package/dist/src/processing/ocrUtils.d.ts.map +0 -1
  165. package/dist/src/processing/ocrUtils.js +0 -79
  166. package/dist/src/processing/ocrUtils.js.map +0 -1
  167. package/dist/src/processing/octUtils.test.d.ts +0 -2
  168. package/dist/src/processing/octUtils.test.d.ts.map +0 -1
  169. package/dist/src/processing/octUtils.test.js +0 -72
  170. package/dist/src/processing/octUtils.test.js.map +0 -1
  171. package/dist/src/processing/searchItems.d.ts +0 -26
  172. package/dist/src/processing/searchItems.d.ts.map +0 -1
  173. package/dist/src/processing/searchItems.js +0 -93
  174. package/dist/src/processing/searchItems.js.map +0 -1
  175. package/dist/src/processing/searchItems.test.d.ts +0 -2
  176. package/dist/src/processing/searchItems.test.d.ts.map +0 -1
  177. package/dist/src/processing/searchItems.test.js +0 -84
  178. package/dist/src/processing/searchItems.test.js.map +0 -1
  179. package/dist/src/processing/textUtils.d.ts +0 -20
  180. package/dist/src/processing/textUtils.d.ts.map +0 -1
  181. package/dist/src/processing/textUtils.js +0 -142
  182. package/dist/src/processing/textUtils.js.map +0 -1
  183. package/dist/src/processing/textUtils.test.d.ts +0 -2
  184. package/dist/src/processing/textUtils.test.d.ts.map +0 -1
  185. package/dist/src/processing/textUtils.test.js +0 -45
  186. package/dist/src/processing/textUtils.test.js.map +0 -1
  187. package/dist/src/vendor/pdfjs/LICENSE +0 -177
  188. package/dist/src/vendor/pdfjs/README.md +0 -0
  189. package/dist/src/vendor/pdfjs/cmaps/78-EUC-H.bcmap +0 -0
  190. package/dist/src/vendor/pdfjs/cmaps/78-EUC-V.bcmap +0 -0
  191. package/dist/src/vendor/pdfjs/cmaps/78-H.bcmap +0 -0
  192. package/dist/src/vendor/pdfjs/cmaps/78-RKSJ-H.bcmap +0 -0
  193. package/dist/src/vendor/pdfjs/cmaps/78-RKSJ-V.bcmap +0 -0
  194. package/dist/src/vendor/pdfjs/cmaps/78-V.bcmap +0 -0
  195. package/dist/src/vendor/pdfjs/cmaps/78ms-RKSJ-H.bcmap +0 -0
  196. package/dist/src/vendor/pdfjs/cmaps/78ms-RKSJ-V.bcmap +0 -0
  197. package/dist/src/vendor/pdfjs/cmaps/83pv-RKSJ-H.bcmap +0 -0
  198. package/dist/src/vendor/pdfjs/cmaps/90ms-RKSJ-H.bcmap +0 -0
  199. package/dist/src/vendor/pdfjs/cmaps/90ms-RKSJ-V.bcmap +0 -0
  200. package/dist/src/vendor/pdfjs/cmaps/90msp-RKSJ-H.bcmap +0 -0
  201. package/dist/src/vendor/pdfjs/cmaps/90msp-RKSJ-V.bcmap +0 -0
  202. package/dist/src/vendor/pdfjs/cmaps/90pv-RKSJ-H.bcmap +0 -0
  203. package/dist/src/vendor/pdfjs/cmaps/90pv-RKSJ-V.bcmap +0 -0
  204. package/dist/src/vendor/pdfjs/cmaps/Add-H.bcmap +0 -0
  205. package/dist/src/vendor/pdfjs/cmaps/Add-RKSJ-H.bcmap +0 -0
  206. package/dist/src/vendor/pdfjs/cmaps/Add-RKSJ-V.bcmap +0 -0
  207. package/dist/src/vendor/pdfjs/cmaps/Add-V.bcmap +0 -0
  208. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-0.bcmap +0 -0
  209. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-1.bcmap +0 -0
  210. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-2.bcmap +0 -0
  211. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-3.bcmap +0 -0
  212. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-4.bcmap +0 -0
  213. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-5.bcmap +0 -0
  214. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-6.bcmap +0 -0
  215. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-UCS2.bcmap +0 -0
  216. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-0.bcmap +0 -0
  217. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-1.bcmap +0 -0
  218. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-2.bcmap +0 -0
  219. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-3.bcmap +0 -0
  220. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-4.bcmap +0 -0
  221. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-5.bcmap +0 -0
  222. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-UCS2.bcmap +0 -0
  223. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-0.bcmap +0 -0
  224. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-1.bcmap +0 -0
  225. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-2.bcmap +0 -0
  226. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-3.bcmap +0 -0
  227. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-4.bcmap +0 -0
  228. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-5.bcmap +0 -0
  229. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-6.bcmap +0 -0
  230. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-UCS2.bcmap +0 -0
  231. package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-0.bcmap +0 -0
  232. package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-1.bcmap +0 -0
  233. package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-2.bcmap +0 -0
  234. package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-UCS2.bcmap +0 -0
  235. package/dist/src/vendor/pdfjs/cmaps/B5-H.bcmap +0 -0
  236. package/dist/src/vendor/pdfjs/cmaps/B5-V.bcmap +0 -0
  237. package/dist/src/vendor/pdfjs/cmaps/B5pc-H.bcmap +0 -0
  238. package/dist/src/vendor/pdfjs/cmaps/B5pc-V.bcmap +0 -0
  239. package/dist/src/vendor/pdfjs/cmaps/CNS-EUC-H.bcmap +0 -0
  240. package/dist/src/vendor/pdfjs/cmaps/CNS-EUC-V.bcmap +0 -0
  241. package/dist/src/vendor/pdfjs/cmaps/CNS1-H.bcmap +0 -0
  242. package/dist/src/vendor/pdfjs/cmaps/CNS1-V.bcmap +0 -0
  243. package/dist/src/vendor/pdfjs/cmaps/CNS2-H.bcmap +0 -0
  244. package/dist/src/vendor/pdfjs/cmaps/CNS2-V.bcmap +0 -3
  245. package/dist/src/vendor/pdfjs/cmaps/ETHK-B5-H.bcmap +0 -0
  246. package/dist/src/vendor/pdfjs/cmaps/ETHK-B5-V.bcmap +0 -0
  247. package/dist/src/vendor/pdfjs/cmaps/ETen-B5-H.bcmap +0 -0
  248. package/dist/src/vendor/pdfjs/cmaps/ETen-B5-V.bcmap +0 -0
  249. package/dist/src/vendor/pdfjs/cmaps/ETenms-B5-H.bcmap +0 -3
  250. package/dist/src/vendor/pdfjs/cmaps/ETenms-B5-V.bcmap +0 -0
  251. package/dist/src/vendor/pdfjs/cmaps/EUC-H.bcmap +0 -0
  252. package/dist/src/vendor/pdfjs/cmaps/EUC-V.bcmap +0 -0
  253. package/dist/src/vendor/pdfjs/cmaps/Ext-H.bcmap +0 -0
  254. package/dist/src/vendor/pdfjs/cmaps/Ext-RKSJ-H.bcmap +0 -0
  255. package/dist/src/vendor/pdfjs/cmaps/Ext-RKSJ-V.bcmap +0 -0
  256. package/dist/src/vendor/pdfjs/cmaps/Ext-V.bcmap +0 -0
  257. package/dist/src/vendor/pdfjs/cmaps/GB-EUC-H.bcmap +0 -0
  258. package/dist/src/vendor/pdfjs/cmaps/GB-EUC-V.bcmap +0 -0
  259. package/dist/src/vendor/pdfjs/cmaps/GB-H.bcmap +0 -4
  260. package/dist/src/vendor/pdfjs/cmaps/GB-V.bcmap +0 -0
  261. package/dist/src/vendor/pdfjs/cmaps/GBK-EUC-H.bcmap +0 -0
  262. package/dist/src/vendor/pdfjs/cmaps/GBK-EUC-V.bcmap +0 -0
  263. package/dist/src/vendor/pdfjs/cmaps/GBK2K-H.bcmap +0 -0
  264. package/dist/src/vendor/pdfjs/cmaps/GBK2K-V.bcmap +0 -0
  265. package/dist/src/vendor/pdfjs/cmaps/GBKp-EUC-H.bcmap +0 -0
  266. package/dist/src/vendor/pdfjs/cmaps/GBKp-EUC-V.bcmap +0 -0
  267. package/dist/src/vendor/pdfjs/cmaps/GBT-EUC-H.bcmap +0 -0
  268. package/dist/src/vendor/pdfjs/cmaps/GBT-EUC-V.bcmap +0 -0
  269. package/dist/src/vendor/pdfjs/cmaps/GBT-H.bcmap +0 -0
  270. package/dist/src/vendor/pdfjs/cmaps/GBT-V.bcmap +0 -0
  271. package/dist/src/vendor/pdfjs/cmaps/GBTpc-EUC-H.bcmap +0 -0
  272. package/dist/src/vendor/pdfjs/cmaps/GBTpc-EUC-V.bcmap +0 -0
  273. package/dist/src/vendor/pdfjs/cmaps/GBpc-EUC-H.bcmap +0 -0
  274. package/dist/src/vendor/pdfjs/cmaps/GBpc-EUC-V.bcmap +0 -0
  275. package/dist/src/vendor/pdfjs/cmaps/H.bcmap +0 -0
  276. package/dist/src/vendor/pdfjs/cmaps/HKdla-B5-H.bcmap +0 -0
  277. package/dist/src/vendor/pdfjs/cmaps/HKdla-B5-V.bcmap +0 -0
  278. package/dist/src/vendor/pdfjs/cmaps/HKdlb-B5-H.bcmap +0 -0
  279. package/dist/src/vendor/pdfjs/cmaps/HKdlb-B5-V.bcmap +0 -0
  280. package/dist/src/vendor/pdfjs/cmaps/HKgccs-B5-H.bcmap +0 -0
  281. package/dist/src/vendor/pdfjs/cmaps/HKgccs-B5-V.bcmap +0 -0
  282. package/dist/src/vendor/pdfjs/cmaps/HKm314-B5-H.bcmap +0 -0
  283. package/dist/src/vendor/pdfjs/cmaps/HKm314-B5-V.bcmap +0 -0
  284. package/dist/src/vendor/pdfjs/cmaps/HKm471-B5-H.bcmap +0 -0
  285. package/dist/src/vendor/pdfjs/cmaps/HKm471-B5-V.bcmap +0 -0
  286. package/dist/src/vendor/pdfjs/cmaps/HKscs-B5-H.bcmap +0 -0
  287. package/dist/src/vendor/pdfjs/cmaps/HKscs-B5-V.bcmap +0 -0
  288. package/dist/src/vendor/pdfjs/cmaps/Hankaku.bcmap +0 -0
  289. package/dist/src/vendor/pdfjs/cmaps/Hiragana.bcmap +0 -0
  290. package/dist/src/vendor/pdfjs/cmaps/KSC-EUC-H.bcmap +0 -0
  291. package/dist/src/vendor/pdfjs/cmaps/KSC-EUC-V.bcmap +0 -0
  292. package/dist/src/vendor/pdfjs/cmaps/KSC-H.bcmap +0 -0
  293. package/dist/src/vendor/pdfjs/cmaps/KSC-Johab-H.bcmap +0 -0
  294. package/dist/src/vendor/pdfjs/cmaps/KSC-Johab-V.bcmap +0 -0
  295. package/dist/src/vendor/pdfjs/cmaps/KSC-V.bcmap +0 -0
  296. package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-H.bcmap +0 -0
  297. package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-H.bcmap +0 -0
  298. package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-V.bcmap +0 -0
  299. package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-V.bcmap +0 -0
  300. package/dist/src/vendor/pdfjs/cmaps/KSCpc-EUC-H.bcmap +0 -0
  301. package/dist/src/vendor/pdfjs/cmaps/KSCpc-EUC-V.bcmap +0 -0
  302. package/dist/src/vendor/pdfjs/cmaps/Katakana.bcmap +0 -0
  303. package/dist/src/vendor/pdfjs/cmaps/LICENSE +0 -36
  304. package/dist/src/vendor/pdfjs/cmaps/NWP-H.bcmap +0 -0
  305. package/dist/src/vendor/pdfjs/cmaps/NWP-V.bcmap +0 -0
  306. package/dist/src/vendor/pdfjs/cmaps/RKSJ-H.bcmap +0 -0
  307. package/dist/src/vendor/pdfjs/cmaps/RKSJ-V.bcmap +0 -0
  308. package/dist/src/vendor/pdfjs/cmaps/Roman.bcmap +0 -0
  309. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UCS2-H.bcmap +0 -0
  310. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UCS2-V.bcmap +0 -0
  311. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF16-H.bcmap +0 -0
  312. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF16-V.bcmap +0 -0
  313. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF32-H.bcmap +0 -0
  314. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF32-V.bcmap +0 -0
  315. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF8-H.bcmap +0 -0
  316. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF8-V.bcmap +0 -0
  317. package/dist/src/vendor/pdfjs/cmaps/UniGB-UCS2-H.bcmap +0 -0
  318. package/dist/src/vendor/pdfjs/cmaps/UniGB-UCS2-V.bcmap +0 -0
  319. package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF16-H.bcmap +0 -0
  320. package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF16-V.bcmap +0 -0
  321. package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF32-H.bcmap +0 -0
  322. package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF32-V.bcmap +0 -0
  323. package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF8-H.bcmap +0 -0
  324. package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF8-V.bcmap +0 -0
  325. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-H.bcmap +0 -0
  326. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-H.bcmap +0 -0
  327. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-V.bcmap +0 -0
  328. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-V.bcmap +0 -0
  329. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF16-H.bcmap +0 -0
  330. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF16-V.bcmap +0 -0
  331. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF32-H.bcmap +0 -0
  332. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF32-V.bcmap +0 -0
  333. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF8-H.bcmap +0 -0
  334. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF8-V.bcmap +0 -0
  335. package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-H.bcmap +0 -0
  336. package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-V.bcmap +0 -0
  337. package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-H.bcmap +0 -0
  338. package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-V.bcmap +0 -0
  339. package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-H.bcmap +0 -0
  340. package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-V.bcmap +0 -0
  341. package/dist/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-HW-V.bcmap +0 -0
  342. package/dist/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-V.bcmap +0 -0
  343. package/dist/src/vendor/pdfjs/cmaps/UniJISPro-UTF8-V.bcmap +0 -0
  344. package/dist/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-H.bcmap +0 -0
  345. package/dist/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-V.bcmap +0 -0
  346. package/dist/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-H.bcmap +0 -0
  347. package/dist/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-V.bcmap +0 -0
  348. package/dist/src/vendor/pdfjs/cmaps/UniKS-UCS2-H.bcmap +0 -0
  349. package/dist/src/vendor/pdfjs/cmaps/UniKS-UCS2-V.bcmap +0 -0
  350. package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF16-H.bcmap +0 -0
  351. package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF16-V.bcmap +0 -0
  352. package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF32-H.bcmap +0 -0
  353. package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF32-V.bcmap +0 -0
  354. package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF8-H.bcmap +0 -0
  355. package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF8-V.bcmap +0 -0
  356. package/dist/src/vendor/pdfjs/cmaps/V.bcmap +0 -0
  357. package/dist/src/vendor/pdfjs/cmaps/WP-Symbol.bcmap +0 -0
  358. package/dist/src/vendor/pdfjs/jbig2.wasm +0 -0
  359. package/dist/src/vendor/pdfjs/openjpeg.wasm +0 -0
  360. package/dist/src/vendor/pdfjs/pdf.mjs +0 -33603
  361. package/dist/src/vendor/pdfjs/pdf.mjs.map +0 -1
  362. package/dist/src/vendor/pdfjs/pdf.sandbox.mjs +0 -4936
  363. package/dist/src/vendor/pdfjs/pdf.sandbox.mjs.map +0 -1
  364. package/dist/src/vendor/pdfjs/pdf.worker.mjs +0 -70100
  365. package/dist/src/vendor/pdfjs/pdf.worker.mjs.map +0 -1
  366. package/dist/src/vendor/pdfjs/qcms_bg.wasm +0 -0
  367. package/dist/src/vendor/pdfjs/standard_fonts/FoxitDingbats.pfb +0 -0
  368. package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixed.pfb +0 -0
  369. package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixedBold.pfb +0 -0
  370. package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixedBoldItalic.pfb +0 -0
  371. package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixedItalic.pfb +0 -0
  372. package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerif.pfb +0 -0
  373. package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerifBold.pfb +0 -0
  374. package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerifBoldItalic.pfb +0 -0
  375. package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerifItalic.pfb +0 -0
  376. package/dist/src/vendor/pdfjs/standard_fonts/FoxitSymbol.pfb +0 -0
  377. package/dist/src/vendor/pdfjs/standard_fonts/LICENSE_FOXIT +0 -27
  378. package/dist/src/vendor/pdfjs/standard_fonts/LICENSE_LIBERATION +0 -102
  379. package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-Bold.ttf +0 -0
  380. package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-BoldItalic.ttf +0 -0
  381. package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-Italic.ttf +0 -0
  382. package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-Regular.ttf +0 -0
  383. package/src/vendor/pdfjs/LICENSE +0 -177
  384. package/src/vendor/pdfjs/README.md +0 -0
  385. package/src/vendor/pdfjs/cmaps/78-EUC-H.bcmap +0 -0
  386. package/src/vendor/pdfjs/cmaps/78-EUC-V.bcmap +0 -0
  387. package/src/vendor/pdfjs/cmaps/78-H.bcmap +0 -0
  388. package/src/vendor/pdfjs/cmaps/78-RKSJ-H.bcmap +0 -0
  389. package/src/vendor/pdfjs/cmaps/78-RKSJ-V.bcmap +0 -0
  390. package/src/vendor/pdfjs/cmaps/78-V.bcmap +0 -0
  391. package/src/vendor/pdfjs/cmaps/78ms-RKSJ-H.bcmap +0 -0
  392. package/src/vendor/pdfjs/cmaps/78ms-RKSJ-V.bcmap +0 -0
  393. package/src/vendor/pdfjs/cmaps/83pv-RKSJ-H.bcmap +0 -0
  394. package/src/vendor/pdfjs/cmaps/90ms-RKSJ-H.bcmap +0 -0
  395. package/src/vendor/pdfjs/cmaps/90ms-RKSJ-V.bcmap +0 -0
  396. package/src/vendor/pdfjs/cmaps/90msp-RKSJ-H.bcmap +0 -0
  397. package/src/vendor/pdfjs/cmaps/90msp-RKSJ-V.bcmap +0 -0
  398. package/src/vendor/pdfjs/cmaps/90pv-RKSJ-H.bcmap +0 -0
  399. package/src/vendor/pdfjs/cmaps/90pv-RKSJ-V.bcmap +0 -0
  400. package/src/vendor/pdfjs/cmaps/Add-H.bcmap +0 -0
  401. package/src/vendor/pdfjs/cmaps/Add-RKSJ-H.bcmap +0 -0
  402. package/src/vendor/pdfjs/cmaps/Add-RKSJ-V.bcmap +0 -0
  403. package/src/vendor/pdfjs/cmaps/Add-V.bcmap +0 -0
  404. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-0.bcmap +0 -0
  405. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-1.bcmap +0 -0
  406. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-2.bcmap +0 -0
  407. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-3.bcmap +0 -0
  408. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-4.bcmap +0 -0
  409. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-5.bcmap +0 -0
  410. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-6.bcmap +0 -0
  411. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-UCS2.bcmap +0 -0
  412. package/src/vendor/pdfjs/cmaps/Adobe-GB1-0.bcmap +0 -0
  413. package/src/vendor/pdfjs/cmaps/Adobe-GB1-1.bcmap +0 -0
  414. package/src/vendor/pdfjs/cmaps/Adobe-GB1-2.bcmap +0 -0
  415. package/src/vendor/pdfjs/cmaps/Adobe-GB1-3.bcmap +0 -0
  416. package/src/vendor/pdfjs/cmaps/Adobe-GB1-4.bcmap +0 -0
  417. package/src/vendor/pdfjs/cmaps/Adobe-GB1-5.bcmap +0 -0
  418. package/src/vendor/pdfjs/cmaps/Adobe-GB1-UCS2.bcmap +0 -0
  419. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-0.bcmap +0 -0
  420. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-1.bcmap +0 -0
  421. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-2.bcmap +0 -0
  422. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-3.bcmap +0 -0
  423. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-4.bcmap +0 -0
  424. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-5.bcmap +0 -0
  425. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-6.bcmap +0 -0
  426. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-UCS2.bcmap +0 -0
  427. package/src/vendor/pdfjs/cmaps/Adobe-Korea1-0.bcmap +0 -0
  428. package/src/vendor/pdfjs/cmaps/Adobe-Korea1-1.bcmap +0 -0
  429. package/src/vendor/pdfjs/cmaps/Adobe-Korea1-2.bcmap +0 -0
  430. package/src/vendor/pdfjs/cmaps/Adobe-Korea1-UCS2.bcmap +0 -0
  431. package/src/vendor/pdfjs/cmaps/B5-H.bcmap +0 -0
  432. package/src/vendor/pdfjs/cmaps/B5-V.bcmap +0 -0
  433. package/src/vendor/pdfjs/cmaps/B5pc-H.bcmap +0 -0
  434. package/src/vendor/pdfjs/cmaps/B5pc-V.bcmap +0 -0
  435. package/src/vendor/pdfjs/cmaps/CNS-EUC-H.bcmap +0 -0
  436. package/src/vendor/pdfjs/cmaps/CNS-EUC-V.bcmap +0 -0
  437. package/src/vendor/pdfjs/cmaps/CNS1-H.bcmap +0 -0
  438. package/src/vendor/pdfjs/cmaps/CNS1-V.bcmap +0 -0
  439. package/src/vendor/pdfjs/cmaps/CNS2-H.bcmap +0 -0
  440. package/src/vendor/pdfjs/cmaps/CNS2-V.bcmap +0 -3
  441. package/src/vendor/pdfjs/cmaps/ETHK-B5-H.bcmap +0 -0
  442. package/src/vendor/pdfjs/cmaps/ETHK-B5-V.bcmap +0 -0
  443. package/src/vendor/pdfjs/cmaps/ETen-B5-H.bcmap +0 -0
  444. package/src/vendor/pdfjs/cmaps/ETen-B5-V.bcmap +0 -0
  445. package/src/vendor/pdfjs/cmaps/ETenms-B5-H.bcmap +0 -3
  446. package/src/vendor/pdfjs/cmaps/ETenms-B5-V.bcmap +0 -0
  447. package/src/vendor/pdfjs/cmaps/EUC-H.bcmap +0 -0
  448. package/src/vendor/pdfjs/cmaps/EUC-V.bcmap +0 -0
  449. package/src/vendor/pdfjs/cmaps/Ext-H.bcmap +0 -0
  450. package/src/vendor/pdfjs/cmaps/Ext-RKSJ-H.bcmap +0 -0
  451. package/src/vendor/pdfjs/cmaps/Ext-RKSJ-V.bcmap +0 -0
  452. package/src/vendor/pdfjs/cmaps/Ext-V.bcmap +0 -0
  453. package/src/vendor/pdfjs/cmaps/GB-EUC-H.bcmap +0 -0
  454. package/src/vendor/pdfjs/cmaps/GB-EUC-V.bcmap +0 -0
  455. package/src/vendor/pdfjs/cmaps/GB-H.bcmap +0 -4
  456. package/src/vendor/pdfjs/cmaps/GB-V.bcmap +0 -0
  457. package/src/vendor/pdfjs/cmaps/GBK-EUC-H.bcmap +0 -0
  458. package/src/vendor/pdfjs/cmaps/GBK-EUC-V.bcmap +0 -0
  459. package/src/vendor/pdfjs/cmaps/GBK2K-H.bcmap +0 -0
  460. package/src/vendor/pdfjs/cmaps/GBK2K-V.bcmap +0 -0
  461. package/src/vendor/pdfjs/cmaps/GBKp-EUC-H.bcmap +0 -0
  462. package/src/vendor/pdfjs/cmaps/GBKp-EUC-V.bcmap +0 -0
  463. package/src/vendor/pdfjs/cmaps/GBT-EUC-H.bcmap +0 -0
  464. package/src/vendor/pdfjs/cmaps/GBT-EUC-V.bcmap +0 -0
  465. package/src/vendor/pdfjs/cmaps/GBT-H.bcmap +0 -0
  466. package/src/vendor/pdfjs/cmaps/GBT-V.bcmap +0 -0
  467. package/src/vendor/pdfjs/cmaps/GBTpc-EUC-H.bcmap +0 -0
  468. package/src/vendor/pdfjs/cmaps/GBTpc-EUC-V.bcmap +0 -0
  469. package/src/vendor/pdfjs/cmaps/GBpc-EUC-H.bcmap +0 -0
  470. package/src/vendor/pdfjs/cmaps/GBpc-EUC-V.bcmap +0 -0
  471. package/src/vendor/pdfjs/cmaps/H.bcmap +0 -0
  472. package/src/vendor/pdfjs/cmaps/HKdla-B5-H.bcmap +0 -0
  473. package/src/vendor/pdfjs/cmaps/HKdla-B5-V.bcmap +0 -0
  474. package/src/vendor/pdfjs/cmaps/HKdlb-B5-H.bcmap +0 -0
  475. package/src/vendor/pdfjs/cmaps/HKdlb-B5-V.bcmap +0 -0
  476. package/src/vendor/pdfjs/cmaps/HKgccs-B5-H.bcmap +0 -0
  477. package/src/vendor/pdfjs/cmaps/HKgccs-B5-V.bcmap +0 -0
  478. package/src/vendor/pdfjs/cmaps/HKm314-B5-H.bcmap +0 -0
  479. package/src/vendor/pdfjs/cmaps/HKm314-B5-V.bcmap +0 -0
  480. package/src/vendor/pdfjs/cmaps/HKm471-B5-H.bcmap +0 -0
  481. package/src/vendor/pdfjs/cmaps/HKm471-B5-V.bcmap +0 -0
  482. package/src/vendor/pdfjs/cmaps/HKscs-B5-H.bcmap +0 -0
  483. package/src/vendor/pdfjs/cmaps/HKscs-B5-V.bcmap +0 -0
  484. package/src/vendor/pdfjs/cmaps/Hankaku.bcmap +0 -0
  485. package/src/vendor/pdfjs/cmaps/Hiragana.bcmap +0 -0
  486. package/src/vendor/pdfjs/cmaps/KSC-EUC-H.bcmap +0 -0
  487. package/src/vendor/pdfjs/cmaps/KSC-EUC-V.bcmap +0 -0
  488. package/src/vendor/pdfjs/cmaps/KSC-H.bcmap +0 -0
  489. package/src/vendor/pdfjs/cmaps/KSC-Johab-H.bcmap +0 -0
  490. package/src/vendor/pdfjs/cmaps/KSC-Johab-V.bcmap +0 -0
  491. package/src/vendor/pdfjs/cmaps/KSC-V.bcmap +0 -0
  492. package/src/vendor/pdfjs/cmaps/KSCms-UHC-H.bcmap +0 -0
  493. package/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-H.bcmap +0 -0
  494. package/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-V.bcmap +0 -0
  495. package/src/vendor/pdfjs/cmaps/KSCms-UHC-V.bcmap +0 -0
  496. package/src/vendor/pdfjs/cmaps/KSCpc-EUC-H.bcmap +0 -0
  497. package/src/vendor/pdfjs/cmaps/KSCpc-EUC-V.bcmap +0 -0
  498. package/src/vendor/pdfjs/cmaps/Katakana.bcmap +0 -0
  499. package/src/vendor/pdfjs/cmaps/LICENSE +0 -36
  500. package/src/vendor/pdfjs/cmaps/NWP-H.bcmap +0 -0
  501. package/src/vendor/pdfjs/cmaps/NWP-V.bcmap +0 -0
  502. package/src/vendor/pdfjs/cmaps/RKSJ-H.bcmap +0 -0
  503. package/src/vendor/pdfjs/cmaps/RKSJ-V.bcmap +0 -0
  504. package/src/vendor/pdfjs/cmaps/Roman.bcmap +0 -0
  505. package/src/vendor/pdfjs/cmaps/UniCNS-UCS2-H.bcmap +0 -0
  506. package/src/vendor/pdfjs/cmaps/UniCNS-UCS2-V.bcmap +0 -0
  507. package/src/vendor/pdfjs/cmaps/UniCNS-UTF16-H.bcmap +0 -0
  508. package/src/vendor/pdfjs/cmaps/UniCNS-UTF16-V.bcmap +0 -0
  509. package/src/vendor/pdfjs/cmaps/UniCNS-UTF32-H.bcmap +0 -0
  510. package/src/vendor/pdfjs/cmaps/UniCNS-UTF32-V.bcmap +0 -0
  511. package/src/vendor/pdfjs/cmaps/UniCNS-UTF8-H.bcmap +0 -0
  512. package/src/vendor/pdfjs/cmaps/UniCNS-UTF8-V.bcmap +0 -0
  513. package/src/vendor/pdfjs/cmaps/UniGB-UCS2-H.bcmap +0 -0
  514. package/src/vendor/pdfjs/cmaps/UniGB-UCS2-V.bcmap +0 -0
  515. package/src/vendor/pdfjs/cmaps/UniGB-UTF16-H.bcmap +0 -0
  516. package/src/vendor/pdfjs/cmaps/UniGB-UTF16-V.bcmap +0 -0
  517. package/src/vendor/pdfjs/cmaps/UniGB-UTF32-H.bcmap +0 -0
  518. package/src/vendor/pdfjs/cmaps/UniGB-UTF32-V.bcmap +0 -0
  519. package/src/vendor/pdfjs/cmaps/UniGB-UTF8-H.bcmap +0 -0
  520. package/src/vendor/pdfjs/cmaps/UniGB-UTF8-V.bcmap +0 -0
  521. package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-H.bcmap +0 -0
  522. package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-H.bcmap +0 -0
  523. package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-V.bcmap +0 -0
  524. package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-V.bcmap +0 -0
  525. package/src/vendor/pdfjs/cmaps/UniJIS-UTF16-H.bcmap +0 -0
  526. package/src/vendor/pdfjs/cmaps/UniJIS-UTF16-V.bcmap +0 -0
  527. package/src/vendor/pdfjs/cmaps/UniJIS-UTF32-H.bcmap +0 -0
  528. package/src/vendor/pdfjs/cmaps/UniJIS-UTF32-V.bcmap +0 -0
  529. package/src/vendor/pdfjs/cmaps/UniJIS-UTF8-H.bcmap +0 -0
  530. package/src/vendor/pdfjs/cmaps/UniJIS-UTF8-V.bcmap +0 -0
  531. package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-H.bcmap +0 -0
  532. package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-V.bcmap +0 -0
  533. package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-H.bcmap +0 -0
  534. package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-V.bcmap +0 -0
  535. package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-H.bcmap +0 -0
  536. package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-V.bcmap +0 -0
  537. package/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-HW-V.bcmap +0 -0
  538. package/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-V.bcmap +0 -0
  539. package/src/vendor/pdfjs/cmaps/UniJISPro-UTF8-V.bcmap +0 -0
  540. package/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-H.bcmap +0 -0
  541. package/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-V.bcmap +0 -0
  542. package/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-H.bcmap +0 -0
  543. package/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-V.bcmap +0 -0
  544. package/src/vendor/pdfjs/cmaps/UniKS-UCS2-H.bcmap +0 -0
  545. package/src/vendor/pdfjs/cmaps/UniKS-UCS2-V.bcmap +0 -0
  546. package/src/vendor/pdfjs/cmaps/UniKS-UTF16-H.bcmap +0 -0
  547. package/src/vendor/pdfjs/cmaps/UniKS-UTF16-V.bcmap +0 -0
  548. package/src/vendor/pdfjs/cmaps/UniKS-UTF32-H.bcmap +0 -0
  549. package/src/vendor/pdfjs/cmaps/UniKS-UTF32-V.bcmap +0 -0
  550. package/src/vendor/pdfjs/cmaps/UniKS-UTF8-H.bcmap +0 -0
  551. package/src/vendor/pdfjs/cmaps/UniKS-UTF8-V.bcmap +0 -0
  552. package/src/vendor/pdfjs/cmaps/V.bcmap +0 -0
  553. package/src/vendor/pdfjs/cmaps/WP-Symbol.bcmap +0 -0
  554. package/src/vendor/pdfjs/jbig2.wasm +0 -0
  555. package/src/vendor/pdfjs/openjpeg.wasm +0 -0
  556. package/src/vendor/pdfjs/pdf.mjs +0 -33603
  557. package/src/vendor/pdfjs/pdf.mjs.map +0 -1
  558. package/src/vendor/pdfjs/pdf.sandbox.mjs +0 -4936
  559. package/src/vendor/pdfjs/pdf.sandbox.mjs.map +0 -1
  560. package/src/vendor/pdfjs/pdf.worker.mjs +0 -70100
  561. package/src/vendor/pdfjs/pdf.worker.mjs.map +0 -1
  562. package/src/vendor/pdfjs/qcms_bg.wasm +0 -0
  563. package/src/vendor/pdfjs/standard_fonts/FoxitDingbats.pfb +0 -0
  564. package/src/vendor/pdfjs/standard_fonts/FoxitFixed.pfb +0 -0
  565. package/src/vendor/pdfjs/standard_fonts/FoxitFixedBold.pfb +0 -0
  566. package/src/vendor/pdfjs/standard_fonts/FoxitFixedBoldItalic.pfb +0 -0
  567. package/src/vendor/pdfjs/standard_fonts/FoxitFixedItalic.pfb +0 -0
  568. package/src/vendor/pdfjs/standard_fonts/FoxitSerif.pfb +0 -0
  569. package/src/vendor/pdfjs/standard_fonts/FoxitSerifBold.pfb +0 -0
  570. package/src/vendor/pdfjs/standard_fonts/FoxitSerifBoldItalic.pfb +0 -0
  571. package/src/vendor/pdfjs/standard_fonts/FoxitSerifItalic.pfb +0 -0
  572. package/src/vendor/pdfjs/standard_fonts/FoxitSymbol.pfb +0 -0
  573. package/src/vendor/pdfjs/standard_fonts/LICENSE_FOXIT +0 -27
  574. package/src/vendor/pdfjs/standard_fonts/LICENSE_LIBERATION +0 -102
  575. package/src/vendor/pdfjs/standard_fonts/LiberationSans-Bold.ttf +0 -0
  576. package/src/vendor/pdfjs/standard_fonts/LiberationSans-BoldItalic.ttf +0 -0
  577. package/src/vendor/pdfjs/standard_fonts/LiberationSans-Italic.ttf +0 -0
  578. package/src/vendor/pdfjs/standard_fonts/LiberationSans-Regular.ttf +0 -0
package/README.md CHANGED
@@ -1,412 +1,89 @@
1
- # LiteParse
1
+ # LiteParse Node.js
2
2
 
3
- [![CI](https://github.com/run-llama/liteparse/actions/workflows/ci.yml/badge.svg)](https://github.com/run-llama/liteparse/actions/workflows/ci.yml)
4
- |
5
- [![npm version](https://img.shields.io/npm/v/@llamaindex/liteparse.svg)](https://www.npmjs.com/package/@llamaindex/liteparse)
6
- |
7
- [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
8
- |
9
- [Docs](https://developers.llamaindex.ai/liteparse/)
10
-
11
- <img src="https://github.com/user-attachments/assets/07ba6a82-6bb1-4dea-b0ef-cad7df7d1622" alt="out" width="600">
12
-
13
- LiteParse is a standalone OSS PDF parsing tool focused exclusively on **fast and light** parsing. It provides high-quality spatial text parsing with bounding boxes, without proprietary LLM features or cloud dependencies. Everything runs locally on your machine.
14
-
15
- **Hitting the limits of local parsing?**
16
- For complex documents (dense tables, multi-column layouts, charts, handwritten text, or
17
- scanned PDFs), you'll get significantly better results with [LlamaParse](https://developers.llamaindex.ai/python/cloud/llamaparse/?utm_source=github&utm_medium=liteparse),
18
- our cloud-based document parser built for production document pipelines. LlamaParse handles the
19
- hard stuff so your models see clean, structured data and markdown.
20
-
21
- > 👉 [Sign up for LlamaParse free](https://cloud.llamaindex.ai?utm_source=github&utm_medium=liteparse)
22
-
23
- ## Overview
24
-
25
- - **Fast Text Parsing**: Spatial text parsing using PDF.js
26
- - **Flexible OCR System**:
27
- - **Built-in**: Tesseract.js (zero setup, works out of the box!)
28
- - **HTTP Servers**: Plug in any OCR server (EasyOCR, PaddleOCR, custom)
29
- - **Standard API**: Simple, well-defined OCR API specification
30
- - **Screenshot Generation**: Generate high-quality page screenshots for LLM agents
31
- - **Multiple Output Formats**: JSON and Text
32
- - **Bounding Boxes**: Precise text positioning information
33
- - **Standalone Binary**: No cloud dependencies, runs entirely locally
34
- - **Multi-platform**: Linux, macOS (Intel/ARM), Windows
3
+ Node.js/TypeScript bindings for [LiteParse](https://github.com/run-llama/liteparse) — fast, lightweight PDF and document parsing with spatial text extraction.
35
4
 
36
5
  ## Installation
37
6
 
38
- ### CLI Tool
39
-
40
- #### Option 1: Global Install (Recommended)
41
-
42
- Install globally via npm to use the `lit` command anywhere:
43
-
44
- ```bash
45
- npm i -g @llamaindex/liteparse
46
- ```
47
-
48
- Then use it:
49
-
50
- ```bash
51
- lit parse document.pdf
52
- lit screenshot document.pdf
53
- ```
54
-
55
- For macOS and Linux users, `liteparse` can be also installed via `brew`:
56
-
57
- ```bash
58
- brew tap run-llama/liteparse
59
- brew install llamaindex-liteparse
60
- ```
61
-
62
- #### Option 2: Install from Source
63
-
64
- You can clone the repo and install the CLI globally from source:
65
-
66
- ```
67
- git clone https://github.com/run-llama/liteparse.git
68
- cd liteparse
69
- npm run build
70
- npm pack
71
- npm install -g ./liteparse-*.tgz
72
- ```
73
-
74
- ### Agent Skill
75
-
76
- You can use `liteparse` as an agent skill, downloading it with the `skills` CLI tool:
77
-
78
- ```bash
79
- npx skills add run-llama/llamaparse-agent-skills --skill liteparse
80
- ```
81
-
82
- Or copy-pasting the [`SKILL.md`](https://github.com/run-llama/llamaparse-agent-skills/blob/main/skills/liteparse/SKILL.md) file to your own skills setup.
83
-
84
- ## Usage
85
-
86
- ### Parse Files
87
-
88
- ```bash
89
- # Basic parsing
90
- lit parse document.pdf
91
-
92
- # Parse with specific format
93
- lit parse document.pdf --format json -o output.md
94
-
95
- # Parse specific pages
96
- lit parse document.pdf --target-pages "1-5,10,15-20"
97
-
98
- # Parse without OCR
99
- lit parse document.pdf --no-ocr
100
-
101
- # Parse a remote PDF
102
- curl -sL https://example.com/report.pdf | lit parse -
103
- ```
104
-
105
- ### Batch Parsing
106
-
107
- You can also parse an entire directory of documents:
108
-
109
7
  ```bash
110
- lit batch-parse ./input-directory ./output-directory
8
+ npm i @llamaindex/liteparse
111
9
  ```
112
10
 
113
- ### Generate Screenshots
11
+ This also installs the `lit` CLI command (use `npm i -g` for global access).
114
12
 
115
- Screenshots are essential for LLM agents to extract visual information that text alone cannot capture.
13
+ ## Quick Start
116
14
 
117
- ```bash
118
- # Screenshot all pages
119
- lit screenshot document.pdf -o ./screenshots
120
-
121
- # Screenshot specific pages
122
- lit screenshot document.pdf --target-pages "1,3,5" -o ./screenshots
15
+ ```typescript
16
+ import { LiteParse } from '@llamaindex/liteparse';
123
17
 
124
- # Custom DPI
125
- lit screenshot document.pdf --dpi 300 -o ./screenshots
18
+ const parser = new LiteParse();
19
+ const result = await parser.parse('document.pdf');
20
+ console.log(result.text);
126
21
 
127
- # Screenshot page range
128
- lit screenshot document.pdf --target-pages "1-10" -o ./screenshots
22
+ // Access structured data
23
+ for (const page of result.pages) {
24
+ console.log(`Page ${page.pageNum}: ${page.textItems.length} text items`);
25
+ }
129
26
  ```
130
27
 
131
- ### Library Usage
132
-
133
- Install as a dependency in your project:
28
+ ## Configuration
134
29
 
135
- ```bash
136
- npm install @llamaindex/liteparse
137
- # or
138
- pnpm add @llamaindex/liteparse
139
- ```
30
+ All options are passed to the constructor:
140
31
 
141
32
  ```typescript
142
- import { LiteParse } from '@llamaindex/liteparse';
143
-
144
- const parser = new LiteParse({ ocrEnabled: true });
145
- const result = await parser.parse('document.pdf');
146
- console.log(result.text);
33
+ const parser = new LiteParse({
34
+ ocrEnabled: true, // Enable OCR (default: true)
35
+ ocrLanguage: 'eng', // Tesseract language code
36
+ ocrServerUrl: undefined, // HTTP OCR server URL (optional)
37
+ tessdataPath: undefined, // Path to tessdata directory (optional)
38
+ maxPages: 1000, // Max pages to parse
39
+ targetPages: '1-5,10', // Specific pages (optional)
40
+ dpi: 150, // Rendering DPI
41
+ preserveVerySmallText: false, // Keep tiny text
42
+ password: undefined, // Password for protected documents
43
+ quiet: false, // Suppress progress output
44
+ numWorkers: 4, // Concurrent OCR workers
45
+ });
147
46
  ```
148
47
 
149
- #### Buffer / Uint8Array Input
48
+ ## Parsing from Bytes
150
49
 
151
- You can pass raw bytes directly instead of a file path, which is useful for remote files:
50
+ Pass a `Buffer` or `Uint8Array` directly useful for HTTP responses or in-memory data:
152
51
 
153
52
  ```typescript
154
- import { LiteParse } from '@llamaindex/liteparse';
155
53
  import { readFile } from 'fs/promises';
156
54
 
157
- const parser = new LiteParse();
158
-
159
- // From a file read
160
55
  const pdfBytes = await readFile('document.pdf');
161
56
  const result = await parser.parse(pdfBytes);
162
-
163
- // From an HTTP response
164
- const response = await fetch('https://example.com/document.pdf');
165
- const buffer = Buffer.from(await response.arrayBuffer());
166
- const result2 = await parser.parse(buffer);
167
- ```
168
-
169
- Non-PDF buffers (images, Office documents) are written to a temp directory for format conversion. Screenshots also work with buffer input:
170
-
171
- ```typescript
172
- const screenshots = await parser.screenshot(pdfBytes, [1, 2, 3]);
173
- ```
174
-
175
- ### CLI Options
176
-
177
- #### Parse Command
178
-
179
- ```
180
- $ lit parse --help
181
- Usage: lit parse [options] <file>
182
-
183
- Parse a document file (PDF, DOCX, XLSX, PPTX, images, etc.)
184
-
185
- Options:
186
- -o, --output <file> Output file path
187
- --format <format> Output format: json|text (default: "text")
188
- --ocr-server-url <url> HTTP OCR server URL (uses Tesseract if not provided)
189
- --no-ocr Disable OCR
190
- --ocr-language <lang> OCR language(s) (default: "en")
191
- --num-workers <n> Number of pages to OCR in parallel (default: CPU cores - 1)
192
- --max-pages <n> Max pages to parse (default: "10000")
193
- --target-pages <pages> Target pages (e.g., "1-5,10,15-20")
194
- --dpi <dpi> DPI for rendering (default: "150")
195
- --no-precise-bbox Disable precise bounding boxes
196
- --preserve-small-text Preserve very small text
197
- --password <password> Password for encrypted/protected documents
198
- --config <file> Config file (JSON)
199
- -q, --quiet Suppress progress output
200
- -h, --help display help for command
201
- ```
202
-
203
- #### Batch Parse Command
204
-
205
- ```
206
- $ lit batch-parse --help
207
- Usage: lit batch-parse [options] <input-dir> <output-dir>
208
-
209
- Parse multiple documents in batch mode (reuses PDF engine for efficiency)
210
-
211
- Options:
212
- --format <format> Output format: json|text (default: "text")
213
- --ocr-server-url <url> HTTP OCR server URL (uses Tesseract if not provided)
214
- --no-ocr Disable OCR
215
- --ocr-language <lang> OCR language(s) (default: "en")
216
- --num-workers <n> Number of pages to OCR in parallel (default: CPU cores - 1)
217
- --max-pages <n> Max pages to parse per file (default: "10000")
218
- --dpi <dpi> DPI for rendering (default: "150")
219
- --no-precise-bbox Disable precise bounding boxes
220
- --recursive Recursively search input directory
221
- --extension <ext> Only process files with this extension (e.g., ".pdf")
222
- --password <password> Password for encrypted/protected documents (applied to all files)
223
- --config <file> Config file (JSON)
224
- -q, --quiet Suppress progress output
225
- -h, --help display help for command
226
- ```
227
-
228
- #### Screenshot Command
229
-
230
- ```
231
- $ lit screenshot --help
232
- Usage: lit screenshot [options] <file>
233
-
234
- Generate screenshots of PDF pages
235
-
236
- Options:
237
- -o, --output-dir <dir> Output directory for screenshots (default: "./screenshots")
238
- --target-pages <pages> Page numbers to screenshot (e.g., "1,3,5" or "1-5")
239
- --dpi <dpi> DPI for rendering (default: "150")
240
- --format <format> Image format: png|jpg (default: "png")
241
- --password <password> Password for encrypted/protected documents
242
- --config <file> Config file (JSON)
243
- -q, --quiet Suppress progress output
244
- -h, --help display help for command
245
- ```
246
-
247
- ## OCR Setup
248
-
249
- ### Default: Tesseract.js
250
-
251
- ```bash
252
- # Tesseract is enabled by default
253
- lit parse document.pdf
254
-
255
- # Specify language
256
- lit parse document.pdf --ocr-language fra
257
-
258
- # Disable OCR
259
- lit parse document.pdf --no-ocr
57
+ console.log(result.text);
260
58
  ```
261
59
 
262
- By default, Tesseract.js downloads language data from the internet on first use. For offline or air-gapped environments, set the `TESSDATA_PREFIX` environment variable to a directory containing pre-downloaded `.traineddata` files:
263
-
264
- ```bash
265
- export TESSDATA_PREFIX=/path/to/tessdata
266
- lit parse document.pdf --ocr-language eng
267
- ```
60
+ ## Screenshots
268
61
 
269
- You can also pass `tessdataPath` in the library config:
62
+ Generate PNG screenshots of document pages:
270
63
 
271
64
  ```typescript
272
- const parser = new LiteParse({ tessdataPath: '/path/to/tessdata' });
273
- ```
274
-
275
- ### Optional: HTTP OCR Servers
276
-
277
- For higher accuracy or better performance, you can use an HTTP OCR server. We provide ready-to-use example wrappers for popular OCR engines:
278
-
279
- - [EasyOCR](ocr/easyocr/README.md)
280
- - [PaddleOCR](ocr/paddleocr/README.md)
281
-
282
- You can integrate any OCR service by implementing the simple LiteParse OCR API specification (see [`OCR_API_SPEC.md`](OCR_API_SPEC.md)).
283
-
284
- The API requires:
285
- - POST `/ocr` endpoint
286
- - Accepts `file` and `language` parameters
287
- - Returns JSON: `{ results: [{ text, bbox: [x1,y1,x2,y2], confidence }] }`
288
-
289
- See the example servers in `ocr/easyocr/` and `ocr/paddleocr/` as templates.
290
-
291
- For the complete OCR API specification, see [`OCR_API_SPEC.md`](OCR_API_SPEC.md).
292
-
293
- ## Multi-Format Input Support
294
-
295
- LiteParse supports **automatic conversion** of various document formats to PDF before parsing. This makes it unique compared to other PDF-only parsing tools!
296
-
297
- ### Supported Input Formats
298
-
299
- #### Office Documents (via LibreOffice)
300
- - **Word**: `.doc`, `.docx`, `.docm`, `.odt`, `.rtf`
301
- - **PowerPoint**: `.ppt`, `.pptx`, `.pptm`, `.odp`
302
- - **Spreadsheets**: `.xls`, `.xlsx`, `.xlsm`, `.ods`, `.csv`, `.tsv`
303
-
304
- Just install the dependency and LiteParse will automatically convert these formats to PDF for parsing:
305
-
306
- ```bash
307
- # macOS
308
- brew install --cask libreoffice
309
-
310
- # Ubuntu/Debian
311
- apt-get install libreoffice
312
-
313
- # Windows
314
- choco install libreoffice-fresh # might require admin permissions
315
- ```
316
-
317
- > _For Windows, you might need to add the path to the directory containing LibreOffice CLI executable (generally `C:\Program Files\LibreOffice\program`) to the environment variables and re-start the machine._
318
-
319
- #### Images (via ImageMagick)
320
- - **Formats**: `.jpg`, `.jpeg`, `.png`, `.gif`, `.bmp`, `.tiff`, `.webp`, `.svg`
321
-
322
- Just install ImageMagick and LiteParse will convert images to PDF for parsing (with OCR):
323
-
324
- ```bash
325
- # macOS
326
- brew install imagemagick
327
-
328
- # Ubuntu/Debian
329
- apt-get install imagemagick
330
-
331
- # Windows
332
- choco install imagemagick.app # might require admin permissions
333
- ```
334
-
335
- ## Environment Variables
336
-
337
- | Variable | Description |
338
- |----------|-------------|
339
- | `TESSDATA_PREFIX` | Path to a directory containing Tesseract `.traineddata` files. Used for offline/air-gapped environments where Tesseract.js cannot download language data from the internet. |
340
- | `LITEPARSE_TMPDIR` | Override the temp directory used for format conversion and intermediate files. Defaults to the OS temp directory (`os.tmpdir()`). Useful in containerized or read-only filesystem environments. |
341
-
342
- ## Configuration
343
-
344
- You can configure parsing options via CLI flags or a JSON config file. The config file allows you to set sensible defaults and override as needed.
345
-
346
- ### Config File Example
347
-
348
- Create a `liteparse.config.json` file:
349
-
350
- ```json
351
- {
352
- "ocrLanguage": "en",
353
- "ocrEnabled": true,
354
- "maxPages": 1000,
355
- "dpi": 150,
356
- "outputFormat": "json",
357
- "preciseBoundingBox": true,
358
- "preserveVerySmallText": false,
359
- "password": "optional_password"
65
+ const screenshots = parser.screenshot('document.pdf', [1, 2, 3]);
66
+ for (const s of screenshots) {
67
+ console.log(`Page ${s.pageNum}: ${s.width}x${s.height}`);
68
+ // s.imageBuffer contains PNG bytes
360
69
  }
361
70
  ```
362
71
 
363
- For HTTP OCR servers, just add `ocrServerUrl`:
72
+ ## Supported Formats
364
73
 
365
- ```json
366
- {
367
- "ocrServerUrl": "http://localhost:8828/ocr",
368
- "ocrLanguage": "en",
369
- "outputFormat": "json"
370
- }
371
- ```
74
+ - PDF (`.pdf`)
75
+ - Microsoft Office (`.docx`, `.xlsx`, `.pptx`, etc.) — requires LibreOffice
76
+ - OpenDocument (`.odt`, `.ods`, `.odp`) — requires LibreOffice
77
+ - Images (`.png`, `.jpg`, `.tiff`, etc.) — requires ImageMagick
78
+ - And more!
372
79
 
373
- Use with:
80
+ ## CLI
374
81
 
375
- ```bash
376
- lit parse document.pdf --config liteparse.config.json
377
- ```
378
-
379
- ## Development
380
-
381
- We provide a fairly rich `AGENTS.md`/`CLAUDE.md` that we recommend using to help with development + coding agents.
82
+ The npm package includes the `lit` CLI:
382
83
 
383
84
  ```bash
384
- # Install dependencies
385
- npm install
386
-
387
- # Build TypeScript (Linux/macOs)
388
- npm run build
389
-
390
- # Build Typescript (Windows)
391
- npm run build:windows
392
-
393
- # Watch mode
394
- npm run dev
395
-
396
- # Test parsing
397
- npm test
85
+ lit parse document.pdf
86
+ lit parse document.pdf --format json -o output.json
87
+ lit screenshot document.pdf -o ./screenshots
88
+ lit batch-parse ./input ./output
398
89
  ```
399
-
400
- ## License
401
-
402
- Apache 2.0
403
-
404
- ## Credits
405
-
406
- Built on top of:
407
-
408
- - [PDF.js](https://github.com/mozilla/pdf.js) - PDF parsing engine
409
- - [Tesseract.js](https://github.com/naptha/tesseract.js) - In-process OCR engine
410
- - [EasyOCR](https://github.com/JaidedAI/EasyOCR) - HTTP OCR server (optional)
411
- - [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR) - HTTP OCR server (optional)
412
- - [Sharp](https://github.com/lovell/sharp) - Image processing
package/dist/cli.d.ts ADDED
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env node
2
+ export {};
3
+ //# sourceMappingURL=cli.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cli.d.ts","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":""}
package/dist/cli.js ADDED
@@ -0,0 +1,87 @@
1
+ #!/usr/bin/env node
2
+ import { program } from "commander";
3
+ import { LiteParse } from "./lib.js";
4
+ import { readFileSync } from "node:fs";
5
+ import { writeFileSync } from "node:fs";
6
+ program
7
+ .name("liteparse")
8
+ .description("Fast, lightweight PDF and document parsing")
9
+ .version("2.0.0");
10
+ program
11
+ .command("parse")
12
+ .description("Parse a document and extract text")
13
+ .argument("<file>", "Path to the document file")
14
+ .option("-o, --output <file>", "Output file path")
15
+ .option("--format <format>", 'Output format: json|text (default: "text")')
16
+ .option("--ocr-server-url <url>", "HTTP OCR server URL")
17
+ .option("--no-ocr", "Disable OCR")
18
+ .option("--ocr-language <lang>", "OCR language (default: eng)")
19
+ .option("--max-pages <n>", "Max pages to parse", parseInt)
20
+ .option("--target-pages <pages>", 'Pages to parse (e.g., "1-5,10,15-20")')
21
+ .option("--dpi <dpi>", "Rendering DPI", parseFloat)
22
+ .option("--preserve-small-text", "Keep very small text")
23
+ .option("--password <password>", "Password for encrypted documents")
24
+ .option("--config <file>", "JSON config file path")
25
+ .option("-q, --quiet", "Suppress progress output")
26
+ .option("--num-workers <n>", "Number of concurrent OCR workers", parseInt)
27
+ .action(async (file, opts) => {
28
+ try {
29
+ const config = {};
30
+ // Load config file if provided
31
+ if (opts.config) {
32
+ const fileConfig = JSON.parse(readFileSync(opts.config, "utf-8"));
33
+ Object.assign(config, fileConfig);
34
+ }
35
+ // CLI options override config file
36
+ if (opts.format)
37
+ config.outputFormat = opts.format;
38
+ if (opts.ocrServerUrl)
39
+ config.ocrServerUrl = opts.ocrServerUrl;
40
+ if (opts.ocr === false)
41
+ config.ocrEnabled = false;
42
+ if (opts.ocrLanguage)
43
+ config.ocrLanguage = opts.ocrLanguage;
44
+ if (opts.maxPages)
45
+ config.maxPages = opts.maxPages;
46
+ if (opts.targetPages)
47
+ config.targetPages = opts.targetPages;
48
+ if (opts.dpi)
49
+ config.dpi = opts.dpi;
50
+ if (opts.preserveSmallText)
51
+ config.preserveVerySmallText = true;
52
+ if (opts.password)
53
+ config.password = opts.password;
54
+ if (opts.quiet)
55
+ config.quiet = true;
56
+ if (opts.numWorkers)
57
+ config.numWorkers = opts.numWorkers;
58
+ // Default CLI output to text (library defaults to json)
59
+ if (!config.outputFormat)
60
+ config.outputFormat = "text";
61
+ const parser = new LiteParse(config);
62
+ const result = await parser.parse(file);
63
+ const output = config.outputFormat === "json"
64
+ ? JSON.stringify({
65
+ pages: result.pages.map((p) => ({
66
+ page: p.pageNum,
67
+ width: p.width,
68
+ height: p.height,
69
+ text: p.text,
70
+ textItems: p.textItems,
71
+ })),
72
+ }, null, 2)
73
+ : result.text;
74
+ if (opts.output) {
75
+ writeFileSync(opts.output, output, "utf-8");
76
+ }
77
+ else {
78
+ process.stdout.write(output);
79
+ }
80
+ }
81
+ catch (err) {
82
+ console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);
83
+ process.exit(1);
84
+ }
85
+ });
86
+ program.parse(process.argv);
87
+ //# sourceMappingURL=cli.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AAEA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,SAAS,EAAwB,MAAM,UAAU,CAAC;AAC3D,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,aAAa,EAAE,MAAM,SAAS,CAAC;AAExC,OAAO;KACJ,IAAI,CAAC,WAAW,CAAC;KACjB,WAAW,CAAC,4CAA4C,CAAC;KACzD,OAAO,CAAC,OAAO,CAAC,CAAC;AAEpB,OAAO;KACJ,OAAO,CAAC,OAAO,CAAC;KAChB,WAAW,CAAC,mCAAmC,CAAC;KAChD,QAAQ,CAAC,QAAQ,EAAE,2BAA2B,CAAC;KAC/C,MAAM,CAAC,qBAAqB,EAAE,kBAAkB,CAAC;KACjD,MAAM,CAAC,mBAAmB,EAAE,4CAA4C,CAAC;KACzE,MAAM,CAAC,wBAAwB,EAAE,qBAAqB,CAAC;KACvD,MAAM,CAAC,UAAU,EAAE,aAAa,CAAC;KACjC,MAAM,CAAC,uBAAuB,EAAE,6BAA6B,CAAC;KAC9D,MAAM,CAAC,iBAAiB,EAAE,oBAAoB,EAAE,QAAQ,CAAC;KACzD,MAAM,CACL,wBAAwB,EACxB,uCAAuC,CACxC;KACA,MAAM,CAAC,aAAa,EAAE,eAAe,EAAE,UAAU,CAAC;KAClD,MAAM,CAAC,uBAAuB,EAAE,sBAAsB,CAAC;KACvD,MAAM,CAAC,uBAAuB,EAAE,kCAAkC,CAAC;KACnE,MAAM,CAAC,iBAAiB,EAAE,uBAAuB,CAAC;KAClD,MAAM,CAAC,aAAa,EAAE,0BAA0B,CAAC;KACjD,MAAM,CAAC,mBAAmB,EAAE,kCAAkC,EAAE,QAAQ,CAAC;KACzE,MAAM,CAAC,KAAK,EAAE,IAAY,EAAE,IAA6B,EAAE,EAAE;IAC5D,IAAI,CAAC;QACH,MAAM,MAAM,GAA6B,EAAE,CAAC;QAE5C,+BAA+B;QAC/B,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChB,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAC3B,YAAY,CAAC,IAAI,CAAC,MAAgB,EAAE,OAAO,CAAC,CAC7C,CAAC;YACF,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC;QACpC,CAAC;QAED,mCAAmC;QACnC,IAAI,IAAI,CAAC,MAAM;YAAE,MAAM,CAAC,YAAY,GAAG,IAAI,CAAC,MAAyB,CAAC;QACtE,IAAI,IAAI,CAAC,YAAY;YACnB,MAAM,CAAC,YAAY,GAAG,IAAI,CAAC,YAAsB,CAAC;QACpD,IAAI,IAAI,CAAC,GAAG,KAAK,KAAK;YAAE,MAAM,CAAC,UAAU,GAAG,KAAK,CAAC;QAClD,IAAI,IAAI,CAAC,WAAW;YAAE,MAAM,CAAC,WAAW,GAAG,IAAI,CAAC,WAAqB,CAAC;QACtE,IAAI,IAAI,CAAC,QAAQ;YAAE,MAAM,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAkB,CAAC;QAC7D,IAAI,IAAI,CAAC,WAAW;YAAE,MAAM,CAAC,WAAW,GAAG,IAAI,CAAC,WAAqB,CAAC;QACtE,IAAI,IAAI,CAAC,GAAG;YAAE,MAAM,CAAC,GAAG,GAAG,IAAI,CAAC,GAAa,CAAC;QAC9C,IAAI,IAAI,CAAC,iBAAiB;YAAE,MAAM,CAAC,qBAAqB,GAAG,IAAI,CAAC;QAChE,IAAI,IAAI,CAAC,QAAQ;YAAE,MAAM,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAkB,CAAC;QAC7D,IAAI,IAAI,CAAC,KAAK;YAAE,MAAM,CAAC,KAAK,GAAG,IAAI,CAAC;QACpC,IAAI,IAAI,CAAC,UAAU;YAAE,MAAM,CAAC,UAAU,GAAG,IAAI,CAAC,UAAoB,CAAC;QAEnE,wDAAwD;QACxD,IAAI,CAAC,MAAM,CAAC,YAAY;YAAE,MAAM,CAAC,YAAY,GAAG,MAAM,CAAC;QAEvD,MAAM,MAAM,GAAG,IAAI,SAAS,CAAC,MAAM,CAAC,CAAC;QACrC,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAExC,MAAM,MAAM,GACV,MAAM,CAAC,YAAY,KAAK,MAAM;YAC5B,CAAC,CAAC,IAAI,CAAC,SAAS,CACZ;gBACE,KAAK,EAAE,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;oBAC9B,IAAI,EAAE,CAAC,CAAC,OAAO;oBACf,KAAK,EAAE,CAAC,CAAC,KAAK;oBACd,MAAM,EAAE,CAAC,CAAC,MAAM;oBAChB,IAAI,EAAE,CAAC,CAAC,IAAI;oBACZ,SAAS,EAAE,CAAC,CAAC,SAAS;iBACvB,CAAC,CAAC;aACJ,EACD,IAAI,EACJ,CAAC,CACF;YACH,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC;QAElB,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChB,aAAa,CAAC,IAAI,CAAC,MAAgB,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC;QACxD,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;QAC/B,CAAC;IACH,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,OAAO,CAAC,KAAK,CACX,UAAU,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAC7D,CAAC;QACF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC,CAAC,CAAC;AAEL,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC"}
package/dist/lib.d.ts ADDED
@@ -0,0 +1,58 @@
1
+ export type LiteParseInput = string | Buffer | Uint8Array;
2
+ export type OutputFormat = "json" | "text";
3
+ export interface LiteParseConfig {
4
+ ocrLanguage: string;
5
+ ocrEnabled: boolean;
6
+ ocrServerUrl?: string;
7
+ tessdataPath?: string;
8
+ maxPages: number;
9
+ targetPages?: string;
10
+ dpi: number;
11
+ outputFormat: OutputFormat;
12
+ preserveVerySmallText: boolean;
13
+ password?: string;
14
+ quiet: boolean;
15
+ numWorkers: number;
16
+ }
17
+ export interface TextItem {
18
+ text: string;
19
+ x: number;
20
+ y: number;
21
+ width: number;
22
+ height: number;
23
+ fontName?: string;
24
+ fontSize?: number;
25
+ confidence?: number;
26
+ }
27
+ export interface ParsedPage {
28
+ pageNum: number;
29
+ width: number;
30
+ height: number;
31
+ text: string;
32
+ textItems: TextItem[];
33
+ }
34
+ export interface ParseResult {
35
+ pages: ParsedPage[];
36
+ text: string;
37
+ }
38
+ export interface ScreenshotResult {
39
+ pageNum: number;
40
+ width: number;
41
+ height: number;
42
+ imageBuffer: Buffer;
43
+ }
44
+ export declare class LiteParse {
45
+ private _native;
46
+ private _config;
47
+ constructor(userConfig?: Partial<LiteParseConfig>);
48
+ parse(input: LiteParseInput): Promise<ParseResult>;
49
+ screenshot(input: string, pageNumbers?: number[]): ScreenshotResult[];
50
+ getConfig(): LiteParseConfig;
51
+ }
52
+ export interface SearchItemsOptions {
53
+ phrase: string;
54
+ caseSensitive?: boolean;
55
+ }
56
+ export declare function searchItems(items: TextItem[], options: SearchItemsOptions): TextItem[];
57
+ export default LiteParse;
58
+ //# sourceMappingURL=lib.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"lib.d.ts","sourceRoot":"","sources":["../src/lib.ts"],"names":[],"mappings":"AAaA,MAAM,MAAM,cAAc,GAAG,MAAM,GAAG,MAAM,GAAG,UAAU,CAAC;AAC1D,MAAM,MAAM,YAAY,GAAG,MAAM,GAAG,MAAM,CAAC;AAE3C,MAAM,WAAW,eAAe;IAC9B,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,OAAO,CAAC;IACpB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,QAAQ,EAAE,MAAM,CAAC;IACjB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,GAAG,EAAE,MAAM,CAAC;IACZ,YAAY,EAAE,YAAY,CAAC;IAC3B,qBAAqB,EAAE,OAAO,CAAC;IAC/B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,OAAO,CAAC;IACf,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,CAAC,EAAE,MAAM,CAAC;IACV,CAAC,EAAE,MAAM,CAAC;IACV,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,UAAU;IACzB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,QAAQ,EAAE,CAAC;CACvB;AAED,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,UAAU,EAAE,CAAC;IACpB,IAAI,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,gBAAgB;IAC/B,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,MAAM,CAAC;CACrB;AAMD,qBAAa,SAAS;IACpB,OAAO,CAAC,OAAO,CAAkB;IACjC,OAAO,CAAC,OAAO,CAAkB;gBAErB,UAAU,GAAE,OAAO,CAAC,eAAe,CAAM;IAoC/C,KAAK,CAAC,KAAK,EAAE,cAAc,GAAG,OAAO,CAAC,WAAW,CAAC;IAWxD,UAAU,CACR,KAAK,EAAE,MAAM,EACb,WAAW,CAAC,EAAE,MAAM,EAAE,GACrB,gBAAgB,EAAE;IASrB,SAAS,IAAI,eAAe;CAG7B;AA6BD,MAAM,WAAW,kBAAkB;IACjC,MAAM,EAAE,MAAM,CAAC;IACf,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB;AAED,wBAAgB,WAAW,CACzB,KAAK,EAAE,QAAQ,EAAE,EACjB,OAAO,EAAE,kBAAkB,GAC1B,QAAQ,EAAE,CAOZ;AAED,eAAe,SAAS,CAAC"}