@llamaindex/liteparse 1.5.2 → 2.0.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (578) hide show
  1. package/README.md +50 -373
  2. package/dist/cli.d.ts +3 -0
  3. package/dist/cli.d.ts.map +1 -0
  4. package/dist/cli.js +87 -0
  5. package/dist/cli.js.map +1 -0
  6. package/dist/lib.d.ts +58 -0
  7. package/dist/lib.d.ts.map +1 -0
  8. package/dist/lib.js +88 -0
  9. package/dist/lib.js.map +1 -0
  10. package/dist/native.d.ts +54 -0
  11. package/dist/native.d.ts.map +1 -0
  12. package/dist/native.js +70 -0
  13. package/dist/native.js.map +1 -0
  14. package/libpdfium.so +0 -0
  15. package/liteparse.linux-x64-gnu.node +0 -0
  16. package/package.json +36 -50
  17. package/LICENSE +0 -201
  18. package/dist/cli/parse.d.ts +0 -4
  19. package/dist/cli/parse.d.ts.map +0 -1
  20. package/dist/cli/parse.js +0 -450
  21. package/dist/cli/parse.js.map +0 -1
  22. package/dist/package.json +0 -90
  23. package/dist/src/conversion/convertToPdf.d.ts +0 -65
  24. package/dist/src/conversion/convertToPdf.d.ts.map +0 -1
  25. package/dist/src/conversion/convertToPdf.js +0 -405
  26. package/dist/src/conversion/convertToPdf.js.map +0 -1
  27. package/dist/src/conversion/convertToPdf.test.d.ts +0 -2
  28. package/dist/src/conversion/convertToPdf.test.d.ts.map +0 -1
  29. package/dist/src/conversion/convertToPdf.test.js +0 -327
  30. package/dist/src/conversion/convertToPdf.test.js.map +0 -1
  31. package/dist/src/core/config.d.ts +0 -4
  32. package/dist/src/core/config.d.ts.map +0 -1
  33. package/dist/src/core/config.js +0 -26
  34. package/dist/src/core/config.js.map +0 -1
  35. package/dist/src/core/config.test.d.ts +0 -2
  36. package/dist/src/core/config.test.d.ts.map +0 -1
  37. package/dist/src/core/config.test.js +0 -21
  38. package/dist/src/core/config.test.js.map +0 -1
  39. package/dist/src/core/parser.d.ts +0 -92
  40. package/dist/src/core/parser.d.ts.map +0 -1
  41. package/dist/src/core/parser.js +0 -401
  42. package/dist/src/core/parser.js.map +0 -1
  43. package/dist/src/core/parser.test.d.ts +0 -2
  44. package/dist/src/core/parser.test.d.ts.map +0 -1
  45. package/dist/src/core/parser.test.js +0 -541
  46. package/dist/src/core/parser.test.js.map +0 -1
  47. package/dist/src/core/types.d.ts +0 -370
  48. package/dist/src/core/types.d.ts.map +0 -1
  49. package/dist/src/core/types.js +0 -2
  50. package/dist/src/core/types.js.map +0 -1
  51. package/dist/src/engines/ocr/http-simple.d.ts +0 -19
  52. package/dist/src/engines/ocr/http-simple.d.ts.map +0 -1
  53. package/dist/src/engines/ocr/http-simple.js +0 -69
  54. package/dist/src/engines/ocr/http-simple.js.map +0 -1
  55. package/dist/src/engines/ocr/http-simple.test.d.ts +0 -2
  56. package/dist/src/engines/ocr/http-simple.test.d.ts.map +0 -1
  57. package/dist/src/engines/ocr/http-simple.test.js +0 -108
  58. package/dist/src/engines/ocr/http-simple.test.js.map +0 -1
  59. package/dist/src/engines/ocr/interface.d.ts +0 -15
  60. package/dist/src/engines/ocr/interface.d.ts.map +0 -1
  61. package/dist/src/engines/ocr/interface.js +0 -2
  62. package/dist/src/engines/ocr/interface.js.map +0 -1
  63. package/dist/src/engines/ocr/tesseract.d.ts +0 -20
  64. package/dist/src/engines/ocr/tesseract.d.ts.map +0 -1
  65. package/dist/src/engines/ocr/tesseract.js +0 -161
  66. package/dist/src/engines/ocr/tesseract.js.map +0 -1
  67. package/dist/src/engines/ocr/tesseract.test.d.ts +0 -2
  68. package/dist/src/engines/ocr/tesseract.test.d.ts.map +0 -1
  69. package/dist/src/engines/ocr/tesseract.test.js +0 -94
  70. package/dist/src/engines/ocr/tesseract.test.js.map +0 -1
  71. package/dist/src/engines/pdf/interface.d.ts +0 -84
  72. package/dist/src/engines/pdf/interface.d.ts.map +0 -1
  73. package/dist/src/engines/pdf/interface.js +0 -2
  74. package/dist/src/engines/pdf/interface.js.map +0 -1
  75. package/dist/src/engines/pdf/pdfium-renderer.d.ts +0 -31
  76. package/dist/src/engines/pdf/pdfium-renderer.d.ts.map +0 -1
  77. package/dist/src/engines/pdf/pdfium-renderer.js +0 -145
  78. package/dist/src/engines/pdf/pdfium-renderer.js.map +0 -1
  79. package/dist/src/engines/pdf/pdfium-renderer.test.d.ts +0 -2
  80. package/dist/src/engines/pdf/pdfium-renderer.test.d.ts.map +0 -1
  81. package/dist/src/engines/pdf/pdfium-renderer.test.js +0 -109
  82. package/dist/src/engines/pdf/pdfium-renderer.test.js.map +0 -1
  83. package/dist/src/engines/pdf/pdfjs.d.ts +0 -14
  84. package/dist/src/engines/pdf/pdfjs.d.ts.map +0 -1
  85. package/dist/src/engines/pdf/pdfjs.js +0 -799
  86. package/dist/src/engines/pdf/pdfjs.js.map +0 -1
  87. package/dist/src/engines/pdf/pdfjs.test.d.ts +0 -2
  88. package/dist/src/engines/pdf/pdfjs.test.d.ts.map +0 -1
  89. package/dist/src/engines/pdf/pdfjs.test.js +0 -225
  90. package/dist/src/engines/pdf/pdfjs.test.js.map +0 -1
  91. package/dist/src/engines/pdf/pdfjsImporter.d.ts +0 -5
  92. package/dist/src/engines/pdf/pdfjsImporter.d.ts.map +0 -1
  93. package/dist/src/engines/pdf/pdfjsImporter.js +0 -45
  94. package/dist/src/engines/pdf/pdfjsImporter.js.map +0 -1
  95. package/dist/src/index.d.ts +0 -3
  96. package/dist/src/index.d.ts.map +0 -1
  97. package/dist/src/index.js +0 -5
  98. package/dist/src/index.js.map +0 -1
  99. package/dist/src/lib.d.ts +0 -19
  100. package/dist/src/lib.d.ts.map +0 -1
  101. package/dist/src/lib.js +0 -17
  102. package/dist/src/lib.js.map +0 -1
  103. package/dist/src/output/json.d.ts +0 -10
  104. package/dist/src/output/json.d.ts.map +0 -1
  105. package/dist/src/output/json.js +0 -32
  106. package/dist/src/output/json.js.map +0 -1
  107. package/dist/src/output/json.test.d.ts +0 -2
  108. package/dist/src/output/json.test.d.ts.map +0 -1
  109. package/dist/src/output/json.test.js +0 -199
  110. package/dist/src/output/json.test.js.map +0 -1
  111. package/dist/src/output/text.d.ts +0 -10
  112. package/dist/src/output/text.d.ts.map +0 -1
  113. package/dist/src/output/text.js +0 -17
  114. package/dist/src/output/text.js.map +0 -1
  115. package/dist/src/output/text.test.d.ts +0 -2
  116. package/dist/src/output/text.test.d.ts.map +0 -1
  117. package/dist/src/output/text.test.js +0 -65
  118. package/dist/src/output/text.test.js.map +0 -1
  119. package/dist/src/processing/bbox.d.ts +0 -20
  120. package/dist/src/processing/bbox.d.ts.map +0 -1
  121. package/dist/src/processing/bbox.js +0 -258
  122. package/dist/src/processing/bbox.js.map +0 -1
  123. package/dist/src/processing/bbox.test.d.ts +0 -2
  124. package/dist/src/processing/bbox.test.d.ts.map +0 -1
  125. package/dist/src/processing/bbox.test.js +0 -334
  126. package/dist/src/processing/bbox.test.js.map +0 -1
  127. package/dist/src/processing/cleanText.d.ts +0 -6
  128. package/dist/src/processing/cleanText.d.ts.map +0 -1
  129. package/dist/src/processing/cleanText.js +0 -73
  130. package/dist/src/processing/cleanText.js.map +0 -1
  131. package/dist/src/processing/cleanText.test.d.ts +0 -2
  132. package/dist/src/processing/cleanText.test.d.ts.map +0 -1
  133. package/dist/src/processing/cleanText.test.js +0 -46
  134. package/dist/src/processing/cleanText.test.js.map +0 -1
  135. package/dist/src/processing/grid.d.ts +0 -7
  136. package/dist/src/processing/grid.d.ts.map +0 -1
  137. package/dist/src/processing/grid.js +0 -13
  138. package/dist/src/processing/grid.js.map +0 -1
  139. package/dist/src/processing/gridDebugLogger.d.ts +0 -206
  140. package/dist/src/processing/gridDebugLogger.d.ts.map +0 -1
  141. package/dist/src/processing/gridDebugLogger.js +0 -446
  142. package/dist/src/processing/gridDebugLogger.js.map +0 -1
  143. package/dist/src/processing/gridProjection.d.ts +0 -19
  144. package/dist/src/processing/gridProjection.d.ts.map +0 -1
  145. package/dist/src/processing/gridProjection.js +0 -1813
  146. package/dist/src/processing/gridProjection.js.map +0 -1
  147. package/dist/src/processing/gridProjection.test.d.ts +0 -2
  148. package/dist/src/processing/gridProjection.test.d.ts.map +0 -1
  149. package/dist/src/processing/gridProjection.test.js +0 -495
  150. package/dist/src/processing/gridProjection.test.js.map +0 -1
  151. package/dist/src/processing/gridVisualizer.d.ts +0 -14
  152. package/dist/src/processing/gridVisualizer.d.ts.map +0 -1
  153. package/dist/src/processing/gridVisualizer.js +0 -166
  154. package/dist/src/processing/gridVisualizer.js.map +0 -1
  155. package/dist/src/processing/markupUtils.d.ts +0 -7
  156. package/dist/src/processing/markupUtils.d.ts.map +0 -1
  157. package/dist/src/processing/markupUtils.js +0 -25
  158. package/dist/src/processing/markupUtils.js.map +0 -1
  159. package/dist/src/processing/markupUtils.test.d.ts +0 -2
  160. package/dist/src/processing/markupUtils.test.d.ts.map +0 -1
  161. package/dist/src/processing/markupUtils.test.js +0 -26
  162. package/dist/src/processing/markupUtils.test.js.map +0 -1
  163. package/dist/src/processing/ocrUtils.d.ts +0 -24
  164. package/dist/src/processing/ocrUtils.d.ts.map +0 -1
  165. package/dist/src/processing/ocrUtils.js +0 -79
  166. package/dist/src/processing/ocrUtils.js.map +0 -1
  167. package/dist/src/processing/octUtils.test.d.ts +0 -2
  168. package/dist/src/processing/octUtils.test.d.ts.map +0 -1
  169. package/dist/src/processing/octUtils.test.js +0 -72
  170. package/dist/src/processing/octUtils.test.js.map +0 -1
  171. package/dist/src/processing/searchItems.d.ts +0 -26
  172. package/dist/src/processing/searchItems.d.ts.map +0 -1
  173. package/dist/src/processing/searchItems.js +0 -93
  174. package/dist/src/processing/searchItems.js.map +0 -1
  175. package/dist/src/processing/searchItems.test.d.ts +0 -2
  176. package/dist/src/processing/searchItems.test.d.ts.map +0 -1
  177. package/dist/src/processing/searchItems.test.js +0 -84
  178. package/dist/src/processing/searchItems.test.js.map +0 -1
  179. package/dist/src/processing/textUtils.d.ts +0 -20
  180. package/dist/src/processing/textUtils.d.ts.map +0 -1
  181. package/dist/src/processing/textUtils.js +0 -142
  182. package/dist/src/processing/textUtils.js.map +0 -1
  183. package/dist/src/processing/textUtils.test.d.ts +0 -2
  184. package/dist/src/processing/textUtils.test.d.ts.map +0 -1
  185. package/dist/src/processing/textUtils.test.js +0 -45
  186. package/dist/src/processing/textUtils.test.js.map +0 -1
  187. package/dist/src/vendor/pdfjs/LICENSE +0 -177
  188. package/dist/src/vendor/pdfjs/README.md +0 -0
  189. package/dist/src/vendor/pdfjs/cmaps/78-EUC-H.bcmap +0 -0
  190. package/dist/src/vendor/pdfjs/cmaps/78-EUC-V.bcmap +0 -0
  191. package/dist/src/vendor/pdfjs/cmaps/78-H.bcmap +0 -0
  192. package/dist/src/vendor/pdfjs/cmaps/78-RKSJ-H.bcmap +0 -0
  193. package/dist/src/vendor/pdfjs/cmaps/78-RKSJ-V.bcmap +0 -0
  194. package/dist/src/vendor/pdfjs/cmaps/78-V.bcmap +0 -0
  195. package/dist/src/vendor/pdfjs/cmaps/78ms-RKSJ-H.bcmap +0 -0
  196. package/dist/src/vendor/pdfjs/cmaps/78ms-RKSJ-V.bcmap +0 -0
  197. package/dist/src/vendor/pdfjs/cmaps/83pv-RKSJ-H.bcmap +0 -0
  198. package/dist/src/vendor/pdfjs/cmaps/90ms-RKSJ-H.bcmap +0 -0
  199. package/dist/src/vendor/pdfjs/cmaps/90ms-RKSJ-V.bcmap +0 -0
  200. package/dist/src/vendor/pdfjs/cmaps/90msp-RKSJ-H.bcmap +0 -0
  201. package/dist/src/vendor/pdfjs/cmaps/90msp-RKSJ-V.bcmap +0 -0
  202. package/dist/src/vendor/pdfjs/cmaps/90pv-RKSJ-H.bcmap +0 -0
  203. package/dist/src/vendor/pdfjs/cmaps/90pv-RKSJ-V.bcmap +0 -0
  204. package/dist/src/vendor/pdfjs/cmaps/Add-H.bcmap +0 -0
  205. package/dist/src/vendor/pdfjs/cmaps/Add-RKSJ-H.bcmap +0 -0
  206. package/dist/src/vendor/pdfjs/cmaps/Add-RKSJ-V.bcmap +0 -0
  207. package/dist/src/vendor/pdfjs/cmaps/Add-V.bcmap +0 -0
  208. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-0.bcmap +0 -0
  209. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-1.bcmap +0 -0
  210. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-2.bcmap +0 -0
  211. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-3.bcmap +0 -0
  212. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-4.bcmap +0 -0
  213. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-5.bcmap +0 -0
  214. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-6.bcmap +0 -0
  215. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-UCS2.bcmap +0 -0
  216. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-0.bcmap +0 -0
  217. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-1.bcmap +0 -0
  218. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-2.bcmap +0 -0
  219. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-3.bcmap +0 -0
  220. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-4.bcmap +0 -0
  221. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-5.bcmap +0 -0
  222. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-UCS2.bcmap +0 -0
  223. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-0.bcmap +0 -0
  224. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-1.bcmap +0 -0
  225. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-2.bcmap +0 -0
  226. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-3.bcmap +0 -0
  227. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-4.bcmap +0 -0
  228. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-5.bcmap +0 -0
  229. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-6.bcmap +0 -0
  230. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-UCS2.bcmap +0 -0
  231. package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-0.bcmap +0 -0
  232. package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-1.bcmap +0 -0
  233. package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-2.bcmap +0 -0
  234. package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-UCS2.bcmap +0 -0
  235. package/dist/src/vendor/pdfjs/cmaps/B5-H.bcmap +0 -0
  236. package/dist/src/vendor/pdfjs/cmaps/B5-V.bcmap +0 -0
  237. package/dist/src/vendor/pdfjs/cmaps/B5pc-H.bcmap +0 -0
  238. package/dist/src/vendor/pdfjs/cmaps/B5pc-V.bcmap +0 -0
  239. package/dist/src/vendor/pdfjs/cmaps/CNS-EUC-H.bcmap +0 -0
  240. package/dist/src/vendor/pdfjs/cmaps/CNS-EUC-V.bcmap +0 -0
  241. package/dist/src/vendor/pdfjs/cmaps/CNS1-H.bcmap +0 -0
  242. package/dist/src/vendor/pdfjs/cmaps/CNS1-V.bcmap +0 -0
  243. package/dist/src/vendor/pdfjs/cmaps/CNS2-H.bcmap +0 -0
  244. package/dist/src/vendor/pdfjs/cmaps/CNS2-V.bcmap +0 -3
  245. package/dist/src/vendor/pdfjs/cmaps/ETHK-B5-H.bcmap +0 -0
  246. package/dist/src/vendor/pdfjs/cmaps/ETHK-B5-V.bcmap +0 -0
  247. package/dist/src/vendor/pdfjs/cmaps/ETen-B5-H.bcmap +0 -0
  248. package/dist/src/vendor/pdfjs/cmaps/ETen-B5-V.bcmap +0 -0
  249. package/dist/src/vendor/pdfjs/cmaps/ETenms-B5-H.bcmap +0 -3
  250. package/dist/src/vendor/pdfjs/cmaps/ETenms-B5-V.bcmap +0 -0
  251. package/dist/src/vendor/pdfjs/cmaps/EUC-H.bcmap +0 -0
  252. package/dist/src/vendor/pdfjs/cmaps/EUC-V.bcmap +0 -0
  253. package/dist/src/vendor/pdfjs/cmaps/Ext-H.bcmap +0 -0
  254. package/dist/src/vendor/pdfjs/cmaps/Ext-RKSJ-H.bcmap +0 -0
  255. package/dist/src/vendor/pdfjs/cmaps/Ext-RKSJ-V.bcmap +0 -0
  256. package/dist/src/vendor/pdfjs/cmaps/Ext-V.bcmap +0 -0
  257. package/dist/src/vendor/pdfjs/cmaps/GB-EUC-H.bcmap +0 -0
  258. package/dist/src/vendor/pdfjs/cmaps/GB-EUC-V.bcmap +0 -0
  259. package/dist/src/vendor/pdfjs/cmaps/GB-H.bcmap +0 -4
  260. package/dist/src/vendor/pdfjs/cmaps/GB-V.bcmap +0 -0
  261. package/dist/src/vendor/pdfjs/cmaps/GBK-EUC-H.bcmap +0 -0
  262. package/dist/src/vendor/pdfjs/cmaps/GBK-EUC-V.bcmap +0 -0
  263. package/dist/src/vendor/pdfjs/cmaps/GBK2K-H.bcmap +0 -0
  264. package/dist/src/vendor/pdfjs/cmaps/GBK2K-V.bcmap +0 -0
  265. package/dist/src/vendor/pdfjs/cmaps/GBKp-EUC-H.bcmap +0 -0
  266. package/dist/src/vendor/pdfjs/cmaps/GBKp-EUC-V.bcmap +0 -0
  267. package/dist/src/vendor/pdfjs/cmaps/GBT-EUC-H.bcmap +0 -0
  268. package/dist/src/vendor/pdfjs/cmaps/GBT-EUC-V.bcmap +0 -0
  269. package/dist/src/vendor/pdfjs/cmaps/GBT-H.bcmap +0 -0
  270. package/dist/src/vendor/pdfjs/cmaps/GBT-V.bcmap +0 -0
  271. package/dist/src/vendor/pdfjs/cmaps/GBTpc-EUC-H.bcmap +0 -0
  272. package/dist/src/vendor/pdfjs/cmaps/GBTpc-EUC-V.bcmap +0 -0
  273. package/dist/src/vendor/pdfjs/cmaps/GBpc-EUC-H.bcmap +0 -0
  274. package/dist/src/vendor/pdfjs/cmaps/GBpc-EUC-V.bcmap +0 -0
  275. package/dist/src/vendor/pdfjs/cmaps/H.bcmap +0 -0
  276. package/dist/src/vendor/pdfjs/cmaps/HKdla-B5-H.bcmap +0 -0
  277. package/dist/src/vendor/pdfjs/cmaps/HKdla-B5-V.bcmap +0 -0
  278. package/dist/src/vendor/pdfjs/cmaps/HKdlb-B5-H.bcmap +0 -0
  279. package/dist/src/vendor/pdfjs/cmaps/HKdlb-B5-V.bcmap +0 -0
  280. package/dist/src/vendor/pdfjs/cmaps/HKgccs-B5-H.bcmap +0 -0
  281. package/dist/src/vendor/pdfjs/cmaps/HKgccs-B5-V.bcmap +0 -0
  282. package/dist/src/vendor/pdfjs/cmaps/HKm314-B5-H.bcmap +0 -0
  283. package/dist/src/vendor/pdfjs/cmaps/HKm314-B5-V.bcmap +0 -0
  284. package/dist/src/vendor/pdfjs/cmaps/HKm471-B5-H.bcmap +0 -0
  285. package/dist/src/vendor/pdfjs/cmaps/HKm471-B5-V.bcmap +0 -0
  286. package/dist/src/vendor/pdfjs/cmaps/HKscs-B5-H.bcmap +0 -0
  287. package/dist/src/vendor/pdfjs/cmaps/HKscs-B5-V.bcmap +0 -0
  288. package/dist/src/vendor/pdfjs/cmaps/Hankaku.bcmap +0 -0
  289. package/dist/src/vendor/pdfjs/cmaps/Hiragana.bcmap +0 -0
  290. package/dist/src/vendor/pdfjs/cmaps/KSC-EUC-H.bcmap +0 -0
  291. package/dist/src/vendor/pdfjs/cmaps/KSC-EUC-V.bcmap +0 -0
  292. package/dist/src/vendor/pdfjs/cmaps/KSC-H.bcmap +0 -0
  293. package/dist/src/vendor/pdfjs/cmaps/KSC-Johab-H.bcmap +0 -0
  294. package/dist/src/vendor/pdfjs/cmaps/KSC-Johab-V.bcmap +0 -0
  295. package/dist/src/vendor/pdfjs/cmaps/KSC-V.bcmap +0 -0
  296. package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-H.bcmap +0 -0
  297. package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-H.bcmap +0 -0
  298. package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-V.bcmap +0 -0
  299. package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-V.bcmap +0 -0
  300. package/dist/src/vendor/pdfjs/cmaps/KSCpc-EUC-H.bcmap +0 -0
  301. package/dist/src/vendor/pdfjs/cmaps/KSCpc-EUC-V.bcmap +0 -0
  302. package/dist/src/vendor/pdfjs/cmaps/Katakana.bcmap +0 -0
  303. package/dist/src/vendor/pdfjs/cmaps/LICENSE +0 -36
  304. package/dist/src/vendor/pdfjs/cmaps/NWP-H.bcmap +0 -0
  305. package/dist/src/vendor/pdfjs/cmaps/NWP-V.bcmap +0 -0
  306. package/dist/src/vendor/pdfjs/cmaps/RKSJ-H.bcmap +0 -0
  307. package/dist/src/vendor/pdfjs/cmaps/RKSJ-V.bcmap +0 -0
  308. package/dist/src/vendor/pdfjs/cmaps/Roman.bcmap +0 -0
  309. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UCS2-H.bcmap +0 -0
  310. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UCS2-V.bcmap +0 -0
  311. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF16-H.bcmap +0 -0
  312. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF16-V.bcmap +0 -0
  313. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF32-H.bcmap +0 -0
  314. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF32-V.bcmap +0 -0
  315. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF8-H.bcmap +0 -0
  316. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF8-V.bcmap +0 -0
  317. package/dist/src/vendor/pdfjs/cmaps/UniGB-UCS2-H.bcmap +0 -0
  318. package/dist/src/vendor/pdfjs/cmaps/UniGB-UCS2-V.bcmap +0 -0
  319. package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF16-H.bcmap +0 -0
  320. package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF16-V.bcmap +0 -0
  321. package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF32-H.bcmap +0 -0
  322. package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF32-V.bcmap +0 -0
  323. package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF8-H.bcmap +0 -0
  324. package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF8-V.bcmap +0 -0
  325. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-H.bcmap +0 -0
  326. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-H.bcmap +0 -0
  327. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-V.bcmap +0 -0
  328. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-V.bcmap +0 -0
  329. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF16-H.bcmap +0 -0
  330. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF16-V.bcmap +0 -0
  331. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF32-H.bcmap +0 -0
  332. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF32-V.bcmap +0 -0
  333. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF8-H.bcmap +0 -0
  334. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF8-V.bcmap +0 -0
  335. package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-H.bcmap +0 -0
  336. package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-V.bcmap +0 -0
  337. package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-H.bcmap +0 -0
  338. package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-V.bcmap +0 -0
  339. package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-H.bcmap +0 -0
  340. package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-V.bcmap +0 -0
  341. package/dist/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-HW-V.bcmap +0 -0
  342. package/dist/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-V.bcmap +0 -0
  343. package/dist/src/vendor/pdfjs/cmaps/UniJISPro-UTF8-V.bcmap +0 -0
  344. package/dist/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-H.bcmap +0 -0
  345. package/dist/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-V.bcmap +0 -0
  346. package/dist/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-H.bcmap +0 -0
  347. package/dist/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-V.bcmap +0 -0
  348. package/dist/src/vendor/pdfjs/cmaps/UniKS-UCS2-H.bcmap +0 -0
  349. package/dist/src/vendor/pdfjs/cmaps/UniKS-UCS2-V.bcmap +0 -0
  350. package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF16-H.bcmap +0 -0
  351. package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF16-V.bcmap +0 -0
  352. package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF32-H.bcmap +0 -0
  353. package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF32-V.bcmap +0 -0
  354. package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF8-H.bcmap +0 -0
  355. package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF8-V.bcmap +0 -0
  356. package/dist/src/vendor/pdfjs/cmaps/V.bcmap +0 -0
  357. package/dist/src/vendor/pdfjs/cmaps/WP-Symbol.bcmap +0 -0
  358. package/dist/src/vendor/pdfjs/jbig2.wasm +0 -0
  359. package/dist/src/vendor/pdfjs/openjpeg.wasm +0 -0
  360. package/dist/src/vendor/pdfjs/pdf.mjs +0 -33603
  361. package/dist/src/vendor/pdfjs/pdf.mjs.map +0 -1
  362. package/dist/src/vendor/pdfjs/pdf.sandbox.mjs +0 -4936
  363. package/dist/src/vendor/pdfjs/pdf.sandbox.mjs.map +0 -1
  364. package/dist/src/vendor/pdfjs/pdf.worker.mjs +0 -70100
  365. package/dist/src/vendor/pdfjs/pdf.worker.mjs.map +0 -1
  366. package/dist/src/vendor/pdfjs/qcms_bg.wasm +0 -0
  367. package/dist/src/vendor/pdfjs/standard_fonts/FoxitDingbats.pfb +0 -0
  368. package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixed.pfb +0 -0
  369. package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixedBold.pfb +0 -0
  370. package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixedBoldItalic.pfb +0 -0
  371. package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixedItalic.pfb +0 -0
  372. package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerif.pfb +0 -0
  373. package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerifBold.pfb +0 -0
  374. package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerifBoldItalic.pfb +0 -0
  375. package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerifItalic.pfb +0 -0
  376. package/dist/src/vendor/pdfjs/standard_fonts/FoxitSymbol.pfb +0 -0
  377. package/dist/src/vendor/pdfjs/standard_fonts/LICENSE_FOXIT +0 -27
  378. package/dist/src/vendor/pdfjs/standard_fonts/LICENSE_LIBERATION +0 -102
  379. package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-Bold.ttf +0 -0
  380. package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-BoldItalic.ttf +0 -0
  381. package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-Italic.ttf +0 -0
  382. package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-Regular.ttf +0 -0
  383. package/src/vendor/pdfjs/LICENSE +0 -177
  384. package/src/vendor/pdfjs/README.md +0 -0
  385. package/src/vendor/pdfjs/cmaps/78-EUC-H.bcmap +0 -0
  386. package/src/vendor/pdfjs/cmaps/78-EUC-V.bcmap +0 -0
  387. package/src/vendor/pdfjs/cmaps/78-H.bcmap +0 -0
  388. package/src/vendor/pdfjs/cmaps/78-RKSJ-H.bcmap +0 -0
  389. package/src/vendor/pdfjs/cmaps/78-RKSJ-V.bcmap +0 -0
  390. package/src/vendor/pdfjs/cmaps/78-V.bcmap +0 -0
  391. package/src/vendor/pdfjs/cmaps/78ms-RKSJ-H.bcmap +0 -0
  392. package/src/vendor/pdfjs/cmaps/78ms-RKSJ-V.bcmap +0 -0
  393. package/src/vendor/pdfjs/cmaps/83pv-RKSJ-H.bcmap +0 -0
  394. package/src/vendor/pdfjs/cmaps/90ms-RKSJ-H.bcmap +0 -0
  395. package/src/vendor/pdfjs/cmaps/90ms-RKSJ-V.bcmap +0 -0
  396. package/src/vendor/pdfjs/cmaps/90msp-RKSJ-H.bcmap +0 -0
  397. package/src/vendor/pdfjs/cmaps/90msp-RKSJ-V.bcmap +0 -0
  398. package/src/vendor/pdfjs/cmaps/90pv-RKSJ-H.bcmap +0 -0
  399. package/src/vendor/pdfjs/cmaps/90pv-RKSJ-V.bcmap +0 -0
  400. package/src/vendor/pdfjs/cmaps/Add-H.bcmap +0 -0
  401. package/src/vendor/pdfjs/cmaps/Add-RKSJ-H.bcmap +0 -0
  402. package/src/vendor/pdfjs/cmaps/Add-RKSJ-V.bcmap +0 -0
  403. package/src/vendor/pdfjs/cmaps/Add-V.bcmap +0 -0
  404. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-0.bcmap +0 -0
  405. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-1.bcmap +0 -0
  406. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-2.bcmap +0 -0
  407. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-3.bcmap +0 -0
  408. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-4.bcmap +0 -0
  409. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-5.bcmap +0 -0
  410. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-6.bcmap +0 -0
  411. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-UCS2.bcmap +0 -0
  412. package/src/vendor/pdfjs/cmaps/Adobe-GB1-0.bcmap +0 -0
  413. package/src/vendor/pdfjs/cmaps/Adobe-GB1-1.bcmap +0 -0
  414. package/src/vendor/pdfjs/cmaps/Adobe-GB1-2.bcmap +0 -0
  415. package/src/vendor/pdfjs/cmaps/Adobe-GB1-3.bcmap +0 -0
  416. package/src/vendor/pdfjs/cmaps/Adobe-GB1-4.bcmap +0 -0
  417. package/src/vendor/pdfjs/cmaps/Adobe-GB1-5.bcmap +0 -0
  418. package/src/vendor/pdfjs/cmaps/Adobe-GB1-UCS2.bcmap +0 -0
  419. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-0.bcmap +0 -0
  420. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-1.bcmap +0 -0
  421. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-2.bcmap +0 -0
  422. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-3.bcmap +0 -0
  423. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-4.bcmap +0 -0
  424. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-5.bcmap +0 -0
  425. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-6.bcmap +0 -0
  426. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-UCS2.bcmap +0 -0
  427. package/src/vendor/pdfjs/cmaps/Adobe-Korea1-0.bcmap +0 -0
  428. package/src/vendor/pdfjs/cmaps/Adobe-Korea1-1.bcmap +0 -0
  429. package/src/vendor/pdfjs/cmaps/Adobe-Korea1-2.bcmap +0 -0
  430. package/src/vendor/pdfjs/cmaps/Adobe-Korea1-UCS2.bcmap +0 -0
  431. package/src/vendor/pdfjs/cmaps/B5-H.bcmap +0 -0
  432. package/src/vendor/pdfjs/cmaps/B5-V.bcmap +0 -0
  433. package/src/vendor/pdfjs/cmaps/B5pc-H.bcmap +0 -0
  434. package/src/vendor/pdfjs/cmaps/B5pc-V.bcmap +0 -0
  435. package/src/vendor/pdfjs/cmaps/CNS-EUC-H.bcmap +0 -0
  436. package/src/vendor/pdfjs/cmaps/CNS-EUC-V.bcmap +0 -0
  437. package/src/vendor/pdfjs/cmaps/CNS1-H.bcmap +0 -0
  438. package/src/vendor/pdfjs/cmaps/CNS1-V.bcmap +0 -0
  439. package/src/vendor/pdfjs/cmaps/CNS2-H.bcmap +0 -0
  440. package/src/vendor/pdfjs/cmaps/CNS2-V.bcmap +0 -3
  441. package/src/vendor/pdfjs/cmaps/ETHK-B5-H.bcmap +0 -0
  442. package/src/vendor/pdfjs/cmaps/ETHK-B5-V.bcmap +0 -0
  443. package/src/vendor/pdfjs/cmaps/ETen-B5-H.bcmap +0 -0
  444. package/src/vendor/pdfjs/cmaps/ETen-B5-V.bcmap +0 -0
  445. package/src/vendor/pdfjs/cmaps/ETenms-B5-H.bcmap +0 -3
  446. package/src/vendor/pdfjs/cmaps/ETenms-B5-V.bcmap +0 -0
  447. package/src/vendor/pdfjs/cmaps/EUC-H.bcmap +0 -0
  448. package/src/vendor/pdfjs/cmaps/EUC-V.bcmap +0 -0
  449. package/src/vendor/pdfjs/cmaps/Ext-H.bcmap +0 -0
  450. package/src/vendor/pdfjs/cmaps/Ext-RKSJ-H.bcmap +0 -0
  451. package/src/vendor/pdfjs/cmaps/Ext-RKSJ-V.bcmap +0 -0
  452. package/src/vendor/pdfjs/cmaps/Ext-V.bcmap +0 -0
  453. package/src/vendor/pdfjs/cmaps/GB-EUC-H.bcmap +0 -0
  454. package/src/vendor/pdfjs/cmaps/GB-EUC-V.bcmap +0 -0
  455. package/src/vendor/pdfjs/cmaps/GB-H.bcmap +0 -4
  456. package/src/vendor/pdfjs/cmaps/GB-V.bcmap +0 -0
  457. package/src/vendor/pdfjs/cmaps/GBK-EUC-H.bcmap +0 -0
  458. package/src/vendor/pdfjs/cmaps/GBK-EUC-V.bcmap +0 -0
  459. package/src/vendor/pdfjs/cmaps/GBK2K-H.bcmap +0 -0
  460. package/src/vendor/pdfjs/cmaps/GBK2K-V.bcmap +0 -0
  461. package/src/vendor/pdfjs/cmaps/GBKp-EUC-H.bcmap +0 -0
  462. package/src/vendor/pdfjs/cmaps/GBKp-EUC-V.bcmap +0 -0
  463. package/src/vendor/pdfjs/cmaps/GBT-EUC-H.bcmap +0 -0
  464. package/src/vendor/pdfjs/cmaps/GBT-EUC-V.bcmap +0 -0
  465. package/src/vendor/pdfjs/cmaps/GBT-H.bcmap +0 -0
  466. package/src/vendor/pdfjs/cmaps/GBT-V.bcmap +0 -0
  467. package/src/vendor/pdfjs/cmaps/GBTpc-EUC-H.bcmap +0 -0
  468. package/src/vendor/pdfjs/cmaps/GBTpc-EUC-V.bcmap +0 -0
  469. package/src/vendor/pdfjs/cmaps/GBpc-EUC-H.bcmap +0 -0
  470. package/src/vendor/pdfjs/cmaps/GBpc-EUC-V.bcmap +0 -0
  471. package/src/vendor/pdfjs/cmaps/H.bcmap +0 -0
  472. package/src/vendor/pdfjs/cmaps/HKdla-B5-H.bcmap +0 -0
  473. package/src/vendor/pdfjs/cmaps/HKdla-B5-V.bcmap +0 -0
  474. package/src/vendor/pdfjs/cmaps/HKdlb-B5-H.bcmap +0 -0
  475. package/src/vendor/pdfjs/cmaps/HKdlb-B5-V.bcmap +0 -0
  476. package/src/vendor/pdfjs/cmaps/HKgccs-B5-H.bcmap +0 -0
  477. package/src/vendor/pdfjs/cmaps/HKgccs-B5-V.bcmap +0 -0
  478. package/src/vendor/pdfjs/cmaps/HKm314-B5-H.bcmap +0 -0
  479. package/src/vendor/pdfjs/cmaps/HKm314-B5-V.bcmap +0 -0
  480. package/src/vendor/pdfjs/cmaps/HKm471-B5-H.bcmap +0 -0
  481. package/src/vendor/pdfjs/cmaps/HKm471-B5-V.bcmap +0 -0
  482. package/src/vendor/pdfjs/cmaps/HKscs-B5-H.bcmap +0 -0
  483. package/src/vendor/pdfjs/cmaps/HKscs-B5-V.bcmap +0 -0
  484. package/src/vendor/pdfjs/cmaps/Hankaku.bcmap +0 -0
  485. package/src/vendor/pdfjs/cmaps/Hiragana.bcmap +0 -0
  486. package/src/vendor/pdfjs/cmaps/KSC-EUC-H.bcmap +0 -0
  487. package/src/vendor/pdfjs/cmaps/KSC-EUC-V.bcmap +0 -0
  488. package/src/vendor/pdfjs/cmaps/KSC-H.bcmap +0 -0
  489. package/src/vendor/pdfjs/cmaps/KSC-Johab-H.bcmap +0 -0
  490. package/src/vendor/pdfjs/cmaps/KSC-Johab-V.bcmap +0 -0
  491. package/src/vendor/pdfjs/cmaps/KSC-V.bcmap +0 -0
  492. package/src/vendor/pdfjs/cmaps/KSCms-UHC-H.bcmap +0 -0
  493. package/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-H.bcmap +0 -0
  494. package/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-V.bcmap +0 -0
  495. package/src/vendor/pdfjs/cmaps/KSCms-UHC-V.bcmap +0 -0
  496. package/src/vendor/pdfjs/cmaps/KSCpc-EUC-H.bcmap +0 -0
  497. package/src/vendor/pdfjs/cmaps/KSCpc-EUC-V.bcmap +0 -0
  498. package/src/vendor/pdfjs/cmaps/Katakana.bcmap +0 -0
  499. package/src/vendor/pdfjs/cmaps/LICENSE +0 -36
  500. package/src/vendor/pdfjs/cmaps/NWP-H.bcmap +0 -0
  501. package/src/vendor/pdfjs/cmaps/NWP-V.bcmap +0 -0
  502. package/src/vendor/pdfjs/cmaps/RKSJ-H.bcmap +0 -0
  503. package/src/vendor/pdfjs/cmaps/RKSJ-V.bcmap +0 -0
  504. package/src/vendor/pdfjs/cmaps/Roman.bcmap +0 -0
  505. package/src/vendor/pdfjs/cmaps/UniCNS-UCS2-H.bcmap +0 -0
  506. package/src/vendor/pdfjs/cmaps/UniCNS-UCS2-V.bcmap +0 -0
  507. package/src/vendor/pdfjs/cmaps/UniCNS-UTF16-H.bcmap +0 -0
  508. package/src/vendor/pdfjs/cmaps/UniCNS-UTF16-V.bcmap +0 -0
  509. package/src/vendor/pdfjs/cmaps/UniCNS-UTF32-H.bcmap +0 -0
  510. package/src/vendor/pdfjs/cmaps/UniCNS-UTF32-V.bcmap +0 -0
  511. package/src/vendor/pdfjs/cmaps/UniCNS-UTF8-H.bcmap +0 -0
  512. package/src/vendor/pdfjs/cmaps/UniCNS-UTF8-V.bcmap +0 -0
  513. package/src/vendor/pdfjs/cmaps/UniGB-UCS2-H.bcmap +0 -0
  514. package/src/vendor/pdfjs/cmaps/UniGB-UCS2-V.bcmap +0 -0
  515. package/src/vendor/pdfjs/cmaps/UniGB-UTF16-H.bcmap +0 -0
  516. package/src/vendor/pdfjs/cmaps/UniGB-UTF16-V.bcmap +0 -0
  517. package/src/vendor/pdfjs/cmaps/UniGB-UTF32-H.bcmap +0 -0
  518. package/src/vendor/pdfjs/cmaps/UniGB-UTF32-V.bcmap +0 -0
  519. package/src/vendor/pdfjs/cmaps/UniGB-UTF8-H.bcmap +0 -0
  520. package/src/vendor/pdfjs/cmaps/UniGB-UTF8-V.bcmap +0 -0
  521. package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-H.bcmap +0 -0
  522. package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-H.bcmap +0 -0
  523. package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-V.bcmap +0 -0
  524. package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-V.bcmap +0 -0
  525. package/src/vendor/pdfjs/cmaps/UniJIS-UTF16-H.bcmap +0 -0
  526. package/src/vendor/pdfjs/cmaps/UniJIS-UTF16-V.bcmap +0 -0
  527. package/src/vendor/pdfjs/cmaps/UniJIS-UTF32-H.bcmap +0 -0
  528. package/src/vendor/pdfjs/cmaps/UniJIS-UTF32-V.bcmap +0 -0
  529. package/src/vendor/pdfjs/cmaps/UniJIS-UTF8-H.bcmap +0 -0
  530. package/src/vendor/pdfjs/cmaps/UniJIS-UTF8-V.bcmap +0 -0
  531. package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-H.bcmap +0 -0
  532. package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-V.bcmap +0 -0
  533. package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-H.bcmap +0 -0
  534. package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-V.bcmap +0 -0
  535. package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-H.bcmap +0 -0
  536. package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-V.bcmap +0 -0
  537. package/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-HW-V.bcmap +0 -0
  538. package/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-V.bcmap +0 -0
  539. package/src/vendor/pdfjs/cmaps/UniJISPro-UTF8-V.bcmap +0 -0
  540. package/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-H.bcmap +0 -0
  541. package/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-V.bcmap +0 -0
  542. package/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-H.bcmap +0 -0
  543. package/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-V.bcmap +0 -0
  544. package/src/vendor/pdfjs/cmaps/UniKS-UCS2-H.bcmap +0 -0
  545. package/src/vendor/pdfjs/cmaps/UniKS-UCS2-V.bcmap +0 -0
  546. package/src/vendor/pdfjs/cmaps/UniKS-UTF16-H.bcmap +0 -0
  547. package/src/vendor/pdfjs/cmaps/UniKS-UTF16-V.bcmap +0 -0
  548. package/src/vendor/pdfjs/cmaps/UniKS-UTF32-H.bcmap +0 -0
  549. package/src/vendor/pdfjs/cmaps/UniKS-UTF32-V.bcmap +0 -0
  550. package/src/vendor/pdfjs/cmaps/UniKS-UTF8-H.bcmap +0 -0
  551. package/src/vendor/pdfjs/cmaps/UniKS-UTF8-V.bcmap +0 -0
  552. package/src/vendor/pdfjs/cmaps/V.bcmap +0 -0
  553. package/src/vendor/pdfjs/cmaps/WP-Symbol.bcmap +0 -0
  554. package/src/vendor/pdfjs/jbig2.wasm +0 -0
  555. package/src/vendor/pdfjs/openjpeg.wasm +0 -0
  556. package/src/vendor/pdfjs/pdf.mjs +0 -33603
  557. package/src/vendor/pdfjs/pdf.mjs.map +0 -1
  558. package/src/vendor/pdfjs/pdf.sandbox.mjs +0 -4936
  559. package/src/vendor/pdfjs/pdf.sandbox.mjs.map +0 -1
  560. package/src/vendor/pdfjs/pdf.worker.mjs +0 -70100
  561. package/src/vendor/pdfjs/pdf.worker.mjs.map +0 -1
  562. package/src/vendor/pdfjs/qcms_bg.wasm +0 -0
  563. package/src/vendor/pdfjs/standard_fonts/FoxitDingbats.pfb +0 -0
  564. package/src/vendor/pdfjs/standard_fonts/FoxitFixed.pfb +0 -0
  565. package/src/vendor/pdfjs/standard_fonts/FoxitFixedBold.pfb +0 -0
  566. package/src/vendor/pdfjs/standard_fonts/FoxitFixedBoldItalic.pfb +0 -0
  567. package/src/vendor/pdfjs/standard_fonts/FoxitFixedItalic.pfb +0 -0
  568. package/src/vendor/pdfjs/standard_fonts/FoxitSerif.pfb +0 -0
  569. package/src/vendor/pdfjs/standard_fonts/FoxitSerifBold.pfb +0 -0
  570. package/src/vendor/pdfjs/standard_fonts/FoxitSerifBoldItalic.pfb +0 -0
  571. package/src/vendor/pdfjs/standard_fonts/FoxitSerifItalic.pfb +0 -0
  572. package/src/vendor/pdfjs/standard_fonts/FoxitSymbol.pfb +0 -0
  573. package/src/vendor/pdfjs/standard_fonts/LICENSE_FOXIT +0 -27
  574. package/src/vendor/pdfjs/standard_fonts/LICENSE_LIBERATION +0 -102
  575. package/src/vendor/pdfjs/standard_fonts/LiberationSans-Bold.ttf +0 -0
  576. package/src/vendor/pdfjs/standard_fonts/LiberationSans-BoldItalic.ttf +0 -0
  577. package/src/vendor/pdfjs/standard_fonts/LiberationSans-Italic.ttf +0 -0
  578. package/src/vendor/pdfjs/standard_fonts/LiberationSans-Regular.ttf +0 -0
@@ -1,161 +0,0 @@
1
- import { createWorker, createScheduler } from "tesseract.js";
2
- export class TesseractEngine {
3
- name = "tesseract";
4
- scheduler;
5
- workers = [];
6
- currentLanguage;
7
- concurrency;
8
- tessdataPath;
9
- constructor(concurrency = 4, tessdataPath) {
10
- this.concurrency = concurrency;
11
- // Use explicit path, then TESSDATA_PREFIX env var, then let tesseract.js default (CDN)
12
- this.tessdataPath = tessdataPath || process.env.TESSDATA_PREFIX || undefined;
13
- }
14
- async initialize(language = "eng") {
15
- if (this.scheduler && this.currentLanguage === language) {
16
- return; // Already initialized for this language
17
- }
18
- // Clean up existing scheduler and workers if language changed
19
- await this.terminate();
20
- // Create scheduler
21
- this.scheduler = createScheduler();
22
- // Build worker options for local tessdata support
23
- const workerOptions = {};
24
- if (this.tessdataPath) {
25
- workerOptions.langPath = this.tessdataPath;
26
- workerOptions.cachePath = this.tessdataPath;
27
- workerOptions.gzip = false; // Pre-cached files are not gzipped
28
- }
29
- workerOptions.errorHandler = () => {
30
- // Let createWorker reject so LiteParse can convert the failure into
31
- // an actionable initialization error instead of crashing the process.
32
- };
33
- // Create worker pool
34
- for (let i = 0; i < this.concurrency; i++) {
35
- let worker;
36
- try {
37
- worker = await createWorker(language, 1, Object.keys(workerOptions).length > 0 ? workerOptions : undefined);
38
- }
39
- catch (error) {
40
- // Clean up any workers already created
41
- await this.terminate();
42
- const message = error instanceof Error ? error.message : String(error);
43
- // Provide actionable guidance for common failures
44
- if (message.includes("fetch") ||
45
- message.includes("network") ||
46
- message.includes("ENOTFOUND") ||
47
- message.includes("ERR_INVALID_URL")) {
48
- throw new Error(`Tesseract failed to download language data for "${language}". ` +
49
- `This usually means the machine has no internet access. ` +
50
- `To fix this, either:\n` +
51
- ` 1. Set the TESSDATA_PREFIX env var to a directory containing ${language}.traineddata\n` +
52
- ` 2. Use --ocr-server-url to use an external OCR server instead\n` +
53
- ` 3. Use --no-ocr to disable OCR entirely`, {
54
- cause: error,
55
- });
56
- }
57
- if (message.includes("traineddata") ||
58
- message.includes("TESSDATA") ||
59
- message.includes("loading language")) {
60
- throw new Error(`Tesseract failed to load language data for "${language}": ${message}\n` +
61
- `Ensure ${language}.traineddata exists in your tessdata directory and set ` +
62
- `the TESSDATA_PREFIX env var accordingly.`, {
63
- cause: error,
64
- });
65
- }
66
- throw new Error(`Tesseract OCR initialization failed: ${message}`, { cause: error });
67
- }
68
- if (!worker) {
69
- await this.terminate();
70
- throw new Error("Tesseract worker not initialized");
71
- }
72
- this.workers.push(worker);
73
- this.scheduler.addWorker(worker);
74
- }
75
- this.currentLanguage = language;
76
- }
77
- async recognize(image, options) {
78
- // Handle language - tesseract.js uses language codes like 'eng', 'fra', 'deu'
79
- const language = this.normalizeLanguage(Array.isArray(options.language) ? options.language[0] : options.language);
80
- // Initialize scheduler if needed
81
- await this.initialize(language);
82
- if (!this.scheduler) {
83
- throw new Error("Tesseract scheduler not initialized");
84
- }
85
- try {
86
- // Recognize text from image using scheduler
87
- // tesseract.js accepts string (path/URL) or Buffer/Uint8Array
88
- // In tesseract.js v6+, we need to enable blocks output to get word-level data
89
- const { data: { blocks }, } = await this.scheduler.addJob("recognize", image, options.correctRotation ? { rotateAuto: true } : {}, { blocks: true });
90
- // Extract words from hierarchical blocks structure: blocks → paragraphs → lines → words
91
- const results = [];
92
- for (const block of blocks || []) {
93
- for (const paragraph of block.paragraphs || []) {
94
- for (const line of paragraph.lines || []) {
95
- for (const word of line.words || []) {
96
- results.push({
97
- text: word.text,
98
- bbox: [word.bbox.x0, word.bbox.y0, word.bbox.x1, word.bbox.y1],
99
- confidence: word.confidence / 100, // Tesseract returns 0-100, we want 0-1
100
- });
101
- }
102
- }
103
- }
104
- }
105
- // Filter out low confidence results (below 30%)
106
- return results.filter((r) => r.confidence > 0.3);
107
- }
108
- catch (error) {
109
- const label = typeof image === "string" ? image : "<buffer>";
110
- console.error(`\nTesseract OCR error for ${label}:`, error);
111
- return [];
112
- }
113
- }
114
- async recognizeBatch(images, options) {
115
- // Handle language
116
- const language = this.normalizeLanguage(Array.isArray(options.language) ? options.language[0] : options.language);
117
- // Initialize scheduler if needed
118
- await this.initialize(language);
119
- if (!this.scheduler) {
120
- throw new Error("Tesseract scheduler not initialized");
121
- }
122
- // Process all images in parallel - scheduler handles distribution
123
- const jobs = images.map((image) => this.recognize(image, options));
124
- return Promise.all(jobs);
125
- }
126
- async terminate() {
127
- if (this.scheduler) {
128
- await this.scheduler.terminate();
129
- this.scheduler = undefined;
130
- }
131
- this.workers = [];
132
- this.currentLanguage = undefined;
133
- }
134
- /**
135
- * Normalize language codes to Tesseract format
136
- * Common mappings: en->eng, fr->fra, de->deu, es->spa, zh->chi_sim, ja->jpn
137
- */
138
- normalizeLanguage(lang) {
139
- const languageMap = {
140
- en: "eng",
141
- fr: "fra",
142
- de: "deu",
143
- es: "spa",
144
- it: "ita",
145
- pt: "por",
146
- ru: "rus",
147
- zh: "chi_sim",
148
- "zh-cn": "chi_sim",
149
- "zh-tw": "chi_tra",
150
- ja: "jpn",
151
- ko: "kor",
152
- ar: "ara",
153
- hi: "hin",
154
- th: "tha",
155
- vi: "vie",
156
- };
157
- const normalized = lang.toLowerCase().trim();
158
- return languageMap[normalized] || normalized;
159
- }
160
- }
161
- //# sourceMappingURL=tesseract.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"tesseract.js","sourceRoot":"","sources":["../../../../src/engines/ocr/tesseract.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,eAAe,EAAqB,MAAM,cAAc,CAAC;AAGhF,MAAM,OAAO,eAAe;IAC1B,IAAI,GAAG,WAAW,CAAC;IACX,SAAS,CAAa;IACtB,OAAO,GAAa,EAAE,CAAC;IACvB,eAAe,CAAU;IACzB,WAAW,CAAS;IACpB,YAAY,CAAU;IAE9B,YAAY,cAAsB,CAAC,EAAE,YAAqB;QACxD,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC;QAC/B,uFAAuF;QACvF,IAAI,CAAC,YAAY,GAAG,YAAY,IAAI,OAAO,CAAC,GAAG,CAAC,eAAe,IAAI,SAAS,CAAC;IAC/E,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,WAAmB,KAAK;QACvC,IAAI,IAAI,CAAC,SAAS,IAAI,IAAI,CAAC,eAAe,KAAK,QAAQ,EAAE,CAAC;YACxD,OAAO,CAAC,wCAAwC;QAClD,CAAC;QAED,8DAA8D;QAC9D,MAAM,IAAI,CAAC,SAAS,EAAE,CAAC;QAEvB,mBAAmB;QACnB,IAAI,CAAC,SAAS,GAAG,eAAe,EAAE,CAAC;QAEnC,kDAAkD;QAClD,MAAM,aAAa,GAA4B,EAAE,CAAC;QAClD,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;YACtB,aAAa,CAAC,QAAQ,GAAG,IAAI,CAAC,YAAY,CAAC;YAC3C,aAAa,CAAC,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC;YAC5C,aAAa,CAAC,IAAI,GAAG,KAAK,CAAC,CAAC,mCAAmC;QACjE,CAAC;QACD,aAAa,CAAC,YAAY,GAAG,GAAG,EAAE;YAChC,oEAAoE;YACpE,sEAAsE;QACxE,CAAC,CAAC;QAEF,qBAAqB;QACrB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC,EAAE,EAAE,CAAC;YAC1C,IAAI,MAAc,CAAC;YACnB,IAAI,CAAC;gBACH,MAAM,GAAG,MAAM,YAAY,CACzB,QAAQ,EACR,CAAC,EACD,MAAM,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,SAAS,CAClE,CAAC;YACJ,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,uCAAuC;gBACvC,MAAM,IAAI,CAAC,SAAS,EAAE,CAAC;gBACvB,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;gBAEvE,kDAAkD;gBAClD,IACE,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAC;oBACzB,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC;oBAC3B,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAC;oBAC7B,OAAO,CAAC,QAAQ,CAAC,iBAAiB,CAAC,EACnC,CAAC;oBACD,MAAM,IAAI,KAAK,CACb,mDAAmD,QAAQ,KAAK;wBAC9D,yDAAyD;wBACzD,wBAAwB;wBACxB,kEAAkE,QAAQ,gBAAgB;wBAC1F,mEAAmE;wBACnE,2CAA2C,EAC7C;wBACE,KAAK,EAAE,KAAK;qBACb,CACF,CAAC;gBACJ,CAAC;gBACD,IACE,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAC;oBAC/B,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC;oBAC5B,OAAO,CAAC,QAAQ,CAAC,kBAAkB,CAAC,EACpC,CAAC;oBACD,MAAM,IAAI,KAAK,CACb,+CAA+C,QAAQ,MAAM,OAAO,IAAI;wBACtE,UAAU,QAAQ,yDAAyD;wBAC3E,0CAA0C,EAC5C;wBACE,KAAK,EAAE,KAAK;qBACb,CACF,CAAC;gBACJ,CAAC;gBACD,MAAM,IAAI,KAAK,CAAC,wCAAwC,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,CAAC;YACvF,CAAC;YACD,IAAI,CAAC,MAAM,EAAE,CAAC;gBACZ,MAAM,IAAI,CAAC,SAAS,EAAE,CAAC;gBACvB,MAAM,IAAI,KAAK,CAAC,kCAAkC,CAAC,CAAC;YACtD,CAAC;YACD,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAC1B,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;QACnC,CAAC;QAED,IAAI,CAAC,eAAe,GAAG,QAAQ,CAAC;IAClC,CAAC;IAED,KAAK,CAAC,SAAS,CAAC,KAAsB,EAAE,OAAmB;QACzD,8EAA8E;QAC9E,MAAM,QAAQ,GAAG,IAAI,CAAC,iBAAiB,CACrC,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,QAAQ,CACzE,CAAC;QAEF,iCAAiC;QACjC,MAAM,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC;QAEhC,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YACpB,MAAM,IAAI,KAAK,CAAC,qCAAqC,CAAC,CAAC;QACzD,CAAC;QAED,IAAI,CAAC;YACH,4CAA4C;YAC5C,8DAA8D;YAC9D,8EAA8E;YAC9E,MAAM,EACJ,IAAI,EAAE,EAAE,MAAM,EAAE,GACjB,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,MAAM,CAC7B,WAAW,EACX,KAAK,EACL,OAAO,CAAC,eAAe,CAAC,CAAC,CAAC,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,EACnD,EAAE,MAAM,EAAE,IAAI,EAAE,CACjB,CAAC;YAEF,wFAAwF;YACxF,MAAM,OAAO,GAAgB,EAAE,CAAC;YAChC,KAAK,MAAM,KAAK,IAAI,MAAM,IAAI,EAAE,EAAE,CAAC;gBACjC,KAAK,MAAM,SAAS,IAAI,KAAK,CAAC,UAAU,IAAI,EAAE,EAAE,CAAC;oBAC/C,KAAK,MAAM,IAAI,IAAI,SAAS,CAAC,KAAK,IAAI,EAAE,EAAE,CAAC;wBACzC,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,KAAK,IAAI,EAAE,EAAE,CAAC;4BACpC,OAAO,CAAC,IAAI,CAAC;gCACX,IAAI,EAAE,IAAI,CAAC,IAAI;gCACf,IAAI,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,IAAI,CAAC,IAAI,CAAC,EAAE,CAK5D;gCACD,UAAU,EAAE,IAAI,CAAC,UAAU,GAAG,GAAG,EAAE,uCAAuC;6BAC3E,CAAC,CAAC;wBACL,CAAC;oBACH,CAAC;gBACH,CAAC;YACH,CAAC;YAED,gDAAgD;YAChD,OAAO,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,GAAG,GAAG,CAAC,CAAC;QACnD,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,KAAK,GAAG,OAAO,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,UAAU,CAAC;YAC7D,OAAO,CAAC,KAAK,CAAC,6BAA6B,KAAK,GAAG,EAAE,KAAK,CAAC,CAAC;YAC5D,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC;IAED,KAAK,CAAC,cAAc,CAAC,MAA2B,EAAE,OAAmB;QACnE,kBAAkB;QAClB,MAAM,QAAQ,GAAG,IAAI,CAAC,iBAAiB,CACrC,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,QAAQ,CACzE,CAAC;QAEF,iCAAiC;QACjC,MAAM,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC;QAEhC,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YACpB,MAAM,IAAI,KAAK,CAAC,qCAAqC,CAAC,CAAC;QACzD,CAAC;QAED,kEAAkE;QAClE,MAAM,IAAI,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC,CAAC;QAEnE,OAAO,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAC3B,CAAC;IAED,KAAK,CAAC,SAAS;QACb,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;YACnB,MAAM,IAAI,CAAC,SAAS,CAAC,SAAS,EAAE,CAAC;YACjC,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC7B,CAAC;QACD,IAAI,CAAC,OAAO,GAAG,EAAE,CAAC;QAClB,IAAI,CAAC,eAAe,GAAG,SAAS,CAAC;IACnC,CAAC;IAED;;;OAGG;IACK,iBAAiB,CAAC,IAAY;QACpC,MAAM,WAAW,GAA2B;YAC1C,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,SAAS;YACb,OAAO,EAAE,SAAS;YAClB,OAAO,EAAE,SAAS;YAClB,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;SACV,CAAC;QAEF,MAAM,UAAU,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC;QAC7C,OAAO,WAAW,CAAC,UAAU,CAAC,IAAI,UAAU,CAAC;IAC/C,CAAC;CACF"}
@@ -1,2 +0,0 @@
1
- export {};
2
- //# sourceMappingURL=tesseract.test.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"tesseract.test.d.ts","sourceRoot":"","sources":["../../../../src/engines/ocr/tesseract.test.ts"],"names":[],"mappings":""}
@@ -1,94 +0,0 @@
1
- import { vi, describe, it, expect } from "vitest";
2
- // In tesseract.js v6+, words are nested in blocks → paragraphs → lines → words
3
- const mockWords = [
4
- {
5
- text: "Hello",
6
- confidence: 95,
7
- bbox: { x0: 0, y0: 0, x1: 50, y1: 20 },
8
- },
9
- {
10
- text: "World",
11
- confidence: 92,
12
- bbox: { x0: 60, y0: 0, x1: 120, y1: 20 },
13
- },
14
- ];
15
- const mockTesseractResult = {
16
- data: {
17
- text: "Hello World",
18
- blocks: [
19
- {
20
- paragraphs: [
21
- {
22
- lines: [
23
- {
24
- words: mockWords,
25
- },
26
- ],
27
- },
28
- ],
29
- },
30
- ],
31
- confidence: 93,
32
- },
33
- };
34
- const mockResults = mockWords.map((word) => ({
35
- text: word.text,
36
- bbox: [word.bbox.x0, word.bbox.y0, word.bbox.x1, word.bbox.y1],
37
- confidence: word.confidence / 100, // Tesseract returns 0-100, we want 0-1
38
- }));
39
- const mockTesseractWorker = {
40
- terminate: vi.fn(async () => { }),
41
- recognize: vi.fn(async () => {
42
- return mockTesseractResult;
43
- }),
44
- };
45
- vi.mock("tesseract.js", async () => {
46
- const actual = await vi.importActual("tesseract.js");
47
- return {
48
- ...actual,
49
- createWorker: vi.fn(
50
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
51
- async (language, _num, options) => {
52
- if (language == "it" || language == "ita") {
53
- return;
54
- }
55
- if (language == "offline" || language == "fetchfail") {
56
- options?.errorHandler?.("TypeError: fetch failed");
57
- throw new Error("TypeError: fetch failed");
58
- }
59
- return mockTesseractWorker;
60
- }),
61
- };
62
- });
63
- import { TesseractEngine } from "./tesseract";
64
- describe("test Tesseract OCR (single image)", () => {
65
- it("test engine success", async () => {
66
- const engine = new TesseractEngine();
67
- expect(engine.name).toBe("tesseract");
68
- const result = await engine.recognize("cat.png", { language: "en" });
69
- expect(result).toStrictEqual(mockResults);
70
- });
71
- it("test engine failure (failed to initialize)", async () => {
72
- const engine = new TesseractEngine();
73
- expect(engine.name).toBe("tesseract");
74
- await expect(engine.recognize("cat.png", { language: "it" })).rejects.toThrow("Tesseract worker not initialized");
75
- });
76
- it("test engine failure (fetch failed) returns actionable guidance", async () => {
77
- const engine = new TesseractEngine();
78
- await expect(engine.recognize("cat.png", { language: "offline" })).rejects.toThrow('Tesseract failed to download language data for "offline"');
79
- });
80
- });
81
- describe("test OCR simple HTTP server (batch)", () => {
82
- it("test engine success", async () => {
83
- const engine = new TesseractEngine();
84
- expect(engine.name).toBe("tesseract");
85
- const result = await engine.recognizeBatch(["cat.png", "dog.png"], { language: "en" });
86
- expect(result).toStrictEqual([mockResults, mockResults]);
87
- });
88
- it("test engine failure (failed to initialize)", async () => {
89
- const engine = new TesseractEngine();
90
- expect(engine.name).toBe("tesseract");
91
- await expect(engine.recognizeBatch(["cat.png", "dog.png"], { language: "it" })).rejects.toThrow("Tesseract worker not initialized");
92
- });
93
- });
94
- //# sourceMappingURL=tesseract.test.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"tesseract.test.js","sourceRoot":"","sources":["../../../../src/engines/ocr/tesseract.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,EAAE,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAElD,+EAA+E;AAC/E,MAAM,SAAS,GAAG;IAChB;QACE,IAAI,EAAE,OAAO;QACb,UAAU,EAAE,EAAE;QACd,IAAI,EAAE,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE;KACvC;IACD;QACE,IAAI,EAAE,OAAO;QACb,UAAU,EAAE,EAAE;QACd,IAAI,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,EAAE,EAAE;KACzC;CACF,CAAC;AAEF,MAAM,mBAAmB,GAAG;IAC1B,IAAI,EAAE;QACJ,IAAI,EAAE,aAAa;QACnB,MAAM,EAAE;YACN;gBACE,UAAU,EAAE;oBACV;wBACE,KAAK,EAAE;4BACL;gCACE,KAAK,EAAE,SAAS;6BACjB;yBACF;qBACF;iBACF;aACF;SACF;QACD,UAAU,EAAE,EAAE;KACf;CACF,CAAC;AAEF,MAAM,WAAW,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;IAC3C,IAAI,EAAE,IAAI,CAAC,IAAI;IACf,IAAI,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,IAAI,CAAC,IAAI,CAAC,EAAE,CAK5D;IACD,UAAU,EAAE,IAAI,CAAC,UAAU,GAAG,GAAG,EAAE,uCAAuC;CAC3E,CAAC,CAAC,CAAC;AAEJ,MAAM,mBAAmB,GAAG;IAC1B,SAAS,EAAE,EAAE,CAAC,EAAE,CAAC,KAAK,IAAI,EAAE,GAAE,CAAC,CAAC;IAChC,SAAS,EAAE,EAAE,CAAC,EAAE,CAAC,KAAK,IAAI,EAAE;QAC1B,OAAO,mBAAmB,CAAC;IAC7B,CAAC,CAAC;CACH,CAAC;AAEF,EAAE,CAAC,IAAI,CAAC,cAAc,EAAE,KAAK,IAAI,EAAE;IACjC,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC,YAAY,CAAgC,cAAc,CAAC,CAAC;IACpF,OAAO;QACL,GAAG,MAAM;QACT,YAAY,EAAE,EAAE,CAAC,EAAE;QACjB,8DAA8D;QAC9D,KAAK,EAAE,QAAgB,EAAE,IAAY,EAAE,OAA+C,EAAE,EAAE;YACxF,IAAI,QAAQ,IAAI,IAAI,IAAI,QAAQ,IAAI,KAAK,EAAE,CAAC;gBAC1C,OAAO;YACT,CAAC;YACD,IAAI,QAAQ,IAAI,SAAS,IAAI,QAAQ,IAAI,WAAW,EAAE,CAAC;gBACrD,OAAO,EAAE,YAAY,EAAE,CAAC,yBAAyB,CAAC,CAAC;gBACnD,MAAM,IAAI,KAAK,CAAC,yBAAyB,CAAC,CAAC;YAC7C,CAAC;YACD,OAAO,mBAAmB,CAAC;QAC7B,CAAC,CACF;KACF,CAAC;AACJ,CAAC,CAAC,CAAC;AAEH,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,QAAQ,CAAC,mCAAmC,EAAE,GAAG,EAAE;IACjD,EAAE,CAAC,qBAAqB,EAAE,KAAK,IAAI,EAAE;QACnC,MAAM,MAAM,GAAG,IAAI,eAAe,EAAE,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACtC,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,SAAS,CAAC,SAAS,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC;QACrE,MAAM,CAAC,MAAM,CAAC,CAAC,aAAa,CAAC,WAAW,CAAC,CAAC;IAC5C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4CAA4C,EAAE,KAAK,IAAI,EAAE;QAC1D,MAAM,MAAM,GAAG,IAAI,eAAe,EAAE,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACtC,MAAM,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,SAAS,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAC3E,kCAAkC,CACnC,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gEAAgE,EAAE,KAAK,IAAI,EAAE;QAC9E,MAAM,MAAM,GAAG,IAAI,eAAe,EAAE,CAAC;QACrC,MAAM,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,SAAS,EAAE,EAAE,QAAQ,EAAE,SAAS,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAChF,0DAA0D,CAC3D,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,qCAAqC,EAAE,GAAG,EAAE;IACnD,EAAE,CAAC,qBAAqB,EAAE,KAAK,IAAI,EAAE;QACnC,MAAM,MAAM,GAAG,IAAI,eAAe,EAAE,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACtC,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,cAAc,CAAC,CAAC,SAAS,EAAE,SAAS,CAAC,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC;QACvF,MAAM,CAAC,MAAM,CAAC,CAAC,aAAa,CAAC,CAAC,WAAW,EAAE,WAAW,CAAC,CAAC,CAAC;IAC3D,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4CAA4C,EAAE,KAAK,IAAI,EAAE;QAC1D,MAAM,MAAM,GAAG,IAAI,eAAe,EAAE,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACtC,MAAM,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,SAAS,EAAE,SAAS,CAAC,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAC7F,kCAAkC,CACnC,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -1,84 +0,0 @@
1
- import { TextItem } from "../../core/types.js";
2
- /** Options for page extraction */
3
- export interface ExtractOptions {
4
- /** Whether to extract embedded image bounds (needed for OCR). Default: true */
5
- extractImages?: boolean;
6
- }
7
- export interface PdfEngine {
8
- name: string;
9
- loadDocument(input: string | Uint8Array, password?: string): Promise<PdfDocument>;
10
- extractPage(doc: PdfDocument, pageNum: number, options?: ExtractOptions): Promise<PageData>;
11
- extractAllPages(doc: PdfDocument, maxPages?: number, targetPages?: string, options?: ExtractOptions): Promise<PageData[]>;
12
- renderPageImage(doc: PdfDocument, pageNum: number, dpi: number, password?: string): Promise<Buffer>;
13
- close(doc: PdfDocument): Promise<void>;
14
- }
15
- export interface PdfDocument {
16
- numPages: number;
17
- data: Uint8Array;
18
- metadata?: unknown;
19
- }
20
- /** Bounding box region */
21
- export interface BoundingBox {
22
- x: number;
23
- y: number;
24
- width: number;
25
- height: number;
26
- }
27
- export interface PageData {
28
- pageNum: number;
29
- width: number;
30
- height: number;
31
- textItems: TextItem[];
32
- images: Image[];
33
- annotations?: Annotation[];
34
- /** Bounding boxes of garbled text that was filtered out (for targeted OCR) */
35
- garbledTextRegions?: BoundingBox[];
36
- }
37
- export interface Path {
38
- type: "rectangle" | "line" | "curve";
39
- points: number[][];
40
- color?: string;
41
- width?: number;
42
- }
43
- export interface Image {
44
- x: number;
45
- y: number;
46
- width: number;
47
- height: number;
48
- data?: Buffer;
49
- coords?: {
50
- x: number;
51
- y: number;
52
- w: number;
53
- h: number;
54
- };
55
- scaleFactor?: number;
56
- originalOrientationAngle?: number;
57
- type?: string;
58
- ocrRaw?: EasyOcrResultLine[];
59
- ocrParsed?: Array<{
60
- x: number;
61
- y: number;
62
- w: number;
63
- h: number;
64
- confidence: number;
65
- text: string;
66
- }>;
67
- }
68
- export type EasyOcrResultLine = [
69
- [
70
- [number, number],
71
- [number, number],
72
- [number, number],
73
- [number, number]
74
- ],
75
- string,
76
- string | number
77
- ];
78
- export interface Annotation {
79
- type: string;
80
- subtype?: string;
81
- url?: string;
82
- rect: number[];
83
- }
84
- //# sourceMappingURL=interface.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"interface.d.ts","sourceRoot":"","sources":["../../../../src/engines/pdf/interface.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,qBAAqB,CAAC;AAE/C,kCAAkC;AAClC,MAAM,WAAW,cAAc;IAC7B,+EAA+E;IAC/E,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB;AAED,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,MAAM,CAAC;IACb,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,UAAU,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC,CAAC;IAClF,WAAW,CAAC,GAAG,EAAE,WAAW,EAAE,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,cAAc,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;IAC5F,eAAe,CACb,GAAG,EAAE,WAAW,EAChB,QAAQ,CAAC,EAAE,MAAM,EACjB,WAAW,CAAC,EAAE,MAAM,EACpB,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,QAAQ,EAAE,CAAC,CAAC;IACvB,eAAe,CACb,GAAG,EAAE,WAAW,EAChB,OAAO,EAAE,MAAM,EACf,GAAG,EAAE,MAAM,EACX,QAAQ,CAAC,EAAE,MAAM,GAChB,OAAO,CAAC,MAAM,CAAC,CAAC;IACnB,KAAK,CAAC,GAAG,EAAE,WAAW,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;CACxC;AAED,MAAM,WAAW,WAAW;IAC1B,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,UAAU,CAAC;IACjB,QAAQ,CAAC,EAAE,OAAO,CAAC;CACpB;AAED,0BAA0B;AAC1B,MAAM,WAAW,WAAW;IAC1B,CAAC,EAAE,MAAM,CAAC;IACV,CAAC,EAAE,MAAM,CAAC;IACV,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,QAAQ;IACvB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,QAAQ,EAAE,CAAC;IACtB,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,WAAW,CAAC,EAAE,UAAU,EAAE,CAAC;IAC3B,8EAA8E;IAC9E,kBAAkB,CAAC,EAAE,WAAW,EAAE,CAAC;CACpC;AAED,MAAM,WAAW,IAAI;IACnB,IAAI,EAAE,WAAW,GAAG,MAAM,GAAG,OAAO,CAAC;IACrC,MAAM,EAAE,MAAM,EAAE,EAAE,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,KAAK;IACpB,CAAC,EAAE,MAAM,CAAC;IACV,CAAC,EAAE,MAAM,CAAC;IACV,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE;QAAE,CAAC,EAAE,MAAM,CAAC;QAAC,CAAC,EAAE,MAAM,CAAC;QAAC,CAAC,EAAE,MAAM,CAAC;QAAC,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;IACxD,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,wBAAwB,CAAC,EAAE,MAAM,CAAC;IAClC,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,iBAAiB,EAAE,CAAC;IAC7B,SAAS,CAAC,EAAE,KAAK,CAAC;QAChB,CAAC,EAAE,MAAM,CAAC;QACV,CAAC,EAAE,MAAM,CAAC;QACV,CAAC,EAAE,MAAM,CAAC;QACV,CAAC,EAAE,MAAM,CAAC;QACV,UAAU,EAAE,MAAM,CAAC;QACnB,IAAI,EAAE,MAAM,CAAC;KACd,CAAC,CAAC;CACJ;AAGD,MAAM,MAAM,iBAAiB,GAAG;IAC9B;QAAC,CAAC,MAAM,EAAE,MAAM,CAAC;QAAE,CAAC,MAAM,EAAE,MAAM,CAAC;QAAE,CAAC,MAAM,EAAE,MAAM,CAAC;QAAE,CAAC,MAAM,EAAE,MAAM,CAAC;KAAC;IACxE,MAAM;IACN,MAAM,GAAG,MAAM;CAChB,CAAC;AAEF,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,EAAE,CAAC;CAChB"}
@@ -1,2 +0,0 @@
1
- export {};
2
- //# sourceMappingURL=interface.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"interface.js","sourceRoot":"","sources":["../../../../src/engines/pdf/interface.ts"],"names":[],"mappings":""}
@@ -1,31 +0,0 @@
1
- /**
2
- * PDFium-based PDF screenshot renderer
3
- * Uses native PDFium library for high-quality, fast screenshots
4
- */
5
- export declare class PdfiumRenderer {
6
- private pdfium;
7
- private cachedDocument;
8
- init(): Promise<void>;
9
- /**
10
- * Pre-load a PDF document so that subsequent per-page calls
11
- * (`renderPageToBuffer`, `extractImageBounds`) reuse it instead
12
- * of re-parsing the file on every invocation.
13
- */
14
- loadDocument(pdfInput: string | Buffer | Uint8Array, password?: string): Promise<void>;
15
- closeDocument(): void;
16
- private getOrLoadDocument;
17
- renderPageToBuffer(pdfInput: string | Buffer | Uint8Array, pageNumber: number, dpi?: number, password?: string): Promise<Buffer>;
18
- /**
19
- * Extract bounding boxes of all embedded images on a page.
20
- * Uses PDFium's low-level WASM API to iterate page objects and read image bounds.
21
- * Returns coordinates in viewport space (Y-down, origin top-left) in PDF points.
22
- */
23
- extractImageBounds(pdfInput: string | Buffer | Uint8Array, pageNumber: number, password?: string): Promise<Array<{
24
- x: number;
25
- y: number;
26
- width: number;
27
- height: number;
28
- }>>;
29
- close(): Promise<void>;
30
- }
31
- //# sourceMappingURL=pdfium-renderer.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"pdfium-renderer.d.ts","sourceRoot":"","sources":["../../../../src/engines/pdf/pdfium-renderer.ts"],"names":[],"mappings":"AAmCA;;;GAGG;AACH,qBAAa,cAAc;IACzB,OAAO,CAAC,MAAM,CAA8B;IAC5C,OAAO,CAAC,cAAc,CAA+B;IAE/C,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAM3B;;;;OAIG;IACG,YAAY,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,GAAG,UAAU,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAQ5F,aAAa,IAAI,IAAI;YAOP,iBAAiB;IAczB,kBAAkB,CACtB,QAAQ,EAAE,MAAM,GAAG,MAAM,GAAG,UAAU,EACtC,UAAU,EAAE,MAAM,EAClB,GAAG,GAAE,MAAY,EACjB,QAAQ,CAAC,EAAE,MAAM,GAChB,OAAO,CAAC,MAAM,CAAC;IAmClB;;;;OAIG;IACG,kBAAkB,CACtB,QAAQ,EAAE,MAAM,GAAG,MAAM,GAAG,UAAU,EACtC,UAAU,EAAE,MAAM,EAClB,QAAQ,CAAC,EAAE,MAAM,GAChB,OAAO,CAAC,KAAK,CAAC;QAAE,CAAC,EAAE,MAAM,CAAC;QAAC,CAAC,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IA6DpE,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;CAO7B"}
@@ -1,145 +0,0 @@
1
- import { PDFiumLibrary } from "@hyzyla/pdfium";
2
- import sharp from "sharp";
3
- import { promises as fs } from "fs";
4
- /** Minimum image dimension in PDF points to be considered for OCR */
5
- const MIN_IMAGE_SIZE_PT = 25;
6
- /** Images covering more than this fraction of the page are treated as backgrounds */
7
- const MAX_IMAGE_PAGE_COVERAGE = 0.9;
8
- /**
9
- * PDFium-based PDF screenshot renderer
10
- * Uses native PDFium library for high-quality, fast screenshots
11
- */
12
- export class PdfiumRenderer {
13
- pdfium = null;
14
- cachedDocument = null;
15
- async init() {
16
- if (!this.pdfium) {
17
- this.pdfium = await PDFiumLibrary.init();
18
- }
19
- }
20
- /**
21
- * Pre-load a PDF document so that subsequent per-page calls
22
- * (`renderPageToBuffer`, `extractImageBounds`) reuse it instead
23
- * of re-parsing the file on every invocation.
24
- */
25
- async loadDocument(pdfInput, password) {
26
- await this.init();
27
- this.closeDocument();
28
- const pdfBuffer = typeof pdfInput === "string" ? await fs.readFile(pdfInput) : Buffer.from(pdfInput);
29
- this.cachedDocument = await this.pdfium.loadDocument(pdfBuffer, password);
30
- }
31
- closeDocument() {
32
- if (this.cachedDocument) {
33
- this.cachedDocument.destroy();
34
- this.cachedDocument = null;
35
- }
36
- }
37
- async getOrLoadDocument(pdfInput, password) {
38
- if (this.cachedDocument) {
39
- return { document: this.cachedDocument, isTemporary: false };
40
- }
41
- await this.init();
42
- const pdfBuffer = typeof pdfInput === "string" ? await fs.readFile(pdfInput) : Buffer.from(pdfInput);
43
- const document = await this.pdfium.loadDocument(pdfBuffer, password);
44
- return { document, isTemporary: true };
45
- }
46
- async renderPageToBuffer(pdfInput, pageNumber, dpi = 150, password) {
47
- const { document, isTemporary } = await this.getOrLoadDocument(pdfInput, password);
48
- try {
49
- const page = document.getPage(pageNumber - 1);
50
- const scale = dpi / 72;
51
- const image = await page.render({
52
- scale,
53
- render: async (options) => {
54
- return await sharp(options.data, {
55
- raw: {
56
- width: options.width,
57
- height: options.height,
58
- channels: 4, // RGBA
59
- },
60
- })
61
- .png({
62
- compressionLevel: 6,
63
- })
64
- .withMetadata({
65
- density: dpi,
66
- })
67
- .toBuffer();
68
- },
69
- });
70
- return Buffer.from(image.data);
71
- }
72
- finally {
73
- if (isTemporary) {
74
- document.destroy();
75
- }
76
- }
77
- }
78
- /**
79
- * Extract bounding boxes of all embedded images on a page.
80
- * Uses PDFium's low-level WASM API to iterate page objects and read image bounds.
81
- * Returns coordinates in viewport space (Y-down, origin top-left) in PDF points.
82
- */
83
- async extractImageBounds(pdfInput, pageNumber, password) {
84
- const { document, isTemporary } = await this.getOrLoadDocument(pdfInput, password);
85
- try {
86
- const page = document.getPage(pageNumber - 1);
87
- const results = [];
88
- const mod = page.module;
89
- const pagePtr = page.pageIdx;
90
- if (!mod || !mod._FPDFPageObj_GetBounds) {
91
- return results;
92
- }
93
- const pageWidth = mod._FPDF_GetPageWidthF(pagePtr);
94
- const pageHeight = mod._FPDF_GetPageHeightF(pagePtr);
95
- for (const obj of page.objects()) {
96
- if (obj.type !== "image")
97
- continue;
98
- const objHandle = obj.objectIdx;
99
- if (!objHandle)
100
- continue;
101
- const ptr = mod._malloc(16);
102
- try {
103
- const ok = mod._FPDFPageObj_GetBounds(objHandle, ptr, ptr + 4, ptr + 8, ptr + 12);
104
- if (!ok)
105
- continue;
106
- const buf = mod.HEAPU8.buffer;
107
- const view = new DataView(buf);
108
- const left = view.getFloat32(ptr, true);
109
- const bottom = view.getFloat32(ptr + 4, true);
110
- const right = view.getFloat32(ptr + 8, true);
111
- const top = view.getFloat32(ptr + 12, true);
112
- const w = right - left;
113
- const h = top - bottom;
114
- if (w < MIN_IMAGE_SIZE_PT || h < MIN_IMAGE_SIZE_PT)
115
- continue;
116
- if (w > pageWidth * MAX_IMAGE_PAGE_COVERAGE && h > pageHeight * MAX_IMAGE_PAGE_COVERAGE)
117
- continue;
118
- results.push({
119
- x: left,
120
- y: pageHeight - top,
121
- width: w,
122
- height: h,
123
- });
124
- }
125
- finally {
126
- mod._free(ptr);
127
- }
128
- }
129
- return results;
130
- }
131
- finally {
132
- if (isTemporary) {
133
- document.destroy();
134
- }
135
- }
136
- }
137
- async close() {
138
- this.closeDocument();
139
- if (this.pdfium) {
140
- this.pdfium.destroy();
141
- this.pdfium = null;
142
- }
143
- }
144
- }
145
- //# sourceMappingURL=pdfium-renderer.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"pdfium-renderer.js","sourceRoot":"","sources":["../../../../src/engines/pdf/pdfium-renderer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAgD,MAAM,gBAAgB,CAAC;AAC7F,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,QAAQ,IAAI,EAAE,EAAE,MAAM,IAAI,CAAC;AAsBpC,qEAAqE;AACrE,MAAM,iBAAiB,GAAG,EAAE,CAAC;AAC7B,qFAAqF;AACrF,MAAM,uBAAuB,GAAG,GAAG,CAAC;AAQpC;;;GAGG;AACH,MAAM,OAAO,cAAc;IACjB,MAAM,GAAyB,IAAI,CAAC;IACpC,cAAc,GAA0B,IAAI,CAAC;IAErD,KAAK,CAAC,IAAI;QACR,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;YACjB,IAAI,CAAC,MAAM,GAAG,MAAM,aAAa,CAAC,IAAI,EAAE,CAAC;QAC3C,CAAC;IACH,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,YAAY,CAAC,QAAsC,EAAE,QAAiB;QAC1E,MAAM,IAAI,CAAC,IAAI,EAAE,CAAC;QAClB,IAAI,CAAC,aAAa,EAAE,CAAC;QACrB,MAAM,SAAS,GACb,OAAO,QAAQ,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACrF,IAAI,CAAC,cAAc,GAAG,MAAM,IAAI,CAAC,MAAO,CAAC,YAAY,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;IAC7E,CAAC;IAED,aAAa;QACX,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;YACxB,IAAI,CAAC,cAAc,CAAC,OAAO,EAAE,CAAC;YAC9B,IAAI,CAAC,cAAc,GAAG,IAAI,CAAC;QAC7B,CAAC;IACH,CAAC;IAEO,KAAK,CAAC,iBAAiB,CAC7B,QAAsC,EACtC,QAAiB;QAEjB,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;YACxB,OAAO,EAAE,QAAQ,EAAE,IAAI,CAAC,cAAc,EAAE,WAAW,EAAE,KAAK,EAAE,CAAC;QAC/D,CAAC;QACD,MAAM,IAAI,CAAC,IAAI,EAAE,CAAC;QAClB,MAAM,SAAS,GACb,OAAO,QAAQ,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACrF,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,MAAO,CAAC,YAAY,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QACtE,OAAO,EAAE,QAAQ,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;IACzC,CAAC;IAED,KAAK,CAAC,kBAAkB,CACtB,QAAsC,EACtC,UAAkB,EAClB,MAAc,GAAG,EACjB,QAAiB;QAEjB,MAAM,EAAE,QAAQ,EAAE,WAAW,EAAE,GAAG,MAAM,IAAI,CAAC,iBAAiB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;QAEnF,IAAI,CAAC;YACH,MAAM,IAAI,GAAG,QAAQ,CAAC,OAAO,CAAC,UAAU,GAAG,CAAC,CAAC,CAAC;YAC9C,MAAM,KAAK,GAAG,GAAG,GAAG,EAAE,CAAC;YAEvB,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC;gBAC9B,KAAK;gBACL,MAAM,EAAE,KAAK,EAAE,OAAgC,EAAE,EAAE;oBACjD,OAAO,MAAM,KAAK,CAAC,OAAO,CAAC,IAAI,EAAE;wBAC/B,GAAG,EAAE;4BACH,KAAK,EAAE,OAAO,CAAC,KAAK;4BACpB,MAAM,EAAE,OAAO,CAAC,MAAM;4BACtB,QAAQ,EAAE,CAAC,EAAE,OAAO;yBACrB;qBACF,CAAC;yBACC,GAAG,CAAC;wBACH,gBAAgB,EAAE,CAAC;qBACpB,CAAC;yBACD,YAAY,CAAC;wBACZ,OAAO,EAAE,GAAG;qBACb,CAAC;yBACD,QAAQ,EAAE,CAAC;gBAChB,CAAC;aACF,CAAC,CAAC;YAEH,OAAO,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QACjC,CAAC;gBAAS,CAAC;YACT,IAAI,WAAW,EAAE,CAAC;gBAChB,QAAQ,CAAC,OAAO,EAAE,CAAC;YACrB,CAAC;QACH,CAAC;IACH,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,kBAAkB,CACtB,QAAsC,EACtC,UAAkB,EAClB,QAAiB;QAEjB,MAAM,EAAE,QAAQ,EAAE,WAAW,EAAE,GAAG,MAAM,IAAI,CAAC,iBAAiB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;QAEnF,IAAI,CAAC;YACH,MAAM,IAAI,GAAG,QAAQ,CAAC,OAAO,CAAC,UAAU,GAAG,CAAC,CAAkC,CAAC;YAC/E,MAAM,OAAO,GAAmE,EAAE,CAAC;YAEnF,MAAM,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC;YACxB,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC;YAE7B,IAAI,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,sBAAsB,EAAE,CAAC;gBACxC,OAAO,OAAO,CAAC;YACjB,CAAC;YAED,MAAM,SAAS,GAAG,GAAG,CAAC,mBAAmB,CAAC,OAAO,CAAC,CAAC;YACnD,MAAM,UAAU,GAAG,GAAG,CAAC,oBAAoB,CAAC,OAAO,CAAC,CAAC;YAErD,KAAK,MAAM,GAAG,IAAI,IAAI,CAAC,OAAO,EAAE,EAAE,CAAC;gBACjC,IAAI,GAAG,CAAC,IAAI,KAAK,OAAO;oBAAE,SAAS;gBAEnC,MAAM,SAAS,GAAG,GAAG,CAAC,SAAS,CAAC;gBAChC,IAAI,CAAC,SAAS;oBAAE,SAAS;gBAEzB,MAAM,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;gBAC5B,IAAI,CAAC;oBACH,MAAM,EAAE,GAAG,GAAG,CAAC,sBAAsB,CAAC,SAAS,EAAE,GAAG,EAAE,GAAG,GAAG,CAAC,EAAE,GAAG,GAAG,CAAC,EAAE,GAAG,GAAG,EAAE,CAAC,CAAC;oBAClF,IAAI,CAAC,EAAE;wBAAE,SAAS;oBAElB,MAAM,GAAG,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC;oBAC9B,MAAM,IAAI,GAAG,IAAI,QAAQ,CAAC,GAAG,CAAC,CAAC;oBAC/B,MAAM,IAAI,GAAG,IAAI,CAAC,UAAU,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC;oBACxC,MAAM,MAAM,GAAG,IAAI,CAAC,UAAU,CAAC,GAAG,GAAG,CAAC,EAAE,IAAI,CAAC,CAAC;oBAC9C,MAAM,KAAK,GAAG,IAAI,CAAC,UAAU,CAAC,GAAG,GAAG,CAAC,EAAE,IAAI,CAAC,CAAC;oBAC7C,MAAM,GAAG,GAAG,IAAI,CAAC,UAAU,CAAC,GAAG,GAAG,EAAE,EAAE,IAAI,CAAC,CAAC;oBAE5C,MAAM,CAAC,GAAG,KAAK,GAAG,IAAI,CAAC;oBACvB,MAAM,CAAC,GAAG,GAAG,GAAG,MAAM,CAAC;oBAEvB,IAAI,CAAC,GAAG,iBAAiB,IAAI,CAAC,GAAG,iBAAiB;wBAAE,SAAS;oBAC7D,IAAI,CAAC,GAAG,SAAS,GAAG,uBAAuB,IAAI,CAAC,GAAG,UAAU,GAAG,uBAAuB;wBACrF,SAAS;oBAEX,OAAO,CAAC,IAAI,CAAC;wBACX,CAAC,EAAE,IAAI;wBACP,CAAC,EAAE,UAAU,GAAG,GAAG;wBACnB,KAAK,EAAE,CAAC;wBACR,MAAM,EAAE,CAAC;qBACV,CAAC,CAAC;gBACL,CAAC;wBAAS,CAAC;oBACT,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;gBACjB,CAAC;YACH,CAAC;YAED,OAAO,OAAO,CAAC;QACjB,CAAC;gBAAS,CAAC;YACT,IAAI,WAAW,EAAE,CAAC;gBAChB,QAAQ,CAAC,OAAO,EAAE,CAAC;YACrB,CAAC;QACH,CAAC;IACH,CAAC;IAED,KAAK,CAAC,KAAK;QACT,IAAI,CAAC,aAAa,EAAE,CAAC;QACrB,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChB,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;YACtB,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC;QACrB,CAAC;IACH,CAAC;CACF"}
@@ -1,2 +0,0 @@
1
- export {};
2
- //# sourceMappingURL=pdfium-renderer.test.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"pdfium-renderer.test.d.ts","sourceRoot":"","sources":["../../../../src/engines/pdf/pdfium-renderer.test.ts"],"names":[],"mappings":""}