@llamaindex/liteparse 1.5.3 → 2.0.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (578) hide show
  1. package/README.md +49 -448
  2. package/dist/cli.d.ts +3 -0
  3. package/dist/cli.d.ts.map +1 -0
  4. package/dist/cli.js +87 -0
  5. package/dist/cli.js.map +1 -0
  6. package/dist/lib.d.ts +58 -0
  7. package/dist/lib.d.ts.map +1 -0
  8. package/dist/lib.js +88 -0
  9. package/dist/lib.js.map +1 -0
  10. package/dist/native.d.ts +54 -0
  11. package/dist/native.d.ts.map +1 -0
  12. package/dist/native.js +70 -0
  13. package/dist/native.js.map +1 -0
  14. package/libpdfium.so +0 -0
  15. package/liteparse.linux-x64-gnu.node +0 -0
  16. package/package.json +36 -50
  17. package/LICENSE +0 -201
  18. package/dist/cli/parse.d.ts +0 -4
  19. package/dist/cli/parse.d.ts.map +0 -1
  20. package/dist/cli/parse.js +0 -450
  21. package/dist/cli/parse.js.map +0 -1
  22. package/dist/package.json +0 -90
  23. package/dist/src/conversion/convertToPdf.d.ts +0 -65
  24. package/dist/src/conversion/convertToPdf.d.ts.map +0 -1
  25. package/dist/src/conversion/convertToPdf.js +0 -405
  26. package/dist/src/conversion/convertToPdf.js.map +0 -1
  27. package/dist/src/conversion/convertToPdf.test.d.ts +0 -2
  28. package/dist/src/conversion/convertToPdf.test.d.ts.map +0 -1
  29. package/dist/src/conversion/convertToPdf.test.js +0 -327
  30. package/dist/src/conversion/convertToPdf.test.js.map +0 -1
  31. package/dist/src/core/config.d.ts +0 -4
  32. package/dist/src/core/config.d.ts.map +0 -1
  33. package/dist/src/core/config.js +0 -26
  34. package/dist/src/core/config.js.map +0 -1
  35. package/dist/src/core/config.test.d.ts +0 -2
  36. package/dist/src/core/config.test.d.ts.map +0 -1
  37. package/dist/src/core/config.test.js +0 -21
  38. package/dist/src/core/config.test.js.map +0 -1
  39. package/dist/src/core/parser.d.ts +0 -92
  40. package/dist/src/core/parser.d.ts.map +0 -1
  41. package/dist/src/core/parser.js +0 -401
  42. package/dist/src/core/parser.js.map +0 -1
  43. package/dist/src/core/parser.test.d.ts +0 -2
  44. package/dist/src/core/parser.test.d.ts.map +0 -1
  45. package/dist/src/core/parser.test.js +0 -541
  46. package/dist/src/core/parser.test.js.map +0 -1
  47. package/dist/src/core/types.d.ts +0 -370
  48. package/dist/src/core/types.d.ts.map +0 -1
  49. package/dist/src/core/types.js +0 -2
  50. package/dist/src/core/types.js.map +0 -1
  51. package/dist/src/engines/ocr/http-simple.d.ts +0 -19
  52. package/dist/src/engines/ocr/http-simple.d.ts.map +0 -1
  53. package/dist/src/engines/ocr/http-simple.js +0 -69
  54. package/dist/src/engines/ocr/http-simple.js.map +0 -1
  55. package/dist/src/engines/ocr/http-simple.test.d.ts +0 -2
  56. package/dist/src/engines/ocr/http-simple.test.d.ts.map +0 -1
  57. package/dist/src/engines/ocr/http-simple.test.js +0 -108
  58. package/dist/src/engines/ocr/http-simple.test.js.map +0 -1
  59. package/dist/src/engines/ocr/interface.d.ts +0 -15
  60. package/dist/src/engines/ocr/interface.d.ts.map +0 -1
  61. package/dist/src/engines/ocr/interface.js +0 -2
  62. package/dist/src/engines/ocr/interface.js.map +0 -1
  63. package/dist/src/engines/ocr/tesseract.d.ts +0 -20
  64. package/dist/src/engines/ocr/tesseract.d.ts.map +0 -1
  65. package/dist/src/engines/ocr/tesseract.js +0 -162
  66. package/dist/src/engines/ocr/tesseract.js.map +0 -1
  67. package/dist/src/engines/ocr/tesseract.test.d.ts +0 -2
  68. package/dist/src/engines/ocr/tesseract.test.d.ts.map +0 -1
  69. package/dist/src/engines/ocr/tesseract.test.js +0 -94
  70. package/dist/src/engines/ocr/tesseract.test.js.map +0 -1
  71. package/dist/src/engines/pdf/interface.d.ts +0 -84
  72. package/dist/src/engines/pdf/interface.d.ts.map +0 -1
  73. package/dist/src/engines/pdf/interface.js +0 -2
  74. package/dist/src/engines/pdf/interface.js.map +0 -1
  75. package/dist/src/engines/pdf/pdfium-renderer.d.ts +0 -31
  76. package/dist/src/engines/pdf/pdfium-renderer.d.ts.map +0 -1
  77. package/dist/src/engines/pdf/pdfium-renderer.js +0 -145
  78. package/dist/src/engines/pdf/pdfium-renderer.js.map +0 -1
  79. package/dist/src/engines/pdf/pdfium-renderer.test.d.ts +0 -2
  80. package/dist/src/engines/pdf/pdfium-renderer.test.d.ts.map +0 -1
  81. package/dist/src/engines/pdf/pdfium-renderer.test.js +0 -109
  82. package/dist/src/engines/pdf/pdfium-renderer.test.js.map +0 -1
  83. package/dist/src/engines/pdf/pdfjs.d.ts +0 -14
  84. package/dist/src/engines/pdf/pdfjs.d.ts.map +0 -1
  85. package/dist/src/engines/pdf/pdfjs.js +0 -804
  86. package/dist/src/engines/pdf/pdfjs.js.map +0 -1
  87. package/dist/src/engines/pdf/pdfjs.test.d.ts +0 -2
  88. package/dist/src/engines/pdf/pdfjs.test.d.ts.map +0 -1
  89. package/dist/src/engines/pdf/pdfjs.test.js +0 -225
  90. package/dist/src/engines/pdf/pdfjs.test.js.map +0 -1
  91. package/dist/src/engines/pdf/pdfjsImporter.d.ts +0 -5
  92. package/dist/src/engines/pdf/pdfjsImporter.d.ts.map +0 -1
  93. package/dist/src/engines/pdf/pdfjsImporter.js +0 -45
  94. package/dist/src/engines/pdf/pdfjsImporter.js.map +0 -1
  95. package/dist/src/index.d.ts +0 -3
  96. package/dist/src/index.d.ts.map +0 -1
  97. package/dist/src/index.js +0 -5
  98. package/dist/src/index.js.map +0 -1
  99. package/dist/src/lib.d.ts +0 -19
  100. package/dist/src/lib.d.ts.map +0 -1
  101. package/dist/src/lib.js +0 -17
  102. package/dist/src/lib.js.map +0 -1
  103. package/dist/src/output/json.d.ts +0 -10
  104. package/dist/src/output/json.d.ts.map +0 -1
  105. package/dist/src/output/json.js +0 -32
  106. package/dist/src/output/json.js.map +0 -1
  107. package/dist/src/output/json.test.d.ts +0 -2
  108. package/dist/src/output/json.test.d.ts.map +0 -1
  109. package/dist/src/output/json.test.js +0 -199
  110. package/dist/src/output/json.test.js.map +0 -1
  111. package/dist/src/output/text.d.ts +0 -10
  112. package/dist/src/output/text.d.ts.map +0 -1
  113. package/dist/src/output/text.js +0 -17
  114. package/dist/src/output/text.js.map +0 -1
  115. package/dist/src/output/text.test.d.ts +0 -2
  116. package/dist/src/output/text.test.d.ts.map +0 -1
  117. package/dist/src/output/text.test.js +0 -65
  118. package/dist/src/output/text.test.js.map +0 -1
  119. package/dist/src/processing/bbox.d.ts +0 -20
  120. package/dist/src/processing/bbox.d.ts.map +0 -1
  121. package/dist/src/processing/bbox.js +0 -258
  122. package/dist/src/processing/bbox.js.map +0 -1
  123. package/dist/src/processing/bbox.test.d.ts +0 -2
  124. package/dist/src/processing/bbox.test.d.ts.map +0 -1
  125. package/dist/src/processing/bbox.test.js +0 -334
  126. package/dist/src/processing/bbox.test.js.map +0 -1
  127. package/dist/src/processing/cleanText.d.ts +0 -6
  128. package/dist/src/processing/cleanText.d.ts.map +0 -1
  129. package/dist/src/processing/cleanText.js +0 -73
  130. package/dist/src/processing/cleanText.js.map +0 -1
  131. package/dist/src/processing/cleanText.test.d.ts +0 -2
  132. package/dist/src/processing/cleanText.test.d.ts.map +0 -1
  133. package/dist/src/processing/cleanText.test.js +0 -46
  134. package/dist/src/processing/cleanText.test.js.map +0 -1
  135. package/dist/src/processing/grid.d.ts +0 -7
  136. package/dist/src/processing/grid.d.ts.map +0 -1
  137. package/dist/src/processing/grid.js +0 -13
  138. package/dist/src/processing/grid.js.map +0 -1
  139. package/dist/src/processing/gridDebugLogger.d.ts +0 -206
  140. package/dist/src/processing/gridDebugLogger.d.ts.map +0 -1
  141. package/dist/src/processing/gridDebugLogger.js +0 -446
  142. package/dist/src/processing/gridDebugLogger.js.map +0 -1
  143. package/dist/src/processing/gridProjection.d.ts +0 -19
  144. package/dist/src/processing/gridProjection.d.ts.map +0 -1
  145. package/dist/src/processing/gridProjection.js +0 -1813
  146. package/dist/src/processing/gridProjection.js.map +0 -1
  147. package/dist/src/processing/gridProjection.test.d.ts +0 -2
  148. package/dist/src/processing/gridProjection.test.d.ts.map +0 -1
  149. package/dist/src/processing/gridProjection.test.js +0 -495
  150. package/dist/src/processing/gridProjection.test.js.map +0 -1
  151. package/dist/src/processing/gridVisualizer.d.ts +0 -14
  152. package/dist/src/processing/gridVisualizer.d.ts.map +0 -1
  153. package/dist/src/processing/gridVisualizer.js +0 -166
  154. package/dist/src/processing/gridVisualizer.js.map +0 -1
  155. package/dist/src/processing/markupUtils.d.ts +0 -7
  156. package/dist/src/processing/markupUtils.d.ts.map +0 -1
  157. package/dist/src/processing/markupUtils.js +0 -25
  158. package/dist/src/processing/markupUtils.js.map +0 -1
  159. package/dist/src/processing/markupUtils.test.d.ts +0 -2
  160. package/dist/src/processing/markupUtils.test.d.ts.map +0 -1
  161. package/dist/src/processing/markupUtils.test.js +0 -26
  162. package/dist/src/processing/markupUtils.test.js.map +0 -1
  163. package/dist/src/processing/ocrUtils.d.ts +0 -24
  164. package/dist/src/processing/ocrUtils.d.ts.map +0 -1
  165. package/dist/src/processing/ocrUtils.js +0 -79
  166. package/dist/src/processing/ocrUtils.js.map +0 -1
  167. package/dist/src/processing/octUtils.test.d.ts +0 -2
  168. package/dist/src/processing/octUtils.test.d.ts.map +0 -1
  169. package/dist/src/processing/octUtils.test.js +0 -72
  170. package/dist/src/processing/octUtils.test.js.map +0 -1
  171. package/dist/src/processing/searchItems.d.ts +0 -26
  172. package/dist/src/processing/searchItems.d.ts.map +0 -1
  173. package/dist/src/processing/searchItems.js +0 -93
  174. package/dist/src/processing/searchItems.js.map +0 -1
  175. package/dist/src/processing/searchItems.test.d.ts +0 -2
  176. package/dist/src/processing/searchItems.test.d.ts.map +0 -1
  177. package/dist/src/processing/searchItems.test.js +0 -84
  178. package/dist/src/processing/searchItems.test.js.map +0 -1
  179. package/dist/src/processing/textUtils.d.ts +0 -20
  180. package/dist/src/processing/textUtils.d.ts.map +0 -1
  181. package/dist/src/processing/textUtils.js +0 -142
  182. package/dist/src/processing/textUtils.js.map +0 -1
  183. package/dist/src/processing/textUtils.test.d.ts +0 -2
  184. package/dist/src/processing/textUtils.test.d.ts.map +0 -1
  185. package/dist/src/processing/textUtils.test.js +0 -45
  186. package/dist/src/processing/textUtils.test.js.map +0 -1
  187. package/dist/src/vendor/pdfjs/LICENSE +0 -177
  188. package/dist/src/vendor/pdfjs/README.md +0 -0
  189. package/dist/src/vendor/pdfjs/cmaps/78-EUC-H.bcmap +0 -0
  190. package/dist/src/vendor/pdfjs/cmaps/78-EUC-V.bcmap +0 -0
  191. package/dist/src/vendor/pdfjs/cmaps/78-H.bcmap +0 -0
  192. package/dist/src/vendor/pdfjs/cmaps/78-RKSJ-H.bcmap +0 -0
  193. package/dist/src/vendor/pdfjs/cmaps/78-RKSJ-V.bcmap +0 -0
  194. package/dist/src/vendor/pdfjs/cmaps/78-V.bcmap +0 -0
  195. package/dist/src/vendor/pdfjs/cmaps/78ms-RKSJ-H.bcmap +0 -0
  196. package/dist/src/vendor/pdfjs/cmaps/78ms-RKSJ-V.bcmap +0 -0
  197. package/dist/src/vendor/pdfjs/cmaps/83pv-RKSJ-H.bcmap +0 -0
  198. package/dist/src/vendor/pdfjs/cmaps/90ms-RKSJ-H.bcmap +0 -0
  199. package/dist/src/vendor/pdfjs/cmaps/90ms-RKSJ-V.bcmap +0 -0
  200. package/dist/src/vendor/pdfjs/cmaps/90msp-RKSJ-H.bcmap +0 -0
  201. package/dist/src/vendor/pdfjs/cmaps/90msp-RKSJ-V.bcmap +0 -0
  202. package/dist/src/vendor/pdfjs/cmaps/90pv-RKSJ-H.bcmap +0 -0
  203. package/dist/src/vendor/pdfjs/cmaps/90pv-RKSJ-V.bcmap +0 -0
  204. package/dist/src/vendor/pdfjs/cmaps/Add-H.bcmap +0 -0
  205. package/dist/src/vendor/pdfjs/cmaps/Add-RKSJ-H.bcmap +0 -0
  206. package/dist/src/vendor/pdfjs/cmaps/Add-RKSJ-V.bcmap +0 -0
  207. package/dist/src/vendor/pdfjs/cmaps/Add-V.bcmap +0 -0
  208. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-0.bcmap +0 -0
  209. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-1.bcmap +0 -0
  210. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-2.bcmap +0 -0
  211. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-3.bcmap +0 -0
  212. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-4.bcmap +0 -0
  213. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-5.bcmap +0 -0
  214. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-6.bcmap +0 -0
  215. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-UCS2.bcmap +0 -0
  216. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-0.bcmap +0 -0
  217. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-1.bcmap +0 -0
  218. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-2.bcmap +0 -0
  219. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-3.bcmap +0 -0
  220. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-4.bcmap +0 -0
  221. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-5.bcmap +0 -0
  222. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-UCS2.bcmap +0 -0
  223. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-0.bcmap +0 -0
  224. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-1.bcmap +0 -0
  225. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-2.bcmap +0 -0
  226. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-3.bcmap +0 -0
  227. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-4.bcmap +0 -0
  228. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-5.bcmap +0 -0
  229. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-6.bcmap +0 -0
  230. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-UCS2.bcmap +0 -0
  231. package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-0.bcmap +0 -0
  232. package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-1.bcmap +0 -0
  233. package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-2.bcmap +0 -0
  234. package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-UCS2.bcmap +0 -0
  235. package/dist/src/vendor/pdfjs/cmaps/B5-H.bcmap +0 -0
  236. package/dist/src/vendor/pdfjs/cmaps/B5-V.bcmap +0 -0
  237. package/dist/src/vendor/pdfjs/cmaps/B5pc-H.bcmap +0 -0
  238. package/dist/src/vendor/pdfjs/cmaps/B5pc-V.bcmap +0 -0
  239. package/dist/src/vendor/pdfjs/cmaps/CNS-EUC-H.bcmap +0 -0
  240. package/dist/src/vendor/pdfjs/cmaps/CNS-EUC-V.bcmap +0 -0
  241. package/dist/src/vendor/pdfjs/cmaps/CNS1-H.bcmap +0 -0
  242. package/dist/src/vendor/pdfjs/cmaps/CNS1-V.bcmap +0 -0
  243. package/dist/src/vendor/pdfjs/cmaps/CNS2-H.bcmap +0 -0
  244. package/dist/src/vendor/pdfjs/cmaps/CNS2-V.bcmap +0 -3
  245. package/dist/src/vendor/pdfjs/cmaps/ETHK-B5-H.bcmap +0 -0
  246. package/dist/src/vendor/pdfjs/cmaps/ETHK-B5-V.bcmap +0 -0
  247. package/dist/src/vendor/pdfjs/cmaps/ETen-B5-H.bcmap +0 -0
  248. package/dist/src/vendor/pdfjs/cmaps/ETen-B5-V.bcmap +0 -0
  249. package/dist/src/vendor/pdfjs/cmaps/ETenms-B5-H.bcmap +0 -3
  250. package/dist/src/vendor/pdfjs/cmaps/ETenms-B5-V.bcmap +0 -0
  251. package/dist/src/vendor/pdfjs/cmaps/EUC-H.bcmap +0 -0
  252. package/dist/src/vendor/pdfjs/cmaps/EUC-V.bcmap +0 -0
  253. package/dist/src/vendor/pdfjs/cmaps/Ext-H.bcmap +0 -0
  254. package/dist/src/vendor/pdfjs/cmaps/Ext-RKSJ-H.bcmap +0 -0
  255. package/dist/src/vendor/pdfjs/cmaps/Ext-RKSJ-V.bcmap +0 -0
  256. package/dist/src/vendor/pdfjs/cmaps/Ext-V.bcmap +0 -0
  257. package/dist/src/vendor/pdfjs/cmaps/GB-EUC-H.bcmap +0 -0
  258. package/dist/src/vendor/pdfjs/cmaps/GB-EUC-V.bcmap +0 -0
  259. package/dist/src/vendor/pdfjs/cmaps/GB-H.bcmap +0 -4
  260. package/dist/src/vendor/pdfjs/cmaps/GB-V.bcmap +0 -0
  261. package/dist/src/vendor/pdfjs/cmaps/GBK-EUC-H.bcmap +0 -0
  262. package/dist/src/vendor/pdfjs/cmaps/GBK-EUC-V.bcmap +0 -0
  263. package/dist/src/vendor/pdfjs/cmaps/GBK2K-H.bcmap +0 -0
  264. package/dist/src/vendor/pdfjs/cmaps/GBK2K-V.bcmap +0 -0
  265. package/dist/src/vendor/pdfjs/cmaps/GBKp-EUC-H.bcmap +0 -0
  266. package/dist/src/vendor/pdfjs/cmaps/GBKp-EUC-V.bcmap +0 -0
  267. package/dist/src/vendor/pdfjs/cmaps/GBT-EUC-H.bcmap +0 -0
  268. package/dist/src/vendor/pdfjs/cmaps/GBT-EUC-V.bcmap +0 -0
  269. package/dist/src/vendor/pdfjs/cmaps/GBT-H.bcmap +0 -0
  270. package/dist/src/vendor/pdfjs/cmaps/GBT-V.bcmap +0 -0
  271. package/dist/src/vendor/pdfjs/cmaps/GBTpc-EUC-H.bcmap +0 -0
  272. package/dist/src/vendor/pdfjs/cmaps/GBTpc-EUC-V.bcmap +0 -0
  273. package/dist/src/vendor/pdfjs/cmaps/GBpc-EUC-H.bcmap +0 -0
  274. package/dist/src/vendor/pdfjs/cmaps/GBpc-EUC-V.bcmap +0 -0
  275. package/dist/src/vendor/pdfjs/cmaps/H.bcmap +0 -0
  276. package/dist/src/vendor/pdfjs/cmaps/HKdla-B5-H.bcmap +0 -0
  277. package/dist/src/vendor/pdfjs/cmaps/HKdla-B5-V.bcmap +0 -0
  278. package/dist/src/vendor/pdfjs/cmaps/HKdlb-B5-H.bcmap +0 -0
  279. package/dist/src/vendor/pdfjs/cmaps/HKdlb-B5-V.bcmap +0 -0
  280. package/dist/src/vendor/pdfjs/cmaps/HKgccs-B5-H.bcmap +0 -0
  281. package/dist/src/vendor/pdfjs/cmaps/HKgccs-B5-V.bcmap +0 -0
  282. package/dist/src/vendor/pdfjs/cmaps/HKm314-B5-H.bcmap +0 -0
  283. package/dist/src/vendor/pdfjs/cmaps/HKm314-B5-V.bcmap +0 -0
  284. package/dist/src/vendor/pdfjs/cmaps/HKm471-B5-H.bcmap +0 -0
  285. package/dist/src/vendor/pdfjs/cmaps/HKm471-B5-V.bcmap +0 -0
  286. package/dist/src/vendor/pdfjs/cmaps/HKscs-B5-H.bcmap +0 -0
  287. package/dist/src/vendor/pdfjs/cmaps/HKscs-B5-V.bcmap +0 -0
  288. package/dist/src/vendor/pdfjs/cmaps/Hankaku.bcmap +0 -0
  289. package/dist/src/vendor/pdfjs/cmaps/Hiragana.bcmap +0 -0
  290. package/dist/src/vendor/pdfjs/cmaps/KSC-EUC-H.bcmap +0 -0
  291. package/dist/src/vendor/pdfjs/cmaps/KSC-EUC-V.bcmap +0 -0
  292. package/dist/src/vendor/pdfjs/cmaps/KSC-H.bcmap +0 -0
  293. package/dist/src/vendor/pdfjs/cmaps/KSC-Johab-H.bcmap +0 -0
  294. package/dist/src/vendor/pdfjs/cmaps/KSC-Johab-V.bcmap +0 -0
  295. package/dist/src/vendor/pdfjs/cmaps/KSC-V.bcmap +0 -0
  296. package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-H.bcmap +0 -0
  297. package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-H.bcmap +0 -0
  298. package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-V.bcmap +0 -0
  299. package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-V.bcmap +0 -0
  300. package/dist/src/vendor/pdfjs/cmaps/KSCpc-EUC-H.bcmap +0 -0
  301. package/dist/src/vendor/pdfjs/cmaps/KSCpc-EUC-V.bcmap +0 -0
  302. package/dist/src/vendor/pdfjs/cmaps/Katakana.bcmap +0 -0
  303. package/dist/src/vendor/pdfjs/cmaps/LICENSE +0 -36
  304. package/dist/src/vendor/pdfjs/cmaps/NWP-H.bcmap +0 -0
  305. package/dist/src/vendor/pdfjs/cmaps/NWP-V.bcmap +0 -0
  306. package/dist/src/vendor/pdfjs/cmaps/RKSJ-H.bcmap +0 -0
  307. package/dist/src/vendor/pdfjs/cmaps/RKSJ-V.bcmap +0 -0
  308. package/dist/src/vendor/pdfjs/cmaps/Roman.bcmap +0 -0
  309. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UCS2-H.bcmap +0 -0
  310. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UCS2-V.bcmap +0 -0
  311. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF16-H.bcmap +0 -0
  312. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF16-V.bcmap +0 -0
  313. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF32-H.bcmap +0 -0
  314. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF32-V.bcmap +0 -0
  315. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF8-H.bcmap +0 -0
  316. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF8-V.bcmap +0 -0
  317. package/dist/src/vendor/pdfjs/cmaps/UniGB-UCS2-H.bcmap +0 -0
  318. package/dist/src/vendor/pdfjs/cmaps/UniGB-UCS2-V.bcmap +0 -0
  319. package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF16-H.bcmap +0 -0
  320. package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF16-V.bcmap +0 -0
  321. package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF32-H.bcmap +0 -0
  322. package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF32-V.bcmap +0 -0
  323. package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF8-H.bcmap +0 -0
  324. package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF8-V.bcmap +0 -0
  325. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-H.bcmap +0 -0
  326. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-H.bcmap +0 -0
  327. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-V.bcmap +0 -0
  328. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-V.bcmap +0 -0
  329. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF16-H.bcmap +0 -0
  330. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF16-V.bcmap +0 -0
  331. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF32-H.bcmap +0 -0
  332. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF32-V.bcmap +0 -0
  333. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF8-H.bcmap +0 -0
  334. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF8-V.bcmap +0 -0
  335. package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-H.bcmap +0 -0
  336. package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-V.bcmap +0 -0
  337. package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-H.bcmap +0 -0
  338. package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-V.bcmap +0 -0
  339. package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-H.bcmap +0 -0
  340. package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-V.bcmap +0 -0
  341. package/dist/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-HW-V.bcmap +0 -0
  342. package/dist/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-V.bcmap +0 -0
  343. package/dist/src/vendor/pdfjs/cmaps/UniJISPro-UTF8-V.bcmap +0 -0
  344. package/dist/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-H.bcmap +0 -0
  345. package/dist/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-V.bcmap +0 -0
  346. package/dist/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-H.bcmap +0 -0
  347. package/dist/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-V.bcmap +0 -0
  348. package/dist/src/vendor/pdfjs/cmaps/UniKS-UCS2-H.bcmap +0 -0
  349. package/dist/src/vendor/pdfjs/cmaps/UniKS-UCS2-V.bcmap +0 -0
  350. package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF16-H.bcmap +0 -0
  351. package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF16-V.bcmap +0 -0
  352. package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF32-H.bcmap +0 -0
  353. package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF32-V.bcmap +0 -0
  354. package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF8-H.bcmap +0 -0
  355. package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF8-V.bcmap +0 -0
  356. package/dist/src/vendor/pdfjs/cmaps/V.bcmap +0 -0
  357. package/dist/src/vendor/pdfjs/cmaps/WP-Symbol.bcmap +0 -0
  358. package/dist/src/vendor/pdfjs/jbig2.wasm +0 -0
  359. package/dist/src/vendor/pdfjs/openjpeg.wasm +0 -0
  360. package/dist/src/vendor/pdfjs/pdf.mjs +0 -33603
  361. package/dist/src/vendor/pdfjs/pdf.mjs.map +0 -1
  362. package/dist/src/vendor/pdfjs/pdf.sandbox.mjs +0 -4936
  363. package/dist/src/vendor/pdfjs/pdf.sandbox.mjs.map +0 -1
  364. package/dist/src/vendor/pdfjs/pdf.worker.mjs +0 -70100
  365. package/dist/src/vendor/pdfjs/pdf.worker.mjs.map +0 -1
  366. package/dist/src/vendor/pdfjs/qcms_bg.wasm +0 -0
  367. package/dist/src/vendor/pdfjs/standard_fonts/FoxitDingbats.pfb +0 -0
  368. package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixed.pfb +0 -0
  369. package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixedBold.pfb +0 -0
  370. package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixedBoldItalic.pfb +0 -0
  371. package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixedItalic.pfb +0 -0
  372. package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerif.pfb +0 -0
  373. package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerifBold.pfb +0 -0
  374. package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerifBoldItalic.pfb +0 -0
  375. package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerifItalic.pfb +0 -0
  376. package/dist/src/vendor/pdfjs/standard_fonts/FoxitSymbol.pfb +0 -0
  377. package/dist/src/vendor/pdfjs/standard_fonts/LICENSE_FOXIT +0 -27
  378. package/dist/src/vendor/pdfjs/standard_fonts/LICENSE_LIBERATION +0 -102
  379. package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-Bold.ttf +0 -0
  380. package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-BoldItalic.ttf +0 -0
  381. package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-Italic.ttf +0 -0
  382. package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-Regular.ttf +0 -0
  383. package/src/vendor/pdfjs/LICENSE +0 -177
  384. package/src/vendor/pdfjs/README.md +0 -0
  385. package/src/vendor/pdfjs/cmaps/78-EUC-H.bcmap +0 -0
  386. package/src/vendor/pdfjs/cmaps/78-EUC-V.bcmap +0 -0
  387. package/src/vendor/pdfjs/cmaps/78-H.bcmap +0 -0
  388. package/src/vendor/pdfjs/cmaps/78-RKSJ-H.bcmap +0 -0
  389. package/src/vendor/pdfjs/cmaps/78-RKSJ-V.bcmap +0 -0
  390. package/src/vendor/pdfjs/cmaps/78-V.bcmap +0 -0
  391. package/src/vendor/pdfjs/cmaps/78ms-RKSJ-H.bcmap +0 -0
  392. package/src/vendor/pdfjs/cmaps/78ms-RKSJ-V.bcmap +0 -0
  393. package/src/vendor/pdfjs/cmaps/83pv-RKSJ-H.bcmap +0 -0
  394. package/src/vendor/pdfjs/cmaps/90ms-RKSJ-H.bcmap +0 -0
  395. package/src/vendor/pdfjs/cmaps/90ms-RKSJ-V.bcmap +0 -0
  396. package/src/vendor/pdfjs/cmaps/90msp-RKSJ-H.bcmap +0 -0
  397. package/src/vendor/pdfjs/cmaps/90msp-RKSJ-V.bcmap +0 -0
  398. package/src/vendor/pdfjs/cmaps/90pv-RKSJ-H.bcmap +0 -0
  399. package/src/vendor/pdfjs/cmaps/90pv-RKSJ-V.bcmap +0 -0
  400. package/src/vendor/pdfjs/cmaps/Add-H.bcmap +0 -0
  401. package/src/vendor/pdfjs/cmaps/Add-RKSJ-H.bcmap +0 -0
  402. package/src/vendor/pdfjs/cmaps/Add-RKSJ-V.bcmap +0 -0
  403. package/src/vendor/pdfjs/cmaps/Add-V.bcmap +0 -0
  404. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-0.bcmap +0 -0
  405. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-1.bcmap +0 -0
  406. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-2.bcmap +0 -0
  407. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-3.bcmap +0 -0
  408. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-4.bcmap +0 -0
  409. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-5.bcmap +0 -0
  410. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-6.bcmap +0 -0
  411. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-UCS2.bcmap +0 -0
  412. package/src/vendor/pdfjs/cmaps/Adobe-GB1-0.bcmap +0 -0
  413. package/src/vendor/pdfjs/cmaps/Adobe-GB1-1.bcmap +0 -0
  414. package/src/vendor/pdfjs/cmaps/Adobe-GB1-2.bcmap +0 -0
  415. package/src/vendor/pdfjs/cmaps/Adobe-GB1-3.bcmap +0 -0
  416. package/src/vendor/pdfjs/cmaps/Adobe-GB1-4.bcmap +0 -0
  417. package/src/vendor/pdfjs/cmaps/Adobe-GB1-5.bcmap +0 -0
  418. package/src/vendor/pdfjs/cmaps/Adobe-GB1-UCS2.bcmap +0 -0
  419. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-0.bcmap +0 -0
  420. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-1.bcmap +0 -0
  421. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-2.bcmap +0 -0
  422. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-3.bcmap +0 -0
  423. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-4.bcmap +0 -0
  424. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-5.bcmap +0 -0
  425. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-6.bcmap +0 -0
  426. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-UCS2.bcmap +0 -0
  427. package/src/vendor/pdfjs/cmaps/Adobe-Korea1-0.bcmap +0 -0
  428. package/src/vendor/pdfjs/cmaps/Adobe-Korea1-1.bcmap +0 -0
  429. package/src/vendor/pdfjs/cmaps/Adobe-Korea1-2.bcmap +0 -0
  430. package/src/vendor/pdfjs/cmaps/Adobe-Korea1-UCS2.bcmap +0 -0
  431. package/src/vendor/pdfjs/cmaps/B5-H.bcmap +0 -0
  432. package/src/vendor/pdfjs/cmaps/B5-V.bcmap +0 -0
  433. package/src/vendor/pdfjs/cmaps/B5pc-H.bcmap +0 -0
  434. package/src/vendor/pdfjs/cmaps/B5pc-V.bcmap +0 -0
  435. package/src/vendor/pdfjs/cmaps/CNS-EUC-H.bcmap +0 -0
  436. package/src/vendor/pdfjs/cmaps/CNS-EUC-V.bcmap +0 -0
  437. package/src/vendor/pdfjs/cmaps/CNS1-H.bcmap +0 -0
  438. package/src/vendor/pdfjs/cmaps/CNS1-V.bcmap +0 -0
  439. package/src/vendor/pdfjs/cmaps/CNS2-H.bcmap +0 -0
  440. package/src/vendor/pdfjs/cmaps/CNS2-V.bcmap +0 -3
  441. package/src/vendor/pdfjs/cmaps/ETHK-B5-H.bcmap +0 -0
  442. package/src/vendor/pdfjs/cmaps/ETHK-B5-V.bcmap +0 -0
  443. package/src/vendor/pdfjs/cmaps/ETen-B5-H.bcmap +0 -0
  444. package/src/vendor/pdfjs/cmaps/ETen-B5-V.bcmap +0 -0
  445. package/src/vendor/pdfjs/cmaps/ETenms-B5-H.bcmap +0 -3
  446. package/src/vendor/pdfjs/cmaps/ETenms-B5-V.bcmap +0 -0
  447. package/src/vendor/pdfjs/cmaps/EUC-H.bcmap +0 -0
  448. package/src/vendor/pdfjs/cmaps/EUC-V.bcmap +0 -0
  449. package/src/vendor/pdfjs/cmaps/Ext-H.bcmap +0 -0
  450. package/src/vendor/pdfjs/cmaps/Ext-RKSJ-H.bcmap +0 -0
  451. package/src/vendor/pdfjs/cmaps/Ext-RKSJ-V.bcmap +0 -0
  452. package/src/vendor/pdfjs/cmaps/Ext-V.bcmap +0 -0
  453. package/src/vendor/pdfjs/cmaps/GB-EUC-H.bcmap +0 -0
  454. package/src/vendor/pdfjs/cmaps/GB-EUC-V.bcmap +0 -0
  455. package/src/vendor/pdfjs/cmaps/GB-H.bcmap +0 -4
  456. package/src/vendor/pdfjs/cmaps/GB-V.bcmap +0 -0
  457. package/src/vendor/pdfjs/cmaps/GBK-EUC-H.bcmap +0 -0
  458. package/src/vendor/pdfjs/cmaps/GBK-EUC-V.bcmap +0 -0
  459. package/src/vendor/pdfjs/cmaps/GBK2K-H.bcmap +0 -0
  460. package/src/vendor/pdfjs/cmaps/GBK2K-V.bcmap +0 -0
  461. package/src/vendor/pdfjs/cmaps/GBKp-EUC-H.bcmap +0 -0
  462. package/src/vendor/pdfjs/cmaps/GBKp-EUC-V.bcmap +0 -0
  463. package/src/vendor/pdfjs/cmaps/GBT-EUC-H.bcmap +0 -0
  464. package/src/vendor/pdfjs/cmaps/GBT-EUC-V.bcmap +0 -0
  465. package/src/vendor/pdfjs/cmaps/GBT-H.bcmap +0 -0
  466. package/src/vendor/pdfjs/cmaps/GBT-V.bcmap +0 -0
  467. package/src/vendor/pdfjs/cmaps/GBTpc-EUC-H.bcmap +0 -0
  468. package/src/vendor/pdfjs/cmaps/GBTpc-EUC-V.bcmap +0 -0
  469. package/src/vendor/pdfjs/cmaps/GBpc-EUC-H.bcmap +0 -0
  470. package/src/vendor/pdfjs/cmaps/GBpc-EUC-V.bcmap +0 -0
  471. package/src/vendor/pdfjs/cmaps/H.bcmap +0 -0
  472. package/src/vendor/pdfjs/cmaps/HKdla-B5-H.bcmap +0 -0
  473. package/src/vendor/pdfjs/cmaps/HKdla-B5-V.bcmap +0 -0
  474. package/src/vendor/pdfjs/cmaps/HKdlb-B5-H.bcmap +0 -0
  475. package/src/vendor/pdfjs/cmaps/HKdlb-B5-V.bcmap +0 -0
  476. package/src/vendor/pdfjs/cmaps/HKgccs-B5-H.bcmap +0 -0
  477. package/src/vendor/pdfjs/cmaps/HKgccs-B5-V.bcmap +0 -0
  478. package/src/vendor/pdfjs/cmaps/HKm314-B5-H.bcmap +0 -0
  479. package/src/vendor/pdfjs/cmaps/HKm314-B5-V.bcmap +0 -0
  480. package/src/vendor/pdfjs/cmaps/HKm471-B5-H.bcmap +0 -0
  481. package/src/vendor/pdfjs/cmaps/HKm471-B5-V.bcmap +0 -0
  482. package/src/vendor/pdfjs/cmaps/HKscs-B5-H.bcmap +0 -0
  483. package/src/vendor/pdfjs/cmaps/HKscs-B5-V.bcmap +0 -0
  484. package/src/vendor/pdfjs/cmaps/Hankaku.bcmap +0 -0
  485. package/src/vendor/pdfjs/cmaps/Hiragana.bcmap +0 -0
  486. package/src/vendor/pdfjs/cmaps/KSC-EUC-H.bcmap +0 -0
  487. package/src/vendor/pdfjs/cmaps/KSC-EUC-V.bcmap +0 -0
  488. package/src/vendor/pdfjs/cmaps/KSC-H.bcmap +0 -0
  489. package/src/vendor/pdfjs/cmaps/KSC-Johab-H.bcmap +0 -0
  490. package/src/vendor/pdfjs/cmaps/KSC-Johab-V.bcmap +0 -0
  491. package/src/vendor/pdfjs/cmaps/KSC-V.bcmap +0 -0
  492. package/src/vendor/pdfjs/cmaps/KSCms-UHC-H.bcmap +0 -0
  493. package/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-H.bcmap +0 -0
  494. package/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-V.bcmap +0 -0
  495. package/src/vendor/pdfjs/cmaps/KSCms-UHC-V.bcmap +0 -0
  496. package/src/vendor/pdfjs/cmaps/KSCpc-EUC-H.bcmap +0 -0
  497. package/src/vendor/pdfjs/cmaps/KSCpc-EUC-V.bcmap +0 -0
  498. package/src/vendor/pdfjs/cmaps/Katakana.bcmap +0 -0
  499. package/src/vendor/pdfjs/cmaps/LICENSE +0 -36
  500. package/src/vendor/pdfjs/cmaps/NWP-H.bcmap +0 -0
  501. package/src/vendor/pdfjs/cmaps/NWP-V.bcmap +0 -0
  502. package/src/vendor/pdfjs/cmaps/RKSJ-H.bcmap +0 -0
  503. package/src/vendor/pdfjs/cmaps/RKSJ-V.bcmap +0 -0
  504. package/src/vendor/pdfjs/cmaps/Roman.bcmap +0 -0
  505. package/src/vendor/pdfjs/cmaps/UniCNS-UCS2-H.bcmap +0 -0
  506. package/src/vendor/pdfjs/cmaps/UniCNS-UCS2-V.bcmap +0 -0
  507. package/src/vendor/pdfjs/cmaps/UniCNS-UTF16-H.bcmap +0 -0
  508. package/src/vendor/pdfjs/cmaps/UniCNS-UTF16-V.bcmap +0 -0
  509. package/src/vendor/pdfjs/cmaps/UniCNS-UTF32-H.bcmap +0 -0
  510. package/src/vendor/pdfjs/cmaps/UniCNS-UTF32-V.bcmap +0 -0
  511. package/src/vendor/pdfjs/cmaps/UniCNS-UTF8-H.bcmap +0 -0
  512. package/src/vendor/pdfjs/cmaps/UniCNS-UTF8-V.bcmap +0 -0
  513. package/src/vendor/pdfjs/cmaps/UniGB-UCS2-H.bcmap +0 -0
  514. package/src/vendor/pdfjs/cmaps/UniGB-UCS2-V.bcmap +0 -0
  515. package/src/vendor/pdfjs/cmaps/UniGB-UTF16-H.bcmap +0 -0
  516. package/src/vendor/pdfjs/cmaps/UniGB-UTF16-V.bcmap +0 -0
  517. package/src/vendor/pdfjs/cmaps/UniGB-UTF32-H.bcmap +0 -0
  518. package/src/vendor/pdfjs/cmaps/UniGB-UTF32-V.bcmap +0 -0
  519. package/src/vendor/pdfjs/cmaps/UniGB-UTF8-H.bcmap +0 -0
  520. package/src/vendor/pdfjs/cmaps/UniGB-UTF8-V.bcmap +0 -0
  521. package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-H.bcmap +0 -0
  522. package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-H.bcmap +0 -0
  523. package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-V.bcmap +0 -0
  524. package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-V.bcmap +0 -0
  525. package/src/vendor/pdfjs/cmaps/UniJIS-UTF16-H.bcmap +0 -0
  526. package/src/vendor/pdfjs/cmaps/UniJIS-UTF16-V.bcmap +0 -0
  527. package/src/vendor/pdfjs/cmaps/UniJIS-UTF32-H.bcmap +0 -0
  528. package/src/vendor/pdfjs/cmaps/UniJIS-UTF32-V.bcmap +0 -0
  529. package/src/vendor/pdfjs/cmaps/UniJIS-UTF8-H.bcmap +0 -0
  530. package/src/vendor/pdfjs/cmaps/UniJIS-UTF8-V.bcmap +0 -0
  531. package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-H.bcmap +0 -0
  532. package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-V.bcmap +0 -0
  533. package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-H.bcmap +0 -0
  534. package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-V.bcmap +0 -0
  535. package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-H.bcmap +0 -0
  536. package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-V.bcmap +0 -0
  537. package/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-HW-V.bcmap +0 -0
  538. package/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-V.bcmap +0 -0
  539. package/src/vendor/pdfjs/cmaps/UniJISPro-UTF8-V.bcmap +0 -0
  540. package/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-H.bcmap +0 -0
  541. package/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-V.bcmap +0 -0
  542. package/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-H.bcmap +0 -0
  543. package/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-V.bcmap +0 -0
  544. package/src/vendor/pdfjs/cmaps/UniKS-UCS2-H.bcmap +0 -0
  545. package/src/vendor/pdfjs/cmaps/UniKS-UCS2-V.bcmap +0 -0
  546. package/src/vendor/pdfjs/cmaps/UniKS-UTF16-H.bcmap +0 -0
  547. package/src/vendor/pdfjs/cmaps/UniKS-UTF16-V.bcmap +0 -0
  548. package/src/vendor/pdfjs/cmaps/UniKS-UTF32-H.bcmap +0 -0
  549. package/src/vendor/pdfjs/cmaps/UniKS-UTF32-V.bcmap +0 -0
  550. package/src/vendor/pdfjs/cmaps/UniKS-UTF8-H.bcmap +0 -0
  551. package/src/vendor/pdfjs/cmaps/UniKS-UTF8-V.bcmap +0 -0
  552. package/src/vendor/pdfjs/cmaps/V.bcmap +0 -0
  553. package/src/vendor/pdfjs/cmaps/WP-Symbol.bcmap +0 -0
  554. package/src/vendor/pdfjs/jbig2.wasm +0 -0
  555. package/src/vendor/pdfjs/openjpeg.wasm +0 -0
  556. package/src/vendor/pdfjs/pdf.mjs +0 -33603
  557. package/src/vendor/pdfjs/pdf.mjs.map +0 -1
  558. package/src/vendor/pdfjs/pdf.sandbox.mjs +0 -4936
  559. package/src/vendor/pdfjs/pdf.sandbox.mjs.map +0 -1
  560. package/src/vendor/pdfjs/pdf.worker.mjs +0 -70100
  561. package/src/vendor/pdfjs/pdf.worker.mjs.map +0 -1
  562. package/src/vendor/pdfjs/qcms_bg.wasm +0 -0
  563. package/src/vendor/pdfjs/standard_fonts/FoxitDingbats.pfb +0 -0
  564. package/src/vendor/pdfjs/standard_fonts/FoxitFixed.pfb +0 -0
  565. package/src/vendor/pdfjs/standard_fonts/FoxitFixedBold.pfb +0 -0
  566. package/src/vendor/pdfjs/standard_fonts/FoxitFixedBoldItalic.pfb +0 -0
  567. package/src/vendor/pdfjs/standard_fonts/FoxitFixedItalic.pfb +0 -0
  568. package/src/vendor/pdfjs/standard_fonts/FoxitSerif.pfb +0 -0
  569. package/src/vendor/pdfjs/standard_fonts/FoxitSerifBold.pfb +0 -0
  570. package/src/vendor/pdfjs/standard_fonts/FoxitSerifBoldItalic.pfb +0 -0
  571. package/src/vendor/pdfjs/standard_fonts/FoxitSerifItalic.pfb +0 -0
  572. package/src/vendor/pdfjs/standard_fonts/FoxitSymbol.pfb +0 -0
  573. package/src/vendor/pdfjs/standard_fonts/LICENSE_FOXIT +0 -27
  574. package/src/vendor/pdfjs/standard_fonts/LICENSE_LIBERATION +0 -102
  575. package/src/vendor/pdfjs/standard_fonts/LiberationSans-Bold.ttf +0 -0
  576. package/src/vendor/pdfjs/standard_fonts/LiberationSans-BoldItalic.ttf +0 -0
  577. package/src/vendor/pdfjs/standard_fonts/LiberationSans-Italic.ttf +0 -0
  578. package/src/vendor/pdfjs/standard_fonts/LiberationSans-Regular.ttf +0 -0
@@ -1,199 +0,0 @@
1
- import { describe, it, expect } from "vitest";
2
- import { buildJSON, formatJSON } from "./json";
3
- const results = [
4
- { text: "Hello World", bbox: [10, 20, 200, 40], confidence: 0.98 },
5
- { text: "Sample text", bbox: [10, 50, 180, 70], confidence: 0.85 },
6
- { text: "Page footer", bbox: [10, 750, 300, 770], confidence: 0.76 },
7
- ];
8
- const textItems = results
9
- .filter((r) => r.confidence > 0.1) // Filter low confidence
10
- .filter((r) => {
11
- // Filter out OCR text that already exists in native PDF text
12
- const ocrText = r.text.trim().toLowerCase();
13
- return ocrText.length > 0;
14
- })
15
- .map((r) => ({
16
- str: r.text,
17
- x: r.bbox[0],
18
- y: r.bbox[1],
19
- width: r.bbox[2] - r.bbox[0],
20
- height: r.bbox[3] - r.bbox[1],
21
- w: r.bbox[2] - r.bbox[0],
22
- h: r.bbox[3] - r.bbox[1],
23
- fontName: "OCR",
24
- fontSize: r.bbox[3] - r.bbox[1],
25
- }));
26
- const textItemsJSON = results.map((r) => ({
27
- text: r.text,
28
- x: r.bbox[0],
29
- y: r.bbox[1],
30
- width: r.bbox[2] - r.bbox[0],
31
- height: r.bbox[3] - r.bbox[1],
32
- fontName: "OCR",
33
- fontSize: r.bbox[3] - r.bbox[1],
34
- confidence: 1.0,
35
- }));
36
- const pages = [
37
- {
38
- pageNum: 1,
39
- width: 612,
40
- height: 792,
41
- text: "Sample text for page 1",
42
- textItems: textItems,
43
- boundingBoxes: [{ x1: 0, y1: 0, x2: 300, y2: 400 }],
44
- },
45
- {
46
- pageNum: 2,
47
- width: 612,
48
- height: 792,
49
- text: "Sample text for page 2",
50
- textItems: textItems,
51
- boundingBoxes: [{ x1: 0, y1: 0, x2: 300, y2: 400 }],
52
- },
53
- {
54
- pageNum: 3,
55
- width: 612,
56
- height: 792,
57
- text: "Sample text for page 3",
58
- textItems: textItems,
59
- boundingBoxes: [{ x1: 0, y1: 0, x2: 300, y2: 400 }],
60
- },
61
- {
62
- pageNum: 4,
63
- width: 612,
64
- height: 792,
65
- text: "Sample text for page 4",
66
- textItems: textItems,
67
- boundingBoxes: [{ x1: 0, y1: 0, x2: 300, y2: 400 }],
68
- },
69
- {
70
- pageNum: 5,
71
- width: 612,
72
- height: 792,
73
- text: "Sample text for page 5",
74
- textItems: textItems,
75
- boundingBoxes: [{ x1: 0, y1: 0, x2: 300, y2: 400 }],
76
- },
77
- ];
78
- // Native PDF text item (confidence defaults to 1.0)
79
- const nativeTextItem = {
80
- str: "Native text",
81
- x: 10,
82
- y: 20,
83
- width: 100,
84
- height: 15,
85
- w: 100,
86
- h: 15,
87
- fontName: "Helvetica",
88
- fontSize: 12,
89
- confidence: 1.0,
90
- };
91
- // OCR text item (confidence from OCR engine)
92
- const ocrTextItem = {
93
- str: "OCR detected text",
94
- x: 10,
95
- y: 50,
96
- width: 150,
97
- height: 20,
98
- w: 150,
99
- h: 20,
100
- fontName: "OCR",
101
- fontSize: 20,
102
- confidence: 0.95,
103
- };
104
- const mixedPage = {
105
- pageNum: 1,
106
- width: 612,
107
- height: 792,
108
- text: "Native text\nOCR detected text",
109
- textItems: [nativeTextItem, ocrTextItem],
110
- boundingBoxes: [],
111
- };
112
- const pagesJSON = {
113
- pages: [
114
- {
115
- page: 1,
116
- width: 612,
117
- height: 792,
118
- text: "Sample text for page 1",
119
- textItems: textItemsJSON,
120
- boundingBoxes: [{ x1: 0, y1: 0, x2: 300, y2: 400 }],
121
- },
122
- {
123
- page: 2,
124
- width: 612,
125
- height: 792,
126
- text: "Sample text for page 2",
127
- textItems: textItemsJSON,
128
- boundingBoxes: [{ x1: 0, y1: 0, x2: 300, y2: 400 }],
129
- },
130
- {
131
- page: 3,
132
- width: 612,
133
- height: 792,
134
- text: "Sample text for page 3",
135
- textItems: textItemsJSON,
136
- boundingBoxes: [{ x1: 0, y1: 0, x2: 300, y2: 400 }],
137
- },
138
- {
139
- page: 4,
140
- width: 612,
141
- height: 792,
142
- text: "Sample text for page 4",
143
- textItems: textItemsJSON,
144
- boundingBoxes: [{ x1: 0, y1: 0, x2: 300, y2: 400 }],
145
- },
146
- {
147
- page: 5,
148
- width: 612,
149
- height: 792,
150
- text: "Sample text for page 5",
151
- textItems: textItemsJSON,
152
- boundingBoxes: [{ x1: 0, y1: 0, x2: 300, y2: 400 }],
153
- },
154
- ],
155
- };
156
- const parseResult = {
157
- pages: pages,
158
- text: "hello world",
159
- json: undefined,
160
- };
161
- describe("test json utilities", () => {
162
- it("test buildJSON", () => {
163
- const result = buildJSON(pages);
164
- expect(result).toStrictEqual(pagesJSON);
165
- });
166
- it("test formatJSON", () => {
167
- const result = formatJSON(parseResult);
168
- expect(result).toBe(JSON.stringify(pagesJSON, null, 2));
169
- });
170
- });
171
- describe("confidence field", () => {
172
- it("includes OCR confidence score for OCR items", () => {
173
- const result = buildJSON([mixedPage]);
174
- const items = result.pages[0].textItems;
175
- const ocrItem = items.find((i) => i.text === "OCR detected text");
176
- expect(ocrItem?.confidence).toBe(0.95);
177
- });
178
- it("defaults to 1.0 for native PDF items", () => {
179
- const result = buildJSON([mixedPage]);
180
- const items = result.pages[0].textItems;
181
- const nativeItem = items.find((i) => i.text === "Native text");
182
- expect(nativeItem?.confidence).toBe(1.0);
183
- });
184
- it("preserves confidence of 0.0", () => {
185
- const zeroConfidenceItem = { ...ocrTextItem, confidence: 0.0 };
186
- const page = { ...mixedPage, textItems: [zeroConfidenceItem] };
187
- const result = buildJSON([page]);
188
- const item = result.pages[0].textItems[0];
189
- expect(item.confidence).toBe(0.0);
190
- });
191
- it("defaults to 1.0 when confidence is undefined", () => {
192
- const { confidence: _confidence, ...itemWithoutConfidence } = nativeTextItem;
193
- const page = { ...mixedPage, textItems: [itemWithoutConfidence] };
194
- const result = buildJSON([page]);
195
- const item = result.pages[0].textItems[0];
196
- expect(item.confidence).toBe(1.0);
197
- });
198
- });
199
- //# sourceMappingURL=json.test.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"json.test.js","sourceRoot":"","sources":["../../../src/output/json.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EAAE,SAAS,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAE/C,MAAM,OAAO,GAAG;IACd,EAAE,IAAI,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,EAAE,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,CAAC,EAAE,UAAU,EAAE,IAAI,EAAE;IAClE,EAAE,IAAI,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,EAAE,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,CAAC,EAAE,UAAU,EAAE,IAAI,EAAE;IAClE,EAAE,IAAI,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,EAAE,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,EAAE,UAAU,EAAE,IAAI,EAAE;CACrE,CAAC;AAEF,MAAM,SAAS,GAAG,OAAO;KACtB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,GAAG,GAAG,CAAC,CAAC,wBAAwB;KAC1D,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE;IACZ,6DAA6D;IAC7D,MAAM,OAAO,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IAC5C,OAAO,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC;AAC5B,CAAC,CAAC;KACD,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IACX,GAAG,EAAE,CAAC,CAAC,IAAI;IACX,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;IACZ,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;IACZ,KAAK,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;IAC5B,MAAM,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;IAC7B,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;IACxB,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;IACxB,QAAQ,EAAE,KAAK;IACf,QAAQ,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;CAChC,CAAC,CAAC,CAAC;AAEN,MAAM,aAAa,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IACxC,IAAI,EAAE,CAAC,CAAC,IAAI;IACZ,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;IACZ,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;IACZ,KAAK,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;IAC5B,MAAM,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;IAC7B,QAAQ,EAAE,KAAK;IACf,QAAQ,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;IAC/B,UAAU,EAAE,GAAG;CAChB,CAAC,CAAC,CAAC;AAEJ,MAAM,KAAK,GAAG;IACZ;QACE,OAAO,EAAE,CAAC;QACV,KAAK,EAAE,GAAG;QACV,MAAM,EAAE,GAAG;QACX,IAAI,EAAE,wBAAwB;QAC9B,SAAS,EAAE,SAAS;QACpB,aAAa,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,GAAG,EAAE,CAAC;KACpD;IACD;QACE,OAAO,EAAE,CAAC;QACV,KAAK,EAAE,GAAG;QACV,MAAM,EAAE,GAAG;QACX,IAAI,EAAE,wBAAwB;QAC9B,SAAS,EAAE,SAAS;QACpB,aAAa,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,GAAG,EAAE,CAAC;KACpD;IACD;QACE,OAAO,EAAE,CAAC;QACV,KAAK,EAAE,GAAG;QACV,MAAM,EAAE,GAAG;QACX,IAAI,EAAE,wBAAwB;QAC9B,SAAS,EAAE,SAAS;QACpB,aAAa,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,GAAG,EAAE,CAAC;KACpD;IACD;QACE,OAAO,EAAE,CAAC;QACV,KAAK,EAAE,GAAG;QACV,MAAM,EAAE,GAAG;QACX,IAAI,EAAE,wBAAwB;QAC9B,SAAS,EAAE,SAAS;QACpB,aAAa,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,GAAG,EAAE,CAAC;KACpD;IACD;QACE,OAAO,EAAE,CAAC;QACV,KAAK,EAAE,GAAG;QACV,MAAM,EAAE,GAAG;QACX,IAAI,EAAE,wBAAwB;QAC9B,SAAS,EAAE,SAAS;QACpB,aAAa,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,GAAG,EAAE,CAAC;KACpD;CACF,CAAC;AAEF,oDAAoD;AACpD,MAAM,cAAc,GAAG;IACrB,GAAG,EAAE,aAAa;IAClB,CAAC,EAAE,EAAE;IACL,CAAC,EAAE,EAAE;IACL,KAAK,EAAE,GAAG;IACV,MAAM,EAAE,EAAE;IACV,CAAC,EAAE,GAAG;IACN,CAAC,EAAE,EAAE;IACL,QAAQ,EAAE,WAAW;IACrB,QAAQ,EAAE,EAAE;IACZ,UAAU,EAAE,GAAG;CAChB,CAAC;AAEF,6CAA6C;AAC7C,MAAM,WAAW,GAAG;IAClB,GAAG,EAAE,mBAAmB;IACxB,CAAC,EAAE,EAAE;IACL,CAAC,EAAE,EAAE;IACL,KAAK,EAAE,GAAG;IACV,MAAM,EAAE,EAAE;IACV,CAAC,EAAE,GAAG;IACN,CAAC,EAAE,EAAE;IACL,QAAQ,EAAE,KAAK;IACf,QAAQ,EAAE,EAAE;IACZ,UAAU,EAAE,IAAI;CACjB,CAAC;AAEF,MAAM,SAAS,GAAG;IAChB,OAAO,EAAE,CAAC;IACV,KAAK,EAAE,GAAG;IACV,MAAM,EAAE,GAAG;IACX,IAAI,EAAE,gCAAgC;IACtC,SAAS,EAAE,CAAC,cAAc,EAAE,WAAW,CAAC;IACxC,aAAa,EAAE,EAAE;CAClB,CAAC;AAEF,MAAM,SAAS,GAAG;IAChB,KAAK,EAAE;QACL;YACE,IAAI,EAAE,CAAC;YACP,KAAK,EAAE,GAAG;YACV,MAAM,EAAE,GAAG;YACX,IAAI,EAAE,wBAAwB;YAC9B,SAAS,EAAE,aAAa;YACxB,aAAa,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,GAAG,EAAE,CAAC;SACpD;QACD;YACE,IAAI,EAAE,CAAC;YACP,KAAK,EAAE,GAAG;YACV,MAAM,EAAE,GAAG;YACX,IAAI,EAAE,wBAAwB;YAC9B,SAAS,EAAE,aAAa;YACxB,aAAa,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,GAAG,EAAE,CAAC;SACpD;QACD;YACE,IAAI,EAAE,CAAC;YACP,KAAK,EAAE,GAAG;YACV,MAAM,EAAE,GAAG;YACX,IAAI,EAAE,wBAAwB;YAC9B,SAAS,EAAE,aAAa;YACxB,aAAa,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,GAAG,EAAE,CAAC;SACpD;QACD;YACE,IAAI,EAAE,CAAC;YACP,KAAK,EAAE,GAAG;YACV,MAAM,EAAE,GAAG;YACX,IAAI,EAAE,wBAAwB;YAC9B,SAAS,EAAE,aAAa;YACxB,aAAa,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,GAAG,EAAE,CAAC;SACpD;QACD;YACE,IAAI,EAAE,CAAC;YACP,KAAK,EAAE,GAAG;YACV,MAAM,EAAE,GAAG;YACX,IAAI,EAAE,wBAAwB;YAC9B,SAAS,EAAE,aAAa;YACxB,aAAa,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,GAAG,EAAE,CAAC;SACpD;KACF;CACF,CAAC;AAEF,MAAM,WAAW,GAAG;IAClB,KAAK,EAAE,KAAK;IACZ,IAAI,EAAE,aAAa;IACnB,IAAI,EAAE,SAAS;CAChB,CAAC;AAEF,QAAQ,CAAC,qBAAqB,EAAE,GAAG,EAAE;IACnC,EAAE,CAAC,gBAAgB,EAAE,GAAG,EAAE;QACxB,MAAM,MAAM,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC;QAChC,MAAM,CAAC,MAAM,CAAC,CAAC,aAAa,CAAC,SAAS,CAAC,CAAC;IAC1C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iBAAiB,EAAE,GAAG,EAAE;QACzB,MAAM,MAAM,GAAG,UAAU,CAAC,WAAW,CAAC,CAAC;QACvC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IAC1D,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,kBAAkB,EAAE,GAAG,EAAE;IAChC,EAAE,CAAC,6CAA6C,EAAE,GAAG,EAAE;QACrD,MAAM,MAAM,GAAG,SAAS,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC;QACtC,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;QACxC,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,mBAAmB,CAAC,CAAC;QAClE,MAAM,CAAC,OAAO,EAAE,UAAU,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACzC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,sCAAsC,EAAE,GAAG,EAAE;QAC9C,MAAM,MAAM,GAAG,SAAS,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC;QACtC,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;QACxC,MAAM,UAAU,GAAG,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,aAAa,CAAC,CAAC;QAC/D,MAAM,CAAC,UAAU,EAAE,UAAU,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAC3C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6BAA6B,EAAE,GAAG,EAAE;QACrC,MAAM,kBAAkB,GAAG,EAAE,GAAG,WAAW,EAAE,UAAU,EAAE,GAAG,EAAE,CAAC;QAC/D,MAAM,IAAI,GAAG,EAAE,GAAG,SAAS,EAAE,SAAS,EAAE,CAAC,kBAAkB,CAAC,EAAE,CAAC;QAC/D,MAAM,MAAM,GAAG,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;QACjC,MAAM,IAAI,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC;QAC1C,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACpC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,8CAA8C,EAAE,GAAG,EAAE;QACtD,MAAM,EAAE,UAAU,EAAE,WAAW,EAAE,GAAG,qBAAqB,EAAE,GAAG,cAAc,CAAC;QAC7E,MAAM,IAAI,GAAG,EAAE,GAAG,SAAS,EAAE,SAAS,EAAE,CAAC,qBAAqB,CAAC,EAAE,CAAC;QAClE,MAAM,MAAM,GAAG,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;QACjC,MAAM,IAAI,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC;QAC1C,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACpC,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -1,10 +0,0 @@
1
- import { ParseResult, ParsedPage } from "../core/types.js";
2
- /**
3
- * Format pages as plain text
4
- */
5
- export declare function formatText(result: ParseResult): string;
6
- /**
7
- * Format single page as text
8
- */
9
- export declare function formatPageText(page: ParsedPage): string;
10
- //# sourceMappingURL=text.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"text.d.ts","sourceRoot":"","sources":["../../../src/output/text.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAC;AAE3D;;GAEG;AACH,wBAAgB,UAAU,CAAC,MAAM,EAAE,WAAW,GAAG,MAAM,CAOtD;AAED;;GAEG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,UAAU,GAAG,MAAM,CAEvD"}
@@ -1,17 +0,0 @@
1
- /**
2
- * Format pages as plain text
3
- */
4
- export function formatText(result) {
5
- const pageTexts = result.pages.map((page) => {
6
- const header = `\n--- Page ${page.pageNum} ---\n`;
7
- return header + page.text;
8
- });
9
- return pageTexts.join("\n\n");
10
- }
11
- /**
12
- * Format single page as text
13
- */
14
- export function formatPageText(page) {
15
- return page.text;
16
- }
17
- //# sourceMappingURL=text.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"text.js","sourceRoot":"","sources":["../../../src/output/text.ts"],"names":[],"mappings":"AAEA;;GAEG;AACH,MAAM,UAAU,UAAU,CAAC,MAAmB;IAC5C,MAAM,SAAS,GAAG,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QAC1C,MAAM,MAAM,GAAG,cAAc,IAAI,CAAC,OAAO,QAAQ,CAAC;QAClD,OAAO,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC;IAC5B,CAAC,CAAC,CAAC;IAEH,OAAO,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;AAChC,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,IAAgB;IAC7C,OAAO,IAAI,CAAC,IAAI,CAAC;AACnB,CAAC"}
@@ -1,2 +0,0 @@
1
- export {};
2
- //# sourceMappingURL=text.test.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"text.test.d.ts","sourceRoot":"","sources":["../../../src/output/text.test.ts"],"names":[],"mappings":""}
@@ -1,65 +0,0 @@
1
- import { describe, expect, it } from "vitest";
2
- import { formatPageText, formatText } from "./text";
3
- const pages = [
4
- {
5
- pageNum: 1,
6
- width: 612,
7
- height: 792,
8
- text: "Sample text for page 1",
9
- textItems: [],
10
- boundingBoxes: [{ x1: 0, y1: 0, x2: 300, y2: 400 }],
11
- },
12
- {
13
- pageNum: 2,
14
- width: 612,
15
- height: 792,
16
- text: "Sample text for page 2",
17
- textItems: [],
18
- boundingBoxes: [{ x1: 0, y1: 0, x2: 300, y2: 400 }],
19
- },
20
- {
21
- pageNum: 3,
22
- width: 612,
23
- height: 792,
24
- text: "Sample text for page 3",
25
- textItems: [],
26
- boundingBoxes: [{ x1: 0, y1: 0, x2: 300, y2: 400 }],
27
- },
28
- {
29
- pageNum: 4,
30
- width: 612,
31
- height: 792,
32
- text: "Sample text for page 4",
33
- textItems: [],
34
- boundingBoxes: [{ x1: 0, y1: 0, x2: 300, y2: 400 }],
35
- },
36
- {
37
- pageNum: 5,
38
- width: 612,
39
- height: 792,
40
- text: "Sample text for page 5",
41
- textItems: [],
42
- boundingBoxes: [{ x1: 0, y1: 0, x2: 300, y2: 400 }],
43
- },
44
- ];
45
- const parseResult = {
46
- pages: pages,
47
- text: "hello world",
48
- json: undefined,
49
- };
50
- describe("test text utilites", () => {
51
- it("test formatText", () => {
52
- const result = formatText(parseResult);
53
- expect(result).toBe(pages
54
- .map((page) => {
55
- const header = `\n--- Page ${page.pageNum} ---\n`;
56
- return header + page.text;
57
- })
58
- .join("\n\n"));
59
- });
60
- it("test formatPageText", () => {
61
- const result = formatPageText(pages[0]);
62
- expect(result).toBe(pages[0].text);
63
- });
64
- });
65
- //# sourceMappingURL=text.test.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"text.test.js","sourceRoot":"","sources":["../../../src/output/text.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EAAE,cAAc,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAEpD,MAAM,KAAK,GAAG;IACZ;QACE,OAAO,EAAE,CAAC;QACV,KAAK,EAAE,GAAG;QACV,MAAM,EAAE,GAAG;QACX,IAAI,EAAE,wBAAwB;QAC9B,SAAS,EAAE,EAAE;QACb,aAAa,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,GAAG,EAAE,CAAC;KACpD;IACD;QACE,OAAO,EAAE,CAAC;QACV,KAAK,EAAE,GAAG;QACV,MAAM,EAAE,GAAG;QACX,IAAI,EAAE,wBAAwB;QAC9B,SAAS,EAAE,EAAE;QACb,aAAa,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,GAAG,EAAE,CAAC;KACpD;IACD;QACE,OAAO,EAAE,CAAC;QACV,KAAK,EAAE,GAAG;QACV,MAAM,EAAE,GAAG;QACX,IAAI,EAAE,wBAAwB;QAC9B,SAAS,EAAE,EAAE;QACb,aAAa,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,GAAG,EAAE,CAAC;KACpD;IACD;QACE,OAAO,EAAE,CAAC;QACV,KAAK,EAAE,GAAG;QACV,MAAM,EAAE,GAAG;QACX,IAAI,EAAE,wBAAwB;QAC9B,SAAS,EAAE,EAAE;QACb,aAAa,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,GAAG,EAAE,CAAC;KACpD;IACD;QACE,OAAO,EAAE,CAAC;QACV,KAAK,EAAE,GAAG;QACV,MAAM,EAAE,GAAG;QACX,IAAI,EAAE,wBAAwB;QAC9B,SAAS,EAAE,EAAE;QACb,aAAa,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,GAAG,EAAE,CAAC;KACpD;CACF,CAAC;AAEF,MAAM,WAAW,GAAG;IAClB,KAAK,EAAE,KAAK;IACZ,IAAI,EAAE,aAAa;IACnB,IAAI,EAAE,SAAS;CAChB,CAAC;AAEF,QAAQ,CAAC,oBAAoB,EAAE,GAAG,EAAE;IAClC,EAAE,CAAC,iBAAiB,EAAE,GAAG,EAAE;QACzB,MAAM,MAAM,GAAG,UAAU,CAAC,WAAW,CAAC,CAAC;QACvC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CACjB,KAAK;aACF,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;YACZ,MAAM,MAAM,GAAG,cAAc,IAAI,CAAC,OAAO,QAAQ,CAAC;YAClD,OAAO,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC;QAC5B,CAAC,CAAC;aACD,IAAI,CAAC,MAAM,CAAC,CAChB,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qBAAqB,EAAE,GAAG,EAAE;QAC7B,MAAM,MAAM,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;QACxC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IACrC,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -1,20 +0,0 @@
1
- import { TextItem, BoundingBox, ProjectionTextBox, LiteParseConfig } from "../core/types.js";
2
- import { PageData, Image } from "../engines/pdf/interface.js";
3
- /**
4
- * Filters images that should not be OCR'd based on various criteria.
5
- * Returns the filtered array of images that should be processed.
6
- */
7
- export declare function filterImagesForOCR(images: Image[], page: {
8
- width: number;
9
- height: number;
10
- }): Image[];
11
- /**
12
- * Build projection text boxes from page data, including OCR results
13
- * This is the complete implementation from buildBbox.ts
14
- */
15
- export declare function buildBbox(pageData: PageData, config: LiteParseConfig): ProjectionTextBox[];
16
- /**
17
- * Build bounding boxes from text items
18
- */
19
- export declare function buildBoundingBoxes(textItems: TextItem[]): BoundingBox[];
20
- //# sourceMappingURL=bbox.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"bbox.d.ts","sourceRoot":"","sources":["../../../src/processing/bbox.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,QAAQ,EACR,WAAW,EACX,iBAAiB,EAEjB,eAAe,EAChB,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EAAE,QAAQ,EAAE,KAAK,EAAE,MAAM,6BAA6B,CAAC;AAgC9D;;;GAGG;AACH,wBAAgB,kBAAkB,CAChC,MAAM,EAAE,KAAK,EAAE,EACf,IAAI,EAAE;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,GACtC,KAAK,EAAE,CA2DT;AAsFD;;;GAGG;AACH,wBAAgB,SAAS,CAAC,QAAQ,EAAE,QAAQ,EAAE,MAAM,EAAE,eAAe,GAAG,iBAAiB,EAAE,CAmH1F;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,WAAW,EAAE,CAiBvE"}
@@ -1,258 +0,0 @@
1
- import { parseImageOcrBlocks } from "./ocrUtils.js";
2
- import { cleanOcrTableArtifacts } from "./textUtils.js";
3
- const OCR_CONFIDENCE_THRESHOLD = 0.1;
4
- /**
5
- * Minimum overlap ratio (0-1) required to consider an OCR block as duplicate of existing text.
6
- * An OCR block is filtered out if:
7
- * - Total overlap with all text items covers more than this ratio of the OCR block area
8
- * - OR the OCR block covers more than this ratio of any single text item
9
- */
10
- const OCR_OVERLAP_THRESHOLD = 0.5;
11
- /**
12
- * Maximum number of embedded images to process for OCR per page.
13
- * Keeps the largest images when limit is exceeded.
14
- */
15
- const MAX_IMAGES_PER_PAGE = 10;
16
- /**
17
- * Minimum image dimensions for OCR processing
18
- */
19
- const MIN_IMAGE_DIMENSION = 12;
20
- const MIN_IMAGE_AREA = 200;
21
- /**
22
- * Minimum rendered image dimensions for OCR processing
23
- */
24
- const MIN_RENDERED_DIMENSION = 6;
25
- const MIN_RENDERED_AREA = 200;
26
- /**
27
- * Filters images that should not be OCR'd based on various criteria.
28
- * Returns the filtered array of images that should be processed.
29
- */
30
- export function filterImagesForOCR(images, page) {
31
- // Filter images that start with g_ or pattern_ (generated/pattern images)
32
- let filtered = images.filter((image) => !image.type?.startsWith("g_") && !image.type?.startsWith("pattern_"));
33
- // Limit to max images per page, keeping the largest ones
34
- if (filtered.length > MAX_IMAGES_PER_PAGE) {
35
- filtered.sort((a, b) => b.width * b.height - a.width * a.height);
36
- filtered = filtered.slice(0, MAX_IMAGES_PER_PAGE);
37
- }
38
- // Apply additional filtering criteria
39
- filtered = filtered.filter((image) => {
40
- // Ignore layout extracted images
41
- if (image.type?.includes("layout_")) {
42
- return false;
43
- }
44
- // Get image coords (use image dimensions if coords not set)
45
- const coords = image.coords || {
46
- x: image.x,
47
- y: image.y,
48
- w: image.width,
49
- h: image.height,
50
- };
51
- // Skip images that are out of viewport
52
- if (coords.x + coords.w < 0 || // left of page
53
- coords.y + coords.h < 0 || // above page
54
- coords.x > page.width || // right of page
55
- coords.y > page.height // below page
56
- ) {
57
- return false;
58
- }
59
- // Skip small images (raw dimensions)
60
- if (image.width < MIN_IMAGE_DIMENSION ||
61
- image.height < MIN_IMAGE_DIMENSION ||
62
- image.width * image.height < MIN_IMAGE_AREA) {
63
- return false;
64
- }
65
- // Skip images that render too small in the viewport
66
- if (coords.w < MIN_RENDERED_DIMENSION ||
67
- coords.h < MIN_RENDERED_DIMENSION ||
68
- coords.w * coords.h < MIN_RENDERED_AREA) {
69
- return false;
70
- }
71
- return true;
72
- });
73
- return filtered;
74
- }
75
- /**
76
- * Checks if two bounding boxes overlap and returns the overlap area.
77
- */
78
- function getOverlapArea(box1, box2) {
79
- const left = Math.max(box1.x, box2.x);
80
- const right = Math.min(box1.x + box1.w, box2.x + box2.w);
81
- const top = Math.max(box1.y, box2.y);
82
- const bottom = Math.min(box1.y + box1.h, box2.y + box2.h);
83
- if (left >= right || top >= bottom) {
84
- return 0;
85
- }
86
- return (right - left) * (bottom - top);
87
- }
88
- /**
89
- * Filters out OCR blocks that significantly overlap with already-extracted text items.
90
- * This prevents duplicate text when both document text extraction and OCR detect the same content.
91
- * Prefers document-extracted text over OCR text.
92
- *
93
- * An OCR block is rejected if:
94
- * 1. The total overlap with all text items covers more than 50% of the OCR block area
95
- * 2. OR the OCR block covers more than 50% of any single text item's area
96
- */
97
- function filterOcrBlocksOverlappingWithText(ocrBlocks, textItems) {
98
- if (!textItems.length || !ocrBlocks.length) {
99
- return ocrBlocks;
100
- }
101
- return ocrBlocks.filter((ocrBlock) => {
102
- const ocrBox = {
103
- x: ocrBlock.x,
104
- y: ocrBlock.y,
105
- w: ocrBlock.w,
106
- h: ocrBlock.h,
107
- };
108
- const ocrArea = ocrBlock.w * ocrBlock.h;
109
- if (ocrArea <= 0) {
110
- return false;
111
- }
112
- let totalOverlapArea = 0;
113
- // Check overlap with each text item
114
- for (const textItem of textItems) {
115
- const textBox = {
116
- x: textItem.x,
117
- y: textItem.y,
118
- w: textItem.w,
119
- h: textItem.h,
120
- };
121
- const textItemArea = textItem.w * textItem.h;
122
- const overlapArea = getOverlapArea(ocrBox, textBox);
123
- if (overlapArea > 0) {
124
- // Accumulate total overlap for condition 1
125
- totalOverlapArea += overlapArea;
126
- // Condition 2: Reject if OCR block covers more than 50% of any single text item
127
- if (textItemArea > 0 && overlapArea / textItemArea >= OCR_OVERLAP_THRESHOLD) {
128
- return false;
129
- }
130
- }
131
- }
132
- // Condition 1: Reject if total overlap covers more than 50% of the OCR block
133
- const totalOverlapRatio = totalOverlapArea / ocrArea;
134
- if (totalOverlapRatio >= OCR_OVERLAP_THRESHOLD) {
135
- return false;
136
- }
137
- return true;
138
- });
139
- }
140
- /**
141
- * Build projection text boxes from page data, including OCR results
142
- * This is the complete implementation from buildBbox.ts
143
- */
144
- export function buildBbox(pageData, config) {
145
- const lines = [];
146
- // Process all extracted text items
147
- for (const item of pageData.textItems) {
148
- const line = {
149
- x: item.x,
150
- y: item.y,
151
- rx: item.rx || 0,
152
- ry: item.ry || 0,
153
- w: Math.round(item.w || item.width),
154
- h: Math.round(item.h || item.height),
155
- r: item.r || 0,
156
- str: item.str,
157
- strLength: [...item.str].length, // Handle multi-byte characters correctly
158
- pageBbox: {
159
- x: item.x,
160
- y: item.y,
161
- w: item.w || item.width,
162
- h: item.h || item.height,
163
- },
164
- vgap: item.vgap,
165
- isPlaceholder: item.isPlaceholder,
166
- };
167
- lines.push(line);
168
- }
169
- // Process OCR data if images are present
170
- if (pageData.images.length && config.ocrEnabled) {
171
- // Filter images that should be processed for OCR
172
- const imagesToProcess = filterImagesForOCR(pageData.images, {
173
- width: pageData.width,
174
- height: pageData.height,
175
- });
176
- // Collect text item bounding boxes for overlap checking
177
- const textItemBoxes = pageData.textItems.map((item) => ({
178
- x: item.x,
179
- y: item.y,
180
- w: item.w || item.width,
181
- h: item.h || item.height,
182
- }));
183
- // Collect text content for content-based deduplication (normalized lowercase)
184
- const existingTextContent = new Set(pageData.textItems.map((item) => item.str.trim().toLowerCase()).filter((s) => s.length > 0));
185
- for (const image of imagesToProcess) {
186
- // Parse OCR blocks from image
187
- let ocrData = parseImageOcrBlocks(image);
188
- // Filter by confidence threshold
189
- ocrData = ocrData.filter((block) => parseFloat(block.confidence.toString()) >= OCR_CONFIDENCE_THRESHOLD);
190
- // Filter out OCR blocks that overlap with already-extracted text
191
- ocrData = filterOcrBlocksOverlappingWithText(ocrData, textItemBoxes);
192
- // Filter out OCR blocks whose text content already exists in native PDF text
193
- // This catches duplicates that are at different positions (e.g., watermarks, repeated headers)
194
- ocrData = ocrData.filter((block) => {
195
- const ocrText = block.c.trim().toLowerCase();
196
- return ocrText.length > 0 && !existingTextContent.has(ocrText);
197
- });
198
- const ocrParsed = [];
199
- for (const block of ocrData) {
200
- const confidenceRounded = Math.round(parseFloat(block.confidence.toString()) * 1000) / 1000;
201
- // Clean OCR artifacts from table border misreads
202
- const cleanedText = cleanOcrTableArtifacts(block.c);
203
- // Skip if cleaning removed all content
204
- if (cleanedText.length === 0) {
205
- continue;
206
- }
207
- const line = {
208
- fromOCR: true,
209
- x: block.x,
210
- y: block.y,
211
- w: block.w,
212
- h: block.h,
213
- r: image.originalOrientationAngle || 0,
214
- str: cleanedText,
215
- strLength: [...cleanedText].length,
216
- pageBbox: {
217
- x: block.x,
218
- y: block.y,
219
- w: block.w,
220
- h: block.h,
221
- },
222
- };
223
- lines.push(line);
224
- ocrParsed.push({
225
- x: block.rx,
226
- y: block.ry,
227
- w: block.rw,
228
- h: block.rh,
229
- confidence: confidenceRounded,
230
- text: cleanedText,
231
- });
232
- }
233
- if (ocrParsed.length) {
234
- image.ocrParsed = ocrParsed;
235
- }
236
- }
237
- }
238
- return lines;
239
- }
240
- /**
241
- * Build bounding boxes from text items
242
- */
243
- export function buildBoundingBoxes(textItems) {
244
- const bboxes = [];
245
- for (const item of textItems) {
246
- if (item.str.trim() === "") {
247
- continue;
248
- }
249
- bboxes.push({
250
- x1: item.x,
251
- y1: item.y,
252
- x2: item.x + (item.w || item.width),
253
- y2: item.y + (item.h || item.height),
254
- });
255
- }
256
- return bboxes;
257
- }
258
- //# sourceMappingURL=bbox.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"bbox.js","sourceRoot":"","sources":["../../../src/processing/bbox.ts"],"names":[],"mappings":"AAQA,OAAO,EAAE,mBAAmB,EAAY,MAAM,eAAe,CAAC;AAC9D,OAAO,EAAE,sBAAsB,EAAE,MAAM,gBAAgB,CAAC;AAExD,MAAM,wBAAwB,GAAG,GAAG,CAAC;AAErC;;;;;GAKG;AACH,MAAM,qBAAqB,GAAG,GAAG,CAAC;AAElC;;;GAGG;AACH,MAAM,mBAAmB,GAAG,EAAE,CAAC;AAE/B;;GAEG;AACH,MAAM,mBAAmB,GAAG,EAAE,CAAC;AAC/B,MAAM,cAAc,GAAG,GAAG,CAAC;AAE3B;;GAEG;AACH,MAAM,sBAAsB,GAAG,CAAC,CAAC;AACjC,MAAM,iBAAiB,GAAG,GAAG,CAAC;AAE9B;;;GAGG;AACH,MAAM,UAAU,kBAAkB,CAChC,MAAe,EACf,IAAuC;IAEvC,0EAA0E;IAC1E,IAAI,QAAQ,GAAG,MAAM,CAAC,MAAM,CAC1B,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,IAAI,EAAE,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,UAAU,CAAC,UAAU,CAAC,CAChF,CAAC;IAEF,yDAAyD;IACzD,IAAI,QAAQ,CAAC,MAAM,GAAG,mBAAmB,EAAE,CAAC;QAC1C,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC;QACjE,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,mBAAmB,CAAC,CAAC;IACpD,CAAC;IAED,sCAAsC;IACtC,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE;QACnC,iCAAiC;QACjC,IAAI,KAAK,CAAC,IAAI,EAAE,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;YACpC,OAAO,KAAK,CAAC;QACf,CAAC;QAED,4DAA4D;QAC5D,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,IAAI;YAC7B,CAAC,EAAE,KAAK,CAAC,CAAC;YACV,CAAC,EAAE,KAAK,CAAC,CAAC;YACV,CAAC,EAAE,KAAK,CAAC,KAAK;YACd,CAAC,EAAE,KAAK,CAAC,MAAM;SAChB,CAAC;QAEF,uCAAuC;QACvC,IACE,MAAM,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC,GAAG,CAAC,IAAI,eAAe;YAC1C,MAAM,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC,GAAG,CAAC,IAAI,aAAa;YACxC,MAAM,CAAC,CAAC,GAAG,IAAI,CAAC,KAAK,IAAI,gBAAgB;YACzC,MAAM,CAAC,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,aAAa;UACpC,CAAC;YACD,OAAO,KAAK,CAAC;QACf,CAAC;QAED,qCAAqC;QACrC,IACE,KAAK,CAAC,KAAK,GAAG,mBAAmB;YACjC,KAAK,CAAC,MAAM,GAAG,mBAAmB;YAClC,KAAK,CAAC,KAAK,GAAG,KAAK,CAAC,MAAM,GAAG,cAAc,EAC3C,CAAC;YACD,OAAO,KAAK,CAAC;QACf,CAAC;QAED,oDAAoD;QACpD,IACE,MAAM,CAAC,CAAC,GAAG,sBAAsB;YACjC,MAAM,CAAC,CAAC,GAAG,sBAAsB;YACjC,MAAM,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC,GAAG,iBAAiB,EACvC,CAAC;YACD,OAAO,KAAK,CAAC;QACf,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC,CAAC,CAAC;IAEH,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;GAEG;AACH,SAAS,cAAc,CACrB,IAAoD,EACpD,IAAoD;IAEpD,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC;IACtC,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;IACzD,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC;IACrC,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;IAE1D,IAAI,IAAI,IAAI,KAAK,IAAI,GAAG,IAAI,MAAM,EAAE,CAAC;QACnC,OAAO,CAAC,CAAC;IACX,CAAC;IAED,OAAO,CAAC,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC;AACzC,CAAC;AAED;;;;;;;;GAQG;AACH,SAAS,kCAAkC,CACzC,SAAqB,EACrB,SAAgE;IAEhE,IAAI,CAAC,SAAS,CAAC,MAAM,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,CAAC;QAC3C,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,OAAO,SAAS,CAAC,MAAM,CAAC,CAAC,QAAQ,EAAE,EAAE;QACnC,MAAM,MAAM,GAAG;YACb,CAAC,EAAE,QAAQ,CAAC,CAAC;YACb,CAAC,EAAE,QAAQ,CAAC,CAAC;YACb,CAAC,EAAE,QAAQ,CAAC,CAAC;YACb,CAAC,EAAE,QAAQ,CAAC,CAAC;SACd,CAAC;QACF,MAAM,OAAO,GAAG,QAAQ,CAAC,CAAC,GAAG,QAAQ,CAAC,CAAC,CAAC;QAExC,IAAI,OAAO,IAAI,CAAC,EAAE,CAAC;YACjB,OAAO,KAAK,CAAC;QACf,CAAC;QAED,IAAI,gBAAgB,GAAG,CAAC,CAAC;QAEzB,oCAAoC;QACpC,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;YACjC,MAAM,OAAO,GAAG;gBACd,CAAC,EAAE,QAAQ,CAAC,CAAC;gBACb,CAAC,EAAE,QAAQ,CAAC,CAAC;gBACb,CAAC,EAAE,QAAQ,CAAC,CAAC;gBACb,CAAC,EAAE,QAAQ,CAAC,CAAC;aACd,CAAC;YACF,MAAM,YAAY,GAAG,QAAQ,CAAC,CAAC,GAAG,QAAQ,CAAC,CAAC,CAAC;YAE7C,MAAM,WAAW,GAAG,cAAc,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;YAEpD,IAAI,WAAW,GAAG,CAAC,EAAE,CAAC;gBACpB,2CAA2C;gBAC3C,gBAAgB,IAAI,WAAW,CAAC;gBAEhC,gFAAgF;gBAChF,IAAI,YAAY,GAAG,CAAC,IAAI,WAAW,GAAG,YAAY,IAAI,qBAAqB,EAAE,CAAC;oBAC5E,OAAO,KAAK,CAAC;gBACf,CAAC;YACH,CAAC;QACH,CAAC;QAED,6EAA6E;QAC7E,MAAM,iBAAiB,GAAG,gBAAgB,GAAG,OAAO,CAAC;QACrD,IAAI,iBAAiB,IAAI,qBAAqB,EAAE,CAAC;YAC/C,OAAO,KAAK,CAAC;QACf,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,SAAS,CAAC,QAAkB,EAAE,MAAuB;IACnE,MAAM,KAAK,GAAwB,EAAE,CAAC;IAEtC,mCAAmC;IACnC,KAAK,MAAM,IAAI,IAAI,QAAQ,CAAC,SAAS,EAAE,CAAC;QACtC,MAAM,IAAI,GAAsB;YAC9B,CAAC,EAAE,IAAI,CAAC,CAAC;YACT,CAAC,EAAE,IAAI,CAAC,CAAC;YACT,EAAE,EAAE,IAAI,CAAC,EAAE,IAAI,CAAC;YAChB,EAAE,EAAE,IAAI,CAAC,EAAE,IAAI,CAAC;YAChB,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC;YACnC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC,MAAM,CAAC;YACpC,CAAC,EAAE,IAAI,CAAC,CAAC,IAAI,CAAC;YACd,GAAG,EAAE,IAAI,CAAC,GAAG;YACb,SAAS,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,yCAAyC;YAC1E,QAAQ,EAAE;gBACR,CAAC,EAAE,IAAI,CAAC,CAAC;gBACT,CAAC,EAAE,IAAI,CAAC,CAAC;gBACT,CAAC,EAAE,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC,KAAK;gBACvB,CAAC,EAAE,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC,MAAM;aACzB;YACD,IAAI,EAAE,IAAI,CAAC,IAAI;YACf,aAAa,EAAE,IAAI,CAAC,aAAa;SAClC,CAAC;QAEF,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACnB,CAAC;IAED,yCAAyC;IACzC,IAAI,QAAQ,CAAC,MAAM,CAAC,MAAM,IAAI,MAAM,CAAC,UAAU,EAAE,CAAC;QAChD,iDAAiD;QACjD,MAAM,eAAe,GAAG,kBAAkB,CAAC,QAAQ,CAAC,MAAM,EAAE;YAC1D,KAAK,EAAE,QAAQ,CAAC,KAAK;YACrB,MAAM,EAAE,QAAQ,CAAC,MAAM;SACxB,CAAC,CAAC;QAEH,wDAAwD;QACxD,MAAM,aAAa,GAAG,QAAQ,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;YACtD,CAAC,EAAE,IAAI,CAAC,CAAC;YACT,CAAC,EAAE,IAAI,CAAC,CAAC;YACT,CAAC,EAAE,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC,KAAK;YACvB,CAAC,EAAE,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC,MAAM;SACzB,CAAC,CAAC,CAAC;QAEJ,8EAA8E;QAC9E,MAAM,mBAAmB,GAAG,IAAI,GAAG,CACjC,QAAQ,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAC5F,CAAC;QAEF,KAAK,MAAM,KAAK,IAAI,eAAe,EAAE,CAAC;YACpC,8BAA8B;YAC9B,IAAI,OAAO,GAAG,mBAAmB,CAAC,KAAK,CAAC,CAAC;YAEzC,iCAAiC;YACjC,OAAO,GAAG,OAAO,CAAC,MAAM,CACtB,CAAC,KAAK,EAAE,EAAE,CAAC,UAAU,CAAC,KAAK,CAAC,UAAU,CAAC,QAAQ,EAAE,CAAC,IAAI,wBAAwB,CAC/E,CAAC;YAEF,iEAAiE;YACjE,OAAO,GAAG,kCAAkC,CAAC,OAAO,EAAE,aAAa,CAAC,CAAC;YAErE,6EAA6E;YAC7E,+FAA+F;YAC/F,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE;gBACjC,MAAM,OAAO,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;gBAC7C,OAAO,OAAO,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,mBAAmB,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;YACjE,CAAC,CAAC,CAAC;YAEH,MAAM,SAAS,GAAc,EAAE,CAAC;YAChC,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;gBAC5B,MAAM,iBAAiB,GAAG,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,KAAK,CAAC,UAAU,CAAC,QAAQ,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,IAAI,CAAC;gBAE5F,iDAAiD;gBACjD,MAAM,WAAW,GAAG,sBAAsB,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;gBAEpD,uCAAuC;gBACvC,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;oBAC7B,SAAS;gBACX,CAAC;gBAED,MAAM,IAAI,GAAsB;oBAC9B,OAAO,EAAE,IAAI;oBACb,CAAC,EAAE,KAAK,CAAC,CAAC;oBACV,CAAC,EAAE,KAAK,CAAC,CAAC;oBACV,CAAC,EAAE,KAAK,CAAC,CAAC;oBACV,CAAC,EAAE,KAAK,CAAC,CAAC;oBACV,CAAC,EAAE,KAAK,CAAC,wBAAwB,IAAI,CAAC;oBACtC,GAAG,EAAE,WAAW;oBAChB,SAAS,EAAE,CAAC,GAAG,WAAW,CAAC,CAAC,MAAM;oBAClC,QAAQ,EAAE;wBACR,CAAC,EAAE,KAAK,CAAC,CAAC;wBACV,CAAC,EAAE,KAAK,CAAC,CAAC;wBACV,CAAC,EAAE,KAAK,CAAC,CAAC;wBACV,CAAC,EAAE,KAAK,CAAC,CAAC;qBACX;iBACF,CAAC;gBACF,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBAEjB,SAAS,CAAC,IAAI,CAAC;oBACb,CAAC,EAAE,KAAK,CAAC,EAAE;oBACX,CAAC,EAAE,KAAK,CAAC,EAAE;oBACX,CAAC,EAAE,KAAK,CAAC,EAAE;oBACX,CAAC,EAAE,KAAK,CAAC,EAAE;oBACX,UAAU,EAAE,iBAAiB;oBAC7B,IAAI,EAAE,WAAW;iBAClB,CAAC,CAAC;YACL,CAAC;YAED,IAAI,SAAS,CAAC,MAAM,EAAE,CAAC;gBACrB,KAAK,CAAC,SAAS,GAAG,SAAS,CAAC;YAC9B,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,kBAAkB,CAAC,SAAqB;IACtD,MAAM,MAAM,GAAkB,EAAE,CAAC;IAEjC,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;QAC7B,IAAI,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC;YAC3B,SAAS;QACX,CAAC;QAED,MAAM,CAAC,IAAI,CAAC;YACV,EAAE,EAAE,IAAI,CAAC,CAAC;YACV,EAAE,EAAE,IAAI,CAAC,CAAC;YACV,EAAE,EAAE,IAAI,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC;YACnC,EAAE,EAAE,IAAI,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC,MAAM,CAAC;SACrC,CAAC,CAAC;IACL,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC"}
@@ -1,2 +0,0 @@
1
- export {};
2
- //# sourceMappingURL=bbox.test.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"bbox.test.d.ts","sourceRoot":"","sources":["../../../src/processing/bbox.test.ts"],"names":[],"mappings":""}