@llamaindex/liteparse 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (541) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +339 -0
  3. package/dist/cli/parse.d.ts +4 -0
  4. package/dist/cli/parse.d.ts.map +1 -0
  5. package/dist/cli/parse.js +401 -0
  6. package/dist/cli/parse.js.map +1 -0
  7. package/dist/src/conversion/convertToPdf.d.ts +47 -0
  8. package/dist/src/conversion/convertToPdf.d.ts.map +1 -0
  9. package/dist/src/conversion/convertToPdf.js +337 -0
  10. package/dist/src/conversion/convertToPdf.js.map +1 -0
  11. package/dist/src/conversion/convertToPdf.test.d.ts +2 -0
  12. package/dist/src/conversion/convertToPdf.test.d.ts.map +1 -0
  13. package/dist/src/conversion/convertToPdf.test.js +208 -0
  14. package/dist/src/conversion/convertToPdf.test.js.map +1 -0
  15. package/dist/src/core/config.d.ts +4 -0
  16. package/dist/src/core/config.d.ts.map +1 -0
  17. package/dist/src/core/config.js +25 -0
  18. package/dist/src/core/config.js.map +1 -0
  19. package/dist/src/core/config.test.d.ts +2 -0
  20. package/dist/src/core/config.test.d.ts.map +1 -0
  21. package/dist/src/core/config.test.js +21 -0
  22. package/dist/src/core/config.test.js.map +1 -0
  23. package/dist/src/core/parser.d.ts +83 -0
  24. package/dist/src/core/parser.d.ts.map +1 -0
  25. package/dist/src/core/parser.js +333 -0
  26. package/dist/src/core/parser.js.map +1 -0
  27. package/dist/src/core/parser.test.d.ts +2 -0
  28. package/dist/src/core/parser.test.d.ts.map +1 -0
  29. package/dist/src/core/parser.test.js +537 -0
  30. package/dist/src/core/parser.test.js.map +1 -0
  31. package/dist/src/core/types.d.ts +287 -0
  32. package/dist/src/core/types.d.ts.map +1 -0
  33. package/dist/src/core/types.js +2 -0
  34. package/dist/src/core/types.js.map +1 -0
  35. package/dist/src/engines/ocr/http-simple.d.ts +19 -0
  36. package/dist/src/engines/ocr/http-simple.d.ts.map +1 -0
  37. package/dist/src/engines/ocr/http-simple.js +63 -0
  38. package/dist/src/engines/ocr/http-simple.js.map +1 -0
  39. package/dist/src/engines/ocr/http-simple.test.d.ts +2 -0
  40. package/dist/src/engines/ocr/http-simple.test.d.ts.map +1 -0
  41. package/dist/src/engines/ocr/http-simple.test.js +108 -0
  42. package/dist/src/engines/ocr/http-simple.test.js.map +1 -0
  43. package/dist/src/engines/ocr/interface.d.ts +15 -0
  44. package/dist/src/engines/ocr/interface.d.ts.map +1 -0
  45. package/dist/src/engines/ocr/interface.js +2 -0
  46. package/dist/src/engines/ocr/interface.js.map +1 -0
  47. package/dist/src/engines/ocr/tesseract.d.ts +19 -0
  48. package/dist/src/engines/ocr/tesseract.d.ts.map +1 -0
  49. package/dist/src/engines/ocr/tesseract.js +112 -0
  50. package/dist/src/engines/ocr/tesseract.js.map +1 -0
  51. package/dist/src/engines/ocr/tesseract.test.d.ts +2 -0
  52. package/dist/src/engines/ocr/tesseract.test.d.ts.map +1 -0
  53. package/dist/src/engines/ocr/tesseract.test.js +84 -0
  54. package/dist/src/engines/ocr/tesseract.test.js.map +1 -0
  55. package/dist/src/engines/pdf/interface.d.ts +79 -0
  56. package/dist/src/engines/pdf/interface.d.ts.map +1 -0
  57. package/dist/src/engines/pdf/interface.js +2 -0
  58. package/dist/src/engines/pdf/interface.js.map +1 -0
  59. package/dist/src/engines/pdf/pdfium-renderer.d.ts +11 -0
  60. package/dist/src/engines/pdf/pdfium-renderer.d.ts.map +1 -0
  61. package/dist/src/engines/pdf/pdfium-renderer.js +64 -0
  62. package/dist/src/engines/pdf/pdfium-renderer.js.map +1 -0
  63. package/dist/src/engines/pdf/pdfium-renderer.test.d.ts +2 -0
  64. package/dist/src/engines/pdf/pdfium-renderer.test.d.ts.map +1 -0
  65. package/dist/src/engines/pdf/pdfium-renderer.test.js +76 -0
  66. package/dist/src/engines/pdf/pdfium-renderer.test.js.map +1 -0
  67. package/dist/src/engines/pdf/pdfjs.d.ts +13 -0
  68. package/dist/src/engines/pdf/pdfjs.d.ts.map +1 -0
  69. package/dist/src/engines/pdf/pdfjs.js +538 -0
  70. package/dist/src/engines/pdf/pdfjs.js.map +1 -0
  71. package/dist/src/engines/pdf/pdfjs.test.d.ts +2 -0
  72. package/dist/src/engines/pdf/pdfjs.test.d.ts.map +1 -0
  73. package/dist/src/engines/pdf/pdfjs.test.js +220 -0
  74. package/dist/src/engines/pdf/pdfjs.test.js.map +1 -0
  75. package/dist/src/engines/pdf/pdfjsImporter.d.ts +5 -0
  76. package/dist/src/engines/pdf/pdfjsImporter.d.ts.map +1 -0
  77. package/dist/src/engines/pdf/pdfjsImporter.js +9 -0
  78. package/dist/src/engines/pdf/pdfjsImporter.js.map +1 -0
  79. package/dist/src/index.d.ts +3 -0
  80. package/dist/src/index.d.ts.map +1 -0
  81. package/dist/src/index.js +5 -0
  82. package/dist/src/index.js.map +1 -0
  83. package/dist/src/lib.d.ts +17 -0
  84. package/dist/src/lib.d.ts.map +1 -0
  85. package/dist/src/lib.js +16 -0
  86. package/dist/src/lib.js.map +1 -0
  87. package/dist/src/output/json.d.ts +10 -0
  88. package/dist/src/output/json.d.ts.map +1 -0
  89. package/dist/src/output/json.js +31 -0
  90. package/dist/src/output/json.js.map +1 -0
  91. package/dist/src/output/json.test.d.ts +2 -0
  92. package/dist/src/output/json.test.d.ts.map +1 -0
  93. package/dist/src/output/json.test.js +136 -0
  94. package/dist/src/output/json.test.js.map +1 -0
  95. package/dist/src/output/text.d.ts +10 -0
  96. package/dist/src/output/text.d.ts.map +1 -0
  97. package/dist/src/output/text.js +17 -0
  98. package/dist/src/output/text.js.map +1 -0
  99. package/dist/src/output/text.test.d.ts +2 -0
  100. package/dist/src/output/text.test.d.ts.map +1 -0
  101. package/dist/src/output/text.test.js +65 -0
  102. package/dist/src/output/text.test.js.map +1 -0
  103. package/dist/src/processing/bbox.d.ts +20 -0
  104. package/dist/src/processing/bbox.d.ts.map +1 -0
  105. package/dist/src/processing/bbox.js +258 -0
  106. package/dist/src/processing/bbox.js.map +1 -0
  107. package/dist/src/processing/bbox.test.d.ts +2 -0
  108. package/dist/src/processing/bbox.test.d.ts.map +1 -0
  109. package/dist/src/processing/bbox.test.js +334 -0
  110. package/dist/src/processing/bbox.test.js.map +1 -0
  111. package/dist/src/processing/cleanText.d.ts +6 -0
  112. package/dist/src/processing/cleanText.d.ts.map +1 -0
  113. package/dist/src/processing/cleanText.js +73 -0
  114. package/dist/src/processing/cleanText.js.map +1 -0
  115. package/dist/src/processing/cleanText.test.d.ts +2 -0
  116. package/dist/src/processing/cleanText.test.d.ts.map +1 -0
  117. package/dist/src/processing/cleanText.test.js +46 -0
  118. package/dist/src/processing/cleanText.test.js.map +1 -0
  119. package/dist/src/processing/grid.d.ts +7 -0
  120. package/dist/src/processing/grid.d.ts.map +1 -0
  121. package/dist/src/processing/grid.js +13 -0
  122. package/dist/src/processing/grid.js.map +1 -0
  123. package/dist/src/processing/gridProjection.d.ts +18 -0
  124. package/dist/src/processing/gridProjection.d.ts.map +1 -0
  125. package/dist/src/processing/gridProjection.js +1392 -0
  126. package/dist/src/processing/gridProjection.js.map +1 -0
  127. package/dist/src/processing/gridProjection.test.d.ts +2 -0
  128. package/dist/src/processing/gridProjection.test.d.ts.map +1 -0
  129. package/dist/src/processing/gridProjection.test.js +464 -0
  130. package/dist/src/processing/gridProjection.test.js.map +1 -0
  131. package/dist/src/processing/markupUtils.d.ts +7 -0
  132. package/dist/src/processing/markupUtils.d.ts.map +1 -0
  133. package/dist/src/processing/markupUtils.js +25 -0
  134. package/dist/src/processing/markupUtils.js.map +1 -0
  135. package/dist/src/processing/markupUtils.test.d.ts +2 -0
  136. package/dist/src/processing/markupUtils.test.d.ts.map +1 -0
  137. package/dist/src/processing/markupUtils.test.js +26 -0
  138. package/dist/src/processing/markupUtils.test.js.map +1 -0
  139. package/dist/src/processing/ocrUtils.d.ts +24 -0
  140. package/dist/src/processing/ocrUtils.d.ts.map +1 -0
  141. package/dist/src/processing/ocrUtils.js +79 -0
  142. package/dist/src/processing/ocrUtils.js.map +1 -0
  143. package/dist/src/processing/octUtils.test.d.ts +2 -0
  144. package/dist/src/processing/octUtils.test.d.ts.map +1 -0
  145. package/dist/src/processing/octUtils.test.js +72 -0
  146. package/dist/src/processing/octUtils.test.js.map +1 -0
  147. package/dist/src/processing/textUtils.d.ts +20 -0
  148. package/dist/src/processing/textUtils.d.ts.map +1 -0
  149. package/dist/src/processing/textUtils.js +142 -0
  150. package/dist/src/processing/textUtils.js.map +1 -0
  151. package/dist/src/processing/textUtils.test.d.ts +2 -0
  152. package/dist/src/processing/textUtils.test.d.ts.map +1 -0
  153. package/dist/src/processing/textUtils.test.js +45 -0
  154. package/dist/src/processing/textUtils.test.js.map +1 -0
  155. package/dist/src/vendor/pdfjs/LICENSE +177 -0
  156. package/dist/src/vendor/pdfjs/README.md +0 -0
  157. package/dist/src/vendor/pdfjs/cmaps/78-EUC-H.bcmap +0 -0
  158. package/dist/src/vendor/pdfjs/cmaps/78-EUC-V.bcmap +0 -0
  159. package/dist/src/vendor/pdfjs/cmaps/78-H.bcmap +0 -0
  160. package/dist/src/vendor/pdfjs/cmaps/78-RKSJ-H.bcmap +0 -0
  161. package/dist/src/vendor/pdfjs/cmaps/78-RKSJ-V.bcmap +0 -0
  162. package/dist/src/vendor/pdfjs/cmaps/78-V.bcmap +0 -0
  163. package/dist/src/vendor/pdfjs/cmaps/78ms-RKSJ-H.bcmap +0 -0
  164. package/dist/src/vendor/pdfjs/cmaps/78ms-RKSJ-V.bcmap +0 -0
  165. package/dist/src/vendor/pdfjs/cmaps/83pv-RKSJ-H.bcmap +0 -0
  166. package/dist/src/vendor/pdfjs/cmaps/90ms-RKSJ-H.bcmap +0 -0
  167. package/dist/src/vendor/pdfjs/cmaps/90ms-RKSJ-V.bcmap +0 -0
  168. package/dist/src/vendor/pdfjs/cmaps/90msp-RKSJ-H.bcmap +0 -0
  169. package/dist/src/vendor/pdfjs/cmaps/90msp-RKSJ-V.bcmap +0 -0
  170. package/dist/src/vendor/pdfjs/cmaps/90pv-RKSJ-H.bcmap +0 -0
  171. package/dist/src/vendor/pdfjs/cmaps/90pv-RKSJ-V.bcmap +0 -0
  172. package/dist/src/vendor/pdfjs/cmaps/Add-H.bcmap +0 -0
  173. package/dist/src/vendor/pdfjs/cmaps/Add-RKSJ-H.bcmap +0 -0
  174. package/dist/src/vendor/pdfjs/cmaps/Add-RKSJ-V.bcmap +0 -0
  175. package/dist/src/vendor/pdfjs/cmaps/Add-V.bcmap +0 -0
  176. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-0.bcmap +0 -0
  177. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-1.bcmap +0 -0
  178. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-2.bcmap +0 -0
  179. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-3.bcmap +0 -0
  180. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-4.bcmap +0 -0
  181. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-5.bcmap +0 -0
  182. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-6.bcmap +0 -0
  183. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-UCS2.bcmap +0 -0
  184. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-0.bcmap +0 -0
  185. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-1.bcmap +0 -0
  186. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-2.bcmap +0 -0
  187. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-3.bcmap +0 -0
  188. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-4.bcmap +0 -0
  189. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-5.bcmap +0 -0
  190. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-UCS2.bcmap +0 -0
  191. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-0.bcmap +0 -0
  192. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-1.bcmap +0 -0
  193. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-2.bcmap +0 -0
  194. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-3.bcmap +0 -0
  195. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-4.bcmap +0 -0
  196. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-5.bcmap +0 -0
  197. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-6.bcmap +0 -0
  198. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-UCS2.bcmap +0 -0
  199. package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-0.bcmap +0 -0
  200. package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-1.bcmap +0 -0
  201. package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-2.bcmap +0 -0
  202. package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-UCS2.bcmap +0 -0
  203. package/dist/src/vendor/pdfjs/cmaps/B5-H.bcmap +0 -0
  204. package/dist/src/vendor/pdfjs/cmaps/B5-V.bcmap +0 -0
  205. package/dist/src/vendor/pdfjs/cmaps/B5pc-H.bcmap +0 -0
  206. package/dist/src/vendor/pdfjs/cmaps/B5pc-V.bcmap +0 -0
  207. package/dist/src/vendor/pdfjs/cmaps/CNS-EUC-H.bcmap +0 -0
  208. package/dist/src/vendor/pdfjs/cmaps/CNS-EUC-V.bcmap +0 -0
  209. package/dist/src/vendor/pdfjs/cmaps/CNS1-H.bcmap +0 -0
  210. package/dist/src/vendor/pdfjs/cmaps/CNS1-V.bcmap +0 -0
  211. package/dist/src/vendor/pdfjs/cmaps/CNS2-H.bcmap +0 -0
  212. package/dist/src/vendor/pdfjs/cmaps/CNS2-V.bcmap +3 -0
  213. package/dist/src/vendor/pdfjs/cmaps/ETHK-B5-H.bcmap +0 -0
  214. package/dist/src/vendor/pdfjs/cmaps/ETHK-B5-V.bcmap +0 -0
  215. package/dist/src/vendor/pdfjs/cmaps/ETen-B5-H.bcmap +0 -0
  216. package/dist/src/vendor/pdfjs/cmaps/ETen-B5-V.bcmap +0 -0
  217. package/dist/src/vendor/pdfjs/cmaps/ETenms-B5-H.bcmap +3 -0
  218. package/dist/src/vendor/pdfjs/cmaps/ETenms-B5-V.bcmap +0 -0
  219. package/dist/src/vendor/pdfjs/cmaps/EUC-H.bcmap +0 -0
  220. package/dist/src/vendor/pdfjs/cmaps/EUC-V.bcmap +0 -0
  221. package/dist/src/vendor/pdfjs/cmaps/Ext-H.bcmap +0 -0
  222. package/dist/src/vendor/pdfjs/cmaps/Ext-RKSJ-H.bcmap +0 -0
  223. package/dist/src/vendor/pdfjs/cmaps/Ext-RKSJ-V.bcmap +0 -0
  224. package/dist/src/vendor/pdfjs/cmaps/Ext-V.bcmap +0 -0
  225. package/dist/src/vendor/pdfjs/cmaps/GB-EUC-H.bcmap +0 -0
  226. package/dist/src/vendor/pdfjs/cmaps/GB-EUC-V.bcmap +0 -0
  227. package/dist/src/vendor/pdfjs/cmaps/GB-H.bcmap +4 -0
  228. package/dist/src/vendor/pdfjs/cmaps/GB-V.bcmap +0 -0
  229. package/dist/src/vendor/pdfjs/cmaps/GBK-EUC-H.bcmap +0 -0
  230. package/dist/src/vendor/pdfjs/cmaps/GBK-EUC-V.bcmap +0 -0
  231. package/dist/src/vendor/pdfjs/cmaps/GBK2K-H.bcmap +0 -0
  232. package/dist/src/vendor/pdfjs/cmaps/GBK2K-V.bcmap +0 -0
  233. package/dist/src/vendor/pdfjs/cmaps/GBKp-EUC-H.bcmap +0 -0
  234. package/dist/src/vendor/pdfjs/cmaps/GBKp-EUC-V.bcmap +0 -0
  235. package/dist/src/vendor/pdfjs/cmaps/GBT-EUC-H.bcmap +0 -0
  236. package/dist/src/vendor/pdfjs/cmaps/GBT-EUC-V.bcmap +0 -0
  237. package/dist/src/vendor/pdfjs/cmaps/GBT-H.bcmap +0 -0
  238. package/dist/src/vendor/pdfjs/cmaps/GBT-V.bcmap +0 -0
  239. package/dist/src/vendor/pdfjs/cmaps/GBTpc-EUC-H.bcmap +0 -0
  240. package/dist/src/vendor/pdfjs/cmaps/GBTpc-EUC-V.bcmap +0 -0
  241. package/dist/src/vendor/pdfjs/cmaps/GBpc-EUC-H.bcmap +0 -0
  242. package/dist/src/vendor/pdfjs/cmaps/GBpc-EUC-V.bcmap +0 -0
  243. package/dist/src/vendor/pdfjs/cmaps/H.bcmap +0 -0
  244. package/dist/src/vendor/pdfjs/cmaps/HKdla-B5-H.bcmap +0 -0
  245. package/dist/src/vendor/pdfjs/cmaps/HKdla-B5-V.bcmap +0 -0
  246. package/dist/src/vendor/pdfjs/cmaps/HKdlb-B5-H.bcmap +0 -0
  247. package/dist/src/vendor/pdfjs/cmaps/HKdlb-B5-V.bcmap +0 -0
  248. package/dist/src/vendor/pdfjs/cmaps/HKgccs-B5-H.bcmap +0 -0
  249. package/dist/src/vendor/pdfjs/cmaps/HKgccs-B5-V.bcmap +0 -0
  250. package/dist/src/vendor/pdfjs/cmaps/HKm314-B5-H.bcmap +0 -0
  251. package/dist/src/vendor/pdfjs/cmaps/HKm314-B5-V.bcmap +0 -0
  252. package/dist/src/vendor/pdfjs/cmaps/HKm471-B5-H.bcmap +0 -0
  253. package/dist/src/vendor/pdfjs/cmaps/HKm471-B5-V.bcmap +0 -0
  254. package/dist/src/vendor/pdfjs/cmaps/HKscs-B5-H.bcmap +0 -0
  255. package/dist/src/vendor/pdfjs/cmaps/HKscs-B5-V.bcmap +0 -0
  256. package/dist/src/vendor/pdfjs/cmaps/Hankaku.bcmap +0 -0
  257. package/dist/src/vendor/pdfjs/cmaps/Hiragana.bcmap +0 -0
  258. package/dist/src/vendor/pdfjs/cmaps/KSC-EUC-H.bcmap +0 -0
  259. package/dist/src/vendor/pdfjs/cmaps/KSC-EUC-V.bcmap +0 -0
  260. package/dist/src/vendor/pdfjs/cmaps/KSC-H.bcmap +0 -0
  261. package/dist/src/vendor/pdfjs/cmaps/KSC-Johab-H.bcmap +0 -0
  262. package/dist/src/vendor/pdfjs/cmaps/KSC-Johab-V.bcmap +0 -0
  263. package/dist/src/vendor/pdfjs/cmaps/KSC-V.bcmap +0 -0
  264. package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-H.bcmap +0 -0
  265. package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-H.bcmap +0 -0
  266. package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-V.bcmap +0 -0
  267. package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-V.bcmap +0 -0
  268. package/dist/src/vendor/pdfjs/cmaps/KSCpc-EUC-H.bcmap +0 -0
  269. package/dist/src/vendor/pdfjs/cmaps/KSCpc-EUC-V.bcmap +0 -0
  270. package/dist/src/vendor/pdfjs/cmaps/Katakana.bcmap +0 -0
  271. package/dist/src/vendor/pdfjs/cmaps/LICENSE +36 -0
  272. package/dist/src/vendor/pdfjs/cmaps/NWP-H.bcmap +0 -0
  273. package/dist/src/vendor/pdfjs/cmaps/NWP-V.bcmap +0 -0
  274. package/dist/src/vendor/pdfjs/cmaps/RKSJ-H.bcmap +0 -0
  275. package/dist/src/vendor/pdfjs/cmaps/RKSJ-V.bcmap +0 -0
  276. package/dist/src/vendor/pdfjs/cmaps/Roman.bcmap +0 -0
  277. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UCS2-H.bcmap +0 -0
  278. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UCS2-V.bcmap +0 -0
  279. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF16-H.bcmap +0 -0
  280. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF16-V.bcmap +0 -0
  281. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF32-H.bcmap +0 -0
  282. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF32-V.bcmap +0 -0
  283. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF8-H.bcmap +0 -0
  284. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF8-V.bcmap +0 -0
  285. package/dist/src/vendor/pdfjs/cmaps/UniGB-UCS2-H.bcmap +0 -0
  286. package/dist/src/vendor/pdfjs/cmaps/UniGB-UCS2-V.bcmap +0 -0
  287. package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF16-H.bcmap +0 -0
  288. package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF16-V.bcmap +0 -0
  289. package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF32-H.bcmap +0 -0
  290. package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF32-V.bcmap +0 -0
  291. package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF8-H.bcmap +0 -0
  292. package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF8-V.bcmap +0 -0
  293. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-H.bcmap +0 -0
  294. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-H.bcmap +0 -0
  295. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-V.bcmap +0 -0
  296. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-V.bcmap +0 -0
  297. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF16-H.bcmap +0 -0
  298. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF16-V.bcmap +0 -0
  299. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF32-H.bcmap +0 -0
  300. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF32-V.bcmap +0 -0
  301. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF8-H.bcmap +0 -0
  302. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF8-V.bcmap +0 -0
  303. package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-H.bcmap +0 -0
  304. package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-V.bcmap +0 -0
  305. package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-H.bcmap +0 -0
  306. package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-V.bcmap +0 -0
  307. package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-H.bcmap +0 -0
  308. package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-V.bcmap +0 -0
  309. package/dist/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-HW-V.bcmap +0 -0
  310. package/dist/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-V.bcmap +0 -0
  311. package/dist/src/vendor/pdfjs/cmaps/UniJISPro-UTF8-V.bcmap +0 -0
  312. package/dist/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-H.bcmap +0 -0
  313. package/dist/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-V.bcmap +0 -0
  314. package/dist/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-H.bcmap +0 -0
  315. package/dist/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-V.bcmap +0 -0
  316. package/dist/src/vendor/pdfjs/cmaps/UniKS-UCS2-H.bcmap +0 -0
  317. package/dist/src/vendor/pdfjs/cmaps/UniKS-UCS2-V.bcmap +0 -0
  318. package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF16-H.bcmap +0 -0
  319. package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF16-V.bcmap +0 -0
  320. package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF32-H.bcmap +0 -0
  321. package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF32-V.bcmap +0 -0
  322. package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF8-H.bcmap +0 -0
  323. package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF8-V.bcmap +0 -0
  324. package/dist/src/vendor/pdfjs/cmaps/V.bcmap +0 -0
  325. package/dist/src/vendor/pdfjs/cmaps/WP-Symbol.bcmap +0 -0
  326. package/dist/src/vendor/pdfjs/pdf.mjs +19481 -0
  327. package/dist/src/vendor/pdfjs/pdf.mjs.map +1 -0
  328. package/dist/src/vendor/pdfjs/pdf.sandbox.mjs +210 -0
  329. package/dist/src/vendor/pdfjs/pdf.sandbox.mjs.map +1 -0
  330. package/dist/src/vendor/pdfjs/pdf.worker.mjs +56001 -0
  331. package/dist/src/vendor/pdfjs/pdf.worker.mjs.map +1 -0
  332. package/dist/src/vendor/pdfjs/standard_fonts/FoxitDingbats.pfb +0 -0
  333. package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixed.pfb +0 -0
  334. package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixedBold.pfb +0 -0
  335. package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixedBoldItalic.pfb +0 -0
  336. package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixedItalic.pfb +0 -0
  337. package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerif.pfb +0 -0
  338. package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerifBold.pfb +0 -0
  339. package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerifBoldItalic.pfb +0 -0
  340. package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerifItalic.pfb +0 -0
  341. package/dist/src/vendor/pdfjs/standard_fonts/FoxitSymbol.pfb +0 -0
  342. package/dist/src/vendor/pdfjs/standard_fonts/LICENSE_FOXIT +27 -0
  343. package/dist/src/vendor/pdfjs/standard_fonts/LICENSE_LIBERATION +102 -0
  344. package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-Bold.ttf +0 -0
  345. package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-BoldItalic.ttf +0 -0
  346. package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-Italic.ttf +0 -0
  347. package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-Regular.ttf +0 -0
  348. package/package.json +89 -0
  349. package/src/vendor/pdfjs/LICENSE +177 -0
  350. package/src/vendor/pdfjs/README.md +0 -0
  351. package/src/vendor/pdfjs/cmaps/78-EUC-H.bcmap +0 -0
  352. package/src/vendor/pdfjs/cmaps/78-EUC-V.bcmap +0 -0
  353. package/src/vendor/pdfjs/cmaps/78-H.bcmap +0 -0
  354. package/src/vendor/pdfjs/cmaps/78-RKSJ-H.bcmap +0 -0
  355. package/src/vendor/pdfjs/cmaps/78-RKSJ-V.bcmap +0 -0
  356. package/src/vendor/pdfjs/cmaps/78-V.bcmap +0 -0
  357. package/src/vendor/pdfjs/cmaps/78ms-RKSJ-H.bcmap +0 -0
  358. package/src/vendor/pdfjs/cmaps/78ms-RKSJ-V.bcmap +0 -0
  359. package/src/vendor/pdfjs/cmaps/83pv-RKSJ-H.bcmap +0 -0
  360. package/src/vendor/pdfjs/cmaps/90ms-RKSJ-H.bcmap +0 -0
  361. package/src/vendor/pdfjs/cmaps/90ms-RKSJ-V.bcmap +0 -0
  362. package/src/vendor/pdfjs/cmaps/90msp-RKSJ-H.bcmap +0 -0
  363. package/src/vendor/pdfjs/cmaps/90msp-RKSJ-V.bcmap +0 -0
  364. package/src/vendor/pdfjs/cmaps/90pv-RKSJ-H.bcmap +0 -0
  365. package/src/vendor/pdfjs/cmaps/90pv-RKSJ-V.bcmap +0 -0
  366. package/src/vendor/pdfjs/cmaps/Add-H.bcmap +0 -0
  367. package/src/vendor/pdfjs/cmaps/Add-RKSJ-H.bcmap +0 -0
  368. package/src/vendor/pdfjs/cmaps/Add-RKSJ-V.bcmap +0 -0
  369. package/src/vendor/pdfjs/cmaps/Add-V.bcmap +0 -0
  370. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-0.bcmap +0 -0
  371. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-1.bcmap +0 -0
  372. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-2.bcmap +0 -0
  373. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-3.bcmap +0 -0
  374. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-4.bcmap +0 -0
  375. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-5.bcmap +0 -0
  376. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-6.bcmap +0 -0
  377. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-UCS2.bcmap +0 -0
  378. package/src/vendor/pdfjs/cmaps/Adobe-GB1-0.bcmap +0 -0
  379. package/src/vendor/pdfjs/cmaps/Adobe-GB1-1.bcmap +0 -0
  380. package/src/vendor/pdfjs/cmaps/Adobe-GB1-2.bcmap +0 -0
  381. package/src/vendor/pdfjs/cmaps/Adobe-GB1-3.bcmap +0 -0
  382. package/src/vendor/pdfjs/cmaps/Adobe-GB1-4.bcmap +0 -0
  383. package/src/vendor/pdfjs/cmaps/Adobe-GB1-5.bcmap +0 -0
  384. package/src/vendor/pdfjs/cmaps/Adobe-GB1-UCS2.bcmap +0 -0
  385. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-0.bcmap +0 -0
  386. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-1.bcmap +0 -0
  387. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-2.bcmap +0 -0
  388. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-3.bcmap +0 -0
  389. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-4.bcmap +0 -0
  390. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-5.bcmap +0 -0
  391. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-6.bcmap +0 -0
  392. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-UCS2.bcmap +0 -0
  393. package/src/vendor/pdfjs/cmaps/Adobe-Korea1-0.bcmap +0 -0
  394. package/src/vendor/pdfjs/cmaps/Adobe-Korea1-1.bcmap +0 -0
  395. package/src/vendor/pdfjs/cmaps/Adobe-Korea1-2.bcmap +0 -0
  396. package/src/vendor/pdfjs/cmaps/Adobe-Korea1-UCS2.bcmap +0 -0
  397. package/src/vendor/pdfjs/cmaps/B5-H.bcmap +0 -0
  398. package/src/vendor/pdfjs/cmaps/B5-V.bcmap +0 -0
  399. package/src/vendor/pdfjs/cmaps/B5pc-H.bcmap +0 -0
  400. package/src/vendor/pdfjs/cmaps/B5pc-V.bcmap +0 -0
  401. package/src/vendor/pdfjs/cmaps/CNS-EUC-H.bcmap +0 -0
  402. package/src/vendor/pdfjs/cmaps/CNS-EUC-V.bcmap +0 -0
  403. package/src/vendor/pdfjs/cmaps/CNS1-H.bcmap +0 -0
  404. package/src/vendor/pdfjs/cmaps/CNS1-V.bcmap +0 -0
  405. package/src/vendor/pdfjs/cmaps/CNS2-H.bcmap +0 -0
  406. package/src/vendor/pdfjs/cmaps/CNS2-V.bcmap +3 -0
  407. package/src/vendor/pdfjs/cmaps/ETHK-B5-H.bcmap +0 -0
  408. package/src/vendor/pdfjs/cmaps/ETHK-B5-V.bcmap +0 -0
  409. package/src/vendor/pdfjs/cmaps/ETen-B5-H.bcmap +0 -0
  410. package/src/vendor/pdfjs/cmaps/ETen-B5-V.bcmap +0 -0
  411. package/src/vendor/pdfjs/cmaps/ETenms-B5-H.bcmap +3 -0
  412. package/src/vendor/pdfjs/cmaps/ETenms-B5-V.bcmap +0 -0
  413. package/src/vendor/pdfjs/cmaps/EUC-H.bcmap +0 -0
  414. package/src/vendor/pdfjs/cmaps/EUC-V.bcmap +0 -0
  415. package/src/vendor/pdfjs/cmaps/Ext-H.bcmap +0 -0
  416. package/src/vendor/pdfjs/cmaps/Ext-RKSJ-H.bcmap +0 -0
  417. package/src/vendor/pdfjs/cmaps/Ext-RKSJ-V.bcmap +0 -0
  418. package/src/vendor/pdfjs/cmaps/Ext-V.bcmap +0 -0
  419. package/src/vendor/pdfjs/cmaps/GB-EUC-H.bcmap +0 -0
  420. package/src/vendor/pdfjs/cmaps/GB-EUC-V.bcmap +0 -0
  421. package/src/vendor/pdfjs/cmaps/GB-H.bcmap +4 -0
  422. package/src/vendor/pdfjs/cmaps/GB-V.bcmap +0 -0
  423. package/src/vendor/pdfjs/cmaps/GBK-EUC-H.bcmap +0 -0
  424. package/src/vendor/pdfjs/cmaps/GBK-EUC-V.bcmap +0 -0
  425. package/src/vendor/pdfjs/cmaps/GBK2K-H.bcmap +0 -0
  426. package/src/vendor/pdfjs/cmaps/GBK2K-V.bcmap +0 -0
  427. package/src/vendor/pdfjs/cmaps/GBKp-EUC-H.bcmap +0 -0
  428. package/src/vendor/pdfjs/cmaps/GBKp-EUC-V.bcmap +0 -0
  429. package/src/vendor/pdfjs/cmaps/GBT-EUC-H.bcmap +0 -0
  430. package/src/vendor/pdfjs/cmaps/GBT-EUC-V.bcmap +0 -0
  431. package/src/vendor/pdfjs/cmaps/GBT-H.bcmap +0 -0
  432. package/src/vendor/pdfjs/cmaps/GBT-V.bcmap +0 -0
  433. package/src/vendor/pdfjs/cmaps/GBTpc-EUC-H.bcmap +0 -0
  434. package/src/vendor/pdfjs/cmaps/GBTpc-EUC-V.bcmap +0 -0
  435. package/src/vendor/pdfjs/cmaps/GBpc-EUC-H.bcmap +0 -0
  436. package/src/vendor/pdfjs/cmaps/GBpc-EUC-V.bcmap +0 -0
  437. package/src/vendor/pdfjs/cmaps/H.bcmap +0 -0
  438. package/src/vendor/pdfjs/cmaps/HKdla-B5-H.bcmap +0 -0
  439. package/src/vendor/pdfjs/cmaps/HKdla-B5-V.bcmap +0 -0
  440. package/src/vendor/pdfjs/cmaps/HKdlb-B5-H.bcmap +0 -0
  441. package/src/vendor/pdfjs/cmaps/HKdlb-B5-V.bcmap +0 -0
  442. package/src/vendor/pdfjs/cmaps/HKgccs-B5-H.bcmap +0 -0
  443. package/src/vendor/pdfjs/cmaps/HKgccs-B5-V.bcmap +0 -0
  444. package/src/vendor/pdfjs/cmaps/HKm314-B5-H.bcmap +0 -0
  445. package/src/vendor/pdfjs/cmaps/HKm314-B5-V.bcmap +0 -0
  446. package/src/vendor/pdfjs/cmaps/HKm471-B5-H.bcmap +0 -0
  447. package/src/vendor/pdfjs/cmaps/HKm471-B5-V.bcmap +0 -0
  448. package/src/vendor/pdfjs/cmaps/HKscs-B5-H.bcmap +0 -0
  449. package/src/vendor/pdfjs/cmaps/HKscs-B5-V.bcmap +0 -0
  450. package/src/vendor/pdfjs/cmaps/Hankaku.bcmap +0 -0
  451. package/src/vendor/pdfjs/cmaps/Hiragana.bcmap +0 -0
  452. package/src/vendor/pdfjs/cmaps/KSC-EUC-H.bcmap +0 -0
  453. package/src/vendor/pdfjs/cmaps/KSC-EUC-V.bcmap +0 -0
  454. package/src/vendor/pdfjs/cmaps/KSC-H.bcmap +0 -0
  455. package/src/vendor/pdfjs/cmaps/KSC-Johab-H.bcmap +0 -0
  456. package/src/vendor/pdfjs/cmaps/KSC-Johab-V.bcmap +0 -0
  457. package/src/vendor/pdfjs/cmaps/KSC-V.bcmap +0 -0
  458. package/src/vendor/pdfjs/cmaps/KSCms-UHC-H.bcmap +0 -0
  459. package/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-H.bcmap +0 -0
  460. package/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-V.bcmap +0 -0
  461. package/src/vendor/pdfjs/cmaps/KSCms-UHC-V.bcmap +0 -0
  462. package/src/vendor/pdfjs/cmaps/KSCpc-EUC-H.bcmap +0 -0
  463. package/src/vendor/pdfjs/cmaps/KSCpc-EUC-V.bcmap +0 -0
  464. package/src/vendor/pdfjs/cmaps/Katakana.bcmap +0 -0
  465. package/src/vendor/pdfjs/cmaps/LICENSE +36 -0
  466. package/src/vendor/pdfjs/cmaps/NWP-H.bcmap +0 -0
  467. package/src/vendor/pdfjs/cmaps/NWP-V.bcmap +0 -0
  468. package/src/vendor/pdfjs/cmaps/RKSJ-H.bcmap +0 -0
  469. package/src/vendor/pdfjs/cmaps/RKSJ-V.bcmap +0 -0
  470. package/src/vendor/pdfjs/cmaps/Roman.bcmap +0 -0
  471. package/src/vendor/pdfjs/cmaps/UniCNS-UCS2-H.bcmap +0 -0
  472. package/src/vendor/pdfjs/cmaps/UniCNS-UCS2-V.bcmap +0 -0
  473. package/src/vendor/pdfjs/cmaps/UniCNS-UTF16-H.bcmap +0 -0
  474. package/src/vendor/pdfjs/cmaps/UniCNS-UTF16-V.bcmap +0 -0
  475. package/src/vendor/pdfjs/cmaps/UniCNS-UTF32-H.bcmap +0 -0
  476. package/src/vendor/pdfjs/cmaps/UniCNS-UTF32-V.bcmap +0 -0
  477. package/src/vendor/pdfjs/cmaps/UniCNS-UTF8-H.bcmap +0 -0
  478. package/src/vendor/pdfjs/cmaps/UniCNS-UTF8-V.bcmap +0 -0
  479. package/src/vendor/pdfjs/cmaps/UniGB-UCS2-H.bcmap +0 -0
  480. package/src/vendor/pdfjs/cmaps/UniGB-UCS2-V.bcmap +0 -0
  481. package/src/vendor/pdfjs/cmaps/UniGB-UTF16-H.bcmap +0 -0
  482. package/src/vendor/pdfjs/cmaps/UniGB-UTF16-V.bcmap +0 -0
  483. package/src/vendor/pdfjs/cmaps/UniGB-UTF32-H.bcmap +0 -0
  484. package/src/vendor/pdfjs/cmaps/UniGB-UTF32-V.bcmap +0 -0
  485. package/src/vendor/pdfjs/cmaps/UniGB-UTF8-H.bcmap +0 -0
  486. package/src/vendor/pdfjs/cmaps/UniGB-UTF8-V.bcmap +0 -0
  487. package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-H.bcmap +0 -0
  488. package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-H.bcmap +0 -0
  489. package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-V.bcmap +0 -0
  490. package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-V.bcmap +0 -0
  491. package/src/vendor/pdfjs/cmaps/UniJIS-UTF16-H.bcmap +0 -0
  492. package/src/vendor/pdfjs/cmaps/UniJIS-UTF16-V.bcmap +0 -0
  493. package/src/vendor/pdfjs/cmaps/UniJIS-UTF32-H.bcmap +0 -0
  494. package/src/vendor/pdfjs/cmaps/UniJIS-UTF32-V.bcmap +0 -0
  495. package/src/vendor/pdfjs/cmaps/UniJIS-UTF8-H.bcmap +0 -0
  496. package/src/vendor/pdfjs/cmaps/UniJIS-UTF8-V.bcmap +0 -0
  497. package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-H.bcmap +0 -0
  498. package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-V.bcmap +0 -0
  499. package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-H.bcmap +0 -0
  500. package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-V.bcmap +0 -0
  501. package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-H.bcmap +0 -0
  502. package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-V.bcmap +0 -0
  503. package/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-HW-V.bcmap +0 -0
  504. package/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-V.bcmap +0 -0
  505. package/src/vendor/pdfjs/cmaps/UniJISPro-UTF8-V.bcmap +0 -0
  506. package/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-H.bcmap +0 -0
  507. package/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-V.bcmap +0 -0
  508. package/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-H.bcmap +0 -0
  509. package/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-V.bcmap +0 -0
  510. package/src/vendor/pdfjs/cmaps/UniKS-UCS2-H.bcmap +0 -0
  511. package/src/vendor/pdfjs/cmaps/UniKS-UCS2-V.bcmap +0 -0
  512. package/src/vendor/pdfjs/cmaps/UniKS-UTF16-H.bcmap +0 -0
  513. package/src/vendor/pdfjs/cmaps/UniKS-UTF16-V.bcmap +0 -0
  514. package/src/vendor/pdfjs/cmaps/UniKS-UTF32-H.bcmap +0 -0
  515. package/src/vendor/pdfjs/cmaps/UniKS-UTF32-V.bcmap +0 -0
  516. package/src/vendor/pdfjs/cmaps/UniKS-UTF8-H.bcmap +0 -0
  517. package/src/vendor/pdfjs/cmaps/UniKS-UTF8-V.bcmap +0 -0
  518. package/src/vendor/pdfjs/cmaps/V.bcmap +0 -0
  519. package/src/vendor/pdfjs/cmaps/WP-Symbol.bcmap +0 -0
  520. package/src/vendor/pdfjs/pdf.mjs +19481 -0
  521. package/src/vendor/pdfjs/pdf.mjs.map +1 -0
  522. package/src/vendor/pdfjs/pdf.sandbox.mjs +210 -0
  523. package/src/vendor/pdfjs/pdf.sandbox.mjs.map +1 -0
  524. package/src/vendor/pdfjs/pdf.worker.mjs +56001 -0
  525. package/src/vendor/pdfjs/pdf.worker.mjs.map +1 -0
  526. package/src/vendor/pdfjs/standard_fonts/FoxitDingbats.pfb +0 -0
  527. package/src/vendor/pdfjs/standard_fonts/FoxitFixed.pfb +0 -0
  528. package/src/vendor/pdfjs/standard_fonts/FoxitFixedBold.pfb +0 -0
  529. package/src/vendor/pdfjs/standard_fonts/FoxitFixedBoldItalic.pfb +0 -0
  530. package/src/vendor/pdfjs/standard_fonts/FoxitFixedItalic.pfb +0 -0
  531. package/src/vendor/pdfjs/standard_fonts/FoxitSerif.pfb +0 -0
  532. package/src/vendor/pdfjs/standard_fonts/FoxitSerifBold.pfb +0 -0
  533. package/src/vendor/pdfjs/standard_fonts/FoxitSerifBoldItalic.pfb +0 -0
  534. package/src/vendor/pdfjs/standard_fonts/FoxitSerifItalic.pfb +0 -0
  535. package/src/vendor/pdfjs/standard_fonts/FoxitSymbol.pfb +0 -0
  536. package/src/vendor/pdfjs/standard_fonts/LICENSE_FOXIT +27 -0
  537. package/src/vendor/pdfjs/standard_fonts/LICENSE_LIBERATION +102 -0
  538. package/src/vendor/pdfjs/standard_fonts/LiberationSans-Bold.ttf +0 -0
  539. package/src/vendor/pdfjs/standard_fonts/LiberationSans-BoldItalic.ttf +0 -0
  540. package/src/vendor/pdfjs/standard_fonts/LiberationSans-Italic.ttf +0 -0
  541. package/src/vendor/pdfjs/standard_fonts/LiberationSans-Regular.ttf +0 -0
@@ -0,0 +1,20 @@
1
+ import { TextItem, BoundingBox, ProjectionTextBox, LiteParseConfig } from "../core/types.js";
2
+ import { PageData, Image } from "../engines/pdf/interface.js";
3
+ /**
4
+ * Filters images that should not be OCR'd based on various criteria.
5
+ * Returns the filtered array of images that should be processed.
6
+ */
7
+ export declare function filterImagesForOCR(images: Image[], page: {
8
+ width: number;
9
+ height: number;
10
+ }): Image[];
11
+ /**
12
+ * Build projection text boxes from page data, including OCR results
13
+ * This is the complete implementation from buildBbox.ts
14
+ */
15
+ export declare function buildBbox(pageData: PageData, config: LiteParseConfig): ProjectionTextBox[];
16
+ /**
17
+ * Build bounding boxes from text items
18
+ */
19
+ export declare function buildBoundingBoxes(textItems: TextItem[]): BoundingBox[];
20
+ //# sourceMappingURL=bbox.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"bbox.d.ts","sourceRoot":"","sources":["../../../src/processing/bbox.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,QAAQ,EACR,WAAW,EACX,iBAAiB,EAEjB,eAAe,EAChB,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EAAE,QAAQ,EAAE,KAAK,EAAE,MAAM,6BAA6B,CAAC;AAgC9D;;;GAGG;AACH,wBAAgB,kBAAkB,CAChC,MAAM,EAAE,KAAK,EAAE,EACf,IAAI,EAAE;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,GACtC,KAAK,EAAE,CA2DT;AAsFD;;;GAGG;AACH,wBAAgB,SAAS,CAAC,QAAQ,EAAE,QAAQ,EAAE,MAAM,EAAE,eAAe,GAAG,iBAAiB,EAAE,CAmH1F;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,WAAW,EAAE,CAiBvE"}
@@ -0,0 +1,258 @@
1
+ import { parseImageOcrBlocks } from "./ocrUtils.js";
2
+ import { cleanOcrTableArtifacts } from "./textUtils.js";
3
+ const OCR_CONFIDENCE_THRESHOLD = 0.1;
4
+ /**
5
+ * Minimum overlap ratio (0-1) required to consider an OCR block as duplicate of existing text.
6
+ * An OCR block is filtered out if:
7
+ * - Total overlap with all text items covers more than this ratio of the OCR block area
8
+ * - OR the OCR block covers more than this ratio of any single text item
9
+ */
10
+ const OCR_OVERLAP_THRESHOLD = 0.5;
11
+ /**
12
+ * Maximum number of embedded images to process for OCR per page.
13
+ * Keeps the largest images when limit is exceeded.
14
+ */
15
+ const MAX_IMAGES_PER_PAGE = 10;
16
+ /**
17
+ * Minimum image dimensions for OCR processing
18
+ */
19
+ const MIN_IMAGE_DIMENSION = 12;
20
+ const MIN_IMAGE_AREA = 200;
21
+ /**
22
+ * Minimum rendered image dimensions for OCR processing
23
+ */
24
+ const MIN_RENDERED_DIMENSION = 6;
25
+ const MIN_RENDERED_AREA = 200;
26
+ /**
27
+ * Filters images that should not be OCR'd based on various criteria.
28
+ * Returns the filtered array of images that should be processed.
29
+ */
30
+ export function filterImagesForOCR(images, page) {
31
+ // Filter images that start with g_ or pattern_ (generated/pattern images)
32
+ let filtered = images.filter((image) => !image.type?.startsWith("g_") && !image.type?.startsWith("pattern_"));
33
+ // Limit to max images per page, keeping the largest ones
34
+ if (filtered.length > MAX_IMAGES_PER_PAGE) {
35
+ filtered.sort((a, b) => b.width * b.height - a.width * a.height);
36
+ filtered = filtered.slice(0, MAX_IMAGES_PER_PAGE);
37
+ }
38
+ // Apply additional filtering criteria
39
+ filtered = filtered.filter((image) => {
40
+ // Ignore layout extracted images
41
+ if (image.type?.includes("layout_")) {
42
+ return false;
43
+ }
44
+ // Get image coords (use image dimensions if coords not set)
45
+ const coords = image.coords || {
46
+ x: image.x,
47
+ y: image.y,
48
+ w: image.width,
49
+ h: image.height,
50
+ };
51
+ // Skip images that are out of viewport
52
+ if (coords.x + coords.w < 0 || // left of page
53
+ coords.y + coords.h < 0 || // above page
54
+ coords.x > page.width || // right of page
55
+ coords.y > page.height // below page
56
+ ) {
57
+ return false;
58
+ }
59
+ // Skip small images (raw dimensions)
60
+ if (image.width < MIN_IMAGE_DIMENSION ||
61
+ image.height < MIN_IMAGE_DIMENSION ||
62
+ image.width * image.height < MIN_IMAGE_AREA) {
63
+ return false;
64
+ }
65
+ // Skip images that render too small in the viewport
66
+ if (coords.w < MIN_RENDERED_DIMENSION ||
67
+ coords.h < MIN_RENDERED_DIMENSION ||
68
+ coords.w * coords.h < MIN_RENDERED_AREA) {
69
+ return false;
70
+ }
71
+ return true;
72
+ });
73
+ return filtered;
74
+ }
75
+ /**
76
+ * Checks if two bounding boxes overlap and returns the overlap area.
77
+ */
78
+ function getOverlapArea(box1, box2) {
79
+ const left = Math.max(box1.x, box2.x);
80
+ const right = Math.min(box1.x + box1.w, box2.x + box2.w);
81
+ const top = Math.max(box1.y, box2.y);
82
+ const bottom = Math.min(box1.y + box1.h, box2.y + box2.h);
83
+ if (left >= right || top >= bottom) {
84
+ return 0;
85
+ }
86
+ return (right - left) * (bottom - top);
87
+ }
88
+ /**
89
+ * Filters out OCR blocks that significantly overlap with already-extracted text items.
90
+ * This prevents duplicate text when both document text extraction and OCR detect the same content.
91
+ * Prefers document-extracted text over OCR text.
92
+ *
93
+ * An OCR block is rejected if:
94
+ * 1. The total overlap with all text items covers more than 50% of the OCR block area
95
+ * 2. OR the OCR block covers more than 50% of any single text item's area
96
+ */
97
+ function filterOcrBlocksOverlappingWithText(ocrBlocks, textItems) {
98
+ if (!textItems.length || !ocrBlocks.length) {
99
+ return ocrBlocks;
100
+ }
101
+ return ocrBlocks.filter((ocrBlock) => {
102
+ const ocrBox = {
103
+ x: ocrBlock.x,
104
+ y: ocrBlock.y,
105
+ w: ocrBlock.w,
106
+ h: ocrBlock.h,
107
+ };
108
+ const ocrArea = ocrBlock.w * ocrBlock.h;
109
+ if (ocrArea <= 0) {
110
+ return false;
111
+ }
112
+ let totalOverlapArea = 0;
113
+ // Check overlap with each text item
114
+ for (const textItem of textItems) {
115
+ const textBox = {
116
+ x: textItem.x,
117
+ y: textItem.y,
118
+ w: textItem.w,
119
+ h: textItem.h,
120
+ };
121
+ const textItemArea = textItem.w * textItem.h;
122
+ const overlapArea = getOverlapArea(ocrBox, textBox);
123
+ if (overlapArea > 0) {
124
+ // Accumulate total overlap for condition 1
125
+ totalOverlapArea += overlapArea;
126
+ // Condition 2: Reject if OCR block covers more than 50% of any single text item
127
+ if (textItemArea > 0 && overlapArea / textItemArea >= OCR_OVERLAP_THRESHOLD) {
128
+ return false;
129
+ }
130
+ }
131
+ }
132
+ // Condition 1: Reject if total overlap covers more than 50% of the OCR block
133
+ const totalOverlapRatio = totalOverlapArea / ocrArea;
134
+ if (totalOverlapRatio >= OCR_OVERLAP_THRESHOLD) {
135
+ return false;
136
+ }
137
+ return true;
138
+ });
139
+ }
140
+ /**
141
+ * Build projection text boxes from page data, including OCR results
142
+ * This is the complete implementation from buildBbox.ts
143
+ */
144
+ export function buildBbox(pageData, config) {
145
+ const lines = [];
146
+ // Process all extracted text items
147
+ for (const item of pageData.textItems) {
148
+ const line = {
149
+ x: item.x,
150
+ y: item.y,
151
+ rx: item.rx || 0,
152
+ ry: item.ry || 0,
153
+ w: Math.round(item.w || item.width),
154
+ h: Math.round(item.h || item.height),
155
+ r: item.r || 0,
156
+ str: item.str,
157
+ strLength: [...item.str].length, // Handle multi-byte characters correctly
158
+ pageBbox: {
159
+ x: item.x,
160
+ y: item.y,
161
+ w: item.w || item.width,
162
+ h: item.h || item.height,
163
+ },
164
+ vgap: item.vgap,
165
+ isPlaceholder: item.isPlaceholder,
166
+ };
167
+ lines.push(line);
168
+ }
169
+ // Process OCR data if images are present
170
+ if (pageData.images.length && config.ocrEnabled) {
171
+ // Filter images that should be processed for OCR
172
+ const imagesToProcess = filterImagesForOCR(pageData.images, {
173
+ width: pageData.width,
174
+ height: pageData.height,
175
+ });
176
+ // Collect text item bounding boxes for overlap checking
177
+ const textItemBoxes = pageData.textItems.map((item) => ({
178
+ x: item.x,
179
+ y: item.y,
180
+ w: item.w || item.width,
181
+ h: item.h || item.height,
182
+ }));
183
+ // Collect text content for content-based deduplication (normalized lowercase)
184
+ const existingTextContent = new Set(pageData.textItems.map((item) => item.str.trim().toLowerCase()).filter((s) => s.length > 0));
185
+ for (const image of imagesToProcess) {
186
+ // Parse OCR blocks from image
187
+ let ocrData = parseImageOcrBlocks(image);
188
+ // Filter by confidence threshold
189
+ ocrData = ocrData.filter((block) => parseFloat(block.confidence.toString()) >= OCR_CONFIDENCE_THRESHOLD);
190
+ // Filter out OCR blocks that overlap with already-extracted text
191
+ ocrData = filterOcrBlocksOverlappingWithText(ocrData, textItemBoxes);
192
+ // Filter out OCR blocks whose text content already exists in native PDF text
193
+ // This catches duplicates that are at different positions (e.g., watermarks, repeated headers)
194
+ ocrData = ocrData.filter((block) => {
195
+ const ocrText = block.c.trim().toLowerCase();
196
+ return ocrText.length > 0 && !existingTextContent.has(ocrText);
197
+ });
198
+ const ocrParsed = [];
199
+ for (const block of ocrData) {
200
+ const confidenceRounded = Math.round(parseFloat(block.confidence.toString()) * 1000) / 1000;
201
+ // Clean OCR artifacts from table border misreads
202
+ const cleanedText = cleanOcrTableArtifacts(block.c);
203
+ // Skip if cleaning removed all content
204
+ if (cleanedText.length === 0) {
205
+ continue;
206
+ }
207
+ const line = {
208
+ fromOCR: true,
209
+ x: block.x,
210
+ y: block.y,
211
+ w: block.w,
212
+ h: block.h,
213
+ r: image.originalOrientationAngle || 0,
214
+ str: cleanedText,
215
+ strLength: [...cleanedText].length,
216
+ pageBbox: {
217
+ x: block.x,
218
+ y: block.y,
219
+ w: block.w,
220
+ h: block.h,
221
+ },
222
+ };
223
+ lines.push(line);
224
+ ocrParsed.push({
225
+ x: block.rx,
226
+ y: block.ry,
227
+ w: block.rw,
228
+ h: block.rh,
229
+ confidence: confidenceRounded,
230
+ text: cleanedText,
231
+ });
232
+ }
233
+ if (ocrParsed.length) {
234
+ image.ocrParsed = ocrParsed;
235
+ }
236
+ }
237
+ }
238
+ return lines;
239
+ }
240
+ /**
241
+ * Build bounding boxes from text items
242
+ */
243
+ export function buildBoundingBoxes(textItems) {
244
+ const bboxes = [];
245
+ for (const item of textItems) {
246
+ if (item.str.trim() === "") {
247
+ continue;
248
+ }
249
+ bboxes.push({
250
+ x1: item.x,
251
+ y1: item.y,
252
+ x2: item.x + (item.w || item.width),
253
+ y2: item.y + (item.h || item.height),
254
+ });
255
+ }
256
+ return bboxes;
257
+ }
258
+ //# sourceMappingURL=bbox.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"bbox.js","sourceRoot":"","sources":["../../../src/processing/bbox.ts"],"names":[],"mappings":"AAQA,OAAO,EAAE,mBAAmB,EAAY,MAAM,eAAe,CAAC;AAC9D,OAAO,EAAE,sBAAsB,EAAE,MAAM,gBAAgB,CAAC;AAExD,MAAM,wBAAwB,GAAG,GAAG,CAAC;AAErC;;;;;GAKG;AACH,MAAM,qBAAqB,GAAG,GAAG,CAAC;AAElC;;;GAGG;AACH,MAAM,mBAAmB,GAAG,EAAE,CAAC;AAE/B;;GAEG;AACH,MAAM,mBAAmB,GAAG,EAAE,CAAC;AAC/B,MAAM,cAAc,GAAG,GAAG,CAAC;AAE3B;;GAEG;AACH,MAAM,sBAAsB,GAAG,CAAC,CAAC;AACjC,MAAM,iBAAiB,GAAG,GAAG,CAAC;AAE9B;;;GAGG;AACH,MAAM,UAAU,kBAAkB,CAChC,MAAe,EACf,IAAuC;IAEvC,0EAA0E;IAC1E,IAAI,QAAQ,GAAG,MAAM,CAAC,MAAM,CAC1B,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,IAAI,EAAE,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,UAAU,CAAC,UAAU,CAAC,CAChF,CAAC;IAEF,yDAAyD;IACzD,IAAI,QAAQ,CAAC,MAAM,GAAG,mBAAmB,EAAE,CAAC;QAC1C,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC;QACjE,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,mBAAmB,CAAC,CAAC;IACpD,CAAC;IAED,sCAAsC;IACtC,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE;QACnC,iCAAiC;QACjC,IAAI,KAAK,CAAC,IAAI,EAAE,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;YACpC,OAAO,KAAK,CAAC;QACf,CAAC;QAED,4DAA4D;QAC5D,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,IAAI;YAC7B,CAAC,EAAE,KAAK,CAAC,CAAC;YACV,CAAC,EAAE,KAAK,CAAC,CAAC;YACV,CAAC,EAAE,KAAK,CAAC,KAAK;YACd,CAAC,EAAE,KAAK,CAAC,MAAM;SAChB,CAAC;QAEF,uCAAuC;QACvC,IACE,MAAM,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC,GAAG,CAAC,IAAI,eAAe;YAC1C,MAAM,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC,GAAG,CAAC,IAAI,aAAa;YACxC,MAAM,CAAC,CAAC,GAAG,IAAI,CAAC,KAAK,IAAI,gBAAgB;YACzC,MAAM,CAAC,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,aAAa;UACpC,CAAC;YACD,OAAO,KAAK,CAAC;QACf,CAAC;QAED,qCAAqC;QACrC,IACE,KAAK,CAAC,KAAK,GAAG,mBAAmB;YACjC,KAAK,CAAC,MAAM,GAAG,mBAAmB;YAClC,KAAK,CAAC,KAAK,GAAG,KAAK,CAAC,MAAM,GAAG,cAAc,EAC3C,CAAC;YACD,OAAO,KAAK,CAAC;QACf,CAAC;QAED,oDAAoD;QACpD,IACE,MAAM,CAAC,CAAC,GAAG,sBAAsB;YACjC,MAAM,CAAC,CAAC,GAAG,sBAAsB;YACjC,MAAM,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC,GAAG,iBAAiB,EACvC,CAAC;YACD,OAAO,KAAK,CAAC;QACf,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC,CAAC,CAAC;IAEH,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;GAEG;AACH,SAAS,cAAc,CACrB,IAAoD,EACpD,IAAoD;IAEpD,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC;IACtC,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;IACzD,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC;IACrC,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;IAE1D,IAAI,IAAI,IAAI,KAAK,IAAI,GAAG,IAAI,MAAM,EAAE,CAAC;QACnC,OAAO,CAAC,CAAC;IACX,CAAC;IAED,OAAO,CAAC,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC;AACzC,CAAC;AAED;;;;;;;;GAQG;AACH,SAAS,kCAAkC,CACzC,SAAqB,EACrB,SAAgE;IAEhE,IAAI,CAAC,SAAS,CAAC,MAAM,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,CAAC;QAC3C,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,OAAO,SAAS,CAAC,MAAM,CAAC,CAAC,QAAQ,EAAE,EAAE;QACnC,MAAM,MAAM,GAAG;YACb,CAAC,EAAE,QAAQ,CAAC,CAAC;YACb,CAAC,EAAE,QAAQ,CAAC,CAAC;YACb,CAAC,EAAE,QAAQ,CAAC,CAAC;YACb,CAAC,EAAE,QAAQ,CAAC,CAAC;SACd,CAAC;QACF,MAAM,OAAO,GAAG,QAAQ,CAAC,CAAC,GAAG,QAAQ,CAAC,CAAC,CAAC;QAExC,IAAI,OAAO,IAAI,CAAC,EAAE,CAAC;YACjB,OAAO,KAAK,CAAC;QACf,CAAC;QAED,IAAI,gBAAgB,GAAG,CAAC,CAAC;QAEzB,oCAAoC;QACpC,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;YACjC,MAAM,OAAO,GAAG;gBACd,CAAC,EAAE,QAAQ,CAAC,CAAC;gBACb,CAAC,EAAE,QAAQ,CAAC,CAAC;gBACb,CAAC,EAAE,QAAQ,CAAC,CAAC;gBACb,CAAC,EAAE,QAAQ,CAAC,CAAC;aACd,CAAC;YACF,MAAM,YAAY,GAAG,QAAQ,CAAC,CAAC,GAAG,QAAQ,CAAC,CAAC,CAAC;YAE7C,MAAM,WAAW,GAAG,cAAc,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;YAEpD,IAAI,WAAW,GAAG,CAAC,EAAE,CAAC;gBACpB,2CAA2C;gBAC3C,gBAAgB,IAAI,WAAW,CAAC;gBAEhC,gFAAgF;gBAChF,IAAI,YAAY,GAAG,CAAC,IAAI,WAAW,GAAG,YAAY,IAAI,qBAAqB,EAAE,CAAC;oBAC5E,OAAO,KAAK,CAAC;gBACf,CAAC;YACH,CAAC;QACH,CAAC;QAED,6EAA6E;QAC7E,MAAM,iBAAiB,GAAG,gBAAgB,GAAG,OAAO,CAAC;QACrD,IAAI,iBAAiB,IAAI,qBAAqB,EAAE,CAAC;YAC/C,OAAO,KAAK,CAAC;QACf,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,SAAS,CAAC,QAAkB,EAAE,MAAuB;IACnE,MAAM,KAAK,GAAwB,EAAE,CAAC;IAEtC,mCAAmC;IACnC,KAAK,MAAM,IAAI,IAAI,QAAQ,CAAC,SAAS,EAAE,CAAC;QACtC,MAAM,IAAI,GAAsB;YAC9B,CAAC,EAAE,IAAI,CAAC,CAAC;YACT,CAAC,EAAE,IAAI,CAAC,CAAC;YACT,EAAE,EAAE,IAAI,CAAC,EAAE,IAAI,CAAC;YAChB,EAAE,EAAE,IAAI,CAAC,EAAE,IAAI,CAAC;YAChB,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC;YACnC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC,MAAM,CAAC;YACpC,CAAC,EAAE,IAAI,CAAC,CAAC,IAAI,CAAC;YACd,GAAG,EAAE,IAAI,CAAC,GAAG;YACb,SAAS,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,yCAAyC;YAC1E,QAAQ,EAAE;gBACR,CAAC,EAAE,IAAI,CAAC,CAAC;gBACT,CAAC,EAAE,IAAI,CAAC,CAAC;gBACT,CAAC,EAAE,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC,KAAK;gBACvB,CAAC,EAAE,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC,MAAM;aACzB;YACD,IAAI,EAAE,IAAI,CAAC,IAAI;YACf,aAAa,EAAE,IAAI,CAAC,aAAa;SAClC,CAAC;QAEF,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACnB,CAAC;IAED,yCAAyC;IACzC,IAAI,QAAQ,CAAC,MAAM,CAAC,MAAM,IAAI,MAAM,CAAC,UAAU,EAAE,CAAC;QAChD,iDAAiD;QACjD,MAAM,eAAe,GAAG,kBAAkB,CAAC,QAAQ,CAAC,MAAM,EAAE;YAC1D,KAAK,EAAE,QAAQ,CAAC,KAAK;YACrB,MAAM,EAAE,QAAQ,CAAC,MAAM;SACxB,CAAC,CAAC;QAEH,wDAAwD;QACxD,MAAM,aAAa,GAAG,QAAQ,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;YACtD,CAAC,EAAE,IAAI,CAAC,CAAC;YACT,CAAC,EAAE,IAAI,CAAC,CAAC;YACT,CAAC,EAAE,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC,KAAK;YACvB,CAAC,EAAE,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC,MAAM;SACzB,CAAC,CAAC,CAAC;QAEJ,8EAA8E;QAC9E,MAAM,mBAAmB,GAAG,IAAI,GAAG,CACjC,QAAQ,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAC5F,CAAC;QAEF,KAAK,MAAM,KAAK,IAAI,eAAe,EAAE,CAAC;YACpC,8BAA8B;YAC9B,IAAI,OAAO,GAAG,mBAAmB,CAAC,KAAK,CAAC,CAAC;YAEzC,iCAAiC;YACjC,OAAO,GAAG,OAAO,CAAC,MAAM,CACtB,CAAC,KAAK,EAAE,EAAE,CAAC,UAAU,CAAC,KAAK,CAAC,UAAU,CAAC,QAAQ,EAAE,CAAC,IAAI,wBAAwB,CAC/E,CAAC;YAEF,iEAAiE;YACjE,OAAO,GAAG,kCAAkC,CAAC,OAAO,EAAE,aAAa,CAAC,CAAC;YAErE,6EAA6E;YAC7E,+FAA+F;YAC/F,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE;gBACjC,MAAM,OAAO,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;gBAC7C,OAAO,OAAO,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,mBAAmB,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;YACjE,CAAC,CAAC,CAAC;YAEH,MAAM,SAAS,GAAc,EAAE,CAAC;YAChC,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;gBAC5B,MAAM,iBAAiB,GAAG,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,KAAK,CAAC,UAAU,CAAC,QAAQ,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,IAAI,CAAC;gBAE5F,iDAAiD;gBACjD,MAAM,WAAW,GAAG,sBAAsB,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;gBAEpD,uCAAuC;gBACvC,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;oBAC7B,SAAS;gBACX,CAAC;gBAED,MAAM,IAAI,GAAsB;oBAC9B,OAAO,EAAE,IAAI;oBACb,CAAC,EAAE,KAAK,CAAC,CAAC;oBACV,CAAC,EAAE,KAAK,CAAC,CAAC;oBACV,CAAC,EAAE,KAAK,CAAC,CAAC;oBACV,CAAC,EAAE,KAAK,CAAC,CAAC;oBACV,CAAC,EAAE,KAAK,CAAC,wBAAwB,IAAI,CAAC;oBACtC,GAAG,EAAE,WAAW;oBAChB,SAAS,EAAE,CAAC,GAAG,WAAW,CAAC,CAAC,MAAM;oBAClC,QAAQ,EAAE;wBACR,CAAC,EAAE,KAAK,CAAC,CAAC;wBACV,CAAC,EAAE,KAAK,CAAC,CAAC;wBACV,CAAC,EAAE,KAAK,CAAC,CAAC;wBACV,CAAC,EAAE,KAAK,CAAC,CAAC;qBACX;iBACF,CAAC;gBACF,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBAEjB,SAAS,CAAC,IAAI,CAAC;oBACb,CAAC,EAAE,KAAK,CAAC,EAAE;oBACX,CAAC,EAAE,KAAK,CAAC,EAAE;oBACX,CAAC,EAAE,KAAK,CAAC,EAAE;oBACX,CAAC,EAAE,KAAK,CAAC,EAAE;oBACX,UAAU,EAAE,iBAAiB;oBAC7B,IAAI,EAAE,WAAW;iBAClB,CAAC,CAAC;YACL,CAAC;YAED,IAAI,SAAS,CAAC,MAAM,EAAE,CAAC;gBACrB,KAAK,CAAC,SAAS,GAAG,SAAS,CAAC;YAC9B,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,kBAAkB,CAAC,SAAqB;IACtD,MAAM,MAAM,GAAkB,EAAE,CAAC;IAEjC,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;QAC7B,IAAI,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC;YAC3B,SAAS;QACX,CAAC;QAED,MAAM,CAAC,IAAI,CAAC;YACV,EAAE,EAAE,IAAI,CAAC,CAAC;YACV,EAAE,EAAE,IAAI,CAAC,CAAC;YACV,EAAE,EAAE,IAAI,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC;YACnC,EAAE,EAAE,IAAI,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC,MAAM,CAAC;SACrC,CAAC,CAAC;IACL,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC"}
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=bbox.test.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"bbox.test.d.ts","sourceRoot":"","sources":["../../../src/processing/bbox.test.ts"],"names":[],"mappings":""}
@@ -0,0 +1,334 @@
1
+ import { expect, describe, it } from "vitest";
2
+ import { buildBbox, buildBoundingBoxes, filterImagesForOCR } from "./bbox";
3
+ import { DEFAULT_CONFIG } from "../core/config";
4
+ describe("test filterImagesForOCR", () => {
5
+ it("test valid image", () => {
6
+ const images = [
7
+ {
8
+ type: "photo",
9
+ width: 200,
10
+ height: 200,
11
+ x: 10,
12
+ y: 10,
13
+ coords: { x: 10, y: 10, w: 200, h: 200 },
14
+ },
15
+ ];
16
+ const page = { width: 1000, height: 1000 };
17
+ const result = filterImagesForOCR(images, page);
18
+ expect(result).toStrictEqual(images);
19
+ });
20
+ it("test filter on patterns", () => {
21
+ const images = [
22
+ { type: "g_background", width: 200, height: 200, x: 0, y: 0 },
23
+ { type: "pattern_stripe", width: 200, height: 200, x: 0, y: 0 },
24
+ {
25
+ type: "photo",
26
+ width: 200,
27
+ height: 200,
28
+ x: 0,
29
+ y: 0,
30
+ coords: { x: 0, y: 0, w: 200, h: 200 },
31
+ },
32
+ ];
33
+ const page = { width: 1000, height: 1000 };
34
+ const result = filterImagesForOCR(images, page);
35
+ expect(result).toStrictEqual([images[2]]);
36
+ });
37
+ it("test filter layout", () => {
38
+ const images = [
39
+ {
40
+ type: "layout_header",
41
+ width: 300,
42
+ height: 300,
43
+ x: 0,
44
+ y: 0,
45
+ coords: { x: 0, y: 0, w: 300, h: 300 },
46
+ },
47
+ ];
48
+ const page = { width: 1000, height: 1000 };
49
+ const result = filterImagesForOCR(images, page);
50
+ expect(result.length).toBe(0);
51
+ });
52
+ it("test out of viewport", () => {
53
+ const images = [
54
+ {
55
+ type: "photo",
56
+ width: 200,
57
+ height: 200,
58
+ coords: { x: 1100, y: 0, w: 200, h: 200 },
59
+ x: 0,
60
+ y: 0,
61
+ },
62
+ ];
63
+ const page = { width: 1000, height: 1000 };
64
+ const result = filterImagesForOCR(images, page);
65
+ expect(result.length).toBe(0);
66
+ });
67
+ it("test min dimensions", () => {
68
+ const images = [
69
+ { type: "photo", width: 5, height: 5, x: 0, y: 0, coords: { x: 0, y: 0, w: 5, h: 5 } },
70
+ ];
71
+ const page = { width: 1000, height: 1000 };
72
+ const result = filterImagesForOCR(images, page);
73
+ expect(result.length).toBe(0);
74
+ });
75
+ it("test max images per page", () => {
76
+ const images = [
77
+ {
78
+ type: "photo",
79
+ width: 100,
80
+ height: 100,
81
+ x: 0,
82
+ y: 0,
83
+ coords: { x: 0, y: 0, w: 100, h: 100 },
84
+ },
85
+ {
86
+ type: "photo",
87
+ width: 100,
88
+ height: 100,
89
+ x: 0,
90
+ y: 0,
91
+ coords: { x: 0, y: 0, w: 500, h: 500 },
92
+ },
93
+ {
94
+ type: "photo",
95
+ width: 100,
96
+ height: 100,
97
+ x: 0,
98
+ y: 0,
99
+ coords: { x: 0, y: 0, w: 300, h: 300 },
100
+ },
101
+ {
102
+ type: "photo",
103
+ width: 100,
104
+ height: 100,
105
+ x: 0,
106
+ y: 0,
107
+ coords: { x: 0, y: 0, w: 100, h: 100 },
108
+ },
109
+ {
110
+ type: "photo",
111
+ width: 100,
112
+ height: 100,
113
+ x: 0,
114
+ y: 0,
115
+ coords: { x: 0, y: 0, w: 500, h: 500 },
116
+ },
117
+ {
118
+ type: "photo",
119
+ width: 100,
120
+ height: 100,
121
+ x: 0,
122
+ y: 0,
123
+ coords: { x: 0, y: 0, w: 300, h: 300 },
124
+ },
125
+ {
126
+ type: "photo",
127
+ width: 100,
128
+ height: 100,
129
+ x: 0,
130
+ y: 0,
131
+ coords: { x: 0, y: 0, w: 100, h: 100 },
132
+ },
133
+ {
134
+ type: "photo",
135
+ width: 100,
136
+ height: 100,
137
+ x: 0,
138
+ y: 0,
139
+ coords: { x: 0, y: 0, w: 500, h: 500 },
140
+ },
141
+ {
142
+ type: "photo",
143
+ width: 100,
144
+ height: 100,
145
+ x: 0,
146
+ y: 0,
147
+ coords: { x: 0, y: 0, w: 300, h: 300 },
148
+ },
149
+ {
150
+ type: "photo",
151
+ width: 100,
152
+ height: 100,
153
+ x: 0,
154
+ y: 0,
155
+ coords: { x: 0, y: 0, w: 100, h: 100 },
156
+ },
157
+ {
158
+ type: "photo",
159
+ width: 100,
160
+ height: 100,
161
+ x: 0,
162
+ y: 0,
163
+ coords: { x: 0, y: 0, w: 500, h: 500 },
164
+ },
165
+ {
166
+ type: "photo",
167
+ width: 100,
168
+ height: 100,
169
+ x: 0,
170
+ y: 0,
171
+ coords: { x: 0, y: 0, w: 300, h: 300 },
172
+ },
173
+ ];
174
+ const page = { width: 1000, height: 1000 };
175
+ const result = filterImagesForOCR(images, page);
176
+ expect(result).toStrictEqual(images.slice(0, 10));
177
+ });
178
+ });
179
+ describe("test buildBox", () => {
180
+ it("test with OCR disabled", () => {
181
+ const pageData = {
182
+ pageNum: 1,
183
+ width: 612,
184
+ height: 792,
185
+ textItems: [
186
+ { str: "Hello World", x: 50, y: 100, width: 120, height: 14, w: 120, h: 14 },
187
+ { str: "Some body text", x: 50, y: 130, width: 200, height: 14, w: 200, h: 14 },
188
+ ],
189
+ images: [],
190
+ };
191
+ const config = { ...DEFAULT_CONFIG, ocrEnabled: false };
192
+ const expectedOutput = [
193
+ {
194
+ x: 50,
195
+ y: 100,
196
+ rx: 0,
197
+ ry: 0,
198
+ w: 120,
199
+ h: 14,
200
+ r: 0,
201
+ str: "Hello World",
202
+ strLength: 11,
203
+ pageBbox: { x: 50, y: 100, w: 120, h: 14 },
204
+ vgap: undefined,
205
+ isPlaceholder: undefined,
206
+ },
207
+ {
208
+ x: 50,
209
+ y: 130,
210
+ rx: 0,
211
+ ry: 0,
212
+ w: 200,
213
+ h: 14,
214
+ r: 0,
215
+ str: "Some body text",
216
+ strLength: 14,
217
+ pageBbox: { x: 50, y: 130, w: 200, h: 14 },
218
+ vgap: undefined,
219
+ isPlaceholder: undefined,
220
+ },
221
+ ];
222
+ const result = buildBbox(pageData, config);
223
+ expect(result).toStrictEqual(expectedOutput);
224
+ });
225
+ it("test with OCR enabled", () => {
226
+ const pageData = {
227
+ pageNum: 1,
228
+ width: 612,
229
+ height: 792,
230
+ textItems: [
231
+ // Native PDF text (top-left)
232
+ { str: "Hello World", x: 50, y: 100, width: 120, height: 14, w: 120, h: 14 },
233
+ ],
234
+ images: [
235
+ {
236
+ x: 0,
237
+ y: 200,
238
+ width: 612,
239
+ height: 400,
240
+ originalOrientationAngle: 0,
241
+ // parseImageOcrBlocks() reads this internally:
242
+ ocrRaw: [
243
+ // Block A: no spatial overlap with native text, unique content → KEPT
244
+ [
245
+ [
246
+ [50, 50],
247
+ [250, 50],
248
+ [250, 70],
249
+ [50, 70],
250
+ ],
251
+ "Scanned paragraph text",
252
+ 0.95,
253
+ // resolved by parseImageOcrBlocks to absolute page coords:
254
+ // x:50, y:50, w:200, h:20, rx/ry/rw/rh for rotated coords
255
+ ],
256
+ // Block B: text already exists in native items ("hello world") → FILTERED (content dedup)
257
+ [
258
+ [
259
+ [50, 0],
260
+ [170, 0],
261
+ [170, 14],
262
+ [50, 14],
263
+ ],
264
+ "Hello World",
265
+ 0.97,
266
+ // x:50, y:200, w:120, h:14 — also overlaps native text box
267
+ ],
268
+ // Block C: low confidence → FILTERED (below threshold)
269
+ [
270
+ [
271
+ [300, 100],
272
+ [500, 100],
273
+ [500, 120],
274
+ [300, 120],
275
+ ],
276
+ "Low confidence text",
277
+ 0.05,
278
+ ],
279
+ ],
280
+ },
281
+ ],
282
+ };
283
+ const config = { ...DEFAULT_CONFIG, ocrEnabled: true };
284
+ const expectedOutput = [
285
+ // ── Native text item ──────────────────────────────────────────────
286
+ {
287
+ x: 50,
288
+ y: 100,
289
+ rx: 0,
290
+ ry: 0,
291
+ w: 120,
292
+ h: 14,
293
+ r: 0,
294
+ str: "Hello World",
295
+ strLength: 11,
296
+ pageBbox: { x: 50, y: 100, w: 120, h: 14 },
297
+ vgap: undefined,
298
+ isPlaceholder: undefined,
299
+ },
300
+ // ── OCR block A (passed all filters) ─────────────────────────────
301
+ {
302
+ fromOCR: true,
303
+ x: 50,
304
+ y: 50,
305
+ w: 200,
306
+ h: 20,
307
+ r: 0,
308
+ str: "Scanned paragraph text",
309
+ strLength: 22,
310
+ pageBbox: { x: 50, y: 50, w: 200, h: 20 },
311
+ },
312
+ // Block B removed: spatial overlap >50% of native text item AND content dedup match
313
+ // Block C removed: confidence 0.40 < OCR_CONFIDENCE_THRESHOLD (0.5)
314
+ ];
315
+ const result = buildBbox(pageData, config);
316
+ expect(result).toStrictEqual(expectedOutput);
317
+ });
318
+ });
319
+ describe("test buildBoundingBoxes", () => {
320
+ it("test buildBoundingBoxes success", () => {
321
+ const textItems = [
322
+ { str: "Hello", x: 50, y: 100, width: 60, height: 14, w: 60, h: 14 },
323
+ { str: " ", x: 50, y: 120, width: 30, height: 14, w: 30, h: 14 },
324
+ { str: "World", x: 50, y: 140, width: 80, height: 14, w: 80, h: 14 },
325
+ ];
326
+ const expectedOutput = [
327
+ { x1: 50, y1: 100, x2: 110, y2: 114 },
328
+ { x1: 50, y1: 140, x2: 130, y2: 154 },
329
+ ];
330
+ const result = buildBoundingBoxes(textItems);
331
+ expect(result).toStrictEqual(expectedOutput);
332
+ });
333
+ });
334
+ //# sourceMappingURL=bbox.test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"bbox.test.js","sourceRoot":"","sources":["../../../src/processing/bbox.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EAAE,SAAS,EAAE,kBAAkB,EAAE,kBAAkB,EAAE,MAAM,QAAQ,CAAC;AAE3E,OAAO,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAGhD,QAAQ,CAAC,yBAAyB,EAAE,GAAG,EAAE;IACvC,EAAE,CAAC,kBAAkB,EAAE,GAAG,EAAE;QAC1B,MAAM,MAAM,GAAG;YACb;gBACE,IAAI,EAAE,OAAO;gBACb,KAAK,EAAE,GAAG;gBACV,MAAM,EAAE,GAAG;gBACX,CAAC,EAAE,EAAE;gBACL,CAAC,EAAE,EAAE;gBACL,MAAM,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE;aACzC;SACF,CAAC;QACF,MAAM,IAAI,GAAG,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC;QAC3C,MAAM,MAAM,GAAG,kBAAkB,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;QAChD,MAAM,CAAC,MAAM,CAAC,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;IACvC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yBAAyB,EAAE,GAAG,EAAE;QACjC,MAAM,MAAM,GAAG;YACb,EAAE,IAAI,EAAE,cAAc,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE;YAC7D,EAAE,IAAI,EAAE,gBAAgB,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE;YAC/D;gBACE,IAAI,EAAE,OAAO;gBACb,KAAK,EAAE,GAAG;gBACV,MAAM,EAAE,GAAG;gBACX,CAAC,EAAE,CAAC;gBACJ,CAAC,EAAE,CAAC;gBACJ,MAAM,EAAE,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE;aACvC;SACF,CAAC;QACF,MAAM,IAAI,GAAG,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC;QAC3C,MAAM,MAAM,GAAG,kBAAkB,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;QAChD,MAAM,CAAC,MAAM,CAAC,CAAC,aAAa,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC5C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oBAAoB,EAAE,GAAG,EAAE;QAC5B,MAAM,MAAM,GAAG;YACb;gBACE,IAAI,EAAE,eAAe;gBACrB,KAAK,EAAE,GAAG;gBACV,MAAM,EAAE,GAAG;gBACX,CAAC,EAAE,CAAC;gBACJ,CAAC,EAAE,CAAC;gBACJ,MAAM,EAAE,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE;aACvC;SACF,CAAC;QACF,MAAM,IAAI,GAAG,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC;QAC3C,MAAM,MAAM,GAAG,kBAAkB,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;QAChD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAChC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,sBAAsB,EAAE,GAAG,EAAE;QAC9B,MAAM,MAAM,GAAG;YACb;gBACE,IAAI,EAAE,OAAO;gBACb,KAAK,EAAE,GAAG;gBACV,MAAM,EAAE,GAAG;gBACX,MAAM,EAAE,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE;gBACzC,CAAC,EAAE,CAAC;gBACJ,CAAC,EAAE,CAAC;aACL;SACF,CAAC;QACF,MAAM,IAAI,GAAG,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC;QAC3C,MAAM,MAAM,GAAG,kBAAkB,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;QAChD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAChC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qBAAqB,EAAE,GAAG,EAAE;QAC7B,MAAM,MAAM,GAAG;YACb,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,EAAE;SACvF,CAAC;QACF,MAAM,IAAI,GAAG,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC;QAC3C,MAAM,MAAM,GAAG,kBAAkB,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;QAChD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAChC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0BAA0B,EAAE,GAAG,EAAE;QAClC,MAAM,MAAM,GAAG;YACb;gBACE,IAAI,EAAE,OAAO;gBACb,KAAK,EAAE,GAAG;gBACV,MAAM,EAAE,GAAG;gBACX,CAAC,EAAE,CAAC;gBACJ,CAAC,EAAE,CAAC;gBACJ,MAAM,EAAE,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE;aACvC;YACD;gBACE,IAAI,EAAE,OAAO;gBACb,KAAK,EAAE,GAAG;gBACV,MAAM,EAAE,GAAG;gBACX,CAAC,EAAE,CAAC;gBACJ,CAAC,EAAE,CAAC;gBACJ,MAAM,EAAE,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE;aACvC;YACD;gBACE,IAAI,EAAE,OAAO;gBACb,KAAK,EAAE,GAAG;gBACV,MAAM,EAAE,GAAG;gBACX,CAAC,EAAE,CAAC;gBACJ,CAAC,EAAE,CAAC;gBACJ,MAAM,EAAE,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE;aACvC;YACD;gBACE,IAAI,EAAE,OAAO;gBACb,KAAK,EAAE,GAAG;gBACV,MAAM,EAAE,GAAG;gBACX,CAAC,EAAE,CAAC;gBACJ,CAAC,EAAE,CAAC;gBACJ,MAAM,EAAE,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE;aACvC;YACD;gBACE,IAAI,EAAE,OAAO;gBACb,KAAK,EAAE,GAAG;gBACV,MAAM,EAAE,GAAG;gBACX,CAAC,EAAE,CAAC;gBACJ,CAAC,EAAE,CAAC;gBACJ,MAAM,EAAE,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE;aACvC;YACD;gBACE,IAAI,EAAE,OAAO;gBACb,KAAK,EAAE,GAAG;gBACV,MAAM,EAAE,GAAG;gBACX,CAAC,EAAE,CAAC;gBACJ,CAAC,EAAE,CAAC;gBACJ,MAAM,EAAE,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE;aACvC;YACD;gBACE,IAAI,EAAE,OAAO;gBACb,KAAK,EAAE,GAAG;gBACV,MAAM,EAAE,GAAG;gBACX,CAAC,EAAE,CAAC;gBACJ,CAAC,EAAE,CAAC;gBACJ,MAAM,EAAE,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE;aACvC;YACD;gBACE,IAAI,EAAE,OAAO;gBACb,KAAK,EAAE,GAAG;gBACV,MAAM,EAAE,GAAG;gBACX,CAAC,EAAE,CAAC;gBACJ,CAAC,EAAE,CAAC;gBACJ,MAAM,EAAE,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE;aACvC;YACD;gBACE,IAAI,EAAE,OAAO;gBACb,KAAK,EAAE,GAAG;gBACV,MAAM,EAAE,GAAG;gBACX,CAAC,EAAE,CAAC;gBACJ,CAAC,EAAE,CAAC;gBACJ,MAAM,EAAE,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE;aACvC;YACD;gBACE,IAAI,EAAE,OAAO;gBACb,KAAK,EAAE,GAAG;gBACV,MAAM,EAAE,GAAG;gBACX,CAAC,EAAE,CAAC;gBACJ,CAAC,EAAE,CAAC;gBACJ,MAAM,EAAE,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE;aACvC;YACD;gBACE,IAAI,EAAE,OAAO;gBACb,KAAK,EAAE,GAAG;gBACV,MAAM,EAAE,GAAG;gBACX,CAAC,EAAE,CAAC;gBACJ,CAAC,EAAE,CAAC;gBACJ,MAAM,EAAE,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE;aACvC;YACD;gBACE,IAAI,EAAE,OAAO;gBACb,KAAK,EAAE,GAAG;gBACV,MAAM,EAAE,GAAG;gBACX,CAAC,EAAE,CAAC;gBACJ,CAAC,EAAE,CAAC;gBACJ,MAAM,EAAE,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE;aACvC;SACF,CAAC;QACF,MAAM,IAAI,GAAG,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC;QAC3C,MAAM,MAAM,GAAG,kBAAkB,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;QAChD,MAAM,CAAC,MAAM,CAAC,CAAC,aAAa,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;IACpD,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,eAAe,EAAE,GAAG,EAAE;IAC7B,EAAE,CAAC,wBAAwB,EAAE,GAAG,EAAE;QAChC,MAAM,QAAQ,GAAG;YACf,OAAO,EAAE,CAAC;YACV,KAAK,EAAE,GAAG;YACV,MAAM,EAAE,GAAG;YACX,SAAS,EAAE;gBACT,EAAE,GAAG,EAAE,aAAa,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,EAAE,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,EAAE,EAAE;gBAC5E,EAAE,GAAG,EAAE,gBAAgB,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,EAAE,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,EAAE,EAAE;aAChF;YACD,MAAM,EAAE,EAAE;SACX,CAAC;QACF,MAAM,MAAM,GAAoB,EAAE,GAAG,cAAc,EAAE,UAAU,EAAE,KAAK,EAAE,CAAC;QAEzE,MAAM,cAAc,GAAG;YACrB;gBACE,CAAC,EAAE,EAAE;gBACL,CAAC,EAAE,GAAG;gBACN,EAAE,EAAE,CAAC;gBACL,EAAE,EAAE,CAAC;gBACL,CAAC,EAAE,GAAG;gBACN,CAAC,EAAE,EAAE;gBACL,CAAC,EAAE,CAAC;gBACJ,GAAG,EAAE,aAAa;gBAClB,SAAS,EAAE,EAAE;gBACb,QAAQ,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,EAAE,EAAE;gBAC1C,IAAI,EAAE,SAAS;gBACf,aAAa,EAAE,SAAS;aACzB;YACD;gBACE,CAAC,EAAE,EAAE;gBACL,CAAC,EAAE,GAAG;gBACN,EAAE,EAAE,CAAC;gBACL,EAAE,EAAE,CAAC;gBACL,CAAC,EAAE,GAAG;gBACN,CAAC,EAAE,EAAE;gBACL,CAAC,EAAE,CAAC;gBACJ,GAAG,EAAE,gBAAgB;gBACrB,SAAS,EAAE,EAAE;gBACb,QAAQ,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,EAAE,EAAE;gBAC1C,IAAI,EAAE,SAAS;gBACf,aAAa,EAAE,SAAS;aACzB;SACF,CAAC;QAEF,MAAM,MAAM,GAAG,SAAS,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;QAC3C,MAAM,CAAC,MAAM,CAAC,CAAC,aAAa,CAAC,cAAc,CAAC,CAAC;IAC/C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uBAAuB,EAAE,GAAG,EAAE;QAC/B,MAAM,QAAQ,GAAG;YACf,OAAO,EAAE,CAAC;YACV,KAAK,EAAE,GAAG;YACV,MAAM,EAAE,GAAG;YACX,SAAS,EAAE;gBACT,6BAA6B;gBAC7B,EAAE,GAAG,EAAE,aAAa,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,EAAE,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,EAAE,EAAE;aAC7E;YACD,MAAM,EAAE;gBACN;oBACE,CAAC,EAAE,CAAC;oBACJ,CAAC,EAAE,GAAG;oBACN,KAAK,EAAE,GAAG;oBACV,MAAM,EAAE,GAAG;oBACX,wBAAwB,EAAE,CAAC;oBAC3B,+CAA+C;oBAC/C,MAAM,EAAE;wBACN,sEAAsE;wBACtE;4BACE;gCACE,CAAC,EAAE,EAAE,EAAE,CAAC;gCACR,CAAC,GAAG,EAAE,EAAE,CAAC;gCACT,CAAC,GAAG,EAAE,EAAE,CAAC;gCACT,CAAC,EAAE,EAAE,EAAE,CAAC;6BACT;4BACD,wBAAwB;4BACxB,IAAI;4BACJ,2DAA2D;4BAC3D,0DAA0D;yBACtC;wBACtB,0FAA0F;wBAC1F;4BACE;gCACE,CAAC,EAAE,EAAE,CAAC,CAAC;gCACP,CAAC,GAAG,EAAE,CAAC,CAAC;gCACR,CAAC,GAAG,EAAE,EAAE,CAAC;gCACT,CAAC,EAAE,EAAE,EAAE,CAAC;6BACT;4BACD,aAAa;4BACb,IAAI;4BACJ,2DAA2D;yBACvC;wBACtB,uDAAuD;wBACvD;4BACE;gCACE,CAAC,GAAG,EAAE,GAAG,CAAC;gCACV,CAAC,GAAG,EAAE,GAAG,CAAC;gCACV,CAAC,GAAG,EAAE,GAAG,CAAC;gCACV,CAAC,GAAG,EAAE,GAAG,CAAC;6BACX;4BACD,qBAAqB;4BACrB,IAAI;yBACgB;qBACvB;iBACF;aACF;SACF,CAAC;QACF,MAAM,MAAM,GAAoB,EAAE,GAAG,cAAc,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC;QAExE,MAAM,cAAc,GAAG;YACrB,qEAAqE;YACrE;gBACE,CAAC,EAAE,EAAE;gBACL,CAAC,EAAE,GAAG;gBACN,EAAE,EAAE,CAAC;gBACL,EAAE,EAAE,CAAC;gBACL,CAAC,EAAE,GAAG;gBACN,CAAC,EAAE,EAAE;gBACL,CAAC,EAAE,CAAC;gBACJ,GAAG,EAAE,aAAa;gBAClB,SAAS,EAAE,EAAE;gBACb,QAAQ,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,EAAE,EAAE;gBAC1C,IAAI,EAAE,SAAS;gBACf,aAAa,EAAE,SAAS;aACzB;YAED,oEAAoE;YACpE;gBACE,OAAO,EAAE,IAAI;gBACb,CAAC,EAAE,EAAE;gBACL,CAAC,EAAE,EAAE;gBACL,CAAC,EAAE,GAAG;gBACN,CAAC,EAAE,EAAE;gBACL,CAAC,EAAE,CAAC;gBACJ,GAAG,EAAE,wBAAwB;gBAC7B,SAAS,EAAE,EAAE;gBACb,QAAQ,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,EAAE,EAAE;aAC1C;YAED,oFAAoF;YACpF,oEAAoE;SACrE,CAAC;QAEF,MAAM,MAAM,GAAG,SAAS,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;QAC3C,MAAM,CAAC,MAAM,CAAC,CAAC,aAAa,CAAC,cAAc,CAAC,CAAC;IAC/C,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,yBAAyB,EAAE,GAAG,EAAE;IACvC,EAAE,CAAC,iCAAiC,EAAE,GAAG,EAAE;QACzC,MAAM,SAAS,GAAG;YAChB,EAAE,GAAG,EAAE,OAAO,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE;YACpE,EAAE,GAAG,EAAE,KAAK,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE;YAClE,EAAE,GAAG,EAAE,OAAO,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE;SACrE,CAAC;QAEF,MAAM,cAAc,GAAG;YACrB,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,GAAG,EAAE;YACrC,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,GAAG,EAAE;SACtC,CAAC;QAEF,MAAM,MAAM,GAAG,kBAAkB,CAAC,SAAS,CAAC,CAAC;QAC7C,MAAM,CAAC,MAAM,CAAC,CAAC,aAAa,CAAC,cAAc,CAAC,CAAC;IAC/C,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -0,0 +1,6 @@
1
+ import { ParsedPage, LiteParseConfig } from "../core/types.js";
2
+ /**
3
+ * Clean raw text output - removes margins, null characters
4
+ */
5
+ export declare function cleanRawText(pages: ParsedPage[], _config: LiteParseConfig): void;
6
+ //# sourceMappingURL=cleanText.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cleanText.d.ts","sourceRoot":"","sources":["../../../src/processing/cleanText.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAC;AA2E/D;;GAEG;AACH,wBAAgB,YAAY,CAAC,KAAK,EAAE,UAAU,EAAE,EAAE,OAAO,EAAE,eAAe,GAAG,IAAI,CAShF"}