@llamaindex/liteparse 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (541) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +339 -0
  3. package/dist/cli/parse.d.ts +4 -0
  4. package/dist/cli/parse.d.ts.map +1 -0
  5. package/dist/cli/parse.js +401 -0
  6. package/dist/cli/parse.js.map +1 -0
  7. package/dist/src/conversion/convertToPdf.d.ts +47 -0
  8. package/dist/src/conversion/convertToPdf.d.ts.map +1 -0
  9. package/dist/src/conversion/convertToPdf.js +337 -0
  10. package/dist/src/conversion/convertToPdf.js.map +1 -0
  11. package/dist/src/conversion/convertToPdf.test.d.ts +2 -0
  12. package/dist/src/conversion/convertToPdf.test.d.ts.map +1 -0
  13. package/dist/src/conversion/convertToPdf.test.js +208 -0
  14. package/dist/src/conversion/convertToPdf.test.js.map +1 -0
  15. package/dist/src/core/config.d.ts +4 -0
  16. package/dist/src/core/config.d.ts.map +1 -0
  17. package/dist/src/core/config.js +25 -0
  18. package/dist/src/core/config.js.map +1 -0
  19. package/dist/src/core/config.test.d.ts +2 -0
  20. package/dist/src/core/config.test.d.ts.map +1 -0
  21. package/dist/src/core/config.test.js +21 -0
  22. package/dist/src/core/config.test.js.map +1 -0
  23. package/dist/src/core/parser.d.ts +83 -0
  24. package/dist/src/core/parser.d.ts.map +1 -0
  25. package/dist/src/core/parser.js +333 -0
  26. package/dist/src/core/parser.js.map +1 -0
  27. package/dist/src/core/parser.test.d.ts +2 -0
  28. package/dist/src/core/parser.test.d.ts.map +1 -0
  29. package/dist/src/core/parser.test.js +537 -0
  30. package/dist/src/core/parser.test.js.map +1 -0
  31. package/dist/src/core/types.d.ts +287 -0
  32. package/dist/src/core/types.d.ts.map +1 -0
  33. package/dist/src/core/types.js +2 -0
  34. package/dist/src/core/types.js.map +1 -0
  35. package/dist/src/engines/ocr/http-simple.d.ts +19 -0
  36. package/dist/src/engines/ocr/http-simple.d.ts.map +1 -0
  37. package/dist/src/engines/ocr/http-simple.js +63 -0
  38. package/dist/src/engines/ocr/http-simple.js.map +1 -0
  39. package/dist/src/engines/ocr/http-simple.test.d.ts +2 -0
  40. package/dist/src/engines/ocr/http-simple.test.d.ts.map +1 -0
  41. package/dist/src/engines/ocr/http-simple.test.js +108 -0
  42. package/dist/src/engines/ocr/http-simple.test.js.map +1 -0
  43. package/dist/src/engines/ocr/interface.d.ts +15 -0
  44. package/dist/src/engines/ocr/interface.d.ts.map +1 -0
  45. package/dist/src/engines/ocr/interface.js +2 -0
  46. package/dist/src/engines/ocr/interface.js.map +1 -0
  47. package/dist/src/engines/ocr/tesseract.d.ts +19 -0
  48. package/dist/src/engines/ocr/tesseract.d.ts.map +1 -0
  49. package/dist/src/engines/ocr/tesseract.js +112 -0
  50. package/dist/src/engines/ocr/tesseract.js.map +1 -0
  51. package/dist/src/engines/ocr/tesseract.test.d.ts +2 -0
  52. package/dist/src/engines/ocr/tesseract.test.d.ts.map +1 -0
  53. package/dist/src/engines/ocr/tesseract.test.js +84 -0
  54. package/dist/src/engines/ocr/tesseract.test.js.map +1 -0
  55. package/dist/src/engines/pdf/interface.d.ts +79 -0
  56. package/dist/src/engines/pdf/interface.d.ts.map +1 -0
  57. package/dist/src/engines/pdf/interface.js +2 -0
  58. package/dist/src/engines/pdf/interface.js.map +1 -0
  59. package/dist/src/engines/pdf/pdfium-renderer.d.ts +11 -0
  60. package/dist/src/engines/pdf/pdfium-renderer.d.ts.map +1 -0
  61. package/dist/src/engines/pdf/pdfium-renderer.js +64 -0
  62. package/dist/src/engines/pdf/pdfium-renderer.js.map +1 -0
  63. package/dist/src/engines/pdf/pdfium-renderer.test.d.ts +2 -0
  64. package/dist/src/engines/pdf/pdfium-renderer.test.d.ts.map +1 -0
  65. package/dist/src/engines/pdf/pdfium-renderer.test.js +76 -0
  66. package/dist/src/engines/pdf/pdfium-renderer.test.js.map +1 -0
  67. package/dist/src/engines/pdf/pdfjs.d.ts +13 -0
  68. package/dist/src/engines/pdf/pdfjs.d.ts.map +1 -0
  69. package/dist/src/engines/pdf/pdfjs.js +538 -0
  70. package/dist/src/engines/pdf/pdfjs.js.map +1 -0
  71. package/dist/src/engines/pdf/pdfjs.test.d.ts +2 -0
  72. package/dist/src/engines/pdf/pdfjs.test.d.ts.map +1 -0
  73. package/dist/src/engines/pdf/pdfjs.test.js +220 -0
  74. package/dist/src/engines/pdf/pdfjs.test.js.map +1 -0
  75. package/dist/src/engines/pdf/pdfjsImporter.d.ts +5 -0
  76. package/dist/src/engines/pdf/pdfjsImporter.d.ts.map +1 -0
  77. package/dist/src/engines/pdf/pdfjsImporter.js +9 -0
  78. package/dist/src/engines/pdf/pdfjsImporter.js.map +1 -0
  79. package/dist/src/index.d.ts +3 -0
  80. package/dist/src/index.d.ts.map +1 -0
  81. package/dist/src/index.js +5 -0
  82. package/dist/src/index.js.map +1 -0
  83. package/dist/src/lib.d.ts +17 -0
  84. package/dist/src/lib.d.ts.map +1 -0
  85. package/dist/src/lib.js +16 -0
  86. package/dist/src/lib.js.map +1 -0
  87. package/dist/src/output/json.d.ts +10 -0
  88. package/dist/src/output/json.d.ts.map +1 -0
  89. package/dist/src/output/json.js +31 -0
  90. package/dist/src/output/json.js.map +1 -0
  91. package/dist/src/output/json.test.d.ts +2 -0
  92. package/dist/src/output/json.test.d.ts.map +1 -0
  93. package/dist/src/output/json.test.js +136 -0
  94. package/dist/src/output/json.test.js.map +1 -0
  95. package/dist/src/output/text.d.ts +10 -0
  96. package/dist/src/output/text.d.ts.map +1 -0
  97. package/dist/src/output/text.js +17 -0
  98. package/dist/src/output/text.js.map +1 -0
  99. package/dist/src/output/text.test.d.ts +2 -0
  100. package/dist/src/output/text.test.d.ts.map +1 -0
  101. package/dist/src/output/text.test.js +65 -0
  102. package/dist/src/output/text.test.js.map +1 -0
  103. package/dist/src/processing/bbox.d.ts +20 -0
  104. package/dist/src/processing/bbox.d.ts.map +1 -0
  105. package/dist/src/processing/bbox.js +258 -0
  106. package/dist/src/processing/bbox.js.map +1 -0
  107. package/dist/src/processing/bbox.test.d.ts +2 -0
  108. package/dist/src/processing/bbox.test.d.ts.map +1 -0
  109. package/dist/src/processing/bbox.test.js +334 -0
  110. package/dist/src/processing/bbox.test.js.map +1 -0
  111. package/dist/src/processing/cleanText.d.ts +6 -0
  112. package/dist/src/processing/cleanText.d.ts.map +1 -0
  113. package/dist/src/processing/cleanText.js +73 -0
  114. package/dist/src/processing/cleanText.js.map +1 -0
  115. package/dist/src/processing/cleanText.test.d.ts +2 -0
  116. package/dist/src/processing/cleanText.test.d.ts.map +1 -0
  117. package/dist/src/processing/cleanText.test.js +46 -0
  118. package/dist/src/processing/cleanText.test.js.map +1 -0
  119. package/dist/src/processing/grid.d.ts +7 -0
  120. package/dist/src/processing/grid.d.ts.map +1 -0
  121. package/dist/src/processing/grid.js +13 -0
  122. package/dist/src/processing/grid.js.map +1 -0
  123. package/dist/src/processing/gridProjection.d.ts +18 -0
  124. package/dist/src/processing/gridProjection.d.ts.map +1 -0
  125. package/dist/src/processing/gridProjection.js +1392 -0
  126. package/dist/src/processing/gridProjection.js.map +1 -0
  127. package/dist/src/processing/gridProjection.test.d.ts +2 -0
  128. package/dist/src/processing/gridProjection.test.d.ts.map +1 -0
  129. package/dist/src/processing/gridProjection.test.js +464 -0
  130. package/dist/src/processing/gridProjection.test.js.map +1 -0
  131. package/dist/src/processing/markupUtils.d.ts +7 -0
  132. package/dist/src/processing/markupUtils.d.ts.map +1 -0
  133. package/dist/src/processing/markupUtils.js +25 -0
  134. package/dist/src/processing/markupUtils.js.map +1 -0
  135. package/dist/src/processing/markupUtils.test.d.ts +2 -0
  136. package/dist/src/processing/markupUtils.test.d.ts.map +1 -0
  137. package/dist/src/processing/markupUtils.test.js +26 -0
  138. package/dist/src/processing/markupUtils.test.js.map +1 -0
  139. package/dist/src/processing/ocrUtils.d.ts +24 -0
  140. package/dist/src/processing/ocrUtils.d.ts.map +1 -0
  141. package/dist/src/processing/ocrUtils.js +79 -0
  142. package/dist/src/processing/ocrUtils.js.map +1 -0
  143. package/dist/src/processing/octUtils.test.d.ts +2 -0
  144. package/dist/src/processing/octUtils.test.d.ts.map +1 -0
  145. package/dist/src/processing/octUtils.test.js +72 -0
  146. package/dist/src/processing/octUtils.test.js.map +1 -0
  147. package/dist/src/processing/textUtils.d.ts +20 -0
  148. package/dist/src/processing/textUtils.d.ts.map +1 -0
  149. package/dist/src/processing/textUtils.js +142 -0
  150. package/dist/src/processing/textUtils.js.map +1 -0
  151. package/dist/src/processing/textUtils.test.d.ts +2 -0
  152. package/dist/src/processing/textUtils.test.d.ts.map +1 -0
  153. package/dist/src/processing/textUtils.test.js +45 -0
  154. package/dist/src/processing/textUtils.test.js.map +1 -0
  155. package/dist/src/vendor/pdfjs/LICENSE +177 -0
  156. package/dist/src/vendor/pdfjs/README.md +0 -0
  157. package/dist/src/vendor/pdfjs/cmaps/78-EUC-H.bcmap +0 -0
  158. package/dist/src/vendor/pdfjs/cmaps/78-EUC-V.bcmap +0 -0
  159. package/dist/src/vendor/pdfjs/cmaps/78-H.bcmap +0 -0
  160. package/dist/src/vendor/pdfjs/cmaps/78-RKSJ-H.bcmap +0 -0
  161. package/dist/src/vendor/pdfjs/cmaps/78-RKSJ-V.bcmap +0 -0
  162. package/dist/src/vendor/pdfjs/cmaps/78-V.bcmap +0 -0
  163. package/dist/src/vendor/pdfjs/cmaps/78ms-RKSJ-H.bcmap +0 -0
  164. package/dist/src/vendor/pdfjs/cmaps/78ms-RKSJ-V.bcmap +0 -0
  165. package/dist/src/vendor/pdfjs/cmaps/83pv-RKSJ-H.bcmap +0 -0
  166. package/dist/src/vendor/pdfjs/cmaps/90ms-RKSJ-H.bcmap +0 -0
  167. package/dist/src/vendor/pdfjs/cmaps/90ms-RKSJ-V.bcmap +0 -0
  168. package/dist/src/vendor/pdfjs/cmaps/90msp-RKSJ-H.bcmap +0 -0
  169. package/dist/src/vendor/pdfjs/cmaps/90msp-RKSJ-V.bcmap +0 -0
  170. package/dist/src/vendor/pdfjs/cmaps/90pv-RKSJ-H.bcmap +0 -0
  171. package/dist/src/vendor/pdfjs/cmaps/90pv-RKSJ-V.bcmap +0 -0
  172. package/dist/src/vendor/pdfjs/cmaps/Add-H.bcmap +0 -0
  173. package/dist/src/vendor/pdfjs/cmaps/Add-RKSJ-H.bcmap +0 -0
  174. package/dist/src/vendor/pdfjs/cmaps/Add-RKSJ-V.bcmap +0 -0
  175. package/dist/src/vendor/pdfjs/cmaps/Add-V.bcmap +0 -0
  176. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-0.bcmap +0 -0
  177. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-1.bcmap +0 -0
  178. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-2.bcmap +0 -0
  179. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-3.bcmap +0 -0
  180. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-4.bcmap +0 -0
  181. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-5.bcmap +0 -0
  182. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-6.bcmap +0 -0
  183. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-UCS2.bcmap +0 -0
  184. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-0.bcmap +0 -0
  185. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-1.bcmap +0 -0
  186. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-2.bcmap +0 -0
  187. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-3.bcmap +0 -0
  188. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-4.bcmap +0 -0
  189. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-5.bcmap +0 -0
  190. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-UCS2.bcmap +0 -0
  191. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-0.bcmap +0 -0
  192. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-1.bcmap +0 -0
  193. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-2.bcmap +0 -0
  194. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-3.bcmap +0 -0
  195. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-4.bcmap +0 -0
  196. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-5.bcmap +0 -0
  197. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-6.bcmap +0 -0
  198. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-UCS2.bcmap +0 -0
  199. package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-0.bcmap +0 -0
  200. package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-1.bcmap +0 -0
  201. package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-2.bcmap +0 -0
  202. package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-UCS2.bcmap +0 -0
  203. package/dist/src/vendor/pdfjs/cmaps/B5-H.bcmap +0 -0
  204. package/dist/src/vendor/pdfjs/cmaps/B5-V.bcmap +0 -0
  205. package/dist/src/vendor/pdfjs/cmaps/B5pc-H.bcmap +0 -0
  206. package/dist/src/vendor/pdfjs/cmaps/B5pc-V.bcmap +0 -0
  207. package/dist/src/vendor/pdfjs/cmaps/CNS-EUC-H.bcmap +0 -0
  208. package/dist/src/vendor/pdfjs/cmaps/CNS-EUC-V.bcmap +0 -0
  209. package/dist/src/vendor/pdfjs/cmaps/CNS1-H.bcmap +0 -0
  210. package/dist/src/vendor/pdfjs/cmaps/CNS1-V.bcmap +0 -0
  211. package/dist/src/vendor/pdfjs/cmaps/CNS2-H.bcmap +0 -0
  212. package/dist/src/vendor/pdfjs/cmaps/CNS2-V.bcmap +3 -0
  213. package/dist/src/vendor/pdfjs/cmaps/ETHK-B5-H.bcmap +0 -0
  214. package/dist/src/vendor/pdfjs/cmaps/ETHK-B5-V.bcmap +0 -0
  215. package/dist/src/vendor/pdfjs/cmaps/ETen-B5-H.bcmap +0 -0
  216. package/dist/src/vendor/pdfjs/cmaps/ETen-B5-V.bcmap +0 -0
  217. package/dist/src/vendor/pdfjs/cmaps/ETenms-B5-H.bcmap +3 -0
  218. package/dist/src/vendor/pdfjs/cmaps/ETenms-B5-V.bcmap +0 -0
  219. package/dist/src/vendor/pdfjs/cmaps/EUC-H.bcmap +0 -0
  220. package/dist/src/vendor/pdfjs/cmaps/EUC-V.bcmap +0 -0
  221. package/dist/src/vendor/pdfjs/cmaps/Ext-H.bcmap +0 -0
  222. package/dist/src/vendor/pdfjs/cmaps/Ext-RKSJ-H.bcmap +0 -0
  223. package/dist/src/vendor/pdfjs/cmaps/Ext-RKSJ-V.bcmap +0 -0
  224. package/dist/src/vendor/pdfjs/cmaps/Ext-V.bcmap +0 -0
  225. package/dist/src/vendor/pdfjs/cmaps/GB-EUC-H.bcmap +0 -0
  226. package/dist/src/vendor/pdfjs/cmaps/GB-EUC-V.bcmap +0 -0
  227. package/dist/src/vendor/pdfjs/cmaps/GB-H.bcmap +4 -0
  228. package/dist/src/vendor/pdfjs/cmaps/GB-V.bcmap +0 -0
  229. package/dist/src/vendor/pdfjs/cmaps/GBK-EUC-H.bcmap +0 -0
  230. package/dist/src/vendor/pdfjs/cmaps/GBK-EUC-V.bcmap +0 -0
  231. package/dist/src/vendor/pdfjs/cmaps/GBK2K-H.bcmap +0 -0
  232. package/dist/src/vendor/pdfjs/cmaps/GBK2K-V.bcmap +0 -0
  233. package/dist/src/vendor/pdfjs/cmaps/GBKp-EUC-H.bcmap +0 -0
  234. package/dist/src/vendor/pdfjs/cmaps/GBKp-EUC-V.bcmap +0 -0
  235. package/dist/src/vendor/pdfjs/cmaps/GBT-EUC-H.bcmap +0 -0
  236. package/dist/src/vendor/pdfjs/cmaps/GBT-EUC-V.bcmap +0 -0
  237. package/dist/src/vendor/pdfjs/cmaps/GBT-H.bcmap +0 -0
  238. package/dist/src/vendor/pdfjs/cmaps/GBT-V.bcmap +0 -0
  239. package/dist/src/vendor/pdfjs/cmaps/GBTpc-EUC-H.bcmap +0 -0
  240. package/dist/src/vendor/pdfjs/cmaps/GBTpc-EUC-V.bcmap +0 -0
  241. package/dist/src/vendor/pdfjs/cmaps/GBpc-EUC-H.bcmap +0 -0
  242. package/dist/src/vendor/pdfjs/cmaps/GBpc-EUC-V.bcmap +0 -0
  243. package/dist/src/vendor/pdfjs/cmaps/H.bcmap +0 -0
  244. package/dist/src/vendor/pdfjs/cmaps/HKdla-B5-H.bcmap +0 -0
  245. package/dist/src/vendor/pdfjs/cmaps/HKdla-B5-V.bcmap +0 -0
  246. package/dist/src/vendor/pdfjs/cmaps/HKdlb-B5-H.bcmap +0 -0
  247. package/dist/src/vendor/pdfjs/cmaps/HKdlb-B5-V.bcmap +0 -0
  248. package/dist/src/vendor/pdfjs/cmaps/HKgccs-B5-H.bcmap +0 -0
  249. package/dist/src/vendor/pdfjs/cmaps/HKgccs-B5-V.bcmap +0 -0
  250. package/dist/src/vendor/pdfjs/cmaps/HKm314-B5-H.bcmap +0 -0
  251. package/dist/src/vendor/pdfjs/cmaps/HKm314-B5-V.bcmap +0 -0
  252. package/dist/src/vendor/pdfjs/cmaps/HKm471-B5-H.bcmap +0 -0
  253. package/dist/src/vendor/pdfjs/cmaps/HKm471-B5-V.bcmap +0 -0
  254. package/dist/src/vendor/pdfjs/cmaps/HKscs-B5-H.bcmap +0 -0
  255. package/dist/src/vendor/pdfjs/cmaps/HKscs-B5-V.bcmap +0 -0
  256. package/dist/src/vendor/pdfjs/cmaps/Hankaku.bcmap +0 -0
  257. package/dist/src/vendor/pdfjs/cmaps/Hiragana.bcmap +0 -0
  258. package/dist/src/vendor/pdfjs/cmaps/KSC-EUC-H.bcmap +0 -0
  259. package/dist/src/vendor/pdfjs/cmaps/KSC-EUC-V.bcmap +0 -0
  260. package/dist/src/vendor/pdfjs/cmaps/KSC-H.bcmap +0 -0
  261. package/dist/src/vendor/pdfjs/cmaps/KSC-Johab-H.bcmap +0 -0
  262. package/dist/src/vendor/pdfjs/cmaps/KSC-Johab-V.bcmap +0 -0
  263. package/dist/src/vendor/pdfjs/cmaps/KSC-V.bcmap +0 -0
  264. package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-H.bcmap +0 -0
  265. package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-H.bcmap +0 -0
  266. package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-V.bcmap +0 -0
  267. package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-V.bcmap +0 -0
  268. package/dist/src/vendor/pdfjs/cmaps/KSCpc-EUC-H.bcmap +0 -0
  269. package/dist/src/vendor/pdfjs/cmaps/KSCpc-EUC-V.bcmap +0 -0
  270. package/dist/src/vendor/pdfjs/cmaps/Katakana.bcmap +0 -0
  271. package/dist/src/vendor/pdfjs/cmaps/LICENSE +36 -0
  272. package/dist/src/vendor/pdfjs/cmaps/NWP-H.bcmap +0 -0
  273. package/dist/src/vendor/pdfjs/cmaps/NWP-V.bcmap +0 -0
  274. package/dist/src/vendor/pdfjs/cmaps/RKSJ-H.bcmap +0 -0
  275. package/dist/src/vendor/pdfjs/cmaps/RKSJ-V.bcmap +0 -0
  276. package/dist/src/vendor/pdfjs/cmaps/Roman.bcmap +0 -0
  277. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UCS2-H.bcmap +0 -0
  278. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UCS2-V.bcmap +0 -0
  279. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF16-H.bcmap +0 -0
  280. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF16-V.bcmap +0 -0
  281. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF32-H.bcmap +0 -0
  282. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF32-V.bcmap +0 -0
  283. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF8-H.bcmap +0 -0
  284. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF8-V.bcmap +0 -0
  285. package/dist/src/vendor/pdfjs/cmaps/UniGB-UCS2-H.bcmap +0 -0
  286. package/dist/src/vendor/pdfjs/cmaps/UniGB-UCS2-V.bcmap +0 -0
  287. package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF16-H.bcmap +0 -0
  288. package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF16-V.bcmap +0 -0
  289. package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF32-H.bcmap +0 -0
  290. package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF32-V.bcmap +0 -0
  291. package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF8-H.bcmap +0 -0
  292. package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF8-V.bcmap +0 -0
  293. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-H.bcmap +0 -0
  294. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-H.bcmap +0 -0
  295. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-V.bcmap +0 -0
  296. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-V.bcmap +0 -0
  297. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF16-H.bcmap +0 -0
  298. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF16-V.bcmap +0 -0
  299. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF32-H.bcmap +0 -0
  300. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF32-V.bcmap +0 -0
  301. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF8-H.bcmap +0 -0
  302. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF8-V.bcmap +0 -0
  303. package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-H.bcmap +0 -0
  304. package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-V.bcmap +0 -0
  305. package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-H.bcmap +0 -0
  306. package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-V.bcmap +0 -0
  307. package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-H.bcmap +0 -0
  308. package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-V.bcmap +0 -0
  309. package/dist/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-HW-V.bcmap +0 -0
  310. package/dist/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-V.bcmap +0 -0
  311. package/dist/src/vendor/pdfjs/cmaps/UniJISPro-UTF8-V.bcmap +0 -0
  312. package/dist/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-H.bcmap +0 -0
  313. package/dist/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-V.bcmap +0 -0
  314. package/dist/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-H.bcmap +0 -0
  315. package/dist/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-V.bcmap +0 -0
  316. package/dist/src/vendor/pdfjs/cmaps/UniKS-UCS2-H.bcmap +0 -0
  317. package/dist/src/vendor/pdfjs/cmaps/UniKS-UCS2-V.bcmap +0 -0
  318. package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF16-H.bcmap +0 -0
  319. package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF16-V.bcmap +0 -0
  320. package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF32-H.bcmap +0 -0
  321. package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF32-V.bcmap +0 -0
  322. package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF8-H.bcmap +0 -0
  323. package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF8-V.bcmap +0 -0
  324. package/dist/src/vendor/pdfjs/cmaps/V.bcmap +0 -0
  325. package/dist/src/vendor/pdfjs/cmaps/WP-Symbol.bcmap +0 -0
  326. package/dist/src/vendor/pdfjs/pdf.mjs +19481 -0
  327. package/dist/src/vendor/pdfjs/pdf.mjs.map +1 -0
  328. package/dist/src/vendor/pdfjs/pdf.sandbox.mjs +210 -0
  329. package/dist/src/vendor/pdfjs/pdf.sandbox.mjs.map +1 -0
  330. package/dist/src/vendor/pdfjs/pdf.worker.mjs +56001 -0
  331. package/dist/src/vendor/pdfjs/pdf.worker.mjs.map +1 -0
  332. package/dist/src/vendor/pdfjs/standard_fonts/FoxitDingbats.pfb +0 -0
  333. package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixed.pfb +0 -0
  334. package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixedBold.pfb +0 -0
  335. package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixedBoldItalic.pfb +0 -0
  336. package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixedItalic.pfb +0 -0
  337. package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerif.pfb +0 -0
  338. package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerifBold.pfb +0 -0
  339. package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerifBoldItalic.pfb +0 -0
  340. package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerifItalic.pfb +0 -0
  341. package/dist/src/vendor/pdfjs/standard_fonts/FoxitSymbol.pfb +0 -0
  342. package/dist/src/vendor/pdfjs/standard_fonts/LICENSE_FOXIT +27 -0
  343. package/dist/src/vendor/pdfjs/standard_fonts/LICENSE_LIBERATION +102 -0
  344. package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-Bold.ttf +0 -0
  345. package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-BoldItalic.ttf +0 -0
  346. package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-Italic.ttf +0 -0
  347. package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-Regular.ttf +0 -0
  348. package/package.json +89 -0
  349. package/src/vendor/pdfjs/LICENSE +177 -0
  350. package/src/vendor/pdfjs/README.md +0 -0
  351. package/src/vendor/pdfjs/cmaps/78-EUC-H.bcmap +0 -0
  352. package/src/vendor/pdfjs/cmaps/78-EUC-V.bcmap +0 -0
  353. package/src/vendor/pdfjs/cmaps/78-H.bcmap +0 -0
  354. package/src/vendor/pdfjs/cmaps/78-RKSJ-H.bcmap +0 -0
  355. package/src/vendor/pdfjs/cmaps/78-RKSJ-V.bcmap +0 -0
  356. package/src/vendor/pdfjs/cmaps/78-V.bcmap +0 -0
  357. package/src/vendor/pdfjs/cmaps/78ms-RKSJ-H.bcmap +0 -0
  358. package/src/vendor/pdfjs/cmaps/78ms-RKSJ-V.bcmap +0 -0
  359. package/src/vendor/pdfjs/cmaps/83pv-RKSJ-H.bcmap +0 -0
  360. package/src/vendor/pdfjs/cmaps/90ms-RKSJ-H.bcmap +0 -0
  361. package/src/vendor/pdfjs/cmaps/90ms-RKSJ-V.bcmap +0 -0
  362. package/src/vendor/pdfjs/cmaps/90msp-RKSJ-H.bcmap +0 -0
  363. package/src/vendor/pdfjs/cmaps/90msp-RKSJ-V.bcmap +0 -0
  364. package/src/vendor/pdfjs/cmaps/90pv-RKSJ-H.bcmap +0 -0
  365. package/src/vendor/pdfjs/cmaps/90pv-RKSJ-V.bcmap +0 -0
  366. package/src/vendor/pdfjs/cmaps/Add-H.bcmap +0 -0
  367. package/src/vendor/pdfjs/cmaps/Add-RKSJ-H.bcmap +0 -0
  368. package/src/vendor/pdfjs/cmaps/Add-RKSJ-V.bcmap +0 -0
  369. package/src/vendor/pdfjs/cmaps/Add-V.bcmap +0 -0
  370. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-0.bcmap +0 -0
  371. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-1.bcmap +0 -0
  372. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-2.bcmap +0 -0
  373. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-3.bcmap +0 -0
  374. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-4.bcmap +0 -0
  375. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-5.bcmap +0 -0
  376. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-6.bcmap +0 -0
  377. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-UCS2.bcmap +0 -0
  378. package/src/vendor/pdfjs/cmaps/Adobe-GB1-0.bcmap +0 -0
  379. package/src/vendor/pdfjs/cmaps/Adobe-GB1-1.bcmap +0 -0
  380. package/src/vendor/pdfjs/cmaps/Adobe-GB1-2.bcmap +0 -0
  381. package/src/vendor/pdfjs/cmaps/Adobe-GB1-3.bcmap +0 -0
  382. package/src/vendor/pdfjs/cmaps/Adobe-GB1-4.bcmap +0 -0
  383. package/src/vendor/pdfjs/cmaps/Adobe-GB1-5.bcmap +0 -0
  384. package/src/vendor/pdfjs/cmaps/Adobe-GB1-UCS2.bcmap +0 -0
  385. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-0.bcmap +0 -0
  386. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-1.bcmap +0 -0
  387. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-2.bcmap +0 -0
  388. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-3.bcmap +0 -0
  389. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-4.bcmap +0 -0
  390. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-5.bcmap +0 -0
  391. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-6.bcmap +0 -0
  392. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-UCS2.bcmap +0 -0
  393. package/src/vendor/pdfjs/cmaps/Adobe-Korea1-0.bcmap +0 -0
  394. package/src/vendor/pdfjs/cmaps/Adobe-Korea1-1.bcmap +0 -0
  395. package/src/vendor/pdfjs/cmaps/Adobe-Korea1-2.bcmap +0 -0
  396. package/src/vendor/pdfjs/cmaps/Adobe-Korea1-UCS2.bcmap +0 -0
  397. package/src/vendor/pdfjs/cmaps/B5-H.bcmap +0 -0
  398. package/src/vendor/pdfjs/cmaps/B5-V.bcmap +0 -0
  399. package/src/vendor/pdfjs/cmaps/B5pc-H.bcmap +0 -0
  400. package/src/vendor/pdfjs/cmaps/B5pc-V.bcmap +0 -0
  401. package/src/vendor/pdfjs/cmaps/CNS-EUC-H.bcmap +0 -0
  402. package/src/vendor/pdfjs/cmaps/CNS-EUC-V.bcmap +0 -0
  403. package/src/vendor/pdfjs/cmaps/CNS1-H.bcmap +0 -0
  404. package/src/vendor/pdfjs/cmaps/CNS1-V.bcmap +0 -0
  405. package/src/vendor/pdfjs/cmaps/CNS2-H.bcmap +0 -0
  406. package/src/vendor/pdfjs/cmaps/CNS2-V.bcmap +3 -0
  407. package/src/vendor/pdfjs/cmaps/ETHK-B5-H.bcmap +0 -0
  408. package/src/vendor/pdfjs/cmaps/ETHK-B5-V.bcmap +0 -0
  409. package/src/vendor/pdfjs/cmaps/ETen-B5-H.bcmap +0 -0
  410. package/src/vendor/pdfjs/cmaps/ETen-B5-V.bcmap +0 -0
  411. package/src/vendor/pdfjs/cmaps/ETenms-B5-H.bcmap +3 -0
  412. package/src/vendor/pdfjs/cmaps/ETenms-B5-V.bcmap +0 -0
  413. package/src/vendor/pdfjs/cmaps/EUC-H.bcmap +0 -0
  414. package/src/vendor/pdfjs/cmaps/EUC-V.bcmap +0 -0
  415. package/src/vendor/pdfjs/cmaps/Ext-H.bcmap +0 -0
  416. package/src/vendor/pdfjs/cmaps/Ext-RKSJ-H.bcmap +0 -0
  417. package/src/vendor/pdfjs/cmaps/Ext-RKSJ-V.bcmap +0 -0
  418. package/src/vendor/pdfjs/cmaps/Ext-V.bcmap +0 -0
  419. package/src/vendor/pdfjs/cmaps/GB-EUC-H.bcmap +0 -0
  420. package/src/vendor/pdfjs/cmaps/GB-EUC-V.bcmap +0 -0
  421. package/src/vendor/pdfjs/cmaps/GB-H.bcmap +4 -0
  422. package/src/vendor/pdfjs/cmaps/GB-V.bcmap +0 -0
  423. package/src/vendor/pdfjs/cmaps/GBK-EUC-H.bcmap +0 -0
  424. package/src/vendor/pdfjs/cmaps/GBK-EUC-V.bcmap +0 -0
  425. package/src/vendor/pdfjs/cmaps/GBK2K-H.bcmap +0 -0
  426. package/src/vendor/pdfjs/cmaps/GBK2K-V.bcmap +0 -0
  427. package/src/vendor/pdfjs/cmaps/GBKp-EUC-H.bcmap +0 -0
  428. package/src/vendor/pdfjs/cmaps/GBKp-EUC-V.bcmap +0 -0
  429. package/src/vendor/pdfjs/cmaps/GBT-EUC-H.bcmap +0 -0
  430. package/src/vendor/pdfjs/cmaps/GBT-EUC-V.bcmap +0 -0
  431. package/src/vendor/pdfjs/cmaps/GBT-H.bcmap +0 -0
  432. package/src/vendor/pdfjs/cmaps/GBT-V.bcmap +0 -0
  433. package/src/vendor/pdfjs/cmaps/GBTpc-EUC-H.bcmap +0 -0
  434. package/src/vendor/pdfjs/cmaps/GBTpc-EUC-V.bcmap +0 -0
  435. package/src/vendor/pdfjs/cmaps/GBpc-EUC-H.bcmap +0 -0
  436. package/src/vendor/pdfjs/cmaps/GBpc-EUC-V.bcmap +0 -0
  437. package/src/vendor/pdfjs/cmaps/H.bcmap +0 -0
  438. package/src/vendor/pdfjs/cmaps/HKdla-B5-H.bcmap +0 -0
  439. package/src/vendor/pdfjs/cmaps/HKdla-B5-V.bcmap +0 -0
  440. package/src/vendor/pdfjs/cmaps/HKdlb-B5-H.bcmap +0 -0
  441. package/src/vendor/pdfjs/cmaps/HKdlb-B5-V.bcmap +0 -0
  442. package/src/vendor/pdfjs/cmaps/HKgccs-B5-H.bcmap +0 -0
  443. package/src/vendor/pdfjs/cmaps/HKgccs-B5-V.bcmap +0 -0
  444. package/src/vendor/pdfjs/cmaps/HKm314-B5-H.bcmap +0 -0
  445. package/src/vendor/pdfjs/cmaps/HKm314-B5-V.bcmap +0 -0
  446. package/src/vendor/pdfjs/cmaps/HKm471-B5-H.bcmap +0 -0
  447. package/src/vendor/pdfjs/cmaps/HKm471-B5-V.bcmap +0 -0
  448. package/src/vendor/pdfjs/cmaps/HKscs-B5-H.bcmap +0 -0
  449. package/src/vendor/pdfjs/cmaps/HKscs-B5-V.bcmap +0 -0
  450. package/src/vendor/pdfjs/cmaps/Hankaku.bcmap +0 -0
  451. package/src/vendor/pdfjs/cmaps/Hiragana.bcmap +0 -0
  452. package/src/vendor/pdfjs/cmaps/KSC-EUC-H.bcmap +0 -0
  453. package/src/vendor/pdfjs/cmaps/KSC-EUC-V.bcmap +0 -0
  454. package/src/vendor/pdfjs/cmaps/KSC-H.bcmap +0 -0
  455. package/src/vendor/pdfjs/cmaps/KSC-Johab-H.bcmap +0 -0
  456. package/src/vendor/pdfjs/cmaps/KSC-Johab-V.bcmap +0 -0
  457. package/src/vendor/pdfjs/cmaps/KSC-V.bcmap +0 -0
  458. package/src/vendor/pdfjs/cmaps/KSCms-UHC-H.bcmap +0 -0
  459. package/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-H.bcmap +0 -0
  460. package/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-V.bcmap +0 -0
  461. package/src/vendor/pdfjs/cmaps/KSCms-UHC-V.bcmap +0 -0
  462. package/src/vendor/pdfjs/cmaps/KSCpc-EUC-H.bcmap +0 -0
  463. package/src/vendor/pdfjs/cmaps/KSCpc-EUC-V.bcmap +0 -0
  464. package/src/vendor/pdfjs/cmaps/Katakana.bcmap +0 -0
  465. package/src/vendor/pdfjs/cmaps/LICENSE +36 -0
  466. package/src/vendor/pdfjs/cmaps/NWP-H.bcmap +0 -0
  467. package/src/vendor/pdfjs/cmaps/NWP-V.bcmap +0 -0
  468. package/src/vendor/pdfjs/cmaps/RKSJ-H.bcmap +0 -0
  469. package/src/vendor/pdfjs/cmaps/RKSJ-V.bcmap +0 -0
  470. package/src/vendor/pdfjs/cmaps/Roman.bcmap +0 -0
  471. package/src/vendor/pdfjs/cmaps/UniCNS-UCS2-H.bcmap +0 -0
  472. package/src/vendor/pdfjs/cmaps/UniCNS-UCS2-V.bcmap +0 -0
  473. package/src/vendor/pdfjs/cmaps/UniCNS-UTF16-H.bcmap +0 -0
  474. package/src/vendor/pdfjs/cmaps/UniCNS-UTF16-V.bcmap +0 -0
  475. package/src/vendor/pdfjs/cmaps/UniCNS-UTF32-H.bcmap +0 -0
  476. package/src/vendor/pdfjs/cmaps/UniCNS-UTF32-V.bcmap +0 -0
  477. package/src/vendor/pdfjs/cmaps/UniCNS-UTF8-H.bcmap +0 -0
  478. package/src/vendor/pdfjs/cmaps/UniCNS-UTF8-V.bcmap +0 -0
  479. package/src/vendor/pdfjs/cmaps/UniGB-UCS2-H.bcmap +0 -0
  480. package/src/vendor/pdfjs/cmaps/UniGB-UCS2-V.bcmap +0 -0
  481. package/src/vendor/pdfjs/cmaps/UniGB-UTF16-H.bcmap +0 -0
  482. package/src/vendor/pdfjs/cmaps/UniGB-UTF16-V.bcmap +0 -0
  483. package/src/vendor/pdfjs/cmaps/UniGB-UTF32-H.bcmap +0 -0
  484. package/src/vendor/pdfjs/cmaps/UniGB-UTF32-V.bcmap +0 -0
  485. package/src/vendor/pdfjs/cmaps/UniGB-UTF8-H.bcmap +0 -0
  486. package/src/vendor/pdfjs/cmaps/UniGB-UTF8-V.bcmap +0 -0
  487. package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-H.bcmap +0 -0
  488. package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-H.bcmap +0 -0
  489. package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-V.bcmap +0 -0
  490. package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-V.bcmap +0 -0
  491. package/src/vendor/pdfjs/cmaps/UniJIS-UTF16-H.bcmap +0 -0
  492. package/src/vendor/pdfjs/cmaps/UniJIS-UTF16-V.bcmap +0 -0
  493. package/src/vendor/pdfjs/cmaps/UniJIS-UTF32-H.bcmap +0 -0
  494. package/src/vendor/pdfjs/cmaps/UniJIS-UTF32-V.bcmap +0 -0
  495. package/src/vendor/pdfjs/cmaps/UniJIS-UTF8-H.bcmap +0 -0
  496. package/src/vendor/pdfjs/cmaps/UniJIS-UTF8-V.bcmap +0 -0
  497. package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-H.bcmap +0 -0
  498. package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-V.bcmap +0 -0
  499. package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-H.bcmap +0 -0
  500. package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-V.bcmap +0 -0
  501. package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-H.bcmap +0 -0
  502. package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-V.bcmap +0 -0
  503. package/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-HW-V.bcmap +0 -0
  504. package/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-V.bcmap +0 -0
  505. package/src/vendor/pdfjs/cmaps/UniJISPro-UTF8-V.bcmap +0 -0
  506. package/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-H.bcmap +0 -0
  507. package/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-V.bcmap +0 -0
  508. package/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-H.bcmap +0 -0
  509. package/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-V.bcmap +0 -0
  510. package/src/vendor/pdfjs/cmaps/UniKS-UCS2-H.bcmap +0 -0
  511. package/src/vendor/pdfjs/cmaps/UniKS-UCS2-V.bcmap +0 -0
  512. package/src/vendor/pdfjs/cmaps/UniKS-UTF16-H.bcmap +0 -0
  513. package/src/vendor/pdfjs/cmaps/UniKS-UTF16-V.bcmap +0 -0
  514. package/src/vendor/pdfjs/cmaps/UniKS-UTF32-H.bcmap +0 -0
  515. package/src/vendor/pdfjs/cmaps/UniKS-UTF32-V.bcmap +0 -0
  516. package/src/vendor/pdfjs/cmaps/UniKS-UTF8-H.bcmap +0 -0
  517. package/src/vendor/pdfjs/cmaps/UniKS-UTF8-V.bcmap +0 -0
  518. package/src/vendor/pdfjs/cmaps/V.bcmap +0 -0
  519. package/src/vendor/pdfjs/cmaps/WP-Symbol.bcmap +0 -0
  520. package/src/vendor/pdfjs/pdf.mjs +19481 -0
  521. package/src/vendor/pdfjs/pdf.mjs.map +1 -0
  522. package/src/vendor/pdfjs/pdf.sandbox.mjs +210 -0
  523. package/src/vendor/pdfjs/pdf.sandbox.mjs.map +1 -0
  524. package/src/vendor/pdfjs/pdf.worker.mjs +56001 -0
  525. package/src/vendor/pdfjs/pdf.worker.mjs.map +1 -0
  526. package/src/vendor/pdfjs/standard_fonts/FoxitDingbats.pfb +0 -0
  527. package/src/vendor/pdfjs/standard_fonts/FoxitFixed.pfb +0 -0
  528. package/src/vendor/pdfjs/standard_fonts/FoxitFixedBold.pfb +0 -0
  529. package/src/vendor/pdfjs/standard_fonts/FoxitFixedBoldItalic.pfb +0 -0
  530. package/src/vendor/pdfjs/standard_fonts/FoxitFixedItalic.pfb +0 -0
  531. package/src/vendor/pdfjs/standard_fonts/FoxitSerif.pfb +0 -0
  532. package/src/vendor/pdfjs/standard_fonts/FoxitSerifBold.pfb +0 -0
  533. package/src/vendor/pdfjs/standard_fonts/FoxitSerifBoldItalic.pfb +0 -0
  534. package/src/vendor/pdfjs/standard_fonts/FoxitSerifItalic.pfb +0 -0
  535. package/src/vendor/pdfjs/standard_fonts/FoxitSymbol.pfb +0 -0
  536. package/src/vendor/pdfjs/standard_fonts/LICENSE_FOXIT +27 -0
  537. package/src/vendor/pdfjs/standard_fonts/LICENSE_LIBERATION +102 -0
  538. package/src/vendor/pdfjs/standard_fonts/LiberationSans-Bold.ttf +0 -0
  539. package/src/vendor/pdfjs/standard_fonts/LiberationSans-BoldItalic.ttf +0 -0
  540. package/src/vendor/pdfjs/standard_fonts/LiberationSans-Italic.ttf +0 -0
  541. package/src/vendor/pdfjs/standard_fonts/LiberationSans-Regular.ttf +0 -0
@@ -0,0 +1,1392 @@
1
+ import { strToSubscriptString, strToPostScript } from "./textUtils.js";
2
+ import { buildBbox } from "./bbox.js";
3
+ import { cleanRawText } from "./cleanText.js";
4
+ import { applyMarkupTags } from "./markupUtils.js";
5
+ // Minimum spaces between unsnapped bboxes (likely justified text
6
+ const FLOATING_SPACES = 2;
7
+ // Minimum spaces between snapped columns
8
+ const COLUMN_SPACES = 4;
9
+ function roundAnchor(anchor) {
10
+ // group anchor x-coord by nearest 1/4 unit
11
+ return Math.round(anchor * 4) / 4;
12
+ }
13
+ // 2pt @ PDF 72 DPI -> 8px @ 300DPI
14
+ const SMALL_FONT_SIZE_THRESHOLD = 2;
15
+ function isSmallTextLine(line) {
16
+ // check for line where >50% of the text is very small
17
+ const smallText = line.filter((item) => item.h < SMALL_FONT_SIZE_THRESHOLD);
18
+ if (smallText.length / line.length > 0.5) {
19
+ return true;
20
+ }
21
+ return false;
22
+ }
23
+ function filterUnprojectableText(config, line) {
24
+ // Filter text items that would break projection (e.g., very small text)
25
+ if (line.length === 0) {
26
+ return line;
27
+ }
28
+ let filteredLine = line;
29
+ if (!config.preserveVerySmallText && isSmallTextLine(line)) {
30
+ // remove very small text lines
31
+ filteredLine = filteredLine.filter((item) => item.h >= SMALL_FONT_SIZE_THRESHOLD);
32
+ }
33
+ return filteredLine;
34
+ }
35
+ function canSnapLine(config, line) {
36
+ // force lines that will likely break projection to be unsnapped floating text
37
+ // currently this includes:
38
+ // - lines of entirely small text
39
+ //
40
+ // NOTE: this assumes undesirable text has already been filtered before projection
41
+ // (i.e. parse mode based removal of text should be done before this in filterUnprojectableText())
42
+ if (line.length === 0) {
43
+ return true;
44
+ }
45
+ if (!config.preserveVerySmallText && isSmallTextLine(line)) {
46
+ return false;
47
+ }
48
+ return true;
49
+ }
50
+ function fixSparseBlocks(blocks, rawLines) {
51
+ // compress whitespace in blocks containing very sparse lines (>80% whitespace)
52
+ const regexp = new RegExp(`\\s{${COLUMN_SPACES},}`, "g");
53
+ for (const block of blocks) {
54
+ let total = 0;
55
+ let whitespace = 0;
56
+ for (let i = block.start; i < block.end; ++i) {
57
+ if (!rawLines[i]) {
58
+ continue;
59
+ }
60
+ rawLines[i] = rawLines[i].trimEnd();
61
+ const line = rawLines[i];
62
+ if (line.length === 0) {
63
+ continue;
64
+ }
65
+ total += line.length;
66
+ whitespace += line.match(/\s/g)?.length || 0;
67
+ }
68
+ if (total >= 500 && whitespace / total > 0.8) {
69
+ for (let i = block.start; i < block.end; ++i) {
70
+ const line = rawLines[i];
71
+ if (!line || line.length === 0) {
72
+ continue;
73
+ }
74
+ rawLines[i] = line.replace(regexp, " ".repeat(FLOATING_SPACES));
75
+ }
76
+ }
77
+ }
78
+ }
79
+ function extractAnchorsPointsFromLines(lines, page) {
80
+ const pageHeight = page.height;
81
+ const anchorLeft = {};
82
+ const anchorRight = {};
83
+ const anchorCenter = {};
84
+ for (const line of lines) {
85
+ for (const bbox of line) {
86
+ let anchor = roundAnchor(bbox.x);
87
+ if (!anchorLeft[anchor]) {
88
+ anchorLeft[anchor] = [];
89
+ }
90
+ anchorLeft[anchor].push(bbox);
91
+ anchor = roundAnchor(bbox.x + bbox.w);
92
+ if (!anchorRight[anchor]) {
93
+ anchorRight[anchor] = [];
94
+ }
95
+ anchorRight[anchor].push(bbox);
96
+ const center = Math.round(bbox.x + bbox.w / 2);
97
+ if (!anchorCenter[center]) {
98
+ anchorCenter[center] = [];
99
+ }
100
+ anchorCenter[center].push(bbox);
101
+ }
102
+ }
103
+ function deltaMin(collection, delta) {
104
+ for (const anchor in collection) {
105
+ const maxDelta = pageHeight * delta;
106
+ for (let i = 0; i < collection[anchor].length; i++) {
107
+ let shouldKeep = false;
108
+ if (i > 0) {
109
+ if (collection[anchor][i].y - collection[anchor][i - 1].y < maxDelta) {
110
+ shouldKeep = true;
111
+ }
112
+ }
113
+ if (i < collection[anchor].length - 1) {
114
+ if (collection[anchor][i + 1].y - collection[anchor][i].y < maxDelta) {
115
+ shouldKeep = true;
116
+ }
117
+ }
118
+ if (!shouldKeep) {
119
+ collection[anchor].splice(i--, 1);
120
+ }
121
+ }
122
+ }
123
+ }
124
+ // Group nearby anchors FIRST to merge items at similar positions
125
+ // This ensures deltaMin operates on combined groups, not isolated items
126
+ group(anchorLeft);
127
+ group(anchorRight);
128
+ group(anchorCenter);
129
+ deltaMin(anchorRight, 0.17);
130
+ deltaMin(anchorLeft, 0.2);
131
+ deltaMin(anchorCenter, 0.05);
132
+ function intercept(collection) {
133
+ for (const anchor in collection) {
134
+ let shouldKeep = false;
135
+ for (let i = 0; i < collection[anchor].length; i++) {
136
+ if (i > 0) {
137
+ let intercept = false;
138
+ // check intercept
139
+ const a1 = collection[anchor][i - 1];
140
+ const a2 = collection[anchor][i];
141
+ for (const line of lines) {
142
+ if (line.length > 0 && line[0].y > a1.y && line[0].y < a2.y) {
143
+ for (const item of line) {
144
+ if (item.x < parseInt(anchor) && item.x + item.w > parseInt(anchor)) {
145
+ intercept = true;
146
+ break;
147
+ }
148
+ }
149
+ if (intercept) {
150
+ break;
151
+ }
152
+ }
153
+ }
154
+ if (!intercept) {
155
+ shouldKeep = true;
156
+ break;
157
+ }
158
+ }
159
+ }
160
+ if (!shouldKeep) {
161
+ delete collection[anchor];
162
+ }
163
+ }
164
+ }
165
+ intercept(anchorRight);
166
+ intercept(anchorLeft);
167
+ intercept(anchorCenter);
168
+ function group(collection) {
169
+ // Sort anchors to process them in order
170
+ const sortedAnchors = Object.keys(collection)
171
+ .map((a) => parseFloat(a))
172
+ .sort((a, b) => a - b);
173
+ // Merge nearby anchors within a tolerance
174
+ // Use 2 units as tolerance - this catches columns that are close but not exactly aligned
175
+ const MERGE_TOLERANCE = 2;
176
+ for (let i = 0; i < sortedAnchors.length; i++) {
177
+ const anchor = sortedAnchors[i];
178
+ if (!(anchor in collection))
179
+ continue; // Already merged
180
+ // Look for nearby anchors to merge into this one or merge this into
181
+ for (let j = i + 1; j < sortedAnchors.length; j++) {
182
+ const nextAnchor = sortedAnchors[j];
183
+ if (!(nextAnchor in collection))
184
+ continue;
185
+ // Stop if we're beyond the tolerance
186
+ if (nextAnchor - anchor > MERGE_TOLERANCE)
187
+ break;
188
+ // Merge the smaller anchor into the larger one
189
+ if (collection[nextAnchor].length > collection[anchor].length) {
190
+ collection[nextAnchor].push(...collection[anchor]);
191
+ delete collection[anchor];
192
+ break; // This anchor is gone, move to next
193
+ }
194
+ else {
195
+ collection[anchor].push(...collection[nextAnchor]);
196
+ delete collection[nextAnchor];
197
+ }
198
+ }
199
+ }
200
+ }
201
+ function anyAnchorSurvived(bbox) {
202
+ return (roundAnchor(bbox.x) in anchorLeft ||
203
+ roundAnchor(bbox.x + bbox.w) in anchorRight ||
204
+ Math.round(bbox.x + bbox.w / 2) in anchorCenter);
205
+ }
206
+ // Try seeing if a floating bbox would align well with a surviving anchor on a line immediately above or below it
207
+ function tryAlignFloating(collection, ANCHOR_MARGIN, refXFromBbox, anchorValFromBbox) {
208
+ for (let lineIndex = 0; lineIndex < lines.length; lineIndex++) {
209
+ const line = lines[lineIndex];
210
+ for (const bbox of line) {
211
+ // Only consider floating bboxes
212
+ if (anyAnchorSurvived(bbox)) {
213
+ continue;
214
+ }
215
+ // Check the lines before and after
216
+ const candidateLines = [];
217
+ if (lineIndex > 0) {
218
+ candidateLines.push(lines[lineIndex - 1]);
219
+ }
220
+ if (lineIndex < lines.length - 1) {
221
+ candidateLines.push(lines[lineIndex + 1]);
222
+ }
223
+ // Check candidate lines for:
224
+ // Possible alignment
225
+ // Being within the margin
226
+ // Being the closest of the candidates
227
+ let candidateAnchor = "";
228
+ let prevDiff = ANCHOR_MARGIN + 1;
229
+ for (const candLine of candidateLines) {
230
+ for (const candBBox of candLine) {
231
+ const candAnchorVal = anchorValFromBbox(candBBox);
232
+ if (!(candAnchorVal in collection)) {
233
+ continue;
234
+ }
235
+ const xDiff = Math.abs(candAnchorVal - refXFromBbox(bbox));
236
+ if (xDiff <= ANCHOR_MARGIN && xDiff < prevDiff) {
237
+ candidateAnchor = candAnchorVal.toString();
238
+ prevDiff = xDiff;
239
+ }
240
+ }
241
+ }
242
+ // No candidate found
243
+ if (candidateAnchor.length == 0) {
244
+ continue;
245
+ }
246
+ // Candidate found - update the anchor's bbox list
247
+ // But first check if the bbox is already in this anchor (could happen after merging)
248
+ const targetAnchor = collection[parseFloat(candidateAnchor)];
249
+ if (!targetAnchor.includes(bbox)) {
250
+ targetAnchor.push(bbox);
251
+ }
252
+ }
253
+ }
254
+ }
255
+ // Try to left-align floating bboxes
256
+ tryAlignFloating(anchorLeft, 2, (bbox) => bbox.x, (bbox) => roundAnchor(bbox.x));
257
+ // Sort the anchors' lists of bboxes by y-value
258
+ function sortAnchor(collection) {
259
+ for (const anchor in collection) {
260
+ collection[anchor].sort((a, b) => a.y - b.y);
261
+ }
262
+ }
263
+ sortAnchor(anchorLeft);
264
+ sortAnchor(anchorRight);
265
+ sortAnchor(anchorCenter);
266
+ // deduplicate
267
+ const duplicates = [];
268
+ for (const anchor in anchorLeft) {
269
+ for (const item of anchorLeft[anchor]) {
270
+ item.snap = "left";
271
+ item.leftAnchor = anchor;
272
+ }
273
+ }
274
+ for (const anchor in anchorRight) {
275
+ for (const item of anchorRight[anchor]) {
276
+ if (item.snap) {
277
+ item.isDup = true;
278
+ duplicates.push(item);
279
+ }
280
+ item.snap = "right";
281
+ item.rightAnchor = anchor;
282
+ }
283
+ }
284
+ for (const anchor in anchorCenter) {
285
+ for (const item of anchorCenter[anchor]) {
286
+ if (item.snap && !item.isDup) {
287
+ item.isDup = true;
288
+ duplicates.push(item);
289
+ }
290
+ item.snap = "center";
291
+ item.centerAnchor = anchor;
292
+ }
293
+ }
294
+ function anchorCounts(item) {
295
+ let leftCount = 0;
296
+ if (item.leftAnchor) {
297
+ const key = parseFloat(item.leftAnchor);
298
+ leftCount = anchorLeft[key] ? anchorLeft[key].length : 0;
299
+ }
300
+ let rightCount = 0;
301
+ if (item.rightAnchor) {
302
+ const key = parseFloat(item.rightAnchor);
303
+ rightCount = anchorRight[key] ? anchorRight[key].length : 0;
304
+ }
305
+ let centerCount = 0;
306
+ if (item.centerAnchor) {
307
+ const key = parseFloat(item.centerAnchor);
308
+ centerCount = anchorCenter[key] ? anchorCenter[key].length : 0;
309
+ }
310
+ return [leftCount, rightCount, centerCount];
311
+ }
312
+ // find all left aligned blocks, all right aligned blocks, all centered blocks, in that order
313
+ // we cannot check all 3 at once since we may end up double counting potential anchor matches
314
+ // (i.e. we need to exclude block that we know are left/right aligned before counting possible
315
+ // matching centered blocks)
316
+ // find all lefts
317
+ let hasChanged = true;
318
+ while (hasChanged && duplicates.length > 0) {
319
+ hasChanged = false;
320
+ for (let i = duplicates.length - 1; i >= 0; --i) {
321
+ const item = duplicates[i];
322
+ const [leftCount, rightCount, centerCount] = anchorCounts(item);
323
+ if (leftCount >= rightCount && leftCount >= centerCount) {
324
+ item.snap = "left";
325
+ if (item.rightAnchor) {
326
+ const key = parseFloat(item.rightAnchor);
327
+ if (anchorRight[key]) {
328
+ anchorRight[key].splice(anchorRight[key].indexOf(item), 1);
329
+ hasChanged = true;
330
+ }
331
+ }
332
+ if (item.centerAnchor) {
333
+ const key = parseFloat(item.centerAnchor);
334
+ if (anchorCenter[key]) {
335
+ anchorCenter[key].splice(anchorCenter[key].indexOf(item), 1);
336
+ hasChanged = true;
337
+ }
338
+ }
339
+ duplicates.splice(i, 1);
340
+ }
341
+ }
342
+ }
343
+ // find all rights
344
+ hasChanged = true;
345
+ while (hasChanged && duplicates.length > 0) {
346
+ hasChanged = false;
347
+ for (let i = duplicates.length - 1; i >= 0; --i) {
348
+ const item = duplicates[i];
349
+ const [leftCount, rightCount, centerCount] = anchorCounts(item);
350
+ if (rightCount >= leftCount && rightCount >= centerCount) {
351
+ item.snap = "right";
352
+ if (item.leftAnchor) {
353
+ const key = parseFloat(item.leftAnchor);
354
+ if (anchorLeft[key]) {
355
+ const idx = anchorLeft[key].indexOf(item);
356
+ if (idx >= 0) {
357
+ anchorLeft[key].splice(idx, 1);
358
+ hasChanged = true;
359
+ }
360
+ }
361
+ }
362
+ if (item.centerAnchor) {
363
+ const key = parseFloat(item.centerAnchor);
364
+ if (anchorCenter[key]) {
365
+ anchorCenter[key].splice(anchorCenter[key].indexOf(item), 1);
366
+ hasChanged = true;
367
+ }
368
+ }
369
+ duplicates.splice(i, 1);
370
+ }
371
+ }
372
+ }
373
+ // remaining duplicates are centered
374
+ for (const item of duplicates) {
375
+ item.snap = "center";
376
+ if (item.leftAnchor) {
377
+ const key = parseFloat(item.leftAnchor);
378
+ if (anchorLeft[key]) {
379
+ anchorLeft[key].splice(anchorLeft[key].indexOf(item), 1);
380
+ }
381
+ }
382
+ if (item.rightAnchor) {
383
+ const key = parseFloat(item.rightAnchor);
384
+ if (anchorRight[key]) {
385
+ anchorRight[key].splice(anchorRight[key].indexOf(item), 1);
386
+ }
387
+ }
388
+ }
389
+ // filter anchors
390
+ // delete singleton
391
+ for (const anchor in anchorLeft) {
392
+ if (anchorLeft[anchor].length < 2) {
393
+ if (anchorLeft[anchor].length) {
394
+ delete anchorLeft[anchor][0].snap;
395
+ }
396
+ delete anchorLeft[anchor];
397
+ }
398
+ }
399
+ for (const anchor in anchorRight) {
400
+ if (anchorRight[anchor].length < 2) {
401
+ if (anchorRight[anchor].length) {
402
+ delete anchorRight[anchor][0].snap;
403
+ }
404
+ delete anchorRight[anchor];
405
+ }
406
+ }
407
+ for (const anchor in anchorCenter) {
408
+ if (anchorCenter[anchor].length < 2) {
409
+ if (anchorCenter[anchor].length) {
410
+ delete anchorCenter[anchor][0].snap;
411
+ }
412
+ delete anchorCenter[anchor];
413
+ }
414
+ }
415
+ return {
416
+ anchorLeft,
417
+ anchorRight,
418
+ anchorCenter,
419
+ };
420
+ }
421
+ function handleRotationReadingOrder(textBbox, pageHeight) {
422
+ // if no bbox is rotated (.r is set), return
423
+ if (!textBbox.find((b) => b.r != 0)) {
424
+ return;
425
+ }
426
+ // Group ALL items by rotation value (not by consecutive items)
427
+ // This ensures rotated text blocks stay together even when their X coordinates
428
+ // overlap with non-rotated content (e.g., rotated table + footer at same X positions)
429
+ const groupsByRotation = {};
430
+ for (const bbox of textBbox) {
431
+ const r = bbox.r || 0;
432
+ if (!groupsByRotation[r]) {
433
+ groupsByRotation[r] = [];
434
+ }
435
+ groupsByRotation[r].push(bbox);
436
+ }
437
+ // Build bboxGroup array from rotation groups, sorted by X position of group
438
+ const bboxGroup = [];
439
+ for (const rotation in groupsByRotation) {
440
+ const group = groupsByRotation[rotation];
441
+ // Sort each group by Y for proper reading order
442
+ group.sort((a, b) => a.y - b.y);
443
+ bboxGroup.push(group);
444
+ }
445
+ // Sort groups by their minimum X position to maintain left-to-right order
446
+ bboxGroup.sort((a, b) => {
447
+ const minXA = Math.min(...a.map((item) => item.x));
448
+ const minXB = Math.min(...b.map((item) => item.x));
449
+ return minXA - minXB;
450
+ });
451
+ // NOTE/ WARNING: height and width of bbox are NOT rotated beforehand!
452
+ for (const [index, group] of bboxGroup.entries()) {
453
+ if (group[0].r == 90 || group[0].r == 270) {
454
+ // Check if there are non-rotated items that actually overlap visually (both X and Y)
455
+ // with the rotated group. X-only overlap is not sufficient because items could
456
+ // be in completely different parts of the page (e.g., rotated table + footer).
457
+ let globalOverlap = false;
458
+ for (const bbox of textBbox) {
459
+ if (bbox.r != group[0].r) {
460
+ const overlap = group.find((b) =>
461
+ // Check X overlap
462
+ b.x >= bbox.x &&
463
+ b.x <= bbox.x + bbox.w &&
464
+ // Also check Y overlap - items must actually be near each other vertically
465
+ b.y < bbox.y + bbox.h &&
466
+ b.y + b.h > bbox.y &&
467
+ bbox.r != b.r);
468
+ if (overlap) {
469
+ globalOverlap = true;
470
+ }
471
+ }
472
+ }
473
+ if (globalOverlap) {
474
+ // rotate bbox to be horizontal
475
+ for (const bbox of group) {
476
+ if (bbox.d) {
477
+ bbox.y += bbox.d;
478
+ bbox.d = 0;
479
+ }
480
+ bbox.r = 0;
481
+ bbox.rotated = true;
482
+ }
483
+ }
484
+ else {
485
+ // insert the bbox group in the Y axis after previous group and before next group.
486
+ // move Next group by current group height (width as not rotated yet).
487
+ const groupMaxX = Math.max(...group.map((v) => v.x + v.w));
488
+ let deltaY = 0;
489
+ if (index != 0) {
490
+ const previousGroup = bboxGroup[index - 1];
491
+ const previousGroupMaxY = Math.max(...previousGroup.map((v) => v.y + v.h));
492
+ // Use pageHeight offset to guarantee no alignment issues with other groups
493
+ deltaY = previousGroupMaxY + pageHeight;
494
+ }
495
+ // clockwise rotation (90 degrees)
496
+ // - Text reads top-to-bottom in page space
497
+ // - Y position in page space -> X position after de-rotation
498
+ // - X position in page space -> Y position after de-rotation (row)
499
+ if (group[0].r == 90) {
500
+ for (const bbox of group) {
501
+ const newX = Math.round(bbox.y);
502
+ const newY = bbox.x + deltaY;
503
+ // Swap width and height since text orientation changes
504
+ const newW = bbox.h;
505
+ const newH = bbox.w;
506
+ bbox.x = newX;
507
+ bbox.y = newY;
508
+ bbox.w = newW;
509
+ bbox.h = newH;
510
+ bbox.r = 0;
511
+ bbox.rotated = true;
512
+ }
513
+ }
514
+ // counter clockwize rotation (text reads bottom-to-top)
515
+ // For 270-degree rotation, text at higher Y positions should be
516
+ // at lower X positions after de-rotation (left-to-right reading order)
517
+ if (group[0].r == 270) {
518
+ // For 270-degree counter-clockwise rotation:
519
+ // - Text reads bottom-to-top in page space
520
+ // - Y position in page space -> X position after de-rotation (inverted)
521
+ // - X position in page space -> Y position after de-rotation (row)
522
+ // - w and h need to be swapped since they represent visual dimensions
523
+ // For 270-degree rotation: h is the extent along reading direction (string width)
524
+ const maxY = Math.max(...group.map((b) => b.y + b.h));
525
+ for (const bbox of group) {
526
+ // Transform coordinates:
527
+ // - new_x = distance from right edge of rotated block (inverted Y)
528
+ // Use h (string width in original coords) for the extent
529
+ // - new_y = row position (from original X)
530
+ const newX = Math.round(maxY - bbox.y - bbox.h);
531
+ // Use exact X for Y (will be grouped by bboxToLine's Y_SORT_TOLERANCE)
532
+ const newY = bbox.x + deltaY;
533
+ // Swap width and height since text orientation changes
534
+ const newW = bbox.h;
535
+ const newH = bbox.w;
536
+ bbox.x = newX;
537
+ bbox.y = newY;
538
+ bbox.w = newW;
539
+ bbox.h = newH;
540
+ bbox.r = 0;
541
+ bbox.rotated = true;
542
+ }
543
+ }
544
+ // Use pageHeight offset to guarantee no alignment issues
545
+ const globalDelta = deltaY + groupMaxX + pageHeight;
546
+ for (const [otherGroupIndex, other] of bboxGroup.entries()) {
547
+ if (otherGroupIndex <= index) {
548
+ continue;
549
+ }
550
+ for (const bbox of other) {
551
+ if (bbox.r == 90 || bbox.r == 270) {
552
+ bbox.d = (bbox.d ? bbox.d : 0) + globalDelta;
553
+ continue;
554
+ }
555
+ bbox.y += globalDelta;
556
+ }
557
+ }
558
+ }
559
+ }
560
+ }
561
+ textBbox.sort((a, b) => {
562
+ return a.y - b.y;
563
+ });
564
+ // Handle 180-degree rotated text (upside down)
565
+ // Since we already grouped by rotation, we can iterate the existing groups
566
+ for (const group of bboxGroup) {
567
+ if (group[0].r == 180) {
568
+ // Sort by X for proper reading order
569
+ group.sort((a, b) => a.x - b.x);
570
+ // Switch upside down
571
+ for (const bbox of group) {
572
+ bbox.x = Math.round(bbox.ry ?? bbox.y);
573
+ bbox.y = bbox.rx ?? bbox.x;
574
+ bbox.r = 0;
575
+ bbox.rotated = true;
576
+ }
577
+ }
578
+ }
579
+ }
580
+ export function bboxToLine(textBbox, medianWidth, medianHeight, pageWidth) {
581
+ // Y-tolerance for sorting: items within this threshold are considered same line
582
+ // This handles:
583
+ // 1. Floating point precision issues between columns (e.g., 334.7400 vs 334.7399)
584
+ // 2. Subscripts/superscripts which are typically offset by 3-5 units from their base characters
585
+ // Using a fraction of medianHeight to scale with document font size.
586
+ const Y_SORT_TOLERANCE = Math.max(medianHeight * 0.5, 5.0);
587
+ // Note: We keep whitespace items as they may be needed for proper word separation.
588
+ // The spacing calculation handles gaps between items.
589
+ // For two-column documents, detect and mark margin line numbers
590
+ // These are short numeric items positioned between columns (near the page midpoint)
591
+ // They should not be merged with column content
592
+ if (pageWidth) {
593
+ const midpoint = pageWidth * 0.5;
594
+ const marginZoneLeft = midpoint - 5;
595
+ const marginZoneRight = midpoint + 20;
596
+ for (const bbox of textBbox) {
597
+ const bboxCenter = bbox.x + bbox.w / 2;
598
+ // Check if item is in the margin zone and looks like a line number
599
+ if (bboxCenter > marginZoneLeft &&
600
+ bboxCenter < marginZoneRight &&
601
+ bbox.str.trim().match(/^\d{1,2}[O]?$/) && // 1-2 digits, possibly with O (OCR error for 0)
602
+ bbox.w < 15 // Line numbers are narrow
603
+ ) {
604
+ // Mark as margin item - will be placed on its own line
605
+ bbox.isMarginLineNumber = true;
606
+ }
607
+ }
608
+ }
609
+ // sort lines on first y axis then x axis (top - left)
610
+ // Use Y tolerance so items on same visual line sort by x regardless of tiny y differences
611
+ textBbox.sort((a, b) => {
612
+ if (Math.abs(a.y - b.y) < Y_SORT_TOLERANCE) {
613
+ return a.x - b.x;
614
+ }
615
+ return a.y - b.y;
616
+ });
617
+ function canMergeMarkup(previousBbox, bbox) {
618
+ if (!previousBbox.markup && !bbox.markup) {
619
+ return true;
620
+ }
621
+ if (previousBbox.markup &&
622
+ bbox.markup &&
623
+ previousBbox.markup.highlight === bbox.markup.highlight &&
624
+ previousBbox.markup.underline === bbox.markup.underline &&
625
+ previousBbox.markup.squiggly === bbox.markup.squiggly &&
626
+ previousBbox.markup.strikeout === bbox.markup.strikeout) {
627
+ return true;
628
+ }
629
+ return false;
630
+ }
631
+ function canMerge(previousBbox, bbox) {
632
+ if (bbox.y == previousBbox.y && bbox.h == previousBbox.h) {
633
+ const xDelta = bbox.x - previousBbox.x - previousBbox.w;
634
+ if (((xDelta < 0 && xDelta > -0.5) || (xDelta >= 0 && xDelta < 0.1)) &&
635
+ canMergeMarkup(previousBbox, bbox)) {
636
+ return true;
637
+ }
638
+ }
639
+ return false;
640
+ }
641
+ function mergePageBbox(a, b) {
642
+ const aBbox = a.pageBbox || { x: a.x, y: a.y, w: a.w, h: a.h };
643
+ const bBbox = b.pageBbox || { x: b.x, y: b.y, w: b.w, h: b.h };
644
+ const left = Math.min(aBbox.x, bBbox.x);
645
+ const top = Math.min(aBbox.y, bBbox.y);
646
+ const right = Math.max(aBbox.x + aBbox.w, bBbox.x + bBbox.w);
647
+ const bottom = Math.max(aBbox.y + aBbox.h, bBbox.y + bBbox.h);
648
+ return { x: left, y: top, w: right - left, h: bottom - top };
649
+ }
650
+ // merge Continuous bbox
651
+ for (let i = 1; i < textBbox.length; i++) {
652
+ const bbox = textBbox[i];
653
+ const previousBbox = textBbox[i - 1];
654
+ if (canMerge(previousBbox, bbox)) {
655
+ previousBbox.w = bbox.x + bbox.w - previousBbox.x;
656
+ previousBbox.str += bbox.str;
657
+ previousBbox.strLength += bbox.strLength;
658
+ previousBbox.pageBbox = mergePageBbox(previousBbox, bbox);
659
+ textBbox.splice(i, 1);
660
+ i--;
661
+ }
662
+ }
663
+ // try to find the bounding box that align as line and group them by line
664
+ const lines = [];
665
+ let currentLine = [];
666
+ let previousBbox = null;
667
+ for (const bbox of textBbox) {
668
+ if (!previousBbox) {
669
+ currentLine.push(bbox);
670
+ }
671
+ // This is where we define how line are build. to be improved
672
+ else {
673
+ const lineMinY = Math.min(...currentLine.map((v) => v.y));
674
+ const lineMaxY = Math.max(...currentLine.map((v) => v.y + v.h));
675
+ let lineCollide = false;
676
+ for (const currentLineItemBbox of currentLine) {
677
+ const overlapLenght = Math.min(currentLineItemBbox.x + currentLineItemBbox.w, bbox.x + bbox.w) -
678
+ Math.max(currentLineItemBbox.x, bbox.x);
679
+ // Use a minimum threshold to tolerate small overlaps common in PDFs due to:
680
+ // - character spacing/kerning
681
+ // - floating-point precision issues
682
+ // - adjacent items with slightly overlapping bounding boxes
683
+ // We want to detect true collisions (same text rendered twice) not adjacent text
684
+ if (overlapLenght > Math.max(medianWidth / 3, 5)) {
685
+ lineCollide = true;
686
+ break;
687
+ }
688
+ }
689
+ // Don't merge margin line numbers with regular content
690
+ const currentLineHasMargin = currentLine.some((b) => b.isMarginLineNumber === true);
691
+ const bboxIsMargin = bbox.isMarginLineNumber === true;
692
+ const marginMismatch = currentLineHasMargin !== bboxIsMargin;
693
+ // For rotated text, use Y-tolerance based merging since heights may be inconsistent
694
+ const yTolerance = bbox.rotated ? Math.max(medianHeight * 2, 20) : 0;
695
+ const yWithinTolerance = bbox.rotated && Math.abs(bbox.y - lineMinY) < yTolerance;
696
+ if (!lineCollide &&
697
+ !marginMismatch &&
698
+ (yWithinTolerance ||
699
+ (bbox.y + bbox.h * 0.5 >= lineMinY && bbox.y + bbox.h * 0.5 <= lineMaxY) ||
700
+ (bbox.y >= lineMinY && bbox.y <= lineMaxY))) {
701
+ currentLine.push(bbox);
702
+ }
703
+ else {
704
+ if (currentLine.length) {
705
+ lines.push(currentLine);
706
+ }
707
+ currentLine = [bbox];
708
+ }
709
+ }
710
+ previousBbox = bbox;
711
+ }
712
+ if (currentLine.length) {
713
+ lines.push(currentLine);
714
+ }
715
+ // sort each line by x
716
+ for (const line of lines) {
717
+ line.sort((a, b) => a.x - b.x);
718
+ }
719
+ // sort lines by y
720
+ lines.sort((a, b) => a[0].y - b[0].y);
721
+ // merge 'words'
722
+ const mergeThreshold = 1;
723
+ // Pattern to detect standalone numeric values (financial table numbers)
724
+ // Matches: numbers with optional commas, decimal points, dollar signs, percentages, negatives
725
+ const numericPattern = /^[$]?-?[\d,]+\.?\d*%?$/;
726
+ function looksLikeTableNumber(str) {
727
+ const trimmed = str.trim();
728
+ // Must be at least 2 chars to be a table number (avoid merging single digits)
729
+ return trimmed.length >= 2 && numericPattern.test(trimmed);
730
+ }
731
+ for (const line of lines) {
732
+ for (let i = 1; i < line.length; ++i) {
733
+ // merge box in word if:
734
+ // - same height
735
+ // - less than 2 in space
736
+ // if (line[i].h == line[i-1].h) {
737
+ const currentLine = line[i];
738
+ const previousLine = line[i - 1];
739
+ if (canMergeMarkup(previousLine, currentLine)) {
740
+ // Don't merge adjacent numbers in tables - they're separate columns
741
+ const bothAreNumbers = looksLikeTableNumber(previousLine.str) && looksLikeTableNumber(currentLine.str);
742
+ if (!bothAreNumbers && currentLine.x - previousLine.x - previousLine.w <= mergeThreshold) {
743
+ // if same word but less than .7 of prev line
744
+ if (currentLine.h != 0 && currentLine.h < previousLine.h * 0.7) {
745
+ // and not starting with space
746
+ if (currentLine.str[0] == " ") {
747
+ break;
748
+ }
749
+ if (currentLine.y > previousLine.y + previousLine.h * 0.2) {
750
+ currentLine.str = strToSubscriptString(currentLine.str);
751
+ }
752
+ else {
753
+ currentLine.str = strToPostScript(currentLine.str);
754
+ }
755
+ }
756
+ previousLine.w = currentLine.x + currentLine.w - previousLine.x;
757
+ previousLine.str += currentLine.str;
758
+ previousLine.strLength += currentLine.strLength;
759
+ previousLine.pageBbox = mergePageBbox(previousLine, currentLine);
760
+ line.splice(i, 1);
761
+ i--;
762
+ }
763
+ else if (!bothAreNumbers &&
764
+ currentLine.x - previousLine.x - previousLine.w < previousLine.w / previousLine.strLength) {
765
+ // merge if space between this word and previous is less than average
766
+ // character width (using previous word font size)
767
+ // But don't merge adjacent numbers - they're likely table columns
768
+ // Now extend the width
769
+ previousLine.w = currentLine.x + currentLine.w - previousLine.x;
770
+ // Add space between merged items unless the previous already ends with space
771
+ if (!previousLine.str.endsWith(" ")) {
772
+ previousLine.str += " ";
773
+ previousLine.strLength += 1;
774
+ }
775
+ previousLine.str += currentLine.str;
776
+ previousLine.strLength += currentLine.strLength;
777
+ previousLine.pageBbox = mergePageBbox(previousLine, currentLine);
778
+ line.splice(i, 1);
779
+ i--;
780
+ }
781
+ }
782
+ // }
783
+ }
784
+ }
785
+ // check if we can merge the lines together
786
+ for (let i = 1; i < lines.length - 1; i++) {
787
+ const currentLine = lines[i];
788
+ const previousLine = lines[i - 1];
789
+ const previousLineMinY = Math.min(...previousLine.map((v) => v.y));
790
+ const previousLineMaxY = Math.max(...previousLine.map((v) => v.y + v.h));
791
+ const currentLineMinY = Math.min(...currentLine.map((v) => v.y));
792
+ const currentLineMaxY = Math.max(...currentLine.map((v) => v.y + v.h));
793
+ // does the 2 line overlap?
794
+ if (previousLineMaxY > currentLineMinY && previousLineMinY < currentLineMaxY) {
795
+ // check the bboxes of current line and prevline do not overlap
796
+ let bboxOverlap = false;
797
+ for (const bbox of currentLine) {
798
+ for (const prevBbox of previousLine) {
799
+ if (bbox.x >= prevBbox.x && bbox.x <= prevBbox.x + prevBbox.w) {
800
+ bboxOverlap = true;
801
+ break;
802
+ }
803
+ if (prevBbox.x >= bbox.x && prevBbox.x <= bbox.x + bbox.w) {
804
+ bboxOverlap = true;
805
+ break;
806
+ }
807
+ }
808
+ }
809
+ // merge if no overlap
810
+ if (!bboxOverlap) {
811
+ previousLine.push(...currentLine);
812
+ previousLine.sort((a, b) => a.x - b.x);
813
+ lines.splice(i--, 1);
814
+ }
815
+ }
816
+ }
817
+ for (let i = 1; i < lines.length; i++) {
818
+ const yDelta = lines[i][0].y - lines[i - 1][0].y - lines[i - 1][0].h;
819
+ // Calculate the number of blank lines to insert based on vertical spacing
820
+ // Use medianHeight as a reference for one line spacing
821
+ if (yDelta > medianHeight) {
822
+ // Calculate how many blank lines should be inserted
823
+ // Round to nearest integer to get approximate number of lines
824
+ const numBlankLines = Math.round(yDelta / medianHeight) - 1;
825
+ // Cap at a reasonable maximum (e.g., 10 blank lines) to avoid extreme cases
826
+ const linesToInsert = Math.min(Math.max(numBlankLines, 1), 10);
827
+ // Insert the calculated number of blank lines
828
+ const blankLines = Array(linesToInsert).fill([]);
829
+ lines.splice(i, 0, ...blankLines);
830
+ i += linesToInsert;
831
+ }
832
+ }
833
+ return lines;
834
+ }
835
+ function canRenderBbox(line, bbox) {
836
+ for (const item of line) {
837
+ if (item == bbox) {
838
+ return true;
839
+ }
840
+ if (!item.rendered) {
841
+ return false;
842
+ }
843
+ }
844
+ return false;
845
+ }
846
+ function updateForwardAnchorRightBound(snapMap, forwardAnchor, rightBound, anchorTarget) {
847
+ // Anything snapped to the right of rightBound should be aligned to anchorTarget line length at minimum
848
+ // Also update nearby positions (within tolerance) to handle slight position variations between rows
849
+ const POSITION_TOLERANCE = 2;
850
+ for (let i = snapMap.length - 1; i >= 0; --i) {
851
+ const anchor = snapMap[i];
852
+ if (rightBound <= anchor) {
853
+ if (!forwardAnchor[anchor] || anchorTarget > forwardAnchor[anchor]) {
854
+ forwardAnchor[anchor] = anchorTarget;
855
+ }
856
+ // Also update nearby positions within tolerance
857
+ for (let j = i - 1; j >= 0; --j) {
858
+ const nearbyAnchor = snapMap[j];
859
+ if (anchor - nearbyAnchor > POSITION_TOLERANCE)
860
+ break;
861
+ if (!forwardAnchor[nearbyAnchor] || anchorTarget > forwardAnchor[nearbyAnchor]) {
862
+ forwardAnchor[nearbyAnchor] = anchorTarget;
863
+ }
864
+ }
865
+ }
866
+ else {
867
+ return;
868
+ }
869
+ }
870
+ }
871
+ function updateForwardAnchors(bbox, nextBbox, snapMaps, forwardAnchors, lineLength) {
872
+ const rightBound = bbox.x + bbox.w;
873
+ let targetLength = lineLength;
874
+ if (nextBbox && (nextBbox.shouldSpace ?? 0) > 0) {
875
+ targetLength += nextBbox.shouldSpace ?? 0;
876
+ }
877
+ updateForwardAnchorRightBound(snapMaps.left, forwardAnchors.left, rightBound, targetLength);
878
+ updateForwardAnchorRightBound(snapMaps.right, forwardAnchors.right, rightBound, targetLength);
879
+ // we do not update center anchors since centered text may span between snapped columns
880
+ updateForwardAnchorRightBound(snapMaps.floating, forwardAnchors.floating, rightBound, targetLength);
881
+ }
882
+ function getMedianTextBoxSize(lines) {
883
+ // calculate median textBox width
884
+ const widthList = [];
885
+ for (const bbox of lines) {
886
+ if (bbox.w > 0) {
887
+ widthList.push(bbox.w / bbox.strLength);
888
+ }
889
+ }
890
+ const medianWidth = widthList.sort((a, b) => a - b)[Math.floor(widthList.length / 2)];
891
+ // calculate median textBox height
892
+ const heightList = [];
893
+ for (const bbox of lines) {
894
+ if (bbox.h > 0) {
895
+ heightList.push(bbox.h);
896
+ }
897
+ }
898
+ const medianHeight = heightList.sort((a, b) => a - b)[Math.floor(heightList.length / 2)];
899
+ return { width: medianWidth, height: medianHeight };
900
+ }
901
+ export function projectToGrid(config, page, projectionBoxes, prevAnchors, totalPages) {
902
+ // detect '.' garbage in the lines
903
+ let dotCount = 0;
904
+ for (const bbox of projectionBoxes) {
905
+ // check if bbox.str contains only dots
906
+ if (bbox.str.match(/^\.+$/)) {
907
+ dotCount++;
908
+ }
909
+ }
910
+ if (dotCount > 100 && dotCount > projectionBoxes.length * 0.05) {
911
+ // remove all dots and splice them from lines
912
+ const newLines = [];
913
+ for (const bbox of projectionBoxes) {
914
+ if (bbox.str.match(/^\.+$/)) {
915
+ continue;
916
+ }
917
+ if (bbox.str.match(/^·+$/)) {
918
+ continue;
919
+ }
920
+ if (bbox.str.match(/^"+$/)) {
921
+ continue;
922
+ }
923
+ newLines.push(bbox);
924
+ }
925
+ projectionBoxes = newLines;
926
+ }
927
+ // calculate median textBox width/height
928
+ const pageMedianSizes = getMedianTextBoxSize(projectionBoxes);
929
+ let medianWidth = pageMedianSizes.width;
930
+ const medianHeight = pageMedianSizes.height;
931
+ // Save original bboxes (including OCR) for text attribution
932
+ const attributionBboxes = [];
933
+ for (const bbox of projectionBoxes) {
934
+ if (!bbox || !bbox.str || bbox.vgap || bbox.isPlaceholder) {
935
+ continue;
936
+ }
937
+ attributionBboxes.push({
938
+ str: bbox.str,
939
+ x: bbox.x,
940
+ y: bbox.y,
941
+ w: bbox.w,
942
+ h: bbox.h,
943
+ r: bbox.r,
944
+ strLength: bbox.str.length,
945
+ });
946
+ }
947
+ handleRotationReadingOrder(projectionBoxes, page.height);
948
+ const lines = bboxToLine(projectionBoxes, medianWidth, medianHeight, page.width);
949
+ // remove unprojectable text and apply markup to final lines
950
+ for (let i = 0; i < lines.length; ++i) {
951
+ const line = filterUnprojectableText(config, lines[i]);
952
+ for (const bbox of line) {
953
+ // With the way our grid projection currently works, we have to output
954
+ // tags before raw line projection to avoid breaking the projection alignment.
955
+ // The tags get replaced with MD as needed in output formatting, this does
956
+ // result in output text containing the ~~ strikeout markup, but this is
957
+ // mitigated since we skip markup entirely when we are not outputting markdown
958
+ if (bbox.str.trim().length != 0 && bbox.markup) {
959
+ bbox.str = applyMarkupTags(bbox.markup, bbox.str);
960
+ }
961
+ }
962
+ lines[i] = line;
963
+ }
964
+ const forwardAnchors = {
965
+ left: {},
966
+ right: {},
967
+ center: {},
968
+ floating: {},
969
+ };
970
+ const rawLines = [];
971
+ const rawLinesDelta = [];
972
+ const blocks = [];
973
+ if (config.preserveLayoutAlignmentAcrossPages && totalPages > 1) {
974
+ blocks.push({ start: 0, end: lines.length });
975
+ }
976
+ else {
977
+ let emptyCount = 0;
978
+ let start = -1;
979
+ for (const [lineIndex, line] of lines.entries()) {
980
+ if (line.length === 0) {
981
+ emptyCount++;
982
+ if (emptyCount > 1) {
983
+ if (start >= 0) {
984
+ // ignore completely empty blocks, include the double blank
985
+ // line at the end of valid blocks
986
+ blocks.push({ start: start, end: lineIndex + 1 });
987
+ }
988
+ start = -1;
989
+ }
990
+ }
991
+ else {
992
+ emptyCount = 0;
993
+ if (start < 0) {
994
+ start = lineIndex;
995
+ }
996
+ }
997
+ }
998
+ if (start > -1) {
999
+ blocks.push({ start: start, end: lines.length });
1000
+ }
1001
+ }
1002
+ for (const block of blocks) {
1003
+ const { anchorLeft, anchorRight, anchorCenter } = extractAnchorsPointsFromLines(lines.slice(block.start, block.end), page);
1004
+ const snapMaps = {
1005
+ left: [],
1006
+ right: [],
1007
+ center: [],
1008
+ floating: [],
1009
+ };
1010
+ const uniqueSnaps = new Set();
1011
+ for (const snap in anchorLeft) {
1012
+ uniqueSnaps.add(parseFloat(snap));
1013
+ }
1014
+ snapMaps.left.push(...uniqueSnaps);
1015
+ uniqueSnaps.clear();
1016
+ for (const snap in anchorRight) {
1017
+ uniqueSnaps.add(parseFloat(snap));
1018
+ }
1019
+ snapMaps.right.push(...uniqueSnaps);
1020
+ uniqueSnaps.clear();
1021
+ for (const snap in anchorCenter) {
1022
+ uniqueSnaps.add(parseFloat(snap));
1023
+ }
1024
+ snapMaps.center.push(...uniqueSnaps);
1025
+ uniqueSnaps.clear();
1026
+ let hasChanged = true;
1027
+ const leftSnap = [];
1028
+ const rightSnap = [];
1029
+ const centerSnap = [];
1030
+ if (!config.preserveLayoutAlignmentAcrossPages) {
1031
+ const sizes = getMedianTextBoxSize(lines.slice(block.start, block.end).flat());
1032
+ medianWidth = sizes.width;
1033
+ // medianHeight updated but not currently used per-block - reserved for future use
1034
+ void sizes.height;
1035
+ }
1036
+ // compute snaps
1037
+ for (let lineIndex = block.start; lineIndex < block.end; ++lineIndex) {
1038
+ const line = lines[lineIndex];
1039
+ const forceUnsnapped = !canSnapLine(config, line);
1040
+ let prevBbox = null;
1041
+ for (let boxIndex = 0; boxIndex < line.length; ++boxIndex) {
1042
+ const bbox = line[boxIndex];
1043
+ bbox.forceUnsnapped = forceUnsnapped;
1044
+ const spaceThreshold = 2;
1045
+ // should we add a space between the two bbox?
1046
+ // TODO RTL
1047
+ if (prevBbox && bbox.x - (prevBbox.x + prevBbox.w) > spaceThreshold) {
1048
+ const xDelta = bbox.x - (prevBbox.x + prevBbox.w);
1049
+ const prevCharWidth = prevBbox.w / prevBbox.strLength;
1050
+ // add a space
1051
+ bbox.shouldSpace = 1;
1052
+ if (xDelta > prevCharWidth * 2) {
1053
+ // Check if both items are in the same column based on gap size
1054
+ // If gap is less than 10% of page width, treat as same column
1055
+ // This works for any number of columns
1056
+ const columnGapThreshold = page.width * 0.1;
1057
+ const bothInSameColumn = xDelta < columnGapThreshold;
1058
+ // insert column spacing if any of:
1059
+ // - gap is more than an approximate tab (8x average char width)
1060
+ // - previous bbox is right snap
1061
+ // - this bbox is left snap
1062
+ // - both previous and this bbox are snaps
1063
+ // otherwise insert floating spacing
1064
+ if ((!bbox.forceUnsnapped && xDelta > prevCharWidth * 8) ||
1065
+ (bbox.snap && bbox.snap === "left") ||
1066
+ (prevBbox.snap && prevBbox.snap === "right") ||
1067
+ (bbox.snap && prevBbox.snap)) {
1068
+ // If both items are in the same column, limit spacing to avoid
1069
+ // preserving justified text gaps from PDFs
1070
+ bbox.shouldSpace = bothInSameColumn ? FLOATING_SPACES : COLUMN_SPACES;
1071
+ }
1072
+ else {
1073
+ // For items in the same column, use minimal spacing
1074
+ bbox.shouldSpace = bothInSameColumn ? 1 : FLOATING_SPACES;
1075
+ }
1076
+ }
1077
+ }
1078
+ else {
1079
+ bbox.shouldSpace = 0;
1080
+ }
1081
+ prevBbox = bbox;
1082
+ if (!bbox.snap) {
1083
+ uniqueSnaps.add(Math.round(bbox.x));
1084
+ }
1085
+ else if (bbox.snap == "left") {
1086
+ leftSnap.push({ bbox, lineIndex, boxIndex });
1087
+ }
1088
+ else if (bbox.snap == "right") {
1089
+ rightSnap.push({ bbox, lineIndex, boxIndex });
1090
+ }
1091
+ else if (bbox.snap == "center") {
1092
+ centerSnap.push({ bbox, lineIndex, boxIndex });
1093
+ }
1094
+ }
1095
+ }
1096
+ snapMaps.floating.push(...uniqueSnaps);
1097
+ uniqueSnaps.clear();
1098
+ snapMaps.floating.sort((a, b) => a - b);
1099
+ snapMaps.center.sort((a, b) => a - b);
1100
+ snapMaps.right.sort((a, b) => a - b);
1101
+ snapMaps.left.sort((a, b) => a - b);
1102
+ while (hasChanged || snapMaps.right.length || snapMaps.left.length || snapMaps.center.length) {
1103
+ hasChanged = false;
1104
+ for (let lineIndex = block.start; lineIndex < block.end; ++lineIndex) {
1105
+ const line = lines[lineIndex];
1106
+ if (!rawLines[lineIndex]) {
1107
+ rawLines[lineIndex] = "";
1108
+ rawLinesDelta[lineIndex] = 0;
1109
+ }
1110
+ for (let boxIndex = 0; boxIndex < line.length; ++boxIndex) {
1111
+ const bbox = line[boxIndex];
1112
+ if (bbox.rendered) {
1113
+ continue;
1114
+ }
1115
+ if (!bbox.forceUnsnapped) {
1116
+ if (bbox.snap) {
1117
+ continue;
1118
+ }
1119
+ if ((snapMaps.left.length && snapMaps.left[0] < bbox.x) ||
1120
+ (snapMaps.right.length && snapMaps.right[0] < bbox.x) ||
1121
+ (snapMaps.center.length && snapMaps.center[0] < Math.round(bbox.x + bbox.w / 2))) {
1122
+ continue;
1123
+ }
1124
+ }
1125
+ if (!canRenderBbox(line, bbox)) {
1126
+ break;
1127
+ }
1128
+ let targetX = Math.min(Math.round(bbox.x / medianWidth), COLUMN_SPACES);
1129
+ let lastSnapLeft = 0;
1130
+ for (const key in forwardAnchors.left) {
1131
+ // Use parseFloat to preserve decimal precision from anchor keys
1132
+ if (parseFloat(key) <= bbox.x) {
1133
+ lastSnapLeft = Math.max(lastSnapLeft, forwardAnchors.left[key]);
1134
+ }
1135
+ }
1136
+ const lineMax = Math.max(lastSnapLeft, rawLines[lineIndex].trimEnd().length + (bbox.shouldSpace ?? 0));
1137
+ if (targetX < lineMax) {
1138
+ targetX = lineMax;
1139
+ }
1140
+ if (!bbox.forceUnsnapped) {
1141
+ const floatingAnchor = forwardAnchors.floating[Math.round(bbox.x)];
1142
+ if (floatingAnchor && targetX < floatingAnchor) {
1143
+ // Limit floating anchor adjustment to avoid excessive gaps in justified text
1144
+ // Use a small max gap to prevent large spacing within columns
1145
+ const maxFloatingGap = 4;
1146
+ const adjustedAnchor = Math.min(floatingAnchor, targetX + maxFloatingGap);
1147
+ if (adjustedAnchor > targetX) {
1148
+ targetX = adjustedAnchor;
1149
+ }
1150
+ }
1151
+ }
1152
+ rawLines[lineIndex] = rawLines[lineIndex].trimEnd();
1153
+ if (targetX > rawLines[lineIndex].length) {
1154
+ rawLines[lineIndex] += " ".repeat(targetX - rawLines[lineIndex].length);
1155
+ }
1156
+ rawLines[lineIndex] += bbox.str;
1157
+ bbox.rendered = true;
1158
+ hasChanged = true;
1159
+ let nextBbox = null;
1160
+ if (line.length > boxIndex + 1) {
1161
+ nextBbox = line[boxIndex + 1];
1162
+ }
1163
+ if (!bbox.forceUnsnapped) {
1164
+ updateForwardAnchors(bbox, nextBbox, snapMaps, forwardAnchors, rawLines[lineIndex].length);
1165
+ }
1166
+ }
1167
+ }
1168
+ if (snapMaps.left.length &&
1169
+ (!snapMaps.right.length || snapMaps.left[0] <= snapMaps.right[0]) &&
1170
+ (!snapMaps.center.length || snapMaps.left[0] <= snapMaps.center[0])) {
1171
+ const thisTurnSnap = [];
1172
+ for (const item of leftSnap) {
1173
+ if (item.bbox.leftAnchor && parseFloat(item.bbox.leftAnchor) == snapMaps.left[0]) {
1174
+ thisTurnSnap.push(item);
1175
+ }
1176
+ }
1177
+ hasChanged = true;
1178
+ if (!thisTurnSnap.length) {
1179
+ snapMaps.left.shift();
1180
+ continue;
1181
+ }
1182
+ let targetX = Math.min(Math.round(snapMaps.left[0] / medianWidth), COLUMN_SPACES);
1183
+ const lineMax = Math.max(...thisTurnSnap.map((v) => {
1184
+ let spaceEnd = 0;
1185
+ if (!rawLines[v.lineIndex].endsWith(" ")) {
1186
+ spaceEnd = v.bbox.shouldSpace ?? 0;
1187
+ }
1188
+ if ((v.bbox.shouldSpace ?? 0) > 1) {
1189
+ const trailingSpaces = rawLines[v.lineIndex].length - rawLines[v.lineIndex].trimEnd().length;
1190
+ if (trailingSpaces < (v.bbox.shouldSpace ?? 0)) {
1191
+ spaceEnd = (v.bbox.shouldSpace ?? 0) - trailingSpaces;
1192
+ }
1193
+ }
1194
+ return rawLines[v.lineIndex].length + spaceEnd + 1;
1195
+ }));
1196
+ if (targetX < lineMax) {
1197
+ targetX = lineMax;
1198
+ }
1199
+ if (forwardAnchors.left[snapMaps.left[0]] &&
1200
+ targetX < forwardAnchors.left[snapMaps.left[0]]) {
1201
+ targetX = forwardAnchors.left[snapMaps.left[0]];
1202
+ }
1203
+ if (prevAnchors.forwardAnchorLeft[snapMaps.left[0]] &&
1204
+ targetX < prevAnchors.forwardAnchorLeft[snapMaps.left[0]]) {
1205
+ targetX = prevAnchors.forwardAnchorLeft[snapMaps.left[0]];
1206
+ }
1207
+ forwardAnchors.left[snapMaps.left[0]] = targetX;
1208
+ for (const currentLeftSnapBox of thisTurnSnap) {
1209
+ const lineIndex = currentLeftSnapBox.lineIndex;
1210
+ if (targetX > rawLines[lineIndex].length) {
1211
+ rawLines[lineIndex] += " ".repeat(targetX - rawLines[lineIndex].length);
1212
+ }
1213
+ rawLines[lineIndex] += currentLeftSnapBox.bbox.str;
1214
+ currentLeftSnapBox.bbox.rendered = true;
1215
+ let nextBbox = null;
1216
+ if (lines[lineIndex].length > currentLeftSnapBox.boxIndex + 1) {
1217
+ nextBbox = lines[lineIndex][currentLeftSnapBox.boxIndex + 1];
1218
+ }
1219
+ updateForwardAnchors(currentLeftSnapBox.bbox, nextBbox, snapMaps, forwardAnchors, rawLines[lineIndex].length);
1220
+ }
1221
+ for (let index = block.start; index < block.end; ++index) {
1222
+ const line = rawLines[index];
1223
+ if (line.length < targetX) {
1224
+ rawLines[index] += " ".repeat(targetX - line.length);
1225
+ }
1226
+ }
1227
+ snapMaps.left.shift();
1228
+ }
1229
+ else if (snapMaps.right.length &&
1230
+ (!snapMaps.left.length || snapMaps.right[0] <= snapMaps.left[0]) &&
1231
+ (!snapMaps.center.length || snapMaps.right[0] <= snapMaps.center[0])) {
1232
+ const thisTurnSnap = [];
1233
+ hasChanged = true;
1234
+ for (const item of rightSnap) {
1235
+ if (item.bbox.rightAnchor && parseFloat(item.bbox.rightAnchor) == snapMaps.right[0]) {
1236
+ thisTurnSnap.push(item);
1237
+ }
1238
+ }
1239
+ if (!thisTurnSnap.length) {
1240
+ snapMaps.right.shift();
1241
+ continue;
1242
+ }
1243
+ let targetX = Math.min(Math.round(snapMaps.right[0] / medianWidth), COLUMN_SPACES);
1244
+ const lineMax = Math.max(...thisTurnSnap.map((v) => {
1245
+ let lastSnapLeft = 0;
1246
+ for (const key in forwardAnchors.left) {
1247
+ if (parseInt(key) <= v.bbox.x) {
1248
+ lastSnapLeft = Math.max(lastSnapLeft, forwardAnchors.left[key]);
1249
+ }
1250
+ }
1251
+ return (Math.max(lastSnapLeft, rawLines[v.lineIndex].trimEnd().length + (v.bbox.shouldSpace ?? 0)) + v.bbox.strLength);
1252
+ }));
1253
+ if (targetX < lineMax) {
1254
+ targetX = lineMax;
1255
+ }
1256
+ if (forwardAnchors.right[snapMaps.right[0]] &&
1257
+ targetX < forwardAnchors.right[snapMaps.right[0]]) {
1258
+ targetX = forwardAnchors.right[snapMaps.right[0]];
1259
+ }
1260
+ if (prevAnchors.forwardAnchorRight[snapMaps.right[0]] &&
1261
+ targetX < prevAnchors.forwardAnchorRight[snapMaps.right[0]]) {
1262
+ targetX = prevAnchors.forwardAnchorRight[snapMaps.right[0]];
1263
+ }
1264
+ forwardAnchors.right[snapMaps.right[0]] = targetX;
1265
+ for (const currentRightSnapBox of thisTurnSnap) {
1266
+ const lineIndex = currentRightSnapBox.lineIndex;
1267
+ rawLines[lineIndex] = rawLines[lineIndex].trimEnd();
1268
+ if (targetX > rawLines[lineIndex].trimEnd().length + currentRightSnapBox.bbox.strLength) {
1269
+ rawLines[lineIndex] += " ".repeat(targetX - rawLines[lineIndex].length - currentRightSnapBox.bbox.strLength);
1270
+ }
1271
+ rawLines[lineIndex] += currentRightSnapBox.bbox.str;
1272
+ currentRightSnapBox.bbox.rendered = true;
1273
+ let nextBbox = null;
1274
+ if (lines[lineIndex].length > currentRightSnapBox.boxIndex + 1) {
1275
+ nextBbox = lines[lineIndex][currentRightSnapBox.boxIndex + 1];
1276
+ }
1277
+ updateForwardAnchors(currentRightSnapBox.bbox, nextBbox, snapMaps, forwardAnchors, rawLines[lineIndex].length);
1278
+ }
1279
+ for (let index = block.start; index < block.end; ++index) {
1280
+ const line = rawLines[index];
1281
+ if (line.length < targetX) {
1282
+ rawLines[index] += " ".repeat(targetX - line.length);
1283
+ }
1284
+ }
1285
+ snapMaps.right.shift();
1286
+ }
1287
+ else if (snapMaps.center.length &&
1288
+ (!snapMaps.left.length || snapMaps.center[0] <= snapMaps.left[0]) &&
1289
+ (!snapMaps.right.length || snapMaps.center[0] <= snapMaps.right[0])) {
1290
+ const thisTurnSnap = [];
1291
+ hasChanged = true;
1292
+ for (const item of centerSnap) {
1293
+ if (item.bbox.centerAnchor && parseFloat(item.bbox.centerAnchor) == snapMaps.center[0]) {
1294
+ thisTurnSnap.push(item);
1295
+ }
1296
+ }
1297
+ if (!thisTurnSnap.length) {
1298
+ snapMaps.center.shift();
1299
+ continue;
1300
+ }
1301
+ let targetX = Math.min(Math.round(snapMaps.center[0] / medianWidth), COLUMN_SPACES);
1302
+ const lineMax = Math.max(...thisTurnSnap.map((v) => {
1303
+ let spaceEnd = 0;
1304
+ if (!rawLines[v.lineIndex].endsWith(" ")) {
1305
+ spaceEnd = v.bbox.shouldSpace ?? 0;
1306
+ }
1307
+ if ((v.bbox.shouldSpace ?? 0) > 1) {
1308
+ const trailingSpaces = rawLines[v.lineIndex].length - rawLines[v.lineIndex].trimEnd().length;
1309
+ if (trailingSpaces < (v.bbox.shouldSpace ?? 0)) {
1310
+ spaceEnd = (v.bbox.shouldSpace ?? 0) - trailingSpaces;
1311
+ }
1312
+ }
1313
+ return rawLines[v.lineIndex].length + Math.round(v.bbox.strLength / 2) + spaceEnd;
1314
+ }));
1315
+ if (targetX < lineMax) {
1316
+ targetX = lineMax;
1317
+ }
1318
+ if (forwardAnchors.center[snapMaps.center[0]] &&
1319
+ targetX < forwardAnchors.center[snapMaps.center[0]]) {
1320
+ targetX = forwardAnchors.center[snapMaps.center[0]];
1321
+ }
1322
+ if (prevAnchors.forwardAnchorCenter[snapMaps.center[0]] &&
1323
+ targetX < prevAnchors.forwardAnchorCenter[snapMaps.center[0]]) {
1324
+ targetX = prevAnchors.forwardAnchorCenter[snapMaps.center[0]];
1325
+ }
1326
+ forwardAnchors.center[snapMaps.center[0]] = targetX;
1327
+ for (const currentCenterSnapBox of thisTurnSnap) {
1328
+ if (targetX >
1329
+ rawLines[currentCenterSnapBox.lineIndex].length +
1330
+ Math.round(currentCenterSnapBox.bbox.strLength / 2)) {
1331
+ rawLines[currentCenterSnapBox.lineIndex] += " ".repeat(targetX -
1332
+ rawLines[currentCenterSnapBox.lineIndex].length -
1333
+ Math.round(currentCenterSnapBox.bbox.strLength / 2));
1334
+ }
1335
+ rawLines[currentCenterSnapBox.lineIndex] += currentCenterSnapBox.bbox.str;
1336
+ currentCenterSnapBox.bbox.rendered = true;
1337
+ }
1338
+ snapMaps.center.shift();
1339
+ }
1340
+ }
1341
+ }
1342
+ fixSparseBlocks(blocks, rawLines);
1343
+ const text = rawLines.join("\n");
1344
+ // OSS: Return text instead of mutating page object
1345
+ return {
1346
+ text,
1347
+ prevAnchors: {
1348
+ forwardAnchorLeft: forwardAnchors.left,
1349
+ forwardAnchorRight: forwardAnchors.right,
1350
+ forwardAnchorCenter: forwardAnchors.center,
1351
+ },
1352
+ };
1353
+ }
1354
+ export function projectPagesToGrid(pages, config) {
1355
+ const prevAnchors = {
1356
+ forwardAnchorLeft: {},
1357
+ forwardAnchorRight: {},
1358
+ forwardAnchorCenter: {},
1359
+ };
1360
+ const results = [];
1361
+ for (const page of pages) {
1362
+ // Build projection boxes from text items
1363
+ const projectionBoxes = buildBbox(page, config);
1364
+ // Project to grid
1365
+ const { text, prevAnchors: newAnchors } = projectToGrid(config, page, projectionBoxes, prevAnchors, pages.length);
1366
+ // Update forward anchors if preserving across pages
1367
+ if (config.preserveLayoutAlignmentAcrossPages) {
1368
+ for (const anchor in newAnchors.forwardAnchorLeft) {
1369
+ prevAnchors.forwardAnchorLeft[anchor] = newAnchors.forwardAnchorLeft[anchor];
1370
+ }
1371
+ for (const anchor in newAnchors.forwardAnchorRight) {
1372
+ prevAnchors.forwardAnchorRight[anchor] = newAnchors.forwardAnchorRight[anchor];
1373
+ }
1374
+ for (const anchor in newAnchors.forwardAnchorCenter) {
1375
+ prevAnchors.forwardAnchorCenter[anchor] = newAnchors.forwardAnchorCenter[anchor];
1376
+ }
1377
+ }
1378
+ // Build result page
1379
+ results.push({
1380
+ pageNum: page.pageNum,
1381
+ width: page.width,
1382
+ height: page.height,
1383
+ text,
1384
+ textItems: page.textItems,
1385
+ boundingBoxes: [],
1386
+ });
1387
+ }
1388
+ // Clean raw text (margin detection, etc)
1389
+ cleanRawText(results, config);
1390
+ return results;
1391
+ }
1392
+ //# sourceMappingURL=gridProjection.js.map