@llamaindex/liteparse 1.5.3 → 2.0.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (578) hide show
  1. package/README.md +49 -448
  2. package/dist/cli.d.ts +3 -0
  3. package/dist/cli.d.ts.map +1 -0
  4. package/dist/cli.js +87 -0
  5. package/dist/cli.js.map +1 -0
  6. package/dist/lib.d.ts +58 -0
  7. package/dist/lib.d.ts.map +1 -0
  8. package/dist/lib.js +88 -0
  9. package/dist/lib.js.map +1 -0
  10. package/dist/native.d.ts +54 -0
  11. package/dist/native.d.ts.map +1 -0
  12. package/dist/native.js +70 -0
  13. package/dist/native.js.map +1 -0
  14. package/libpdfium.so +0 -0
  15. package/liteparse.linux-x64-gnu.node +0 -0
  16. package/package.json +36 -50
  17. package/LICENSE +0 -201
  18. package/dist/cli/parse.d.ts +0 -4
  19. package/dist/cli/parse.d.ts.map +0 -1
  20. package/dist/cli/parse.js +0 -450
  21. package/dist/cli/parse.js.map +0 -1
  22. package/dist/package.json +0 -90
  23. package/dist/src/conversion/convertToPdf.d.ts +0 -65
  24. package/dist/src/conversion/convertToPdf.d.ts.map +0 -1
  25. package/dist/src/conversion/convertToPdf.js +0 -405
  26. package/dist/src/conversion/convertToPdf.js.map +0 -1
  27. package/dist/src/conversion/convertToPdf.test.d.ts +0 -2
  28. package/dist/src/conversion/convertToPdf.test.d.ts.map +0 -1
  29. package/dist/src/conversion/convertToPdf.test.js +0 -327
  30. package/dist/src/conversion/convertToPdf.test.js.map +0 -1
  31. package/dist/src/core/config.d.ts +0 -4
  32. package/dist/src/core/config.d.ts.map +0 -1
  33. package/dist/src/core/config.js +0 -26
  34. package/dist/src/core/config.js.map +0 -1
  35. package/dist/src/core/config.test.d.ts +0 -2
  36. package/dist/src/core/config.test.d.ts.map +0 -1
  37. package/dist/src/core/config.test.js +0 -21
  38. package/dist/src/core/config.test.js.map +0 -1
  39. package/dist/src/core/parser.d.ts +0 -92
  40. package/dist/src/core/parser.d.ts.map +0 -1
  41. package/dist/src/core/parser.js +0 -401
  42. package/dist/src/core/parser.js.map +0 -1
  43. package/dist/src/core/parser.test.d.ts +0 -2
  44. package/dist/src/core/parser.test.d.ts.map +0 -1
  45. package/dist/src/core/parser.test.js +0 -541
  46. package/dist/src/core/parser.test.js.map +0 -1
  47. package/dist/src/core/types.d.ts +0 -370
  48. package/dist/src/core/types.d.ts.map +0 -1
  49. package/dist/src/core/types.js +0 -2
  50. package/dist/src/core/types.js.map +0 -1
  51. package/dist/src/engines/ocr/http-simple.d.ts +0 -19
  52. package/dist/src/engines/ocr/http-simple.d.ts.map +0 -1
  53. package/dist/src/engines/ocr/http-simple.js +0 -69
  54. package/dist/src/engines/ocr/http-simple.js.map +0 -1
  55. package/dist/src/engines/ocr/http-simple.test.d.ts +0 -2
  56. package/dist/src/engines/ocr/http-simple.test.d.ts.map +0 -1
  57. package/dist/src/engines/ocr/http-simple.test.js +0 -108
  58. package/dist/src/engines/ocr/http-simple.test.js.map +0 -1
  59. package/dist/src/engines/ocr/interface.d.ts +0 -15
  60. package/dist/src/engines/ocr/interface.d.ts.map +0 -1
  61. package/dist/src/engines/ocr/interface.js +0 -2
  62. package/dist/src/engines/ocr/interface.js.map +0 -1
  63. package/dist/src/engines/ocr/tesseract.d.ts +0 -20
  64. package/dist/src/engines/ocr/tesseract.d.ts.map +0 -1
  65. package/dist/src/engines/ocr/tesseract.js +0 -162
  66. package/dist/src/engines/ocr/tesseract.js.map +0 -1
  67. package/dist/src/engines/ocr/tesseract.test.d.ts +0 -2
  68. package/dist/src/engines/ocr/tesseract.test.d.ts.map +0 -1
  69. package/dist/src/engines/ocr/tesseract.test.js +0 -94
  70. package/dist/src/engines/ocr/tesseract.test.js.map +0 -1
  71. package/dist/src/engines/pdf/interface.d.ts +0 -84
  72. package/dist/src/engines/pdf/interface.d.ts.map +0 -1
  73. package/dist/src/engines/pdf/interface.js +0 -2
  74. package/dist/src/engines/pdf/interface.js.map +0 -1
  75. package/dist/src/engines/pdf/pdfium-renderer.d.ts +0 -31
  76. package/dist/src/engines/pdf/pdfium-renderer.d.ts.map +0 -1
  77. package/dist/src/engines/pdf/pdfium-renderer.js +0 -145
  78. package/dist/src/engines/pdf/pdfium-renderer.js.map +0 -1
  79. package/dist/src/engines/pdf/pdfium-renderer.test.d.ts +0 -2
  80. package/dist/src/engines/pdf/pdfium-renderer.test.d.ts.map +0 -1
  81. package/dist/src/engines/pdf/pdfium-renderer.test.js +0 -109
  82. package/dist/src/engines/pdf/pdfium-renderer.test.js.map +0 -1
  83. package/dist/src/engines/pdf/pdfjs.d.ts +0 -14
  84. package/dist/src/engines/pdf/pdfjs.d.ts.map +0 -1
  85. package/dist/src/engines/pdf/pdfjs.js +0 -804
  86. package/dist/src/engines/pdf/pdfjs.js.map +0 -1
  87. package/dist/src/engines/pdf/pdfjs.test.d.ts +0 -2
  88. package/dist/src/engines/pdf/pdfjs.test.d.ts.map +0 -1
  89. package/dist/src/engines/pdf/pdfjs.test.js +0 -225
  90. package/dist/src/engines/pdf/pdfjs.test.js.map +0 -1
  91. package/dist/src/engines/pdf/pdfjsImporter.d.ts +0 -5
  92. package/dist/src/engines/pdf/pdfjsImporter.d.ts.map +0 -1
  93. package/dist/src/engines/pdf/pdfjsImporter.js +0 -45
  94. package/dist/src/engines/pdf/pdfjsImporter.js.map +0 -1
  95. package/dist/src/index.d.ts +0 -3
  96. package/dist/src/index.d.ts.map +0 -1
  97. package/dist/src/index.js +0 -5
  98. package/dist/src/index.js.map +0 -1
  99. package/dist/src/lib.d.ts +0 -19
  100. package/dist/src/lib.d.ts.map +0 -1
  101. package/dist/src/lib.js +0 -17
  102. package/dist/src/lib.js.map +0 -1
  103. package/dist/src/output/json.d.ts +0 -10
  104. package/dist/src/output/json.d.ts.map +0 -1
  105. package/dist/src/output/json.js +0 -32
  106. package/dist/src/output/json.js.map +0 -1
  107. package/dist/src/output/json.test.d.ts +0 -2
  108. package/dist/src/output/json.test.d.ts.map +0 -1
  109. package/dist/src/output/json.test.js +0 -199
  110. package/dist/src/output/json.test.js.map +0 -1
  111. package/dist/src/output/text.d.ts +0 -10
  112. package/dist/src/output/text.d.ts.map +0 -1
  113. package/dist/src/output/text.js +0 -17
  114. package/dist/src/output/text.js.map +0 -1
  115. package/dist/src/output/text.test.d.ts +0 -2
  116. package/dist/src/output/text.test.d.ts.map +0 -1
  117. package/dist/src/output/text.test.js +0 -65
  118. package/dist/src/output/text.test.js.map +0 -1
  119. package/dist/src/processing/bbox.d.ts +0 -20
  120. package/dist/src/processing/bbox.d.ts.map +0 -1
  121. package/dist/src/processing/bbox.js +0 -258
  122. package/dist/src/processing/bbox.js.map +0 -1
  123. package/dist/src/processing/bbox.test.d.ts +0 -2
  124. package/dist/src/processing/bbox.test.d.ts.map +0 -1
  125. package/dist/src/processing/bbox.test.js +0 -334
  126. package/dist/src/processing/bbox.test.js.map +0 -1
  127. package/dist/src/processing/cleanText.d.ts +0 -6
  128. package/dist/src/processing/cleanText.d.ts.map +0 -1
  129. package/dist/src/processing/cleanText.js +0 -73
  130. package/dist/src/processing/cleanText.js.map +0 -1
  131. package/dist/src/processing/cleanText.test.d.ts +0 -2
  132. package/dist/src/processing/cleanText.test.d.ts.map +0 -1
  133. package/dist/src/processing/cleanText.test.js +0 -46
  134. package/dist/src/processing/cleanText.test.js.map +0 -1
  135. package/dist/src/processing/grid.d.ts +0 -7
  136. package/dist/src/processing/grid.d.ts.map +0 -1
  137. package/dist/src/processing/grid.js +0 -13
  138. package/dist/src/processing/grid.js.map +0 -1
  139. package/dist/src/processing/gridDebugLogger.d.ts +0 -206
  140. package/dist/src/processing/gridDebugLogger.d.ts.map +0 -1
  141. package/dist/src/processing/gridDebugLogger.js +0 -446
  142. package/dist/src/processing/gridDebugLogger.js.map +0 -1
  143. package/dist/src/processing/gridProjection.d.ts +0 -19
  144. package/dist/src/processing/gridProjection.d.ts.map +0 -1
  145. package/dist/src/processing/gridProjection.js +0 -1813
  146. package/dist/src/processing/gridProjection.js.map +0 -1
  147. package/dist/src/processing/gridProjection.test.d.ts +0 -2
  148. package/dist/src/processing/gridProjection.test.d.ts.map +0 -1
  149. package/dist/src/processing/gridProjection.test.js +0 -495
  150. package/dist/src/processing/gridProjection.test.js.map +0 -1
  151. package/dist/src/processing/gridVisualizer.d.ts +0 -14
  152. package/dist/src/processing/gridVisualizer.d.ts.map +0 -1
  153. package/dist/src/processing/gridVisualizer.js +0 -166
  154. package/dist/src/processing/gridVisualizer.js.map +0 -1
  155. package/dist/src/processing/markupUtils.d.ts +0 -7
  156. package/dist/src/processing/markupUtils.d.ts.map +0 -1
  157. package/dist/src/processing/markupUtils.js +0 -25
  158. package/dist/src/processing/markupUtils.js.map +0 -1
  159. package/dist/src/processing/markupUtils.test.d.ts +0 -2
  160. package/dist/src/processing/markupUtils.test.d.ts.map +0 -1
  161. package/dist/src/processing/markupUtils.test.js +0 -26
  162. package/dist/src/processing/markupUtils.test.js.map +0 -1
  163. package/dist/src/processing/ocrUtils.d.ts +0 -24
  164. package/dist/src/processing/ocrUtils.d.ts.map +0 -1
  165. package/dist/src/processing/ocrUtils.js +0 -79
  166. package/dist/src/processing/ocrUtils.js.map +0 -1
  167. package/dist/src/processing/octUtils.test.d.ts +0 -2
  168. package/dist/src/processing/octUtils.test.d.ts.map +0 -1
  169. package/dist/src/processing/octUtils.test.js +0 -72
  170. package/dist/src/processing/octUtils.test.js.map +0 -1
  171. package/dist/src/processing/searchItems.d.ts +0 -26
  172. package/dist/src/processing/searchItems.d.ts.map +0 -1
  173. package/dist/src/processing/searchItems.js +0 -93
  174. package/dist/src/processing/searchItems.js.map +0 -1
  175. package/dist/src/processing/searchItems.test.d.ts +0 -2
  176. package/dist/src/processing/searchItems.test.d.ts.map +0 -1
  177. package/dist/src/processing/searchItems.test.js +0 -84
  178. package/dist/src/processing/searchItems.test.js.map +0 -1
  179. package/dist/src/processing/textUtils.d.ts +0 -20
  180. package/dist/src/processing/textUtils.d.ts.map +0 -1
  181. package/dist/src/processing/textUtils.js +0 -142
  182. package/dist/src/processing/textUtils.js.map +0 -1
  183. package/dist/src/processing/textUtils.test.d.ts +0 -2
  184. package/dist/src/processing/textUtils.test.d.ts.map +0 -1
  185. package/dist/src/processing/textUtils.test.js +0 -45
  186. package/dist/src/processing/textUtils.test.js.map +0 -1
  187. package/dist/src/vendor/pdfjs/LICENSE +0 -177
  188. package/dist/src/vendor/pdfjs/README.md +0 -0
  189. package/dist/src/vendor/pdfjs/cmaps/78-EUC-H.bcmap +0 -0
  190. package/dist/src/vendor/pdfjs/cmaps/78-EUC-V.bcmap +0 -0
  191. package/dist/src/vendor/pdfjs/cmaps/78-H.bcmap +0 -0
  192. package/dist/src/vendor/pdfjs/cmaps/78-RKSJ-H.bcmap +0 -0
  193. package/dist/src/vendor/pdfjs/cmaps/78-RKSJ-V.bcmap +0 -0
  194. package/dist/src/vendor/pdfjs/cmaps/78-V.bcmap +0 -0
  195. package/dist/src/vendor/pdfjs/cmaps/78ms-RKSJ-H.bcmap +0 -0
  196. package/dist/src/vendor/pdfjs/cmaps/78ms-RKSJ-V.bcmap +0 -0
  197. package/dist/src/vendor/pdfjs/cmaps/83pv-RKSJ-H.bcmap +0 -0
  198. package/dist/src/vendor/pdfjs/cmaps/90ms-RKSJ-H.bcmap +0 -0
  199. package/dist/src/vendor/pdfjs/cmaps/90ms-RKSJ-V.bcmap +0 -0
  200. package/dist/src/vendor/pdfjs/cmaps/90msp-RKSJ-H.bcmap +0 -0
  201. package/dist/src/vendor/pdfjs/cmaps/90msp-RKSJ-V.bcmap +0 -0
  202. package/dist/src/vendor/pdfjs/cmaps/90pv-RKSJ-H.bcmap +0 -0
  203. package/dist/src/vendor/pdfjs/cmaps/90pv-RKSJ-V.bcmap +0 -0
  204. package/dist/src/vendor/pdfjs/cmaps/Add-H.bcmap +0 -0
  205. package/dist/src/vendor/pdfjs/cmaps/Add-RKSJ-H.bcmap +0 -0
  206. package/dist/src/vendor/pdfjs/cmaps/Add-RKSJ-V.bcmap +0 -0
  207. package/dist/src/vendor/pdfjs/cmaps/Add-V.bcmap +0 -0
  208. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-0.bcmap +0 -0
  209. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-1.bcmap +0 -0
  210. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-2.bcmap +0 -0
  211. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-3.bcmap +0 -0
  212. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-4.bcmap +0 -0
  213. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-5.bcmap +0 -0
  214. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-6.bcmap +0 -0
  215. package/dist/src/vendor/pdfjs/cmaps/Adobe-CNS1-UCS2.bcmap +0 -0
  216. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-0.bcmap +0 -0
  217. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-1.bcmap +0 -0
  218. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-2.bcmap +0 -0
  219. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-3.bcmap +0 -0
  220. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-4.bcmap +0 -0
  221. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-5.bcmap +0 -0
  222. package/dist/src/vendor/pdfjs/cmaps/Adobe-GB1-UCS2.bcmap +0 -0
  223. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-0.bcmap +0 -0
  224. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-1.bcmap +0 -0
  225. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-2.bcmap +0 -0
  226. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-3.bcmap +0 -0
  227. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-4.bcmap +0 -0
  228. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-5.bcmap +0 -0
  229. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-6.bcmap +0 -0
  230. package/dist/src/vendor/pdfjs/cmaps/Adobe-Japan1-UCS2.bcmap +0 -0
  231. package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-0.bcmap +0 -0
  232. package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-1.bcmap +0 -0
  233. package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-2.bcmap +0 -0
  234. package/dist/src/vendor/pdfjs/cmaps/Adobe-Korea1-UCS2.bcmap +0 -0
  235. package/dist/src/vendor/pdfjs/cmaps/B5-H.bcmap +0 -0
  236. package/dist/src/vendor/pdfjs/cmaps/B5-V.bcmap +0 -0
  237. package/dist/src/vendor/pdfjs/cmaps/B5pc-H.bcmap +0 -0
  238. package/dist/src/vendor/pdfjs/cmaps/B5pc-V.bcmap +0 -0
  239. package/dist/src/vendor/pdfjs/cmaps/CNS-EUC-H.bcmap +0 -0
  240. package/dist/src/vendor/pdfjs/cmaps/CNS-EUC-V.bcmap +0 -0
  241. package/dist/src/vendor/pdfjs/cmaps/CNS1-H.bcmap +0 -0
  242. package/dist/src/vendor/pdfjs/cmaps/CNS1-V.bcmap +0 -0
  243. package/dist/src/vendor/pdfjs/cmaps/CNS2-H.bcmap +0 -0
  244. package/dist/src/vendor/pdfjs/cmaps/CNS2-V.bcmap +0 -3
  245. package/dist/src/vendor/pdfjs/cmaps/ETHK-B5-H.bcmap +0 -0
  246. package/dist/src/vendor/pdfjs/cmaps/ETHK-B5-V.bcmap +0 -0
  247. package/dist/src/vendor/pdfjs/cmaps/ETen-B5-H.bcmap +0 -0
  248. package/dist/src/vendor/pdfjs/cmaps/ETen-B5-V.bcmap +0 -0
  249. package/dist/src/vendor/pdfjs/cmaps/ETenms-B5-H.bcmap +0 -3
  250. package/dist/src/vendor/pdfjs/cmaps/ETenms-B5-V.bcmap +0 -0
  251. package/dist/src/vendor/pdfjs/cmaps/EUC-H.bcmap +0 -0
  252. package/dist/src/vendor/pdfjs/cmaps/EUC-V.bcmap +0 -0
  253. package/dist/src/vendor/pdfjs/cmaps/Ext-H.bcmap +0 -0
  254. package/dist/src/vendor/pdfjs/cmaps/Ext-RKSJ-H.bcmap +0 -0
  255. package/dist/src/vendor/pdfjs/cmaps/Ext-RKSJ-V.bcmap +0 -0
  256. package/dist/src/vendor/pdfjs/cmaps/Ext-V.bcmap +0 -0
  257. package/dist/src/vendor/pdfjs/cmaps/GB-EUC-H.bcmap +0 -0
  258. package/dist/src/vendor/pdfjs/cmaps/GB-EUC-V.bcmap +0 -0
  259. package/dist/src/vendor/pdfjs/cmaps/GB-H.bcmap +0 -4
  260. package/dist/src/vendor/pdfjs/cmaps/GB-V.bcmap +0 -0
  261. package/dist/src/vendor/pdfjs/cmaps/GBK-EUC-H.bcmap +0 -0
  262. package/dist/src/vendor/pdfjs/cmaps/GBK-EUC-V.bcmap +0 -0
  263. package/dist/src/vendor/pdfjs/cmaps/GBK2K-H.bcmap +0 -0
  264. package/dist/src/vendor/pdfjs/cmaps/GBK2K-V.bcmap +0 -0
  265. package/dist/src/vendor/pdfjs/cmaps/GBKp-EUC-H.bcmap +0 -0
  266. package/dist/src/vendor/pdfjs/cmaps/GBKp-EUC-V.bcmap +0 -0
  267. package/dist/src/vendor/pdfjs/cmaps/GBT-EUC-H.bcmap +0 -0
  268. package/dist/src/vendor/pdfjs/cmaps/GBT-EUC-V.bcmap +0 -0
  269. package/dist/src/vendor/pdfjs/cmaps/GBT-H.bcmap +0 -0
  270. package/dist/src/vendor/pdfjs/cmaps/GBT-V.bcmap +0 -0
  271. package/dist/src/vendor/pdfjs/cmaps/GBTpc-EUC-H.bcmap +0 -0
  272. package/dist/src/vendor/pdfjs/cmaps/GBTpc-EUC-V.bcmap +0 -0
  273. package/dist/src/vendor/pdfjs/cmaps/GBpc-EUC-H.bcmap +0 -0
  274. package/dist/src/vendor/pdfjs/cmaps/GBpc-EUC-V.bcmap +0 -0
  275. package/dist/src/vendor/pdfjs/cmaps/H.bcmap +0 -0
  276. package/dist/src/vendor/pdfjs/cmaps/HKdla-B5-H.bcmap +0 -0
  277. package/dist/src/vendor/pdfjs/cmaps/HKdla-B5-V.bcmap +0 -0
  278. package/dist/src/vendor/pdfjs/cmaps/HKdlb-B5-H.bcmap +0 -0
  279. package/dist/src/vendor/pdfjs/cmaps/HKdlb-B5-V.bcmap +0 -0
  280. package/dist/src/vendor/pdfjs/cmaps/HKgccs-B5-H.bcmap +0 -0
  281. package/dist/src/vendor/pdfjs/cmaps/HKgccs-B5-V.bcmap +0 -0
  282. package/dist/src/vendor/pdfjs/cmaps/HKm314-B5-H.bcmap +0 -0
  283. package/dist/src/vendor/pdfjs/cmaps/HKm314-B5-V.bcmap +0 -0
  284. package/dist/src/vendor/pdfjs/cmaps/HKm471-B5-H.bcmap +0 -0
  285. package/dist/src/vendor/pdfjs/cmaps/HKm471-B5-V.bcmap +0 -0
  286. package/dist/src/vendor/pdfjs/cmaps/HKscs-B5-H.bcmap +0 -0
  287. package/dist/src/vendor/pdfjs/cmaps/HKscs-B5-V.bcmap +0 -0
  288. package/dist/src/vendor/pdfjs/cmaps/Hankaku.bcmap +0 -0
  289. package/dist/src/vendor/pdfjs/cmaps/Hiragana.bcmap +0 -0
  290. package/dist/src/vendor/pdfjs/cmaps/KSC-EUC-H.bcmap +0 -0
  291. package/dist/src/vendor/pdfjs/cmaps/KSC-EUC-V.bcmap +0 -0
  292. package/dist/src/vendor/pdfjs/cmaps/KSC-H.bcmap +0 -0
  293. package/dist/src/vendor/pdfjs/cmaps/KSC-Johab-H.bcmap +0 -0
  294. package/dist/src/vendor/pdfjs/cmaps/KSC-Johab-V.bcmap +0 -0
  295. package/dist/src/vendor/pdfjs/cmaps/KSC-V.bcmap +0 -0
  296. package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-H.bcmap +0 -0
  297. package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-H.bcmap +0 -0
  298. package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-V.bcmap +0 -0
  299. package/dist/src/vendor/pdfjs/cmaps/KSCms-UHC-V.bcmap +0 -0
  300. package/dist/src/vendor/pdfjs/cmaps/KSCpc-EUC-H.bcmap +0 -0
  301. package/dist/src/vendor/pdfjs/cmaps/KSCpc-EUC-V.bcmap +0 -0
  302. package/dist/src/vendor/pdfjs/cmaps/Katakana.bcmap +0 -0
  303. package/dist/src/vendor/pdfjs/cmaps/LICENSE +0 -36
  304. package/dist/src/vendor/pdfjs/cmaps/NWP-H.bcmap +0 -0
  305. package/dist/src/vendor/pdfjs/cmaps/NWP-V.bcmap +0 -0
  306. package/dist/src/vendor/pdfjs/cmaps/RKSJ-H.bcmap +0 -0
  307. package/dist/src/vendor/pdfjs/cmaps/RKSJ-V.bcmap +0 -0
  308. package/dist/src/vendor/pdfjs/cmaps/Roman.bcmap +0 -0
  309. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UCS2-H.bcmap +0 -0
  310. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UCS2-V.bcmap +0 -0
  311. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF16-H.bcmap +0 -0
  312. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF16-V.bcmap +0 -0
  313. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF32-H.bcmap +0 -0
  314. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF32-V.bcmap +0 -0
  315. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF8-H.bcmap +0 -0
  316. package/dist/src/vendor/pdfjs/cmaps/UniCNS-UTF8-V.bcmap +0 -0
  317. package/dist/src/vendor/pdfjs/cmaps/UniGB-UCS2-H.bcmap +0 -0
  318. package/dist/src/vendor/pdfjs/cmaps/UniGB-UCS2-V.bcmap +0 -0
  319. package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF16-H.bcmap +0 -0
  320. package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF16-V.bcmap +0 -0
  321. package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF32-H.bcmap +0 -0
  322. package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF32-V.bcmap +0 -0
  323. package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF8-H.bcmap +0 -0
  324. package/dist/src/vendor/pdfjs/cmaps/UniGB-UTF8-V.bcmap +0 -0
  325. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-H.bcmap +0 -0
  326. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-H.bcmap +0 -0
  327. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-V.bcmap +0 -0
  328. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UCS2-V.bcmap +0 -0
  329. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF16-H.bcmap +0 -0
  330. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF16-V.bcmap +0 -0
  331. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF32-H.bcmap +0 -0
  332. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF32-V.bcmap +0 -0
  333. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF8-H.bcmap +0 -0
  334. package/dist/src/vendor/pdfjs/cmaps/UniJIS-UTF8-V.bcmap +0 -0
  335. package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-H.bcmap +0 -0
  336. package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-V.bcmap +0 -0
  337. package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-H.bcmap +0 -0
  338. package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-V.bcmap +0 -0
  339. package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-H.bcmap +0 -0
  340. package/dist/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-V.bcmap +0 -0
  341. package/dist/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-HW-V.bcmap +0 -0
  342. package/dist/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-V.bcmap +0 -0
  343. package/dist/src/vendor/pdfjs/cmaps/UniJISPro-UTF8-V.bcmap +0 -0
  344. package/dist/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-H.bcmap +0 -0
  345. package/dist/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-V.bcmap +0 -0
  346. package/dist/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-H.bcmap +0 -0
  347. package/dist/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-V.bcmap +0 -0
  348. package/dist/src/vendor/pdfjs/cmaps/UniKS-UCS2-H.bcmap +0 -0
  349. package/dist/src/vendor/pdfjs/cmaps/UniKS-UCS2-V.bcmap +0 -0
  350. package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF16-H.bcmap +0 -0
  351. package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF16-V.bcmap +0 -0
  352. package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF32-H.bcmap +0 -0
  353. package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF32-V.bcmap +0 -0
  354. package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF8-H.bcmap +0 -0
  355. package/dist/src/vendor/pdfjs/cmaps/UniKS-UTF8-V.bcmap +0 -0
  356. package/dist/src/vendor/pdfjs/cmaps/V.bcmap +0 -0
  357. package/dist/src/vendor/pdfjs/cmaps/WP-Symbol.bcmap +0 -0
  358. package/dist/src/vendor/pdfjs/jbig2.wasm +0 -0
  359. package/dist/src/vendor/pdfjs/openjpeg.wasm +0 -0
  360. package/dist/src/vendor/pdfjs/pdf.mjs +0 -33603
  361. package/dist/src/vendor/pdfjs/pdf.mjs.map +0 -1
  362. package/dist/src/vendor/pdfjs/pdf.sandbox.mjs +0 -4936
  363. package/dist/src/vendor/pdfjs/pdf.sandbox.mjs.map +0 -1
  364. package/dist/src/vendor/pdfjs/pdf.worker.mjs +0 -70100
  365. package/dist/src/vendor/pdfjs/pdf.worker.mjs.map +0 -1
  366. package/dist/src/vendor/pdfjs/qcms_bg.wasm +0 -0
  367. package/dist/src/vendor/pdfjs/standard_fonts/FoxitDingbats.pfb +0 -0
  368. package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixed.pfb +0 -0
  369. package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixedBold.pfb +0 -0
  370. package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixedBoldItalic.pfb +0 -0
  371. package/dist/src/vendor/pdfjs/standard_fonts/FoxitFixedItalic.pfb +0 -0
  372. package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerif.pfb +0 -0
  373. package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerifBold.pfb +0 -0
  374. package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerifBoldItalic.pfb +0 -0
  375. package/dist/src/vendor/pdfjs/standard_fonts/FoxitSerifItalic.pfb +0 -0
  376. package/dist/src/vendor/pdfjs/standard_fonts/FoxitSymbol.pfb +0 -0
  377. package/dist/src/vendor/pdfjs/standard_fonts/LICENSE_FOXIT +0 -27
  378. package/dist/src/vendor/pdfjs/standard_fonts/LICENSE_LIBERATION +0 -102
  379. package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-Bold.ttf +0 -0
  380. package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-BoldItalic.ttf +0 -0
  381. package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-Italic.ttf +0 -0
  382. package/dist/src/vendor/pdfjs/standard_fonts/LiberationSans-Regular.ttf +0 -0
  383. package/src/vendor/pdfjs/LICENSE +0 -177
  384. package/src/vendor/pdfjs/README.md +0 -0
  385. package/src/vendor/pdfjs/cmaps/78-EUC-H.bcmap +0 -0
  386. package/src/vendor/pdfjs/cmaps/78-EUC-V.bcmap +0 -0
  387. package/src/vendor/pdfjs/cmaps/78-H.bcmap +0 -0
  388. package/src/vendor/pdfjs/cmaps/78-RKSJ-H.bcmap +0 -0
  389. package/src/vendor/pdfjs/cmaps/78-RKSJ-V.bcmap +0 -0
  390. package/src/vendor/pdfjs/cmaps/78-V.bcmap +0 -0
  391. package/src/vendor/pdfjs/cmaps/78ms-RKSJ-H.bcmap +0 -0
  392. package/src/vendor/pdfjs/cmaps/78ms-RKSJ-V.bcmap +0 -0
  393. package/src/vendor/pdfjs/cmaps/83pv-RKSJ-H.bcmap +0 -0
  394. package/src/vendor/pdfjs/cmaps/90ms-RKSJ-H.bcmap +0 -0
  395. package/src/vendor/pdfjs/cmaps/90ms-RKSJ-V.bcmap +0 -0
  396. package/src/vendor/pdfjs/cmaps/90msp-RKSJ-H.bcmap +0 -0
  397. package/src/vendor/pdfjs/cmaps/90msp-RKSJ-V.bcmap +0 -0
  398. package/src/vendor/pdfjs/cmaps/90pv-RKSJ-H.bcmap +0 -0
  399. package/src/vendor/pdfjs/cmaps/90pv-RKSJ-V.bcmap +0 -0
  400. package/src/vendor/pdfjs/cmaps/Add-H.bcmap +0 -0
  401. package/src/vendor/pdfjs/cmaps/Add-RKSJ-H.bcmap +0 -0
  402. package/src/vendor/pdfjs/cmaps/Add-RKSJ-V.bcmap +0 -0
  403. package/src/vendor/pdfjs/cmaps/Add-V.bcmap +0 -0
  404. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-0.bcmap +0 -0
  405. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-1.bcmap +0 -0
  406. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-2.bcmap +0 -0
  407. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-3.bcmap +0 -0
  408. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-4.bcmap +0 -0
  409. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-5.bcmap +0 -0
  410. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-6.bcmap +0 -0
  411. package/src/vendor/pdfjs/cmaps/Adobe-CNS1-UCS2.bcmap +0 -0
  412. package/src/vendor/pdfjs/cmaps/Adobe-GB1-0.bcmap +0 -0
  413. package/src/vendor/pdfjs/cmaps/Adobe-GB1-1.bcmap +0 -0
  414. package/src/vendor/pdfjs/cmaps/Adobe-GB1-2.bcmap +0 -0
  415. package/src/vendor/pdfjs/cmaps/Adobe-GB1-3.bcmap +0 -0
  416. package/src/vendor/pdfjs/cmaps/Adobe-GB1-4.bcmap +0 -0
  417. package/src/vendor/pdfjs/cmaps/Adobe-GB1-5.bcmap +0 -0
  418. package/src/vendor/pdfjs/cmaps/Adobe-GB1-UCS2.bcmap +0 -0
  419. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-0.bcmap +0 -0
  420. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-1.bcmap +0 -0
  421. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-2.bcmap +0 -0
  422. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-3.bcmap +0 -0
  423. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-4.bcmap +0 -0
  424. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-5.bcmap +0 -0
  425. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-6.bcmap +0 -0
  426. package/src/vendor/pdfjs/cmaps/Adobe-Japan1-UCS2.bcmap +0 -0
  427. package/src/vendor/pdfjs/cmaps/Adobe-Korea1-0.bcmap +0 -0
  428. package/src/vendor/pdfjs/cmaps/Adobe-Korea1-1.bcmap +0 -0
  429. package/src/vendor/pdfjs/cmaps/Adobe-Korea1-2.bcmap +0 -0
  430. package/src/vendor/pdfjs/cmaps/Adobe-Korea1-UCS2.bcmap +0 -0
  431. package/src/vendor/pdfjs/cmaps/B5-H.bcmap +0 -0
  432. package/src/vendor/pdfjs/cmaps/B5-V.bcmap +0 -0
  433. package/src/vendor/pdfjs/cmaps/B5pc-H.bcmap +0 -0
  434. package/src/vendor/pdfjs/cmaps/B5pc-V.bcmap +0 -0
  435. package/src/vendor/pdfjs/cmaps/CNS-EUC-H.bcmap +0 -0
  436. package/src/vendor/pdfjs/cmaps/CNS-EUC-V.bcmap +0 -0
  437. package/src/vendor/pdfjs/cmaps/CNS1-H.bcmap +0 -0
  438. package/src/vendor/pdfjs/cmaps/CNS1-V.bcmap +0 -0
  439. package/src/vendor/pdfjs/cmaps/CNS2-H.bcmap +0 -0
  440. package/src/vendor/pdfjs/cmaps/CNS2-V.bcmap +0 -3
  441. package/src/vendor/pdfjs/cmaps/ETHK-B5-H.bcmap +0 -0
  442. package/src/vendor/pdfjs/cmaps/ETHK-B5-V.bcmap +0 -0
  443. package/src/vendor/pdfjs/cmaps/ETen-B5-H.bcmap +0 -0
  444. package/src/vendor/pdfjs/cmaps/ETen-B5-V.bcmap +0 -0
  445. package/src/vendor/pdfjs/cmaps/ETenms-B5-H.bcmap +0 -3
  446. package/src/vendor/pdfjs/cmaps/ETenms-B5-V.bcmap +0 -0
  447. package/src/vendor/pdfjs/cmaps/EUC-H.bcmap +0 -0
  448. package/src/vendor/pdfjs/cmaps/EUC-V.bcmap +0 -0
  449. package/src/vendor/pdfjs/cmaps/Ext-H.bcmap +0 -0
  450. package/src/vendor/pdfjs/cmaps/Ext-RKSJ-H.bcmap +0 -0
  451. package/src/vendor/pdfjs/cmaps/Ext-RKSJ-V.bcmap +0 -0
  452. package/src/vendor/pdfjs/cmaps/Ext-V.bcmap +0 -0
  453. package/src/vendor/pdfjs/cmaps/GB-EUC-H.bcmap +0 -0
  454. package/src/vendor/pdfjs/cmaps/GB-EUC-V.bcmap +0 -0
  455. package/src/vendor/pdfjs/cmaps/GB-H.bcmap +0 -4
  456. package/src/vendor/pdfjs/cmaps/GB-V.bcmap +0 -0
  457. package/src/vendor/pdfjs/cmaps/GBK-EUC-H.bcmap +0 -0
  458. package/src/vendor/pdfjs/cmaps/GBK-EUC-V.bcmap +0 -0
  459. package/src/vendor/pdfjs/cmaps/GBK2K-H.bcmap +0 -0
  460. package/src/vendor/pdfjs/cmaps/GBK2K-V.bcmap +0 -0
  461. package/src/vendor/pdfjs/cmaps/GBKp-EUC-H.bcmap +0 -0
  462. package/src/vendor/pdfjs/cmaps/GBKp-EUC-V.bcmap +0 -0
  463. package/src/vendor/pdfjs/cmaps/GBT-EUC-H.bcmap +0 -0
  464. package/src/vendor/pdfjs/cmaps/GBT-EUC-V.bcmap +0 -0
  465. package/src/vendor/pdfjs/cmaps/GBT-H.bcmap +0 -0
  466. package/src/vendor/pdfjs/cmaps/GBT-V.bcmap +0 -0
  467. package/src/vendor/pdfjs/cmaps/GBTpc-EUC-H.bcmap +0 -0
  468. package/src/vendor/pdfjs/cmaps/GBTpc-EUC-V.bcmap +0 -0
  469. package/src/vendor/pdfjs/cmaps/GBpc-EUC-H.bcmap +0 -0
  470. package/src/vendor/pdfjs/cmaps/GBpc-EUC-V.bcmap +0 -0
  471. package/src/vendor/pdfjs/cmaps/H.bcmap +0 -0
  472. package/src/vendor/pdfjs/cmaps/HKdla-B5-H.bcmap +0 -0
  473. package/src/vendor/pdfjs/cmaps/HKdla-B5-V.bcmap +0 -0
  474. package/src/vendor/pdfjs/cmaps/HKdlb-B5-H.bcmap +0 -0
  475. package/src/vendor/pdfjs/cmaps/HKdlb-B5-V.bcmap +0 -0
  476. package/src/vendor/pdfjs/cmaps/HKgccs-B5-H.bcmap +0 -0
  477. package/src/vendor/pdfjs/cmaps/HKgccs-B5-V.bcmap +0 -0
  478. package/src/vendor/pdfjs/cmaps/HKm314-B5-H.bcmap +0 -0
  479. package/src/vendor/pdfjs/cmaps/HKm314-B5-V.bcmap +0 -0
  480. package/src/vendor/pdfjs/cmaps/HKm471-B5-H.bcmap +0 -0
  481. package/src/vendor/pdfjs/cmaps/HKm471-B5-V.bcmap +0 -0
  482. package/src/vendor/pdfjs/cmaps/HKscs-B5-H.bcmap +0 -0
  483. package/src/vendor/pdfjs/cmaps/HKscs-B5-V.bcmap +0 -0
  484. package/src/vendor/pdfjs/cmaps/Hankaku.bcmap +0 -0
  485. package/src/vendor/pdfjs/cmaps/Hiragana.bcmap +0 -0
  486. package/src/vendor/pdfjs/cmaps/KSC-EUC-H.bcmap +0 -0
  487. package/src/vendor/pdfjs/cmaps/KSC-EUC-V.bcmap +0 -0
  488. package/src/vendor/pdfjs/cmaps/KSC-H.bcmap +0 -0
  489. package/src/vendor/pdfjs/cmaps/KSC-Johab-H.bcmap +0 -0
  490. package/src/vendor/pdfjs/cmaps/KSC-Johab-V.bcmap +0 -0
  491. package/src/vendor/pdfjs/cmaps/KSC-V.bcmap +0 -0
  492. package/src/vendor/pdfjs/cmaps/KSCms-UHC-H.bcmap +0 -0
  493. package/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-H.bcmap +0 -0
  494. package/src/vendor/pdfjs/cmaps/KSCms-UHC-HW-V.bcmap +0 -0
  495. package/src/vendor/pdfjs/cmaps/KSCms-UHC-V.bcmap +0 -0
  496. package/src/vendor/pdfjs/cmaps/KSCpc-EUC-H.bcmap +0 -0
  497. package/src/vendor/pdfjs/cmaps/KSCpc-EUC-V.bcmap +0 -0
  498. package/src/vendor/pdfjs/cmaps/Katakana.bcmap +0 -0
  499. package/src/vendor/pdfjs/cmaps/LICENSE +0 -36
  500. package/src/vendor/pdfjs/cmaps/NWP-H.bcmap +0 -0
  501. package/src/vendor/pdfjs/cmaps/NWP-V.bcmap +0 -0
  502. package/src/vendor/pdfjs/cmaps/RKSJ-H.bcmap +0 -0
  503. package/src/vendor/pdfjs/cmaps/RKSJ-V.bcmap +0 -0
  504. package/src/vendor/pdfjs/cmaps/Roman.bcmap +0 -0
  505. package/src/vendor/pdfjs/cmaps/UniCNS-UCS2-H.bcmap +0 -0
  506. package/src/vendor/pdfjs/cmaps/UniCNS-UCS2-V.bcmap +0 -0
  507. package/src/vendor/pdfjs/cmaps/UniCNS-UTF16-H.bcmap +0 -0
  508. package/src/vendor/pdfjs/cmaps/UniCNS-UTF16-V.bcmap +0 -0
  509. package/src/vendor/pdfjs/cmaps/UniCNS-UTF32-H.bcmap +0 -0
  510. package/src/vendor/pdfjs/cmaps/UniCNS-UTF32-V.bcmap +0 -0
  511. package/src/vendor/pdfjs/cmaps/UniCNS-UTF8-H.bcmap +0 -0
  512. package/src/vendor/pdfjs/cmaps/UniCNS-UTF8-V.bcmap +0 -0
  513. package/src/vendor/pdfjs/cmaps/UniGB-UCS2-H.bcmap +0 -0
  514. package/src/vendor/pdfjs/cmaps/UniGB-UCS2-V.bcmap +0 -0
  515. package/src/vendor/pdfjs/cmaps/UniGB-UTF16-H.bcmap +0 -0
  516. package/src/vendor/pdfjs/cmaps/UniGB-UTF16-V.bcmap +0 -0
  517. package/src/vendor/pdfjs/cmaps/UniGB-UTF32-H.bcmap +0 -0
  518. package/src/vendor/pdfjs/cmaps/UniGB-UTF32-V.bcmap +0 -0
  519. package/src/vendor/pdfjs/cmaps/UniGB-UTF8-H.bcmap +0 -0
  520. package/src/vendor/pdfjs/cmaps/UniGB-UTF8-V.bcmap +0 -0
  521. package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-H.bcmap +0 -0
  522. package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-H.bcmap +0 -0
  523. package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-HW-V.bcmap +0 -0
  524. package/src/vendor/pdfjs/cmaps/UniJIS-UCS2-V.bcmap +0 -0
  525. package/src/vendor/pdfjs/cmaps/UniJIS-UTF16-H.bcmap +0 -0
  526. package/src/vendor/pdfjs/cmaps/UniJIS-UTF16-V.bcmap +0 -0
  527. package/src/vendor/pdfjs/cmaps/UniJIS-UTF32-H.bcmap +0 -0
  528. package/src/vendor/pdfjs/cmaps/UniJIS-UTF32-V.bcmap +0 -0
  529. package/src/vendor/pdfjs/cmaps/UniJIS-UTF8-H.bcmap +0 -0
  530. package/src/vendor/pdfjs/cmaps/UniJIS-UTF8-V.bcmap +0 -0
  531. package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-H.bcmap +0 -0
  532. package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF16-V.bcmap +0 -0
  533. package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-H.bcmap +0 -0
  534. package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF32-V.bcmap +0 -0
  535. package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-H.bcmap +0 -0
  536. package/src/vendor/pdfjs/cmaps/UniJIS2004-UTF8-V.bcmap +0 -0
  537. package/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-HW-V.bcmap +0 -0
  538. package/src/vendor/pdfjs/cmaps/UniJISPro-UCS2-V.bcmap +0 -0
  539. package/src/vendor/pdfjs/cmaps/UniJISPro-UTF8-V.bcmap +0 -0
  540. package/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-H.bcmap +0 -0
  541. package/src/vendor/pdfjs/cmaps/UniJISX0213-UTF32-V.bcmap +0 -0
  542. package/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-H.bcmap +0 -0
  543. package/src/vendor/pdfjs/cmaps/UniJISX02132004-UTF32-V.bcmap +0 -0
  544. package/src/vendor/pdfjs/cmaps/UniKS-UCS2-H.bcmap +0 -0
  545. package/src/vendor/pdfjs/cmaps/UniKS-UCS2-V.bcmap +0 -0
  546. package/src/vendor/pdfjs/cmaps/UniKS-UTF16-H.bcmap +0 -0
  547. package/src/vendor/pdfjs/cmaps/UniKS-UTF16-V.bcmap +0 -0
  548. package/src/vendor/pdfjs/cmaps/UniKS-UTF32-H.bcmap +0 -0
  549. package/src/vendor/pdfjs/cmaps/UniKS-UTF32-V.bcmap +0 -0
  550. package/src/vendor/pdfjs/cmaps/UniKS-UTF8-H.bcmap +0 -0
  551. package/src/vendor/pdfjs/cmaps/UniKS-UTF8-V.bcmap +0 -0
  552. package/src/vendor/pdfjs/cmaps/V.bcmap +0 -0
  553. package/src/vendor/pdfjs/cmaps/WP-Symbol.bcmap +0 -0
  554. package/src/vendor/pdfjs/jbig2.wasm +0 -0
  555. package/src/vendor/pdfjs/openjpeg.wasm +0 -0
  556. package/src/vendor/pdfjs/pdf.mjs +0 -33603
  557. package/src/vendor/pdfjs/pdf.mjs.map +0 -1
  558. package/src/vendor/pdfjs/pdf.sandbox.mjs +0 -4936
  559. package/src/vendor/pdfjs/pdf.sandbox.mjs.map +0 -1
  560. package/src/vendor/pdfjs/pdf.worker.mjs +0 -70100
  561. package/src/vendor/pdfjs/pdf.worker.mjs.map +0 -1
  562. package/src/vendor/pdfjs/qcms_bg.wasm +0 -0
  563. package/src/vendor/pdfjs/standard_fonts/FoxitDingbats.pfb +0 -0
  564. package/src/vendor/pdfjs/standard_fonts/FoxitFixed.pfb +0 -0
  565. package/src/vendor/pdfjs/standard_fonts/FoxitFixedBold.pfb +0 -0
  566. package/src/vendor/pdfjs/standard_fonts/FoxitFixedBoldItalic.pfb +0 -0
  567. package/src/vendor/pdfjs/standard_fonts/FoxitFixedItalic.pfb +0 -0
  568. package/src/vendor/pdfjs/standard_fonts/FoxitSerif.pfb +0 -0
  569. package/src/vendor/pdfjs/standard_fonts/FoxitSerifBold.pfb +0 -0
  570. package/src/vendor/pdfjs/standard_fonts/FoxitSerifBoldItalic.pfb +0 -0
  571. package/src/vendor/pdfjs/standard_fonts/FoxitSerifItalic.pfb +0 -0
  572. package/src/vendor/pdfjs/standard_fonts/FoxitSymbol.pfb +0 -0
  573. package/src/vendor/pdfjs/standard_fonts/LICENSE_FOXIT +0 -27
  574. package/src/vendor/pdfjs/standard_fonts/LICENSE_LIBERATION +0 -102
  575. package/src/vendor/pdfjs/standard_fonts/LiberationSans-Bold.ttf +0 -0
  576. package/src/vendor/pdfjs/standard_fonts/LiberationSans-BoldItalic.ttf +0 -0
  577. package/src/vendor/pdfjs/standard_fonts/LiberationSans-Italic.ttf +0 -0
  578. package/src/vendor/pdfjs/standard_fonts/LiberationSans-Regular.ttf +0 -0
@@ -1,804 +0,0 @@
1
- import fs from "node:fs/promises";
2
- import { PdfiumRenderer } from "./pdfium-renderer.js";
3
- import { importPdfJs } from "./pdfjsImporter.js";
4
- // Dynamic import of PDF.js
5
- const { fn: getDocument, dir: PDFJS_DIR } = await importPdfJs();
6
- const CMAP_URL = `${PDFJS_DIR}/cmaps/`;
7
- const STANDARD_FONT_DATA_URL = `${PDFJS_DIR}/standard_fonts/`;
8
- const WASM_URL = `${PDFJS_DIR}/`;
9
- const CMAP_PACKED = true;
10
- /**
11
- * Extract rotation angle in degrees from PDF transformation matrix
12
- * Matrix format: [a, b, c, d, e, f] where rotation is atan2(b, a)
13
- */
14
- function getRotation(transform) {
15
- return Math.atan2(transform[1], transform[0]) * (180 / Math.PI);
16
- }
17
- /**
18
- * Multiply two transformation matrices
19
- */
20
- function multiplyMatrices(m1, m2) {
21
- return [
22
- m1[0] * m2[0] + m1[2] * m2[1],
23
- m1[1] * m2[0] + m1[3] * m2[1],
24
- m1[0] * m2[2] + m1[2] * m2[3],
25
- m1[1] * m2[2] + m1[3] * m2[3],
26
- m1[0] * m2[4] + m1[2] * m2[5] + m1[4],
27
- m1[1] * m2[4] + m1[3] * m2[5] + m1[5],
28
- ];
29
- }
30
- /**
31
- * Apply transformation matrix to a point
32
- */
33
- function applyTransformation(point, transform) {
34
- return {
35
- x: point.x * transform[0] + point.y * transform[2] + transform[4],
36
- y: point.x * transform[1] + point.y * transform[3] + transform[5],
37
- };
38
- }
39
- // Pre-compiled regex patterns for string decoding
40
- const BUGGY_FONT_MARKER_CHECK = ":->|>";
41
- const PIPE_PATTERN_REGEX = /\s*\|([^|])\|\s*/g;
42
- /**
43
- * Adobe Glyph List subset: maps standard PostScript glyph names to Unicode characters.
44
- *
45
- * When PDF.js detects a "buggy" font (one whose ToUnicode/encoding maps glyphs to
46
- * control characters or PUA code points), it emits markers containing the glyph's
47
- * original char code AND the glyph name from the font's /Differences or /Encoding
48
- * dictionary. This map resolves those glyph names to correct Unicode characters.
49
- *
50
- * This is a ~200-entry subset of the full Adobe Glyph List (~4,300 entries).
51
- * The full canonical source is: https://github.com/adobe-type-tools/agl-aglfn
52
- * (see glyphlist.txt). Our subset covers basic Latin, digits, ligatures, punctuation,
53
- * typographic characters, Greek, math symbols, and common accented Latin. Glyph names
54
- * not in this subset fall through to the uniXXXX convention and ASCII-range fallbacks
55
- * in resolveGlyphName(). Add entries here if a PDF's buggy font uses a standard glyph
56
- * name that isn't covered and doesn't match those fallbacks.
57
- */
58
- const ADOBE_GLYPH_MAP = {
59
- // Basic Latin letters
60
- A: "A",
61
- B: "B",
62
- C: "C",
63
- D: "D",
64
- E: "E",
65
- F: "F",
66
- G: "G",
67
- H: "H",
68
- I: "I",
69
- J: "J",
70
- K: "K",
71
- L: "L",
72
- M: "M",
73
- N: "N",
74
- O: "O",
75
- P: "P",
76
- Q: "Q",
77
- R: "R",
78
- S: "S",
79
- T: "T",
80
- U: "U",
81
- V: "V",
82
- W: "W",
83
- X: "X",
84
- Y: "Y",
85
- Z: "Z",
86
- a: "a",
87
- b: "b",
88
- c: "c",
89
- d: "d",
90
- e: "e",
91
- f: "f",
92
- g: "g",
93
- h: "h",
94
- i: "i",
95
- j: "j",
96
- k: "k",
97
- l: "l",
98
- m: "m",
99
- n: "n",
100
- o: "o",
101
- p: "p",
102
- q: "q",
103
- r: "r",
104
- s: "s",
105
- t: "t",
106
- u: "u",
107
- v: "v",
108
- w: "w",
109
- x: "x",
110
- y: "y",
111
- z: "z",
112
- // Digits
113
- zero: "0",
114
- one: "1",
115
- two: "2",
116
- three: "3",
117
- four: "4",
118
- five: "5",
119
- six: "6",
120
- seven: "7",
121
- eight: "8",
122
- nine: "9",
123
- // Ligatures (Unicode presentation forms — decomposed later by stripControlChars)
124
- fi: "\uFB01",
125
- fl: "\uFB02",
126
- ff: "\uFB00",
127
- ffi: "\uFB03",
128
- ffl: "\uFB04",
129
- // Punctuation and symbols
130
- space: " ",
131
- period: ".",
132
- comma: ",",
133
- colon: ":",
134
- semicolon: ";",
135
- hyphen: "-",
136
- minus: "\u2212",
137
- slash: "/",
138
- question: "?",
139
- dollar: "$",
140
- parenleft: "(",
141
- parenright: ")",
142
- asterisk: "*",
143
- plus: "+",
144
- equal: "=",
145
- numbersign: "#",
146
- percent: "%",
147
- ampersand: "&",
148
- at: "@",
149
- exclam: "!",
150
- bracketleft: "[",
151
- bracketright: "]",
152
- braceleft: "{",
153
- braceright: "}",
154
- underscore: "_",
155
- quotedbl: '"',
156
- quotesingle: "'",
157
- backslash: "\\",
158
- bar: "|",
159
- asciitilde: "~",
160
- asciicircum: "^",
161
- grave: "`",
162
- less: "<",
163
- greater: ">",
164
- // Typographic
165
- quoteright: "\u2019",
166
- quoteleft: "\u2018",
167
- quotedblleft: "\u201C",
168
- quotedblright: "\u201D",
169
- quotesinglbase: "\u201A",
170
- quotedblbase: "\u201E",
171
- endash: "\u2013",
172
- emdash: "\u2014",
173
- bullet: "\u2022",
174
- ellipsis: "\u2026",
175
- dagger: "\u2020",
176
- daggerdbl: "\u2021",
177
- guilsinglleft: "\u2039",
178
- guilsinglright: "\u203A",
179
- guillemotleft: "\u00AB",
180
- guillemotright: "\u00BB",
181
- trademark: "\u2122",
182
- registered: "\u00AE",
183
- copyright: "\u00A9",
184
- // Greek
185
- Alpha: "\u0391",
186
- Beta: "\u0392",
187
- Gamma: "\u0393",
188
- Delta: "\u2206",
189
- Epsilon: "\u0395",
190
- Zeta: "\u0396",
191
- Eta: "\u0397",
192
- Theta: "\u0398",
193
- Iota: "\u0399",
194
- Kappa: "\u039A",
195
- Lambda: "\u039B",
196
- Mu: "\u039C",
197
- Nu: "\u039D",
198
- Xi: "\u039E",
199
- Omicron: "\u039F",
200
- Pi: "\u03A0",
201
- Rho: "\u03A1",
202
- Sigma: "\u03A3",
203
- Tau: "\u03A4",
204
- Upsilon: "\u03A5",
205
- Phi: "\u03A6",
206
- Chi: "\u03A7",
207
- Psi: "\u03A8",
208
- Omega: "\u2126",
209
- alpha: "\u03B1",
210
- beta: "\u03B2",
211
- gamma: "\u03B3",
212
- delta: "\u03B4",
213
- epsilon: "\u03B5",
214
- zeta: "\u03B6",
215
- eta: "\u03B7",
216
- theta: "\u03B8",
217
- iota: "\u03B9",
218
- kappa: "\u03BA",
219
- lambda: "\u03BB",
220
- mu: "\u00B5",
221
- nu: "\u03BD",
222
- xi: "\u03BE",
223
- omicron: "\u03BF",
224
- pi: "\u03C0",
225
- rho: "\u03C1",
226
- sigma: "\u03C3",
227
- tau: "\u03C4",
228
- upsilon: "\u03C5",
229
- phi: "\u03C6",
230
- chi: "\u03C7",
231
- psi: "\u03C8",
232
- omega: "\u03C9",
233
- // Math symbols
234
- greaterequal: "\u2265",
235
- lessequal: "\u2264",
236
- notequal: "\u2260",
237
- plusminus: "\u00B1",
238
- multiply: "\u00D7",
239
- divide: "\u00F7",
240
- infinity: "\u221E",
241
- summation: "\u2211",
242
- integral: "\u222B",
243
- partialdiff: "\u2202",
244
- radical: "\u221A",
245
- approxequal: "\u2248",
246
- degree: "\u00B0",
247
- // Accented Latin (common)
248
- Aacute: "\u00C1",
249
- Agrave: "\u00C0",
250
- Acircumflex: "\u00C2",
251
- Atilde: "\u00C3",
252
- Adieresis: "\u00C4",
253
- Aring: "\u00C5",
254
- Eacute: "\u00C9",
255
- Egrave: "\u00C8",
256
- Ecircumflex: "\u00CA",
257
- Edieresis: "\u00CB",
258
- Iacute: "\u00CD",
259
- Igrave: "\u00CC",
260
- Icircumflex: "\u00CE",
261
- Idieresis: "\u00CF",
262
- Oacute: "\u00D3",
263
- Ograve: "\u00D2",
264
- Ocircumflex: "\u00D4",
265
- Otilde: "\u00D5",
266
- Odieresis: "\u00D6",
267
- Uacute: "\u00DA",
268
- Ugrave: "\u00D9",
269
- Ucircumflex: "\u00DB",
270
- Udieresis: "\u00DC",
271
- Ntilde: "\u00D1",
272
- Ccedilla: "\u00C7",
273
- Scaron: "\u0160",
274
- Zcaron: "\u017D",
275
- aacute: "\u00E1",
276
- agrave: "\u00E0",
277
- acircumflex: "\u00E2",
278
- atilde: "\u00E3",
279
- adieresis: "\u00E4",
280
- aring: "\u00E5",
281
- eacute: "\u00E9",
282
- egrave: "\u00E8",
283
- ecircumflex: "\u00EA",
284
- edieresis: "\u00EB",
285
- iacute: "\u00ED",
286
- igrave: "\u00EC",
287
- icircumflex: "\u00EE",
288
- idieresis: "\u00EF",
289
- oacute: "\u00F3",
290
- ograve: "\u00F2",
291
- ocircumflex: "\u00F4",
292
- otilde: "\u00F5",
293
- odieresis: "\u00F6",
294
- uacute: "\u00FA",
295
- ugrave: "\u00F9",
296
- ucircumflex: "\u00FB",
297
- udieresis: "\u00FC",
298
- ntilde: "\u00F1",
299
- ccedilla: "\u00E7",
300
- scaron: "\u0161",
301
- zcaron: "\u017E",
302
- ydieresis: "\u00FF",
303
- // Miscellaneous
304
- AE: "\u00C6",
305
- ae: "\u00E6",
306
- OE: "\u0152",
307
- oe: "\u0153",
308
- Eth: "\u00D0",
309
- eth: "\u00F0",
310
- Thorn: "\u00DE",
311
- thorn: "\u00FE",
312
- germandbls: "\u00DF",
313
- dotlessi: "\u0131",
314
- section: "\u00A7",
315
- paragraph: "\u00B6",
316
- currency: "\u00A4",
317
- cent: "\u00A2",
318
- sterling: "\u00A3",
319
- yen: "\u00A5",
320
- Euro: "\u20AC",
321
- logicalnot: "\u00AC",
322
- nbspace: "\u00A0",
323
- };
324
- /**
325
- * Resolve a glyph name to its Unicode character using the Adobe Glyph List.
326
- * Handles standard names, the "uniXXXX" convention, and underscore-separated
327
- * composite names (e.g., "f_i" → resolve "f" + "i" = "fi").
328
- */
329
- function resolveGlyphName(glyphName) {
330
- if (glyphName in ADOBE_GLYPH_MAP)
331
- return ADOBE_GLYPH_MAP[glyphName];
332
- // Handle "uniXXXX" convention (e.g., "uni00A0" → U+00A0)
333
- if (glyphName.startsWith("uni") && glyphName.length === 7) {
334
- const code = parseInt(glyphName.slice(3), 16);
335
- if (!isNaN(code) && code > 0)
336
- return String.fromCharCode(code);
337
- }
338
- // Handle underscore-separated composite names (e.g., "f_i" → "fi", "f_f_i" → "ffi")
339
- // Some fonts use this convention instead of standard ligature names
340
- if (glyphName.includes("_")) {
341
- const parts = glyphName.split("_");
342
- const resolved = parts.map((p) => resolveGlyphName(p));
343
- if (resolved.every((r) => r !== null)) {
344
- return resolved.join("");
345
- }
346
- }
347
- return null;
348
- }
349
- /**
350
- * Decode buggy font markers emitted by patched PDF.js.
351
- *
352
- * Marker format: :->|>_<glyphId>_<fontCharCode>@<glyphName>@<|<-:
353
- * The glyph name is delimited by @ instead of _ because some fonts use
354
- * non-standard glyph names containing underscores (e.g., "f_i" for "fi").
355
- *
356
- * Resolution strategy:
357
- * 1. Use glyph name from font's /Differences or /Encoding dictionary
358
- * 2. Fall back to glyphId if it's in printable ASCII range (32-126)
359
- * 3. Drop the character if neither works (better than guessing)
360
- */
361
- const BUGGY_FONT_MARKER_RE = /:->\|>_(\d+)_\d+@([^@]*)@<\|<-:/g;
362
- function decodeBuggyFontMarkers(str) {
363
- return str.replace(BUGGY_FONT_MARKER_RE, (_match, glyphIdStr, glyphName) => {
364
- // Priority 1: Resolve via glyph name from font metadata
365
- if (glyphName) {
366
- const resolved = resolveGlyphName(glyphName);
367
- if (resolved)
368
- return resolved;
369
- }
370
- // Priority 2: If glyphId is in printable ASCII range, use it directly
371
- const glyphId = parseInt(glyphIdStr);
372
- if (glyphId >= 32 && glyphId <= 126) {
373
- return String.fromCharCode(glyphId);
374
- }
375
- // Priority 3: Drop unresolvable characters
376
- return "";
377
- });
378
- }
379
- /**
380
- * Windows-1252 to Unicode mapping for the C1 control range (0x80-0x9F).
381
- *
382
- * Many PDFs encode smart quotes, em-dashes, and other typographic characters
383
- * using Windows-1252 byte values. When PDF.js decodes these without a proper
384
- * ToUnicode map, the raw byte values end up in the 0x80-0x9F range — which is
385
- * technically the C1 control character block in Unicode. Rather than stripping
386
- * them (which loses apostrophes, quotes, dashes, etc.), we map them to their
387
- * correct Unicode equivalents.
388
- */
389
- const WINDOWS_1252_TO_UNICODE = {
390
- 0x80: "\u20AC", // €
391
- 0x82: "\u201A", // ‚
392
- 0x83: "\u0192", // ƒ
393
- 0x84: "\u201E", // „
394
- 0x85: "\u2026", // …
395
- 0x86: "\u2020", // †
396
- 0x87: "\u2021", // ‡
397
- 0x88: "\u02C6", // ˆ
398
- 0x89: "\u2030", // ‰
399
- 0x8a: "\u0160", // Š
400
- 0x8b: "\u2039", // ‹
401
- 0x8c: "\u0152", // Œ
402
- 0x8e: "\u017D", // Ž
403
- 0x91: "\u2018", // '
404
- 0x92: "\u2019", // ' (right single quote / apostrophe)
405
- 0x93: "\u201C", // "
406
- 0x94: "\u201D", // "
407
- 0x95: "\u2022", // •
408
- 0x96: "\u2013", // –
409
- 0x97: "\u2014", // —
410
- 0x98: "\u02DC", // ˜
411
- 0x99: "\u2122", // ™
412
- 0x9a: "\u0161", // š
413
- 0x9b: "\u203A", // ›
414
- 0x9c: "\u0153", // œ
415
- 0x9e: "\u017E", // ž
416
- 0x9f: "\u0178", // Ÿ
417
- };
418
- /**
419
- * Unicode ligature decomposition map.
420
- * PDF fonts often use ligature glyphs; decomposing them to plain ASCII
421
- * ensures the text is searchable and NLP-friendly.
422
- */
423
- const LIGATURE_MAP = {
424
- "\uFB00": "ff",
425
- "\uFB01": "fi",
426
- "\uFB02": "fl",
427
- "\uFB03": "ffi",
428
- "\uFB04": "ffl",
429
- "\uFB05": "st",
430
- "\uFB06": "st",
431
- };
432
- /**
433
- * Strip C0 control characters from text (except common whitespace),
434
- * map C1 control range (0x80-0x9F) to proper Unicode via Windows-1252,
435
- * and decompose Unicode ligatures to plain text.
436
- */
437
- function stripControlChars(str) {
438
- let result = "";
439
- for (const char of str) {
440
- const code = char.charCodeAt(0);
441
- // Decompose Unicode ligatures (fi, fl, ff, ffi, ffl, st)
442
- if (LIGATURE_MAP[char]) {
443
- result += LIGATURE_MAP[char];
444
- continue;
445
- }
446
- // Map Windows-1252 C1 range to proper Unicode (smart quotes, em-dashes, etc.)
447
- if (code >= 0x80 && code <= 0x9f) {
448
- const mapped = WINDOWS_1252_TO_UNICODE[code];
449
- if (mapped) {
450
- result += mapped;
451
- }
452
- // Undefined C1 positions (0x81, 0x8D, 0x8F, 0x90) are dropped
453
- continue;
454
- }
455
- // Skip C0 controls (except tab, newline, carriage return)
456
- if (code >= 0x00 && code <= 0x1f && code !== 0x09 && code !== 0x0a && code !== 0x0d) {
457
- continue;
458
- }
459
- result += char;
460
- }
461
- return result;
462
- }
463
- /**
464
- * Detect garbled text from fonts with corrupted ToUnicode mappings.
465
- *
466
- * When PDF fonts lack proper ToUnicode maps, PDF.js may output characters
467
- * mapped to unexpected Unicode code points. Common patterns include:
468
- *
469
- * 1. Private Use Area (PUA) characters - fonts often map glyphs here
470
- * 2. Mix of unrelated scripts (Arabic + Latin Extended in English text)
471
- * 3. Rare/obscure Unicode blocks appearing in normal text
472
- * 4. Control characters (when text is predominantly control chars)
473
- *
474
- * Returns true if the string appears to be garbled font output.
475
- */
476
- function isGarbledFontOutput(str) {
477
- if (str.length < 3)
478
- return false;
479
- let privateUseCount = 0;
480
- let arabicCount = 0;
481
- let latinExtendedCount = 0;
482
- let basicLatinLetterCount = 0;
483
- let suspiciousCount = 0; // Other suspicious Unicode ranges
484
- let controlCharCount = 0; // C0/C1 control characters
485
- let normalCharCount = 0; // Normal printable characters
486
- for (const char of str) {
487
- const code = char.charCodeAt(0);
488
- // C0 control characters (0x00-0x1F) except common whitespace (tab, newline, carriage return)
489
- if (code >= 0x00 && code <= 0x1f && code !== 0x09 && code !== 0x0a && code !== 0x0d) {
490
- controlCharCount++;
491
- }
492
- // C1 range (0x80-0x9F): only count as control chars if NOT a valid Windows-1252 character.
493
- // Many PDFs use Windows-1252 encoding for smart quotes, em-dashes, etc.
494
- else if (code >= 0x80 && code <= 0x9f) {
495
- if (WINDOWS_1252_TO_UNICODE[code]) {
496
- normalCharCount++; // Valid Windows-1252 char (smart quote, dash, etc.)
497
- }
498
- else {
499
- controlCharCount++; // Undefined C1 position — likely garbled
500
- }
501
- }
502
- // Private Use Area (U+E000-U+F8FF) - almost always garbled
503
- else if (code >= 0xe000 && code <= 0xf8ff) {
504
- privateUseCount++;
505
- }
506
- // Arabic block (0x600-0x6FF) and Arabic Extended (0x750-0x77F, 0x8A0-0x8FF)
507
- else if ((code >= 0x600 && code <= 0x6ff) ||
508
- (code >= 0x750 && code <= 0x77f) ||
509
- (code >= 0x8a0 && code <= 0x8ff)) {
510
- arabicCount++;
511
- }
512
- // Latin Extended-A (0x100-0x17F), Latin Extended-B (0x180-0x24F),
513
- // Latin Extended Additional (0x1E00-0x1EFF)
514
- else if ((code >= 0x100 && code <= 0x24f) || (code >= 0x1e00 && code <= 0x1eff)) {
515
- latinExtendedCount++;
516
- }
517
- // Basic Latin letters (a-z, A-Z)
518
- else if ((code >= 0x41 && code <= 0x5a) || (code >= 0x61 && code <= 0x7a)) {
519
- basicLatinLetterCount++;
520
- normalCharCount++;
521
- }
522
- // Suspicious ranges that rarely appear in normal text:
523
- // - Syriac (0x700-0x74F)
524
- // - Thaana (0x780-0x7BF)
525
- // - NKo (0x7C0-0x7FF)
526
- // - Samaritan (0x800-0x83F)
527
- // - Specials (0xFFF0-0xFFFF)
528
- // - Geometric Shapes (0x25A0-0x25FF) in running text
529
- // - Box Drawing (0x2500-0x257F) in running text
530
- // - Combining Diacritical Marks alone (0x0300-0x036F)
531
- else if ((code >= 0x700 && code <= 0x7ff) || // Syriac, Thaana, NKo
532
- (code >= 0x800 && code <= 0x83f) || // Samaritan
533
- (code >= 0xfff0 && code <= 0xffff) || // Specials
534
- (code >= 0x2500 && code <= 0x25ff) || // Box drawing, geometric shapes
535
- (code >= 0x0300 && code <= 0x036f) // Combining marks (suspicious if frequent)
536
- ) {
537
- suspiciousCount++;
538
- }
539
- // Normal printable characters (digits, punctuation, common symbols, space)
540
- else if ((code >= 0x20 && code <= 0x7e) || code === 0x09 || code === 0x0a || code === 0x0d) {
541
- normalCharCount++;
542
- }
543
- }
544
- const totalChars = str.length;
545
- // Text is predominantly control characters - definitely garbled
546
- // This catches cases like more_hard_2.pdf where text is entirely control chars
547
- if (controlCharCount > 0 && controlCharCount > normalCharCount) {
548
- return true;
549
- }
550
- // Private Use Area characters are almost always garbled fonts
551
- if (privateUseCount >= 2) {
552
- return true;
553
- }
554
- // Mix of Arabic AND Latin Extended is extremely rare in legitimate text
555
- if (arabicCount >= 2 && latinExtendedCount >= 2) {
556
- return true;
557
- }
558
- // High concentration of suspicious characters
559
- if (suspiciousCount >= 3 || suspiciousCount > totalChars * 0.2) {
560
- return true;
561
- }
562
- // Text predominantly Latin Extended with very few basic Latin letters
563
- // (legitimate Latin-script text would have mostly basic Latin)
564
- if (latinExtendedCount > totalChars * 0.3 && basicLatinLetterCount < totalChars * 0.2) {
565
- return true;
566
- }
567
- // Mix of Arabic/suspicious with Latin Extended (script mixing)
568
- if ((arabicCount >= 1 || suspiciousCount >= 1) && latinExtendedCount >= 3) {
569
- return true;
570
- }
571
- return false;
572
- }
573
- export class PdfJsEngine {
574
- name = "pdfjs";
575
- pdfiumRenderer = null;
576
- currentPdfPath = null;
577
- currentPdfData = null;
578
- async loadDocument(input, password) {
579
- let data;
580
- if (typeof input === "string") {
581
- data = new Uint8Array(await fs.readFile(input));
582
- this.currentPdfPath = input;
583
- }
584
- else {
585
- // pdf.js requires a plain Uint8Array, not a Buffer subclass. Copy
586
- // rather than view-on-input, because pdf.js transfers the underlying
587
- // ArrayBuffer to its worker — which would detach the caller's view
588
- // and any downstream consumer (e.g. the OCR renderer) would see zero
589
- // bytes. A fresh Uint8Array owns its own ArrayBuffer.
590
- data = new Uint8Array(input);
591
- this.currentPdfPath = null;
592
- }
593
- // Store data for buffer-based rendering. Keep a separate copy so the
594
- // one we hand to pdf.js can be transferred without affecting this one.
595
- this.currentPdfData = new Uint8Array(data);
596
- const loadingTask = getDocument({
597
- data,
598
- password,
599
- cMapUrl: CMAP_URL,
600
- cMapPacked: CMAP_PACKED,
601
- standardFontDataUrl: STANDARD_FONT_DATA_URL,
602
- wasmUrl: WASM_URL,
603
- verbosity: 0, // VerbosityLevel.ERRORS — suppress Type3 font warnings
604
- });
605
- let pdfDocument;
606
- try {
607
- pdfDocument = await loadingTask.promise;
608
- }
609
- catch (error) {
610
- const message = error instanceof Error ? error.message : String(error);
611
- if (message.includes("password") || message.includes("Password")) {
612
- if (password) {
613
- throw new Error("Incorrect password for this PDF. Please check the password and try again.", { cause: error });
614
- }
615
- else {
616
- throw new Error("This PDF is password-protected. Use --password <password> to provide the document password.", { cause: error });
617
- }
618
- }
619
- throw error;
620
- }
621
- const metadata = await pdfDocument.getMetadata();
622
- return {
623
- numPages: pdfDocument.numPages,
624
- data,
625
- metadata,
626
- _pdfDocument: pdfDocument,
627
- };
628
- }
629
- async extractPage(doc, pageNum, options) {
630
- const pdfDocument = doc._pdfDocument;
631
- const page = await pdfDocument.getPage(pageNum);
632
- // Get viewport
633
- const viewport = page.getViewport({ scale: 1.0 });
634
- // Extract text content
635
- const textContent = await page.getTextContent();
636
- const viewportWidth = viewport.width;
637
- const viewportHeight = viewport.height;
638
- const viewportTransform = viewport.transform;
639
- const textItems = [];
640
- const garbledTextRegions = [];
641
- for (const item of textContent.items) {
642
- // Skip items with zero dimensions
643
- if (item.height === 0 || item.width === 0)
644
- continue;
645
- // Apply viewport transformation to convert PDF coordinates to screen coordinates
646
- // This properly handles Y-axis flip (PDF is bottom-up, screen is top-down)
647
- const cm = multiplyMatrices(viewportTransform, item.transform);
648
- // Get lower-left corner (text space origin)
649
- const ll = applyTransformation({ x: 0, y: 0 }, cm);
650
- const scaleX = Math.sqrt(item.transform[0] ** 2 + item.transform[1] ** 2);
651
- const scaleY = Math.sqrt(item.transform[2] ** 2 + item.transform[3] ** 2);
652
- const ur = applyTransformation({ x: item.width / scaleX, y: item.height / scaleY }, cm);
653
- const left = Math.min(ll.x, ur.x);
654
- const right = Math.max(ll.x, ur.x);
655
- const top = Math.min(ll.y, ur.y);
656
- const bottom = Math.max(ll.y, ur.y);
657
- // Skip items that are off-page (negative coordinates or beyond page bounds)
658
- if (top < 0 || left < 0 || top > viewportHeight || left > viewportWidth)
659
- continue;
660
- const width = right - left;
661
- const height = bottom - top;
662
- // Get rotation angle from the transformation matrix
663
- let rotation = getRotation(cm);
664
- if (rotation < 0)
665
- rotation += 360;
666
- // Decode buggy font markers using glyph names from font metadata
667
- let decodedStr = item.str;
668
- if (decodedStr.includes(BUGGY_FONT_MARKER_CHECK)) {
669
- BUGGY_FONT_MARKER_RE.lastIndex = 0;
670
- decodedStr = decodeBuggyFontMarkers(decodedStr);
671
- }
672
- // Handle pipe-separated characters: " |a| |r| |X| " -> "arX"
673
- if (decodedStr.includes("|")) {
674
- PIPE_PATTERN_REGEX.lastIndex = 0;
675
- const matches = [...decodedStr.matchAll(PIPE_PATTERN_REGEX)];
676
- if (matches.length > 0) {
677
- decodedStr = matches.map((m) => m[1]).join("");
678
- }
679
- }
680
- // Skip garbled text from fonts with corrupted ToUnicode mappings
681
- if (isGarbledFontOutput(decodedStr)) {
682
- garbledTextRegions.push({ x: left, y: top, width, height });
683
- continue;
684
- }
685
- // Strip remaining control characters, map Windows-1252, decompose ligatures
686
- decodedStr = stripControlChars(decodedStr);
687
- textItems.push({
688
- str: decodedStr,
689
- x: left,
690
- y: top,
691
- width,
692
- height,
693
- w: width,
694
- h: height,
695
- r: rotation,
696
- fontName: item.fontName,
697
- fontSize: Math.sqrt(item.transform[0] * item.transform[0] + item.transform[1] * item.transform[1]),
698
- confidence: 1.0,
699
- });
700
- }
701
- let images = [];
702
- if (options?.extractImages !== false) {
703
- try {
704
- const pdfInput = this.currentPdfPath || this.currentPdfData || doc.data;
705
- if (!this.pdfiumRenderer) {
706
- this.pdfiumRenderer = new PdfiumRenderer();
707
- await this.pdfiumRenderer.loadDocument(pdfInput);
708
- }
709
- const imageBounds = await this.pdfiumRenderer.extractImageBounds(pdfInput, pageNum);
710
- images = imageBounds.map((bounds) => ({
711
- x: bounds.x,
712
- y: bounds.y,
713
- width: bounds.width,
714
- height: bounds.height,
715
- }));
716
- }
717
- catch {
718
- // Image extraction is best-effort
719
- }
720
- }
721
- // Skip annotation extraction - not currently used in processing pipeline
722
- // Can be re-enabled if needed for link extraction, etc.
723
- const annotations = [];
724
- await page.cleanup();
725
- return {
726
- pageNum,
727
- width: viewport.width,
728
- height: viewport.height,
729
- textItems,
730
- images,
731
- annotations,
732
- garbledTextRegions: garbledTextRegions.length > 0 ? garbledTextRegions : undefined,
733
- };
734
- }
735
- async extractAllPages(doc, maxPages, targetPages, options) {
736
- const numPages = Math.min(doc.numPages, maxPages || doc.numPages);
737
- const pages = [];
738
- // Parse target pages if specified
739
- let pageNumbers;
740
- if (targetPages) {
741
- pageNumbers = this.parseTargetPages(targetPages, doc.numPages);
742
- }
743
- else {
744
- pageNumbers = Array.from({ length: numPages }, (_, i) => i + 1);
745
- }
746
- for (const pageNum of pageNumbers) {
747
- if (maxPages && pages.length >= maxPages) {
748
- break;
749
- }
750
- const pageData = await this.extractPage(doc, pageNum, options);
751
- pages.push(pageData);
752
- }
753
- return pages;
754
- }
755
- async renderPageImage(_doc, pageNum, dpi, password) {
756
- const pdfInput = this.currentPdfPath || this.currentPdfData;
757
- if (!pdfInput) {
758
- throw new Error("No PDF path or data available for rendering");
759
- }
760
- if (!this.pdfiumRenderer) {
761
- this.pdfiumRenderer = new PdfiumRenderer();
762
- await this.pdfiumRenderer.loadDocument(pdfInput, password);
763
- }
764
- return await this.pdfiumRenderer.renderPageToBuffer(pdfInput, pageNum, dpi, password);
765
- }
766
- async close(doc) {
767
- const pdfDocument = doc._pdfDocument;
768
- if (pdfDocument && pdfDocument.destroy) {
769
- await pdfDocument.destroy();
770
- }
771
- // Clean up PDFium renderer (only if it was initialized)
772
- if (this.pdfiumRenderer) {
773
- await this.pdfiumRenderer.close();
774
- this.pdfiumRenderer = null;
775
- }
776
- this.currentPdfPath = null;
777
- this.currentPdfData = null;
778
- }
779
- parseTargetPages(targetPages, maxPages) {
780
- const pages = [];
781
- const parts = targetPages.split(",");
782
- for (const part of parts) {
783
- const trimmed = part.trim();
784
- if (trimmed.includes("-")) {
785
- // Range: "1-5"
786
- const [start, end] = trimmed.split("-").map((n) => parseInt(n.trim()));
787
- for (let i = start; i <= Math.min(end, maxPages); i++) {
788
- if (i >= 1) {
789
- pages.push(i);
790
- }
791
- }
792
- }
793
- else {
794
- // Single page: "10"
795
- const pageNum = parseInt(trimmed);
796
- if (pageNum >= 1 && pageNum <= maxPages) {
797
- pages.push(pageNum);
798
- }
799
- }
800
- }
801
- return [...new Set(pages)].sort((a, b) => a - b);
802
- }
803
- }
804
- //# sourceMappingURL=pdfjs.js.map