@ncukondo/search-hub 0.12.1 → 0.12.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (334) hide show
  1. package/dist/_virtual/_commonjsHelpers.js +30 -0
  2. package/dist/_virtual/_commonjsHelpers.js.map +1 -0
  3. package/dist/_virtual/aliases.js +5 -0
  4. package/dist/_virtual/aliases.js.map +1 -0
  5. package/dist/_virtual/attributes.js +5 -0
  6. package/dist/_virtual/attributes.js.map +1 -0
  7. package/dist/_virtual/back.js +5 -0
  8. package/dist/_virtual/back.js.map +1 -0
  9. package/dist/_virtual/comment.js +5 -0
  10. package/dist/_virtual/comment.js.map +1 -0
  11. package/dist/_virtual/compile.js +5 -0
  12. package/dist/_virtual/compile.js.map +1 -0
  13. package/dist/_virtual/compile2.js +5 -0
  14. package/dist/_virtual/compile2.js.map +1 -0
  15. package/dist/_virtual/decode-data-html.js +5 -0
  16. package/dist/_virtual/decode-data-html.js.map +1 -0
  17. package/dist/_virtual/decode-data-xml.js +5 -0
  18. package/dist/_virtual/decode-data-xml.js.map +1 -0
  19. package/dist/_virtual/decode.js +5 -0
  20. package/dist/_virtual/decode.js.map +1 -0
  21. package/dist/_virtual/decode_codepoint.js +5 -0
  22. package/dist/_virtual/decode_codepoint.js.map +1 -0
  23. package/dist/_virtual/encode-html.js +5 -0
  24. package/dist/_virtual/encode-html.js.map +1 -0
  25. package/dist/_virtual/encode.js +5 -0
  26. package/dist/_virtual/encode.js.map +1 -0
  27. package/dist/_virtual/escape.js +5 -0
  28. package/dist/_virtual/escape.js.map +1 -0
  29. package/dist/_virtual/feeds.js +5 -0
  30. package/dist/_virtual/feeds.js.map +1 -0
  31. package/dist/_virtual/filters.js +5 -0
  32. package/dist/_virtual/filters.js.map +1 -0
  33. package/dist/_virtual/foreignNames.js +5 -0
  34. package/dist/_virtual/foreignNames.js.map +1 -0
  35. package/dist/_virtual/general.js +5 -0
  36. package/dist/_virtual/general.js.map +1 -0
  37. package/dist/_virtual/he.js +5 -0
  38. package/dist/_virtual/he.js.map +1 -0
  39. package/dist/_virtual/helpers.js +5 -0
  40. package/dist/_virtual/helpers.js.map +1 -0
  41. package/dist/_virtual/html.js +5 -0
  42. package/dist/_virtual/html.js.map +1 -0
  43. package/dist/_virtual/index.js +6 -0
  44. package/dist/_virtual/index.js.map +1 -0
  45. package/dist/_virtual/index10.js +5 -0
  46. package/dist/_virtual/index10.js.map +1 -0
  47. package/dist/_virtual/index11.js +5 -0
  48. package/dist/_virtual/index11.js.map +1 -0
  49. package/dist/_virtual/index2.js +5 -0
  50. package/dist/_virtual/index2.js.map +1 -0
  51. package/dist/_virtual/index3.js +5 -0
  52. package/dist/_virtual/index3.js.map +1 -0
  53. package/dist/_virtual/index4.js +5 -0
  54. package/dist/_virtual/index4.js.map +1 -0
  55. package/dist/_virtual/index5.js +7 -0
  56. package/dist/_virtual/index5.js.map +1 -0
  57. package/dist/_virtual/index6.js +5 -0
  58. package/dist/_virtual/index6.js.map +1 -0
  59. package/dist/_virtual/index7.js +5 -0
  60. package/dist/_virtual/index7.js.map +1 -0
  61. package/dist/_virtual/index8.js +5 -0
  62. package/dist/_virtual/index8.js.map +1 -0
  63. package/dist/_virtual/index9.js +5 -0
  64. package/dist/_virtual/index9.js.map +1 -0
  65. package/dist/_virtual/legacy.js +5 -0
  66. package/dist/_virtual/legacy.js.map +1 -0
  67. package/dist/_virtual/manipulation.js +5 -0
  68. package/dist/_virtual/manipulation.js.map +1 -0
  69. package/dist/_virtual/matcher.js +5 -0
  70. package/dist/_virtual/matcher.js.map +1 -0
  71. package/dist/_virtual/node.js +5 -0
  72. package/dist/_virtual/node.js.map +1 -0
  73. package/dist/_virtual/node2.js +5 -0
  74. package/dist/_virtual/node2.js.map +1 -0
  75. package/dist/_virtual/parse.js +5 -0
  76. package/dist/_virtual/parse.js.map +1 -0
  77. package/dist/_virtual/parse2.js +5 -0
  78. package/dist/_virtual/parse2.js.map +1 -0
  79. package/dist/_virtual/pseudos.js +5 -0
  80. package/dist/_virtual/pseudos.js.map +1 -0
  81. package/dist/_virtual/querying.js +5 -0
  82. package/dist/_virtual/querying.js.map +1 -0
  83. package/dist/_virtual/sort.js +5 -0
  84. package/dist/_virtual/sort.js.map +1 -0
  85. package/dist/_virtual/stringify.js +5 -0
  86. package/dist/_virtual/stringify.js.map +1 -0
  87. package/dist/_virtual/subselects.js +5 -0
  88. package/dist/_virtual/subselects.js.map +1 -0
  89. package/dist/_virtual/text.js +5 -0
  90. package/dist/_virtual/text.js.map +1 -0
  91. package/dist/_virtual/traversal.js +5 -0
  92. package/dist/_virtual/traversal.js.map +1 -0
  93. package/dist/_virtual/type.js +5 -0
  94. package/dist/_virtual/type.js.map +1 -0
  95. package/dist/_virtual/valid.js +5 -0
  96. package/dist/_virtual/valid.js.map +1 -0
  97. package/dist/_virtual/void-tag.js +5 -0
  98. package/dist/_virtual/void-tag.js.map +1 -0
  99. package/dist/cli/commands/fulltext/attach.js +1 -1
  100. package/dist/cli/commands/fulltext/attach.js.map +1 -1
  101. package/dist/cli/commands/fulltext/check.d.ts +1 -2
  102. package/dist/cli/commands/fulltext/check.d.ts.map +1 -1
  103. package/dist/cli/commands/fulltext/check.js +4 -2
  104. package/dist/cli/commands/fulltext/check.js.map +1 -1
  105. package/dist/cli/commands/fulltext/convert.d.ts.map +1 -1
  106. package/dist/cli/commands/fulltext/convert.js +8 -8
  107. package/dist/cli/commands/fulltext/convert.js.map +1 -1
  108. package/dist/cli/commands/fulltext/fetch.d.ts.map +1 -1
  109. package/dist/cli/commands/fulltext/fetch.js +10 -6
  110. package/dist/cli/commands/fulltext/fetch.js.map +1 -1
  111. package/dist/cli/commands/fulltext/index.d.ts.map +1 -1
  112. package/dist/cli/commands/fulltext/index.js +2 -0
  113. package/dist/cli/commands/fulltext/index.js.map +1 -1
  114. package/dist/cli/commands/fulltext/init.d.ts.map +1 -1
  115. package/dist/cli/commands/fulltext/init.js +6 -5
  116. package/dist/cli/commands/fulltext/init.js.map +1 -1
  117. package/dist/cli/commands/fulltext/pending.d.ts +1 -1
  118. package/dist/cli/commands/fulltext/pending.d.ts.map +1 -1
  119. package/dist/cli/commands/fulltext/pending.js +4 -2
  120. package/dist/cli/commands/fulltext/pending.js.map +1 -1
  121. package/dist/cli/commands/fulltext/status.d.ts.map +1 -1
  122. package/dist/cli/commands/fulltext/status.js +4 -2
  123. package/dist/cli/commands/fulltext/status.js.map +1 -1
  124. package/dist/cli/commands/fulltext/sync.d.ts.map +1 -1
  125. package/dist/cli/commands/fulltext/sync.js +6 -2
  126. package/dist/cli/commands/fulltext/sync.js.map +1 -1
  127. package/dist/cli/commands/review/types.d.ts +1 -1
  128. package/dist/cli/commands/review/types.d.ts.map +1 -1
  129. package/dist/cli/commands/review/types.js.map +1 -1
  130. package/dist/config/schema.d.ts +2 -0
  131. package/dist/config/schema.d.ts.map +1 -1
  132. package/dist/config/schema.js +6 -0
  133. package/dist/config/schema.js.map +1 -1
  134. package/dist/{fulltext → integration}/attach-shared.d.ts +2 -2
  135. package/dist/integration/attach-shared.d.ts.map +1 -0
  136. package/dist/integration/attach-shared.js.map +1 -0
  137. package/dist/integration/fulltext-attach.js +1 -1
  138. package/dist/integration/fulltext-attach.js.map +1 -1
  139. package/dist/{fulltext → node_modules/@ncukondo/academic-fulltext/dist}/citation-key.js +1 -1
  140. package/dist/node_modules/@ncukondo/academic-fulltext/dist/citation-key.js.map +1 -0
  141. package/dist/node_modules/@ncukondo/academic-fulltext/dist/convert/arxiv-html-parser.js +434 -0
  142. package/dist/node_modules/@ncukondo/academic-fulltext/dist/convert/arxiv-html-parser.js.map +1 -0
  143. package/dist/node_modules/@ncukondo/academic-fulltext/dist/convert/index.js +93 -0
  144. package/dist/node_modules/@ncukondo/academic-fulltext/dist/convert/index.js.map +1 -0
  145. package/dist/node_modules/@ncukondo/academic-fulltext/dist/convert/jats-parser.js +1060 -0
  146. package/dist/node_modules/@ncukondo/academic-fulltext/dist/convert/jats-parser.js.map +1 -0
  147. package/dist/{fulltext → node_modules/@ncukondo/academic-fulltext/dist}/convert/markdown-writer.js +146 -117
  148. package/dist/node_modules/@ncukondo/academic-fulltext/dist/convert/markdown-writer.js.map +1 -0
  149. package/dist/{fulltext → node_modules/@ncukondo/academic-fulltext/dist}/discovery/arxiv.js +8 -1
  150. package/dist/node_modules/@ncukondo/academic-fulltext/dist/discovery/arxiv.js.map +1 -0
  151. package/dist/{fulltext → node_modules/@ncukondo/academic-fulltext/dist}/discovery/core.js +6 -3
  152. package/dist/node_modules/@ncukondo/academic-fulltext/dist/discovery/core.js.map +1 -0
  153. package/dist/node_modules/@ncukondo/academic-fulltext/dist/discovery/index.js +139 -0
  154. package/dist/node_modules/@ncukondo/academic-fulltext/dist/discovery/index.js.map +1 -0
  155. package/dist/node_modules/@ncukondo/academic-fulltext/dist/discovery/ncbi-id-converter.js +46 -0
  156. package/dist/node_modules/@ncukondo/academic-fulltext/dist/discovery/ncbi-id-converter.js.map +1 -0
  157. package/dist/{fulltext → node_modules/@ncukondo/academic-fulltext/dist}/discovery/pmc.js +8 -4
  158. package/dist/node_modules/@ncukondo/academic-fulltext/dist/discovery/pmc.js.map +1 -0
  159. package/dist/{fulltext → node_modules/@ncukondo/academic-fulltext/dist}/discovery/unpaywall.js +43 -9
  160. package/dist/node_modules/@ncukondo/academic-fulltext/dist/discovery/unpaywall.js.map +1 -0
  161. package/dist/node_modules/@ncukondo/academic-fulltext/dist/download/arxiv-html.js +48 -0
  162. package/dist/node_modules/@ncukondo/academic-fulltext/dist/download/arxiv-html.js.map +1 -0
  163. package/dist/node_modules/@ncukondo/academic-fulltext/dist/download/downloader.js +64 -0
  164. package/dist/node_modules/@ncukondo/academic-fulltext/dist/download/downloader.js.map +1 -0
  165. package/dist/node_modules/@ncukondo/academic-fulltext/dist/download/orchestrator.js +236 -0
  166. package/dist/node_modules/@ncukondo/academic-fulltext/dist/download/orchestrator.js.map +1 -0
  167. package/dist/{fulltext → node_modules/@ncukondo/academic-fulltext/dist}/download/pmc-xml.js +2 -1
  168. package/dist/node_modules/@ncukondo/academic-fulltext/dist/download/pmc-xml.js.map +1 -0
  169. package/dist/{fulltext → node_modules/@ncukondo/academic-fulltext/dist}/meta.js +15 -10
  170. package/dist/node_modules/@ncukondo/academic-fulltext/dist/meta.js.map +1 -0
  171. package/dist/node_modules/@ncukondo/academic-fulltext/dist/paths.js.map +1 -0
  172. package/dist/{fulltext → node_modules/@ncukondo/academic-fulltext/dist}/readme.js +8 -4
  173. package/dist/node_modules/@ncukondo/academic-fulltext/dist/readme.js.map +1 -0
  174. package/dist/node_modules/boolbase/index.js +19 -0
  175. package/dist/node_modules/boolbase/index.js.map +1 -0
  176. package/dist/node_modules/css-select/lib/attributes.js +203 -0
  177. package/dist/node_modules/css-select/lib/attributes.js.map +1 -0
  178. package/dist/node_modules/css-select/lib/compile.js +141 -0
  179. package/dist/node_modules/css-select/lib/compile.js.map +1 -0
  180. package/dist/node_modules/css-select/lib/general.js +154 -0
  181. package/dist/node_modules/css-select/lib/general.js.map +1 -0
  182. package/dist/node_modules/css-select/lib/index.js +128 -0
  183. package/dist/node_modules/css-select/lib/index.js.map +1 -0
  184. package/dist/node_modules/css-select/lib/pseudo-selectors/aliases.js +40 -0
  185. package/dist/node_modules/css-select/lib/pseudo-selectors/aliases.js.map +1 -0
  186. package/dist/node_modules/css-select/lib/pseudo-selectors/filters.js +163 -0
  187. package/dist/node_modules/css-select/lib/pseudo-selectors/filters.js.map +1 -0
  188. package/dist/node_modules/css-select/lib/pseudo-selectors/index.js +71 -0
  189. package/dist/node_modules/css-select/lib/pseudo-selectors/index.js.map +1 -0
  190. package/dist/node_modules/css-select/lib/pseudo-selectors/pseudos.js +93 -0
  191. package/dist/node_modules/css-select/lib/pseudo-selectors/pseudos.js.map +1 -0
  192. package/dist/node_modules/css-select/lib/pseudo-selectors/subselects.js +111 -0
  193. package/dist/node_modules/css-select/lib/pseudo-selectors/subselects.js.map +1 -0
  194. package/dist/node_modules/css-select/lib/sort.js +78 -0
  195. package/dist/node_modules/css-select/lib/sort.js.map +1 -0
  196. package/dist/node_modules/css-what/lib/es/index.js +12 -0
  197. package/dist/node_modules/css-what/lib/es/index.js.map +1 -0
  198. package/dist/node_modules/css-what/lib/es/parse.js +349 -0
  199. package/dist/node_modules/css-what/lib/es/parse.js.map +1 -0
  200. package/dist/node_modules/css-what/lib/es/stringify.js +102 -0
  201. package/dist/node_modules/css-what/lib/es/stringify.js.map +1 -0
  202. package/dist/node_modules/css-what/lib/es/types.js +37 -0
  203. package/dist/node_modules/css-what/lib/es/types.js.map +1 -0
  204. package/dist/node_modules/dom-serializer/lib/foreignNames.js +117 -0
  205. package/dist/node_modules/dom-serializer/lib/foreignNames.js.map +1 -0
  206. package/dist/node_modules/dom-serializer/lib/index.js +207 -0
  207. package/dist/node_modules/dom-serializer/lib/index.js.map +1 -0
  208. package/dist/node_modules/dom-serializer/node_modules/entities/lib/decode.js +368 -0
  209. package/dist/node_modules/dom-serializer/node_modules/entities/lib/decode.js.map +1 -0
  210. package/dist/node_modules/dom-serializer/node_modules/entities/lib/decode_codepoint.js +70 -0
  211. package/dist/node_modules/dom-serializer/node_modules/entities/lib/decode_codepoint.js.map +1 -0
  212. package/dist/node_modules/dom-serializer/node_modules/entities/lib/encode.js +61 -0
  213. package/dist/node_modules/dom-serializer/node_modules/entities/lib/encode.js.map +1 -0
  214. package/dist/node_modules/dom-serializer/node_modules/entities/lib/escape.js +79 -0
  215. package/dist/node_modules/dom-serializer/node_modules/entities/lib/escape.js.map +1 -0
  216. package/dist/node_modules/dom-serializer/node_modules/entities/lib/generated/decode-data-html.js +18 -0
  217. package/dist/node_modules/dom-serializer/node_modules/entities/lib/generated/decode-data-html.js.map +1 -0
  218. package/dist/node_modules/dom-serializer/node_modules/entities/lib/generated/decode-data-xml.js +18 -0
  219. package/dist/node_modules/dom-serializer/node_modules/entities/lib/generated/decode-data-xml.js.map +1 -0
  220. package/dist/node_modules/dom-serializer/node_modules/entities/lib/generated/encode-html.js +19 -0
  221. package/dist/node_modules/dom-serializer/node_modules/entities/lib/generated/encode-html.js.map +1 -0
  222. package/dist/node_modules/dom-serializer/node_modules/entities/lib/index.js +139 -0
  223. package/dist/node_modules/dom-serializer/node_modules/entities/lib/index.js.map +1 -0
  224. package/dist/node_modules/domelementtype/lib/index.js +40 -0
  225. package/dist/node_modules/domelementtype/lib/index.js.map +1 -0
  226. package/dist/node_modules/domhandler/lib/index.js +167 -0
  227. package/dist/node_modules/domhandler/lib/index.js.map +1 -0
  228. package/dist/node_modules/domhandler/lib/node.js +439 -0
  229. package/dist/node_modules/domhandler/lib/node.js.map +1 -0
  230. package/dist/node_modules/domutils/lib/feeds.js +146 -0
  231. package/dist/node_modules/domutils/lib/feeds.js.map +1 -0
  232. package/dist/node_modules/domutils/lib/helpers.js +97 -0
  233. package/dist/node_modules/domutils/lib/helpers.js.map +1 -0
  234. package/dist/node_modules/domutils/lib/index.js +65 -0
  235. package/dist/node_modules/domutils/lib/index.js.map +1 -0
  236. package/dist/node_modules/domutils/lib/legacy.js +124 -0
  237. package/dist/node_modules/domutils/lib/legacy.js.map +1 -0
  238. package/dist/node_modules/domutils/lib/manipulation.js +107 -0
  239. package/dist/node_modules/domutils/lib/manipulation.js.map +1 -0
  240. package/dist/node_modules/domutils/lib/querying.js +102 -0
  241. package/dist/node_modules/domutils/lib/querying.js.map +1 -0
  242. package/dist/node_modules/domutils/lib/stringify.js +65 -0
  243. package/dist/node_modules/domutils/lib/stringify.js.map +1 -0
  244. package/dist/node_modules/domutils/lib/traversal.js +69 -0
  245. package/dist/node_modules/domutils/lib/traversal.js.map +1 -0
  246. package/dist/node_modules/he/he.js +256 -0
  247. package/dist/node_modules/he/he.js.map +1 -0
  248. package/dist/node_modules/node-html-parser/dist/back.js +16 -0
  249. package/dist/node_modules/node-html-parser/dist/back.js.map +1 -0
  250. package/dist/node_modules/node-html-parser/dist/index.js +48 -0
  251. package/dist/node_modules/node-html-parser/dist/index.js.map +1 -0
  252. package/dist/node_modules/node-html-parser/dist/matcher.js +112 -0
  253. package/dist/node_modules/node-html-parser/dist/matcher.js.map +1 -0
  254. package/dist/node_modules/node-html-parser/dist/nodes/comment.js +41 -0
  255. package/dist/node_modules/node-html-parser/dist/nodes/comment.js.map +1 -0
  256. package/dist/node_modules/node-html-parser/dist/nodes/html.js +1048 -0
  257. package/dist/node_modules/node-html-parser/dist/nodes/html.js.map +1 -0
  258. package/dist/node_modules/node-html-parser/dist/nodes/node.js +49 -0
  259. package/dist/node_modules/node-html-parser/dist/nodes/node.js.map +1 -0
  260. package/dist/node_modules/node-html-parser/dist/nodes/text.js +106 -0
  261. package/dist/node_modules/node-html-parser/dist/nodes/text.js.map +1 -0
  262. package/dist/node_modules/node-html-parser/dist/nodes/type.js +19 -0
  263. package/dist/node_modules/node-html-parser/dist/nodes/type.js.map +1 -0
  264. package/dist/node_modules/node-html-parser/dist/parse.js +20 -0
  265. package/dist/node_modules/node-html-parser/dist/parse.js.map +1 -0
  266. package/dist/node_modules/node-html-parser/dist/valid.js +19 -0
  267. package/dist/node_modules/node-html-parser/dist/valid.js.map +1 -0
  268. package/dist/node_modules/node-html-parser/dist/void-tag.js +36 -0
  269. package/dist/node_modules/node-html-parser/dist/void-tag.js.map +1 -0
  270. package/dist/node_modules/nth-check/lib/compile.js +76 -0
  271. package/dist/node_modules/nth-check/lib/compile.js.map +1 -0
  272. package/dist/node_modules/nth-check/lib/index.js +36 -0
  273. package/dist/node_modules/nth-check/lib/index.js.map +1 -0
  274. package/dist/node_modules/nth-check/lib/parse.js +69 -0
  275. package/dist/node_modules/nth-check/lib/parse.js.map +1 -0
  276. package/package.json +2 -2
  277. package/dist/fulltext/attach-shared.d.ts.map +0 -1
  278. package/dist/fulltext/attach-shared.js.map +0 -1
  279. package/dist/fulltext/citation-key.d.ts +0 -15
  280. package/dist/fulltext/citation-key.d.ts.map +0 -1
  281. package/dist/fulltext/citation-key.js.map +0 -1
  282. package/dist/fulltext/convert/index.d.ts +0 -20
  283. package/dist/fulltext/convert/index.d.ts.map +0 -1
  284. package/dist/fulltext/convert/index.js +0 -50
  285. package/dist/fulltext/convert/index.js.map +0 -1
  286. package/dist/fulltext/convert/jats-parser.d.ts +0 -36
  287. package/dist/fulltext/convert/jats-parser.d.ts.map +0 -1
  288. package/dist/fulltext/convert/jats-parser.js +0 -887
  289. package/dist/fulltext/convert/jats-parser.js.map +0 -1
  290. package/dist/fulltext/convert/markdown-writer.d.ts +0 -6
  291. package/dist/fulltext/convert/markdown-writer.d.ts.map +0 -1
  292. package/dist/fulltext/convert/markdown-writer.js.map +0 -1
  293. package/dist/fulltext/convert/types.d.ts +0 -141
  294. package/dist/fulltext/convert/types.d.ts.map +0 -1
  295. package/dist/fulltext/discovery/arxiv.d.ts +0 -11
  296. package/dist/fulltext/discovery/arxiv.d.ts.map +0 -1
  297. package/dist/fulltext/discovery/arxiv.js.map +0 -1
  298. package/dist/fulltext/discovery/core.d.ts +0 -11
  299. package/dist/fulltext/discovery/core.d.ts.map +0 -1
  300. package/dist/fulltext/discovery/core.js.map +0 -1
  301. package/dist/fulltext/discovery/index.d.ts +0 -28
  302. package/dist/fulltext/discovery/index.d.ts.map +0 -1
  303. package/dist/fulltext/discovery/index.js +0 -75
  304. package/dist/fulltext/discovery/index.js.map +0 -1
  305. package/dist/fulltext/discovery/pmc.d.ts +0 -19
  306. package/dist/fulltext/discovery/pmc.d.ts.map +0 -1
  307. package/dist/fulltext/discovery/pmc.js.map +0 -1
  308. package/dist/fulltext/discovery/unpaywall.d.ts +0 -11
  309. package/dist/fulltext/discovery/unpaywall.d.ts.map +0 -1
  310. package/dist/fulltext/discovery/unpaywall.js.map +0 -1
  311. package/dist/fulltext/download/downloader.d.ts +0 -21
  312. package/dist/fulltext/download/downloader.d.ts.map +0 -1
  313. package/dist/fulltext/download/downloader.js +0 -59
  314. package/dist/fulltext/download/downloader.js.map +0 -1
  315. package/dist/fulltext/download/orchestrator.d.ts +0 -33
  316. package/dist/fulltext/download/orchestrator.d.ts.map +0 -1
  317. package/dist/fulltext/download/orchestrator.js +0 -125
  318. package/dist/fulltext/download/orchestrator.js.map +0 -1
  319. package/dist/fulltext/download/pmc-xml.d.ts +0 -13
  320. package/dist/fulltext/download/pmc-xml.d.ts.map +0 -1
  321. package/dist/fulltext/download/pmc-xml.js.map +0 -1
  322. package/dist/fulltext/meta.d.ts +0 -25
  323. package/dist/fulltext/meta.d.ts.map +0 -1
  324. package/dist/fulltext/meta.js.map +0 -1
  325. package/dist/fulltext/paths.d.ts +0 -12
  326. package/dist/fulltext/paths.d.ts.map +0 -1
  327. package/dist/fulltext/paths.js.map +0 -1
  328. package/dist/fulltext/readme.d.ts +0 -4
  329. package/dist/fulltext/readme.d.ts.map +0 -1
  330. package/dist/fulltext/readme.js.map +0 -1
  331. package/dist/fulltext/types.d.ts +0 -90
  332. package/dist/fulltext/types.d.ts.map +0 -1
  333. /package/dist/{fulltext → integration}/attach-shared.js +0 -0
  334. /package/dist/{fulltext → node_modules/@ncukondo/academic-fulltext/dist}/paths.js +0 -0
@@ -0,0 +1,139 @@
1
+ import { checkArxiv } from "./arxiv.js";
2
+ import { checkCore } from "./core.js";
3
+ import { resolveDoiToPmcid } from "./ncbi-id-converter.js";
4
+ import { checkPmc } from "./pmc.js";
5
+ import { checkUnpaywallDetailed } from "./unpaywall.js";
6
+ const DEFAULT_SOURCE_ORDER = ["pmc", "arxiv", "unpaywall", "core"];
7
+ async function checkPmcSource(article) {
8
+ if (!article.pmid && !article.pmcid)
9
+ return void 0;
10
+ const ids = {};
11
+ if (article.pmid)
12
+ ids.pmid = article.pmid;
13
+ if (article.pmcid)
14
+ ids.pmcid = article.pmcid;
15
+ return await checkPmc(ids);
16
+ }
17
+ function checkArxivSource(article) {
18
+ if (!article.arxivId)
19
+ return void 0;
20
+ return checkArxiv(article.arxivId);
21
+ }
22
+ async function checkCoreSource(article, config) {
23
+ if (!config.coreApiKey || !article.doi)
24
+ return void 0;
25
+ return await checkCore(article.doi, config.coreApiKey);
26
+ }
27
+ const sourceCheckers = {
28
+ pmc: checkPmcSource,
29
+ arxiv: checkArxivSource,
30
+ core: checkCoreSource
31
+ };
32
+ function determineOAStatus(locations, errors, sourcesChecked) {
33
+ if (locations.length > 0)
34
+ return "open";
35
+ if (errors.length > 0 && errors.length >= sourcesChecked)
36
+ return "unknown";
37
+ return "closed";
38
+ }
39
+ async function enrichArticleIds(article, config) {
40
+ const discoveredIds = {};
41
+ if (!article.doi || article.pmcid || article.pmid) {
42
+ return { enriched: article, discoveredIds };
43
+ }
44
+ try {
45
+ const email = config.ncbiEmail || config.unpaywallEmail;
46
+ const options = {};
47
+ if (config.ncbiTool)
48
+ options.tool = config.ncbiTool;
49
+ if (email)
50
+ options.email = email;
51
+ const result = await resolveDoiToPmcid(article.doi, options);
52
+ if (result) {
53
+ const enriched = { ...article };
54
+ if (result.pmcid) {
55
+ enriched.pmcid = result.pmcid;
56
+ discoveredIds.pmcid = result.pmcid;
57
+ }
58
+ if (result.pmid) {
59
+ enriched.pmid = result.pmid;
60
+ discoveredIds.pmid = result.pmid;
61
+ }
62
+ return { enriched, discoveredIds };
63
+ }
64
+ } catch {
65
+ }
66
+ return { enriched: article, discoveredIds };
67
+ }
68
+ async function checkUnpaywallSource(enriched, config, state) {
69
+ if (!config.unpaywallEmail || !enriched.doi)
70
+ return;
71
+ state.sourcesChecked++;
72
+ try {
73
+ const detailed = await checkUnpaywallDetailed(enriched.doi, config.unpaywallEmail);
74
+ if (!detailed)
75
+ return;
76
+ state.locations.push(...detailed.locations);
77
+ if (detailed.pmcid) {
78
+ state.unpaywallPmcid = detailed.pmcid;
79
+ }
80
+ } catch (err) {
81
+ state.errors.push({ source: "unpaywall", error: String(err) });
82
+ }
83
+ }
84
+ async function checkGenericSource(source, enriched, config, state) {
85
+ const checker = sourceCheckers[source];
86
+ if (!checker)
87
+ return;
88
+ const result = await runSourceChecker(checker, enriched, config);
89
+ if (result.skipped)
90
+ return;
91
+ state.sourcesChecked++;
92
+ if (result.error) {
93
+ state.errors.push({ source, error: result.error });
94
+ } else if (result.locations) {
95
+ state.locations.push(...result.locations);
96
+ }
97
+ }
98
+ async function lazyPmcCheck(enriched, state, discoveredIds) {
99
+ if (!state.unpaywallPmcid || enriched.pmcid)
100
+ return;
101
+ discoveredIds.pmcid = discoveredIds.pmcid ?? state.unpaywallPmcid;
102
+ try {
103
+ const pmcLocations = await checkPmc({ pmcid: state.unpaywallPmcid });
104
+ if (pmcLocations) {
105
+ state.locations.push(...pmcLocations);
106
+ }
107
+ } catch (err) {
108
+ state.errors.push({ source: "pmc-lazy", error: String(err) });
109
+ }
110
+ }
111
+ async function discoverOA(article, config) {
112
+ const { enriched, discoveredIds } = await enrichArticleIds(article, config);
113
+ const state = { locations: [], errors: [], sourcesChecked: 0 };
114
+ const sourceOrder = config.preferSources.length > 0 ? config.preferSources : DEFAULT_SOURCE_ORDER;
115
+ for (const source of sourceOrder) {
116
+ if (source === "unpaywall") {
117
+ await checkUnpaywallSource(enriched, config, state);
118
+ } else {
119
+ await checkGenericSource(source, enriched, config, state);
120
+ }
121
+ }
122
+ await lazyPmcCheck(enriched, state, discoveredIds);
123
+ const oaStatus = determineOAStatus(state.locations, state.errors, state.sourcesChecked);
124
+ return { oaStatus, locations: state.locations, errors: state.errors, discoveredIds };
125
+ }
126
+ async function runSourceChecker(checker, article, config) {
127
+ try {
128
+ const result = await checker(article, config);
129
+ if (result === void 0)
130
+ return { skipped: true };
131
+ return { skipped: false, locations: result ?? [] };
132
+ } catch (err) {
133
+ return { skipped: false, error: String(err) };
134
+ }
135
+ }
136
+ export {
137
+ discoverOA
138
+ };
139
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sources":["../../../../../../node_modules/@ncukondo/academic-fulltext/dist/discovery/index.js"],"sourcesContent":["/**\n * OA Discovery Aggregator.\n * Combines results from multiple OA discovery sources.\n */\nimport { checkArxiv } from \"./arxiv.js\";\nimport { checkCore } from \"./core.js\";\nimport { resolveDoiToPmcid } from \"./ncbi-id-converter.js\";\nimport { checkPmc } from \"./pmc.js\";\nimport { checkUnpaywallDetailed } from \"./unpaywall.js\";\n/** Default source order when preferSources is empty or not specified */\nconst DEFAULT_SOURCE_ORDER = [\"pmc\", \"arxiv\", \"unpaywall\", \"core\"];\nasync function checkPmcSource(article) {\n if (!article.pmid && !article.pmcid)\n return undefined;\n const ids = {};\n if (article.pmid)\n ids.pmid = article.pmid;\n if (article.pmcid)\n ids.pmcid = article.pmcid;\n return await checkPmc(ids);\n}\nfunction checkArxivSource(article) {\n if (!article.arxivId)\n return undefined;\n return checkArxiv(article.arxivId);\n}\nasync function checkCoreSource(article, config) {\n if (!config.coreApiKey || !article.doi)\n return undefined;\n return await checkCore(article.doi, config.coreApiKey);\n}\n/** Map of source name to its checker function (excluding unpaywall, handled specially). */\nconst sourceCheckers = {\n pmc: checkPmcSource,\n arxiv: checkArxivSource,\n core: checkCoreSource,\n};\n/** Determine OA status from collected locations and errors. */\nfunction determineOAStatus(locations, errors, sourcesChecked) {\n if (locations.length > 0)\n return \"open\";\n if (errors.length > 0 && errors.length >= sourcesChecked)\n return \"unknown\";\n return \"closed\";\n}\n/**\n * Pre-enrich article with PMCID from NCBI ID Converter when only DOI is available.\n */\nasync function enrichArticleIds(article, config) {\n const discoveredIds = {};\n // Only enrich if we have a DOI but no pmcid/pmid\n if (!article.doi || article.pmcid || article.pmid) {\n return { enriched: article, discoveredIds };\n }\n try {\n const email = config.ncbiEmail || config.unpaywallEmail;\n const options = {};\n if (config.ncbiTool)\n options.tool = config.ncbiTool;\n if (email)\n options.email = email;\n const result = await resolveDoiToPmcid(article.doi, options);\n if (result) {\n const enriched = { ...article };\n if (result.pmcid) {\n enriched.pmcid = result.pmcid;\n discoveredIds.pmcid = result.pmcid;\n }\n if (result.pmid) {\n enriched.pmid = result.pmid;\n discoveredIds.pmid = result.pmid;\n }\n return { enriched, discoveredIds };\n }\n }\n catch {\n // NCBI ID Converter failure is non-fatal; continue without enrichment\n }\n return { enriched: article, discoveredIds };\n}\n/** Check Unpaywall with PMCID extraction. */\nasync function checkUnpaywallSource(enriched, config, state) {\n if (!config.unpaywallEmail || !enriched.doi)\n return;\n state.sourcesChecked++;\n try {\n const detailed = await checkUnpaywallDetailed(enriched.doi, config.unpaywallEmail);\n if (!detailed)\n return;\n state.locations.push(...detailed.locations);\n if (detailed.pmcid) {\n state.unpaywallPmcid = detailed.pmcid;\n }\n }\n catch (err) {\n state.errors.push({ source: \"unpaywall\", error: String(err) });\n }\n}\n/** Check a single non-unpaywall source, updating state. */\nasync function checkGenericSource(source, enriched, config, state) {\n const checker = sourceCheckers[source];\n if (!checker)\n return;\n const result = await runSourceChecker(checker, enriched, config);\n if (result.skipped)\n return;\n state.sourcesChecked++;\n if (result.error) {\n state.errors.push({ source, error: result.error });\n }\n else if (result.locations) {\n state.locations.push(...result.locations);\n }\n}\n/** Perform lazy PMC check if Unpaywall revealed a PMCID not already known. */\nasync function lazyPmcCheck(enriched, state, discoveredIds) {\n if (!state.unpaywallPmcid || enriched.pmcid)\n return;\n discoveredIds.pmcid = discoveredIds.pmcid ?? state.unpaywallPmcid;\n try {\n const pmcLocations = await checkPmc({ pmcid: state.unpaywallPmcid });\n if (pmcLocations) {\n state.locations.push(...pmcLocations);\n }\n }\n catch (err) {\n state.errors.push({ source: \"pmc-lazy\", error: String(err) });\n }\n}\n/**\n * Discover OA availability for an article across all configured sources.\n * Checks sources in the order specified by config.preferSources, falling back\n * to the default order. Individual source errors are caught and reported\n * without failing the whole discovery.\n *\n * When only a DOI is provided, pre-enriches with NCBI ID Converter to resolve\n * PMCID/PMID, enabling PMC discovery. Also extracts PMCID from Unpaywall URLs\n * as a fallback, performing a lazy PMC check if a PMCID is discovered.\n */\nexport async function discoverOA(article, config) {\n const { enriched, discoveredIds } = await enrichArticleIds(article, config);\n const state = { locations: [], errors: [], sourcesChecked: 0 };\n const sourceOrder = config.preferSources.length > 0 ? config.preferSources : DEFAULT_SOURCE_ORDER;\n for (const source of sourceOrder) {\n if (source === \"unpaywall\") {\n await checkUnpaywallSource(enriched, config, state);\n }\n else {\n await checkGenericSource(source, enriched, config, state);\n }\n }\n await lazyPmcCheck(enriched, state, discoveredIds);\n const oaStatus = determineOAStatus(state.locations, state.errors, state.sourcesChecked);\n return { oaStatus, locations: state.locations, errors: state.errors, discoveredIds };\n}\n/** Run a single source checker with error handling. */\nasync function runSourceChecker(checker, article, config) {\n try {\n const result = await checker(article, config);\n if (result === undefined)\n return { skipped: true };\n return { skipped: false, locations: result ?? [] };\n }\n catch (err) {\n return { skipped: false, error: String(err) };\n }\n}\n//# sourceMappingURL=index.js.map"],"names":[],"mappings":";;;;;AAUA,MAAM,uBAAuB,CAAC,OAAO,SAAS,aAAa,MAAM;AACjE,eAAe,eAAe,SAAS;AACnC,MAAI,CAAC,QAAQ,QAAQ,CAAC,QAAQ;AAC1B,WAAO;AACX,QAAM,MAAM,CAAA;AACZ,MAAI,QAAQ;AACR,QAAI,OAAO,QAAQ;AACvB,MAAI,QAAQ;AACR,QAAI,QAAQ,QAAQ;AACxB,SAAO,MAAM,SAAS,GAAG;AAC7B;AACA,SAAS,iBAAiB,SAAS;AAC/B,MAAI,CAAC,QAAQ;AACT,WAAO;AACX,SAAO,WAAW,QAAQ,OAAO;AACrC;AACA,eAAe,gBAAgB,SAAS,QAAQ;AAC5C,MAAI,CAAC,OAAO,cAAc,CAAC,QAAQ;AAC/B,WAAO;AACX,SAAO,MAAM,UAAU,QAAQ,KAAK,OAAO,UAAU;AACzD;AAEA,MAAM,iBAAiB;AAAA,EACnB,KAAK;AAAA,EACL,OAAO;AAAA,EACP,MAAM;AACV;AAEA,SAAS,kBAAkB,WAAW,QAAQ,gBAAgB;AAC1D,MAAI,UAAU,SAAS;AACnB,WAAO;AACX,MAAI,OAAO,SAAS,KAAK,OAAO,UAAU;AACtC,WAAO;AACX,SAAO;AACX;AAIA,eAAe,iBAAiB,SAAS,QAAQ;AAC7C,QAAM,gBAAgB,CAAA;AAEtB,MAAI,CAAC,QAAQ,OAAO,QAAQ,SAAS,QAAQ,MAAM;AAC/C,WAAO,EAAE,UAAU,SAAS,cAAa;AAAA,EAC7C;AACA,MAAI;AACA,UAAM,QAAQ,OAAO,aAAa,OAAO;AACzC,UAAM,UAAU,CAAA;AAChB,QAAI,OAAO;AACP,cAAQ,OAAO,OAAO;AAC1B,QAAI;AACA,cAAQ,QAAQ;AACpB,UAAM,SAAS,MAAM,kBAAkB,QAAQ,KAAK,OAAO;AAC3D,QAAI,QAAQ;AACR,YAAM,WAAW,EAAE,GAAG,QAAO;AAC7B,UAAI,OAAO,OAAO;AACd,iBAAS,QAAQ,OAAO;AACxB,sBAAc,QAAQ,OAAO;AAAA,MACjC;AACA,UAAI,OAAO,MAAM;AACb,iBAAS,OAAO,OAAO;AACvB,sBAAc,OAAO,OAAO;AAAA,MAChC;AACA,aAAO,EAAE,UAAU,cAAa;AAAA,IACpC;AAAA,EACJ,QACM;AAAA,EAEN;AACA,SAAO,EAAE,UAAU,SAAS,cAAa;AAC7C;AAEA,eAAe,qBAAqB,UAAU,QAAQ,OAAO;AACzD,MAAI,CAAC,OAAO,kBAAkB,CAAC,SAAS;AACpC;AACJ,QAAM;AACN,MAAI;AACA,UAAM,WAAW,MAAM,uBAAuB,SAAS,KAAK,OAAO,cAAc;AACjF,QAAI,CAAC;AACD;AACJ,UAAM,UAAU,KAAK,GAAG,SAAS,SAAS;AAC1C,QAAI,SAAS,OAAO;AAChB,YAAM,iBAAiB,SAAS;AAAA,IACpC;AAAA,EACJ,SACO,KAAK;AACR,UAAM,OAAO,KAAK,EAAE,QAAQ,aAAa,OAAO,OAAO,GAAG,GAAG;AAAA,EACjE;AACJ;AAEA,eAAe,mBAAmB,QAAQ,UAAU,QAAQ,OAAO;AAC/D,QAAM,UAAU,eAAe,MAAM;AACrC,MAAI,CAAC;AACD;AACJ,QAAM,SAAS,MAAM,iBAAiB,SAAS,UAAU,MAAM;AAC/D,MAAI,OAAO;AACP;AACJ,QAAM;AACN,MAAI,OAAO,OAAO;AACd,UAAM,OAAO,KAAK,EAAE,QAAQ,OAAO,OAAO,OAAO;AAAA,EACrD,WACS,OAAO,WAAW;AACvB,UAAM,UAAU,KAAK,GAAG,OAAO,SAAS;AAAA,EAC5C;AACJ;AAEA,eAAe,aAAa,UAAU,OAAO,eAAe;AACxD,MAAI,CAAC,MAAM,kBAAkB,SAAS;AAClC;AACJ,gBAAc,QAAQ,cAAc,SAAS,MAAM;AACnD,MAAI;AACA,UAAM,eAAe,MAAM,SAAS,EAAE,OAAO,MAAM,gBAAgB;AACnE,QAAI,cAAc;AACd,YAAM,UAAU,KAAK,GAAG,YAAY;AAAA,IACxC;AAAA,EACJ,SACO,KAAK;AACR,UAAM,OAAO,KAAK,EAAE,QAAQ,YAAY,OAAO,OAAO,GAAG,GAAG;AAAA,EAChE;AACJ;AAWO,eAAe,WAAW,SAAS,QAAQ;AAC9C,QAAM,EAAE,UAAU,cAAa,IAAK,MAAM,iBAAiB,SAAS,MAAM;AAC1E,QAAM,QAAQ,EAAE,WAAW,CAAA,GAAI,QAAQ,CAAA,GAAI,gBAAgB,EAAC;AAC5D,QAAM,cAAc,OAAO,cAAc,SAAS,IAAI,OAAO,gBAAgB;AAC7E,aAAW,UAAU,aAAa;AAC9B,QAAI,WAAW,aAAa;AACxB,YAAM,qBAAqB,UAAU,QAAQ,KAAK;AAAA,IACtD,OACK;AACD,YAAM,mBAAmB,QAAQ,UAAU,QAAQ,KAAK;AAAA,IAC5D;AAAA,EACJ;AACA,QAAM,aAAa,UAAU,OAAO,aAAa;AACjD,QAAM,WAAW,kBAAkB,MAAM,WAAW,MAAM,QAAQ,MAAM,cAAc;AACtF,SAAO,EAAE,UAAU,WAAW,MAAM,WAAW,QAAQ,MAAM,QAAQ,cAAa;AACtF;AAEA,eAAe,iBAAiB,SAAS,SAAS,QAAQ;AACtD,MAAI;AACA,UAAM,SAAS,MAAM,QAAQ,SAAS,MAAM;AAC5C,QAAI,WAAW;AACX,aAAO,EAAE,SAAS,KAAI;AAC1B,WAAO,EAAE,SAAS,OAAO,WAAW,UAAU,CAAA,EAAE;AAAA,EACpD,SACO,KAAK;AACR,WAAO,EAAE,SAAS,OAAO,OAAO,OAAO,GAAG,EAAC;AAAA,EAC/C;AACJ;","x_google_ignoreList":[0]}
@@ -0,0 +1,46 @@
1
+ const IDCONV_BASE_URL = "https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/";
2
+ function buildUrl(ids, options) {
3
+ const params = new URLSearchParams({
4
+ ids: ids.join(","),
5
+ format: "json"
6
+ });
7
+ if (options?.tool)
8
+ params.set("tool", options.tool);
9
+ if (options?.email)
10
+ params.set("email", options.email);
11
+ return `${IDCONV_BASE_URL}?${params.toString()}`;
12
+ }
13
+ function parseRecord(record) {
14
+ if (record.errmsg)
15
+ return null;
16
+ const result = {};
17
+ if (record.pmcid)
18
+ result.pmcid = record.pmcid;
19
+ if (record.pmid)
20
+ result.pmid = record.pmid;
21
+ if (record.doi)
22
+ result.doi = record.doi;
23
+ return Object.keys(result).length > 0 ? result : null;
24
+ }
25
+ async function resolveDoiToPmcid(doi, options) {
26
+ if (!doi)
27
+ return null;
28
+ const url = buildUrl([doi], options);
29
+ const response = await fetch(url);
30
+ if (!response.ok) {
31
+ if (response.status === 404)
32
+ return null;
33
+ throw new Error(`NCBI ID Converter API error: HTTP ${response.status} ${response.statusText}`);
34
+ }
35
+ const data = await response.json();
36
+ if (!data.records || data.records.length === 0)
37
+ return null;
38
+ const record = data.records[0];
39
+ if (!record)
40
+ return null;
41
+ return parseRecord(record);
42
+ }
43
+ export {
44
+ resolveDoiToPmcid
45
+ };
46
+ //# sourceMappingURL=ncbi-id-converter.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ncbi-id-converter.js","sources":["../../../../../../node_modules/@ncukondo/academic-fulltext/dist/discovery/ncbi-id-converter.js"],"sourcesContent":["/**\n * NCBI ID Converter API client.\n * Resolves DOI → PMCID/PMID using the NCBI ID Converter API.\n *\n * API: https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/?ids={doi}&format=json\n */\nconst IDCONV_BASE_URL = \"https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/\";\nfunction buildUrl(ids, options) {\n const params = new URLSearchParams({\n ids: ids.join(\",\"),\n format: \"json\",\n });\n if (options?.tool)\n params.set(\"tool\", options.tool);\n if (options?.email)\n params.set(\"email\", options.email);\n return `${IDCONV_BASE_URL}?${params.toString()}`;\n}\nfunction parseRecord(record) {\n if (record.errmsg)\n return null;\n const result = {};\n if (record.pmcid)\n result.pmcid = record.pmcid;\n if (record.pmid)\n result.pmid = record.pmid;\n if (record.doi)\n result.doi = record.doi;\n return Object.keys(result).length > 0 ? result : null;\n}\n/**\n * Resolve a single DOI to PMCID/PMID via the NCBI ID Converter API.\n *\n * @param doi - The article's DOI\n * @param options - Optional tool name and email for API identification\n * @returns Conversion result with pmcid/pmid, or null if not found\n */\nexport async function resolveDoiToPmcid(doi, options) {\n if (!doi)\n return null;\n const url = buildUrl([doi], options);\n const response = await fetch(url);\n if (!response.ok) {\n if (response.status === 404)\n return null;\n throw new Error(`NCBI ID Converter API error: HTTP ${response.status} ${response.statusText}`);\n }\n const data = (await response.json());\n if (!data.records || data.records.length === 0)\n return null;\n const record = data.records[0];\n if (!record)\n return null;\n return parseRecord(record);\n}\n/**\n * Batch resolve multiple IDs (DOIs, PMIDs, PMCIDs) to their cross-references.\n *\n * @param ids - Array of identifiers to resolve\n * @param options - Optional tool name and email for API identification\n * @returns Map of input ID → conversion result\n */\nexport async function batchResolveIds(ids, options) {\n const results = new Map();\n if (ids.length === 0)\n return results;\n const url = buildUrl(ids, options);\n const response = await fetch(url);\n if (!response.ok) {\n throw new Error(`NCBI ID Converter API error: HTTP ${response.status} ${response.statusText}`);\n }\n const data = (await response.json());\n if (!data.records)\n return results;\n for (const record of data.records) {\n const parsed = parseRecord(record);\n if (!parsed)\n continue;\n // Map back to the input ID: try doi, pmid, pmcid\n const key = record.doi ?? record.pmid ?? record.pmcid;\n if (key)\n results.set(key, parsed);\n }\n return results;\n}\n//# sourceMappingURL=ncbi-id-converter.js.map"],"names":[],"mappings":"AAMA,MAAM,kBAAkB;AACxB,SAAS,SAAS,KAAK,SAAS;AAC5B,QAAM,SAAS,IAAI,gBAAgB;AAAA,IAC/B,KAAK,IAAI,KAAK,GAAG;AAAA,IACjB,QAAQ;AAAA,EAChB,CAAK;AACD,MAAI,SAAS;AACT,WAAO,IAAI,QAAQ,QAAQ,IAAI;AACnC,MAAI,SAAS;AACT,WAAO,IAAI,SAAS,QAAQ,KAAK;AACrC,SAAO,GAAG,eAAe,IAAI,OAAO,SAAQ,CAAE;AAClD;AACA,SAAS,YAAY,QAAQ;AACzB,MAAI,OAAO;AACP,WAAO;AACX,QAAM,SAAS,CAAA;AACf,MAAI,OAAO;AACP,WAAO,QAAQ,OAAO;AAC1B,MAAI,OAAO;AACP,WAAO,OAAO,OAAO;AACzB,MAAI,OAAO;AACP,WAAO,MAAM,OAAO;AACxB,SAAO,OAAO,KAAK,MAAM,EAAE,SAAS,IAAI,SAAS;AACrD;AAQO,eAAe,kBAAkB,KAAK,SAAS;AAClD,MAAI,CAAC;AACD,WAAO;AACX,QAAM,MAAM,SAAS,CAAC,GAAG,GAAG,OAAO;AACnC,QAAM,WAAW,MAAM,MAAM,GAAG;AAChC,MAAI,CAAC,SAAS,IAAI;AACd,QAAI,SAAS,WAAW;AACpB,aAAO;AACX,UAAM,IAAI,MAAM,qCAAqC,SAAS,MAAM,IAAI,SAAS,UAAU,EAAE;AAAA,EACjG;AACA,QAAM,OAAQ,MAAM,SAAS;AAC7B,MAAI,CAAC,KAAK,WAAW,KAAK,QAAQ,WAAW;AACzC,WAAO;AACX,QAAM,SAAS,KAAK,QAAQ,CAAC;AAC7B,MAAI,CAAC;AACD,WAAO;AACX,SAAO,YAAY,MAAM;AAC7B;","x_google_ignoreList":[0]}
@@ -31,11 +31,14 @@ async function lookupPmcid(pmid, options) {
31
31
  }
32
32
  const data = await response.json();
33
33
  const linksets = data.linksets;
34
- if (!linksets || linksets.length === 0) return null;
34
+ if (!linksets || linksets.length === 0)
35
+ return null;
35
36
  const firstLinkset = linksets[0];
36
- if (!firstLinkset?.linksetdbs) return null;
37
+ if (!firstLinkset?.linksetdbs)
38
+ return null;
37
39
  const pmcLink = firstLinkset.linksetdbs.find((db) => db.dbto === "pmc");
38
- if (!pmcLink?.links || pmcLink.links.length === 0) return null;
40
+ if (!pmcLink?.links || pmcLink.links.length === 0)
41
+ return null;
39
42
  const pmcNumericId = pmcLink.links[0];
40
43
  return pmcNumericId ? `PMC${pmcNumericId}` : null;
41
44
  }
@@ -45,7 +48,8 @@ async function checkPmc(ids, options) {
45
48
  }
46
49
  if (ids.pmid) {
47
50
  const pmcid = await lookupPmcid(ids.pmid);
48
- if (!pmcid) return null;
51
+ if (!pmcid)
52
+ return null;
49
53
  return getPmcUrls(pmcid);
50
54
  }
51
55
  return null;
@@ -0,0 +1 @@
1
+ {"version":3,"file":"pmc.js","sources":["../../../../../../node_modules/@ncukondo/academic-fulltext/dist/discovery/pmc.js"],"sourcesContent":["/**\n * PMC OA discovery client.\n * Checks PubMed Central availability and generates download URLs.\n *\n * PDF: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC{id}/pdf/\n * XML: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pmc&id={pmcid}&rettype=xml\n * PMID→PMCID: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi?dbfrom=pubmed&db=pmc&id={pmid}&retmode=json\n */\nconst EUTILS_BASE = \"https://eutils.ncbi.nlm.nih.gov/entrez/eutils\";\n/** Strip \"PMC\" prefix from PMCID, returning just the numeric part */\nfunction stripPmcPrefix(pmcid) {\n return pmcid.replace(/^PMC/i, \"\");\n}\n/** Ensure PMCID has the \"PMC\" prefix */\nfunction ensurePmcPrefix(pmcid) {\n return pmcid.startsWith(\"PMC\") ? pmcid : `PMC${pmcid}`;\n}\n/**\n * Generate PMC download URLs from a known PMCID.\n */\nexport function getPmcUrls(pmcid) {\n const numericId = stripPmcPrefix(pmcid);\n const fullPmcid = ensurePmcPrefix(pmcid);\n return [\n {\n source: \"pmc\",\n url: `https://www.ncbi.nlm.nih.gov/pmc/articles/${fullPmcid}/pdf/`,\n urlType: \"pdf\",\n version: \"published\",\n },\n {\n source: \"pmc\",\n url: `${EUTILS_BASE}/efetch.fcgi?db=pmc&id=${numericId}&rettype=xml`,\n urlType: \"xml\",\n version: \"published\",\n },\n ];\n}\n/**\n * Look up PMCID from PMID via E-utilities elink API.\n * @returns PMCID string or null if not in PMC\n */\nasync function lookupPmcid(pmid, options) {\n let url = `${EUTILS_BASE}/elink.fcgi?dbfrom=pubmed&db=pmc&id=${pmid}&retmode=json`;\n if (options?.apiKey) {\n url += `&api_key=${options.apiKey}`;\n }\n const response = await fetch(url);\n if (!response.ok) {\n throw new Error(`PMC elink API error: HTTP ${response.status}`);\n }\n const data = (await response.json());\n const linksets = data.linksets;\n if (!linksets || linksets.length === 0)\n return null;\n const firstLinkset = linksets[0];\n if (!firstLinkset?.linksetdbs)\n return null;\n const pmcLink = firstLinkset.linksetdbs.find((db) => db.dbto === \"pmc\");\n if (!pmcLink?.links || pmcLink.links.length === 0)\n return null;\n const pmcNumericId = pmcLink.links[0];\n return pmcNumericId ? `PMC${pmcNumericId}` : null;\n}\n/**\n * Check PMC availability for an article.\n * If PMCID is known, generates URLs directly.\n * If only PMID is known, looks up PMCID via E-utilities elink.\n */\nexport async function checkPmc(ids, options) {\n // If PMCID is already known, generate URLs directly\n if (ids.pmcid) {\n return getPmcUrls(ids.pmcid);\n }\n // If only PMID is known, look up PMCID\n if (ids.pmid) {\n const pmcid = await lookupPmcid(ids.pmid, options);\n if (!pmcid)\n return null;\n return getPmcUrls(pmcid);\n }\n // No identifiers provided\n return null;\n}\n//# sourceMappingURL=pmc.js.map"],"names":[],"mappings":"AAQA,MAAM,cAAc;AAEpB,SAAS,eAAe,OAAO;AAC3B,SAAO,MAAM,QAAQ,SAAS,EAAE;AACpC;AAEA,SAAS,gBAAgB,OAAO;AAC5B,SAAO,MAAM,WAAW,KAAK,IAAI,QAAQ,MAAM,KAAK;AACxD;AAIO,SAAS,WAAW,OAAO;AAC9B,QAAM,YAAY,eAAe,KAAK;AACtC,QAAM,YAAY,gBAAgB,KAAK;AACvC,SAAO;AAAA,IACH;AAAA,MACI,QAAQ;AAAA,MACR,KAAK,6CAA6C,SAAS;AAAA,MAC3D,SAAS;AAAA,MACT,SAAS;AAAA,IACrB;AAAA,IACQ;AAAA,MACI,QAAQ;AAAA,MACR,KAAK,GAAG,WAAW,0BAA0B,SAAS;AAAA,MACtD,SAAS;AAAA,MACT,SAAS;AAAA,IACrB;AAAA,EACA;AACA;AAKA,eAAe,YAAY,MAAM,SAAS;AACtC,MAAI,MAAM,GAAG,WAAW,uCAAuC,IAAI;AAInE,QAAM,WAAW,MAAM,MAAM,GAAG;AAChC,MAAI,CAAC,SAAS,IAAI;AACd,UAAM,IAAI,MAAM,6BAA6B,SAAS,MAAM,EAAE;AAAA,EAClE;AACA,QAAM,OAAQ,MAAM,SAAS;AAC7B,QAAM,WAAW,KAAK;AACtB,MAAI,CAAC,YAAY,SAAS,WAAW;AACjC,WAAO;AACX,QAAM,eAAe,SAAS,CAAC;AAC/B,MAAI,CAAC,cAAc;AACf,WAAO;AACX,QAAM,UAAU,aAAa,WAAW,KAAK,CAAC,OAAO,GAAG,SAAS,KAAK;AACtE,MAAI,CAAC,SAAS,SAAS,QAAQ,MAAM,WAAW;AAC5C,WAAO;AACX,QAAM,eAAe,QAAQ,MAAM,CAAC;AACpC,SAAO,eAAe,MAAM,YAAY,KAAK;AACjD;AAMO,eAAe,SAAS,KAAK,SAAS;AAEzC,MAAI,IAAI,OAAO;AACX,WAAO,WAAW,IAAI,KAAK;AAAA,EAC/B;AAEA,MAAI,IAAI,MAAM;AACV,UAAM,QAAQ,MAAM,YAAY,IAAI,IAAa;AACjD,QAAI,CAAC;AACD,aAAO;AACX,WAAO,WAAW,KAAK;AAAA,EAC3B;AAEA,SAAO;AACX;","x_google_ignoreList":[0]}
@@ -1,4 +1,5 @@
1
1
  const UNPAYWALL_BASE_URL = "https://api.unpaywall.org/v2";
2
+ const PMC_URL_PATTERN = /\/pmc\/articles\/(PMC\d+)/i;
2
3
  function mapVersion(version) {
3
4
  switch (version) {
4
5
  case "publishedVersion":
@@ -14,7 +15,8 @@ function mapVersion(version) {
14
15
  function toOALocation(loc) {
15
16
  const hasPdf = loc.url_for_pdf != null;
16
17
  const url = hasPdf ? loc.url_for_pdf : loc.url_for_landing_page;
17
- if (!url) return null;
18
+ if (!url)
19
+ return null;
18
20
  const result = {
19
21
  source: "unpaywall",
20
22
  url,
@@ -26,32 +28,64 @@ function toOALocation(loc) {
26
28
  }
27
29
  return result;
28
30
  }
29
- async function checkUnpaywall(doi, email) {
30
- if (!doi) return null;
31
+ function extractPmcidFromUrl(url) {
32
+ const match = PMC_URL_PATTERN.exec(url);
33
+ return match ? match[1] ?? null : null;
34
+ }
35
+ function extractPmcidFromLocations(locations) {
36
+ for (const loc of locations) {
37
+ if (loc.url_for_pdf) {
38
+ const pmcid = extractPmcidFromUrl(loc.url_for_pdf);
39
+ if (pmcid)
40
+ return pmcid;
41
+ }
42
+ if (loc.url_for_landing_page) {
43
+ const pmcid = extractPmcidFromUrl(loc.url_for_landing_page);
44
+ if (pmcid)
45
+ return pmcid;
46
+ }
47
+ }
48
+ return null;
49
+ }
50
+ async function fetchUnpaywallData(doi, email) {
51
+ if (!doi)
52
+ return null;
31
53
  if (!email) {
32
54
  throw new Error("Unpaywall email is required for API access");
33
55
  }
34
56
  const url = `${UNPAYWALL_BASE_URL}/${doi}?email=${encodeURIComponent(email)}`;
35
57
  const response = await fetch(url);
36
58
  if (!response.ok) {
37
- if (response.status === 404) return null;
59
+ if (response.status === 404)
60
+ return null;
38
61
  if (response.status === 429) {
39
62
  throw new Error("Unpaywall rate limit exceeded");
40
63
  }
41
64
  throw new Error(`Unpaywall API error: HTTP ${response.status} ${response.statusText}`);
42
65
  }
43
- const data = await response.json();
44
- if (!data.is_oa || !data.oa_locations || data.oa_locations.length === 0) {
66
+ return await response.json();
67
+ }
68
+ async function checkUnpaywallDetailed(doi, email) {
69
+ const data = await fetchUnpaywallData(doi, email);
70
+ if (!data || !data.is_oa || !data.oa_locations || data.oa_locations.length === 0) {
45
71
  return null;
46
72
  }
47
73
  const locations = [];
48
74
  for (const loc of data.oa_locations) {
49
75
  const oaLoc = toOALocation(loc);
50
- if (oaLoc) locations.push(oaLoc);
76
+ if (oaLoc)
77
+ locations.push(oaLoc);
51
78
  }
52
- return locations.length > 0 ? locations : null;
79
+ if (locations.length === 0)
80
+ return null;
81
+ const pmcid = extractPmcidFromLocations(data.oa_locations);
82
+ const result = { locations };
83
+ if (pmcid)
84
+ result.pmcid = pmcid;
85
+ return result;
53
86
  }
54
87
  export {
55
- checkUnpaywall
88
+ checkUnpaywallDetailed,
89
+ extractPmcidFromUrl
56
90
  };
57
91
  //# sourceMappingURL=unpaywall.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"unpaywall.js","sources":["../../../../../../node_modules/@ncukondo/academic-fulltext/dist/discovery/unpaywall.js"],"sourcesContent":["/**\n * Unpaywall OA discovery client.\n * Checks Open Access availability via the Unpaywall API.\n *\n * API: https://api.unpaywall.org/v2/{doi}?email={email}\n * Rate limit: 100,000 requests/day (no per-second limit documented)\n */\nconst UNPAYWALL_BASE_URL = \"https://api.unpaywall.org/v2\";\n/** Regex to extract PMCID from PMC URLs */\nconst PMC_URL_PATTERN = /\\/pmc\\/articles\\/(PMC\\d+)/i;\n/** Map Unpaywall version strings to our OALocation version format */\nfunction mapVersion(version) {\n switch (version) {\n case \"publishedVersion\":\n return \"published\";\n case \"acceptedVersion\":\n return \"accepted\";\n case \"submittedVersion\":\n return \"submitted\";\n default:\n return \"published\";\n }\n}\n/** Convert an Unpaywall location to our OALocation format */\nfunction toOALocation(loc) {\n const hasPdf = loc.url_for_pdf != null;\n const url = hasPdf ? loc.url_for_pdf : loc.url_for_landing_page;\n if (!url)\n return null;\n const result = {\n source: \"unpaywall\",\n url,\n urlType: hasPdf ? \"pdf\" : \"html\",\n version: mapVersion(loc.version),\n };\n if (loc.license) {\n result.license = loc.license;\n }\n return result;\n}\n/**\n * Extract PMCID from a URL containing a PMC article link.\n * E.g., \"https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1234567/pdf/\" → \"PMC1234567\"\n */\nexport function extractPmcidFromUrl(url) {\n const match = PMC_URL_PATTERN.exec(url);\n return match ? (match[1] ?? null) : null;\n}\n/**\n * Scan Unpaywall locations for a PMCID embedded in any URL.\n */\nfunction extractPmcidFromLocations(locations) {\n for (const loc of locations) {\n if (loc.url_for_pdf) {\n const pmcid = extractPmcidFromUrl(loc.url_for_pdf);\n if (pmcid)\n return pmcid;\n }\n if (loc.url_for_landing_page) {\n const pmcid = extractPmcidFromUrl(loc.url_for_landing_page);\n if (pmcid)\n return pmcid;\n }\n }\n return null;\n}\n/**\n * Fetch Unpaywall API and return raw response data.\n */\nasync function fetchUnpaywallData(doi, email) {\n if (!doi)\n return null;\n if (!email) {\n throw new Error(\"Unpaywall email is required for API access\");\n }\n const url = `${UNPAYWALL_BASE_URL}/${doi}?email=${encodeURIComponent(email)}`;\n const response = await fetch(url);\n if (!response.ok) {\n if (response.status === 404)\n return null;\n if (response.status === 429) {\n throw new Error(\"Unpaywall rate limit exceeded\");\n }\n throw new Error(`Unpaywall API error: HTTP ${response.status} ${response.statusText}`);\n }\n return (await response.json());\n}\n/**\n * Check Unpaywall for Open Access availability of an article.\n *\n * @param doi - The article's DOI\n * @param email - Email address required by Unpaywall API (free, no registration)\n * @returns Array of OALocations if OA, null if closed/not found\n * @throws On rate limit (429) or network errors\n */\nexport async function checkUnpaywall(doi, email) {\n const data = await fetchUnpaywallData(doi, email);\n if (!data || !data.is_oa || !data.oa_locations || data.oa_locations.length === 0) {\n return null;\n }\n const locations = [];\n for (const loc of data.oa_locations) {\n const oaLoc = toOALocation(loc);\n if (oaLoc)\n locations.push(oaLoc);\n }\n return locations.length > 0 ? locations : null;\n}\n/**\n * Check Unpaywall with detailed results including extracted PMCID.\n * Returns both OA locations and any PMCID found in the location URLs.\n *\n * @param doi - The article's DOI\n * @param email - Email address required by Unpaywall API (free, no registration)\n * @returns Detailed result with locations and optional pmcid, or null if closed/not found\n * @throws On rate limit (429) or network errors\n */\nexport async function checkUnpaywallDetailed(doi, email) {\n const data = await fetchUnpaywallData(doi, email);\n if (!data || !data.is_oa || !data.oa_locations || data.oa_locations.length === 0) {\n return null;\n }\n const locations = [];\n for (const loc of data.oa_locations) {\n const oaLoc = toOALocation(loc);\n if (oaLoc)\n locations.push(oaLoc);\n }\n if (locations.length === 0)\n return null;\n const pmcid = extractPmcidFromLocations(data.oa_locations);\n const result = { locations };\n if (pmcid)\n result.pmcid = pmcid;\n return result;\n}\n//# sourceMappingURL=unpaywall.js.map"],"names":[],"mappings":"AAOA,MAAM,qBAAqB;AAE3B,MAAM,kBAAkB;AAExB,SAAS,WAAW,SAAS;AACzB,UAAQ,SAAO;AAAA,IACX,KAAK;AACD,aAAO;AAAA,IACX,KAAK;AACD,aAAO;AAAA,IACX,KAAK;AACD,aAAO;AAAA,IACX;AACI,aAAO;AAAA,EACnB;AACA;AAEA,SAAS,aAAa,KAAK;AACvB,QAAM,SAAS,IAAI,eAAe;AAClC,QAAM,MAAM,SAAS,IAAI,cAAc,IAAI;AAC3C,MAAI,CAAC;AACD,WAAO;AACX,QAAM,SAAS;AAAA,IACX,QAAQ;AAAA,IACR;AAAA,IACA,SAAS,SAAS,QAAQ;AAAA,IAC1B,SAAS,WAAW,IAAI,OAAO;AAAA,EACvC;AACI,MAAI,IAAI,SAAS;AACb,WAAO,UAAU,IAAI;AAAA,EACzB;AACA,SAAO;AACX;AAKO,SAAS,oBAAoB,KAAK;AACrC,QAAM,QAAQ,gBAAgB,KAAK,GAAG;AACtC,SAAO,QAAS,MAAM,CAAC,KAAK,OAAQ;AACxC;AAIA,SAAS,0BAA0B,WAAW;AAC1C,aAAW,OAAO,WAAW;AACzB,QAAI,IAAI,aAAa;AACjB,YAAM,QAAQ,oBAAoB,IAAI,WAAW;AACjD,UAAI;AACA,eAAO;AAAA,IACf;AACA,QAAI,IAAI,sBAAsB;AAC1B,YAAM,QAAQ,oBAAoB,IAAI,oBAAoB;AAC1D,UAAI;AACA,eAAO;AAAA,IACf;AAAA,EACJ;AACA,SAAO;AACX;AAIA,eAAe,mBAAmB,KAAK,OAAO;AAC1C,MAAI,CAAC;AACD,WAAO;AACX,MAAI,CAAC,OAAO;AACR,UAAM,IAAI,MAAM,4CAA4C;AAAA,EAChE;AACA,QAAM,MAAM,GAAG,kBAAkB,IAAI,GAAG,UAAU,mBAAmB,KAAK,CAAC;AAC3E,QAAM,WAAW,MAAM,MAAM,GAAG;AAChC,MAAI,CAAC,SAAS,IAAI;AACd,QAAI,SAAS,WAAW;AACpB,aAAO;AACX,QAAI,SAAS,WAAW,KAAK;AACzB,YAAM,IAAI,MAAM,+BAA+B;AAAA,IACnD;AACA,UAAM,IAAI,MAAM,6BAA6B,SAAS,MAAM,IAAI,SAAS,UAAU,EAAE;AAAA,EACzF;AACA,SAAQ,MAAM,SAAS,KAAI;AAC/B;AA+BO,eAAe,uBAAuB,KAAK,OAAO;AACrD,QAAM,OAAO,MAAM,mBAAmB,KAAK,KAAK;AAChD,MAAI,CAAC,QAAQ,CAAC,KAAK,SAAS,CAAC,KAAK,gBAAgB,KAAK,aAAa,WAAW,GAAG;AAC9E,WAAO;AAAA,EACX;AACA,QAAM,YAAY,CAAA;AAClB,aAAW,OAAO,KAAK,cAAc;AACjC,UAAM,QAAQ,aAAa,GAAG;AAC9B,QAAI;AACA,gBAAU,KAAK,KAAK;AAAA,EAC5B;AACA,MAAI,UAAU,WAAW;AACrB,WAAO;AACX,QAAM,QAAQ,0BAA0B,KAAK,YAAY;AACzD,QAAM,SAAS,EAAE,UAAS;AAC1B,MAAI;AACA,WAAO,QAAQ;AACnB,SAAO;AACX;","x_google_ignoreList":[0]}
@@ -0,0 +1,48 @@
1
+ import { mkdir, writeFile } from "node:fs/promises";
2
+ import { dirname } from "node:path";
3
+ const ARXIV_HTML_BASE = "https://arxiv.org/html/";
4
+ const USER_AGENT = "search-hub/0.8.0 (https://github.com/ncukondo/search-hub)";
5
+ function normalizeArxivId(id) {
6
+ return id.replace(/^arXiv:/i, "");
7
+ }
8
+ function isHtmlContentType(contentType) {
9
+ if (!contentType)
10
+ return false;
11
+ const base = (contentType.split(";")[0] ?? "").trim().toLowerCase();
12
+ return base === "text/html";
13
+ }
14
+ async function downloadArxivHtml(arxivId, destPath) {
15
+ const id = normalizeArxivId(arxivId);
16
+ const url = `${ARXIV_HTML_BASE}${id}`;
17
+ try {
18
+ const response = await fetch(url, {
19
+ headers: { "User-Agent": USER_AGENT }
20
+ });
21
+ if (!response.ok) {
22
+ return {
23
+ success: false,
24
+ error: `HTTP ${response.status} ${response.statusText}`
25
+ };
26
+ }
27
+ const contentType = response.headers.get("content-type");
28
+ if (!isHtmlContentType(contentType)) {
29
+ return {
30
+ success: false,
31
+ error: `Unexpected Content-Type: ${contentType ?? "none"} (expected text/html)`
32
+ };
33
+ }
34
+ const text = await response.text();
35
+ await mkdir(dirname(destPath), { recursive: true });
36
+ await writeFile(destPath, text, "utf-8");
37
+ return { success: true, size: Buffer.byteLength(text) };
38
+ } catch (err) {
39
+ return {
40
+ success: false,
41
+ error: err instanceof Error ? err.message : String(err)
42
+ };
43
+ }
44
+ }
45
+ export {
46
+ downloadArxivHtml
47
+ };
48
+ //# sourceMappingURL=arxiv-html.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"arxiv-html.js","sources":["../../../../../../node_modules/@ncukondo/academic-fulltext/dist/download/arxiv-html.js"],"sourcesContent":["/**\n * arXiv HTML downloader.\n *\n * Downloads LaTeXML-generated HTML from arXiv for conversion to Markdown.\n * Not all arXiv papers have HTML versions; 404 is handled gracefully.\n */\nimport { mkdir, writeFile } from \"node:fs/promises\";\nimport { dirname } from \"node:path\";\nconst ARXIV_HTML_BASE = \"https://arxiv.org/html/\";\nconst USER_AGENT = \"search-hub/0.8.0 (https://github.com/ncukondo/search-hub)\";\n/** Strip common prefixes from arXiv IDs */\nfunction normalizeArxivId(id) {\n return id.replace(/^arXiv:/i, \"\");\n}\nfunction isHtmlContentType(contentType) {\n if (!contentType)\n return false;\n const base = (contentType.split(\";\")[0] ?? \"\").trim().toLowerCase();\n return base === \"text/html\";\n}\n/**\n * Download arXiv HTML for a given arXiv ID.\n *\n * @param arxivId - arXiv identifier (e.g., \"2301.13867\", \"arXiv:2301.13867\")\n * @param destPath - Destination file path for the HTML\n * @returns Result with success status and file size\n */\nexport async function downloadArxivHtml(arxivId, destPath) {\n const id = normalizeArxivId(arxivId);\n const url = `${ARXIV_HTML_BASE}${id}`;\n try {\n const response = await fetch(url, {\n headers: { \"User-Agent\": USER_AGENT },\n });\n if (!response.ok) {\n return {\n success: false,\n error: `HTTP ${response.status} ${response.statusText}`,\n };\n }\n const contentType = response.headers.get(\"content-type\");\n if (!isHtmlContentType(contentType)) {\n return {\n success: false,\n error: `Unexpected Content-Type: ${contentType ?? \"none\"} (expected text/html)`,\n };\n }\n const text = await response.text();\n await mkdir(dirname(destPath), { recursive: true });\n await writeFile(destPath, text, \"utf-8\");\n return { success: true, size: Buffer.byteLength(text) };\n }\n catch (err) {\n return {\n success: false,\n error: err instanceof Error ? err.message : String(err),\n };\n }\n}\n//# sourceMappingURL=arxiv-html.js.map"],"names":[],"mappings":";;AAQA,MAAM,kBAAkB;AACxB,MAAM,aAAa;AAEnB,SAAS,iBAAiB,IAAI;AAC1B,SAAO,GAAG,QAAQ,YAAY,EAAE;AACpC;AACA,SAAS,kBAAkB,aAAa;AACpC,MAAI,CAAC;AACD,WAAO;AACX,QAAM,QAAQ,YAAY,MAAM,GAAG,EAAE,CAAC,KAAK,IAAI,KAAI,EAAG,YAAW;AACjE,SAAO,SAAS;AACpB;AAQO,eAAe,kBAAkB,SAAS,UAAU;AACvD,QAAM,KAAK,iBAAiB,OAAO;AACnC,QAAM,MAAM,GAAG,eAAe,GAAG,EAAE;AACnC,MAAI;AACA,UAAM,WAAW,MAAM,MAAM,KAAK;AAAA,MAC9B,SAAS,EAAE,cAAc,WAAU;AAAA,IAC/C,CAAS;AACD,QAAI,CAAC,SAAS,IAAI;AACd,aAAO;AAAA,QACH,SAAS;AAAA,QACT,OAAO,QAAQ,SAAS,MAAM,IAAI,SAAS,UAAU;AAAA,MACrE;AAAA,IACQ;AACA,UAAM,cAAc,SAAS,QAAQ,IAAI,cAAc;AACvD,QAAI,CAAC,kBAAkB,WAAW,GAAG;AACjC,aAAO;AAAA,QACH,SAAS;AAAA,QACT,OAAO,4BAA4B,eAAe,MAAM;AAAA,MACxE;AAAA,IACQ;AACA,UAAM,OAAO,MAAM,SAAS,KAAI;AAChC,UAAM,MAAM,QAAQ,QAAQ,GAAG,EAAE,WAAW,MAAM;AAClD,UAAM,UAAU,UAAU,MAAM,OAAO;AACvC,WAAO,EAAE,SAAS,MAAM,MAAM,OAAO,WAAW,IAAI,EAAC;AAAA,EACzD,SACO,KAAK;AACR,WAAO;AAAA,MACH,SAAS;AAAA,MACT,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,IAClE;AAAA,EACI;AACJ;","x_google_ignoreList":[0]}
@@ -0,0 +1,64 @@
1
+ import { mkdir, writeFile } from "node:fs/promises";
2
+ import { dirname } from "node:path";
3
+ const NON_RETRYABLE_STATUSES = /* @__PURE__ */ new Set([400, 401, 403, 404, 405, 410]);
4
+ const VALID_CONTENT_TYPES = ["application/pdf", "application/octet-stream"];
5
+ const USER_AGENT = "search-hub/0.8.0 (https://github.com/ncukondo/search-hub)";
6
+ function isValidPdfContentType(contentType) {
7
+ if (!contentType)
8
+ return false;
9
+ const base = (contentType.split(";")[0] ?? "").trim().toLowerCase();
10
+ return VALID_CONTENT_TYPES.includes(base);
11
+ }
12
+ function sleep(ms) {
13
+ return new Promise((resolve) => setTimeout(resolve, ms));
14
+ }
15
+ async function attemptDownload(url, destPath) {
16
+ const response = await fetch(url, {
17
+ headers: { "User-Agent": USER_AGENT }
18
+ });
19
+ if (!response.ok) {
20
+ const error = `HTTP ${response.status} ${response.statusText}`;
21
+ if (NON_RETRYABLE_STATUSES.has(response.status)) {
22
+ return { kind: "fail", result: { success: false, error } };
23
+ }
24
+ return { kind: "retry", error };
25
+ }
26
+ const contentType = response.headers.get("content-type");
27
+ if (!isValidPdfContentType(contentType)) {
28
+ return {
29
+ kind: "fail",
30
+ result: {
31
+ success: false,
32
+ error: `Unexpected Content-Type: ${contentType ?? "none"}`
33
+ }
34
+ };
35
+ }
36
+ const buffer = await response.arrayBuffer();
37
+ await mkdir(dirname(destPath), { recursive: true });
38
+ await writeFile(destPath, Buffer.from(buffer));
39
+ return { kind: "success", result: { success: true, size: buffer.byteLength } };
40
+ }
41
+ async function downloadPdf(url, destPath, options) {
42
+ const retries = options?.retries ?? 3;
43
+ const retryDelay = options?.retryDelay ?? 1e3;
44
+ let lastError;
45
+ for (let attempt = 1; attempt <= retries; attempt++) {
46
+ try {
47
+ const outcome = await attemptDownload(url, destPath);
48
+ if (outcome.kind === "success" || outcome.kind === "fail") {
49
+ return outcome.result;
50
+ }
51
+ lastError = outcome.error;
52
+ } catch (err) {
53
+ lastError = err instanceof Error ? err.message : String(err);
54
+ }
55
+ if (attempt < retries) {
56
+ await sleep(retryDelay * attempt);
57
+ }
58
+ }
59
+ return { success: false, error: lastError ?? "Download failed" };
60
+ }
61
+ export {
62
+ downloadPdf
63
+ };
64
+ //# sourceMappingURL=downloader.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"downloader.js","sources":["../../../../../../node_modules/@ncukondo/academic-fulltext/dist/download/downloader.js"],"sourcesContent":["/**\n * PDF downloader with retry and error handling.\n */\nimport { mkdir, writeFile } from \"node:fs/promises\";\nimport { dirname } from \"node:path\";\n/** HTTP status codes that should not be retried */\nconst NON_RETRYABLE_STATUSES = new Set([400, 401, 403, 404, 405, 410]);\n/** Content types accepted as valid PDF responses */\nconst VALID_CONTENT_TYPES = [\"application/pdf\", \"application/octet-stream\"];\nconst USER_AGENT = \"search-hub/0.8.0 (https://github.com/ncukondo/search-hub)\";\nfunction isValidPdfContentType(contentType) {\n if (!contentType)\n return false;\n const base = (contentType.split(\";\")[0] ?? \"\").trim().toLowerCase();\n return VALID_CONTENT_TYPES.includes(base);\n}\nfunction sleep(ms) {\n return new Promise((resolve) => setTimeout(resolve, ms));\n}\nasync function attemptDownload(url, destPath) {\n const response = await fetch(url, {\n headers: { \"User-Agent\": USER_AGENT },\n });\n if (!response.ok) {\n const error = `HTTP ${response.status} ${response.statusText}`;\n if (NON_RETRYABLE_STATUSES.has(response.status)) {\n return { kind: \"fail\", result: { success: false, error } };\n }\n return { kind: \"retry\", error };\n }\n const contentType = response.headers.get(\"content-type\");\n if (!isValidPdfContentType(contentType)) {\n return {\n kind: \"fail\",\n result: {\n success: false,\n error: `Unexpected Content-Type: ${contentType ?? \"none\"}`,\n },\n };\n }\n const buffer = await response.arrayBuffer();\n await mkdir(dirname(destPath), { recursive: true });\n await writeFile(destPath, Buffer.from(buffer));\n return { kind: \"success\", result: { success: true, size: buffer.byteLength } };\n}\n/**\n * Download a PDF from a URL to a local file path.\n * Retries on network errors and 429 responses with exponential backoff.\n * Does not retry on 403/404 or other client errors.\n */\nexport async function downloadPdf(url, destPath, options) {\n const retries = options?.retries ?? 3;\n const retryDelay = options?.retryDelay ?? 1000;\n let lastError;\n for (let attempt = 1; attempt <= retries; attempt++) {\n try {\n const outcome = await attemptDownload(url, destPath);\n if (outcome.kind === \"success\" || outcome.kind === \"fail\") {\n return outcome.result;\n }\n lastError = outcome.error;\n }\n catch (err) {\n lastError = err instanceof Error ? err.message : String(err);\n }\n if (attempt < retries) {\n await sleep(retryDelay * attempt);\n }\n }\n return { success: false, error: lastError ?? \"Download failed\" };\n}\n//# sourceMappingURL=downloader.js.map"],"names":[],"mappings":";;AAMA,MAAM,yBAAyB,oBAAI,IAAI,CAAC,KAAK,KAAK,KAAK,KAAK,KAAK,GAAG,CAAC;AAErE,MAAM,sBAAsB,CAAC,mBAAmB,0BAA0B;AAC1E,MAAM,aAAa;AACnB,SAAS,sBAAsB,aAAa;AACxC,MAAI,CAAC;AACD,WAAO;AACX,QAAM,QAAQ,YAAY,MAAM,GAAG,EAAE,CAAC,KAAK,IAAI,KAAI,EAAG,YAAW;AACjE,SAAO,oBAAoB,SAAS,IAAI;AAC5C;AACA,SAAS,MAAM,IAAI;AACf,SAAO,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,EAAE,CAAC;AAC3D;AACA,eAAe,gBAAgB,KAAK,UAAU;AAC1C,QAAM,WAAW,MAAM,MAAM,KAAK;AAAA,IAC9B,SAAS,EAAE,cAAc,WAAU;AAAA,EAC3C,CAAK;AACD,MAAI,CAAC,SAAS,IAAI;AACd,UAAM,QAAQ,QAAQ,SAAS,MAAM,IAAI,SAAS,UAAU;AAC5D,QAAI,uBAAuB,IAAI,SAAS,MAAM,GAAG;AAC7C,aAAO,EAAE,MAAM,QAAQ,QAAQ,EAAE,SAAS,OAAO,QAAO;AAAA,IAC5D;AACA,WAAO,EAAE,MAAM,SAAS,MAAK;AAAA,EACjC;AACA,QAAM,cAAc,SAAS,QAAQ,IAAI,cAAc;AACvD,MAAI,CAAC,sBAAsB,WAAW,GAAG;AACrC,WAAO;AAAA,MACH,MAAM;AAAA,MACN,QAAQ;AAAA,QACJ,SAAS;AAAA,QACT,OAAO,4BAA4B,eAAe,MAAM;AAAA,MACxE;AAAA,IACA;AAAA,EACI;AACA,QAAM,SAAS,MAAM,SAAS,YAAW;AACzC,QAAM,MAAM,QAAQ,QAAQ,GAAG,EAAE,WAAW,MAAM;AAClD,QAAM,UAAU,UAAU,OAAO,KAAK,MAAM,CAAC;AAC7C,SAAO,EAAE,MAAM,WAAW,QAAQ,EAAE,SAAS,MAAM,MAAM,OAAO,aAAY;AAChF;AAMO,eAAe,YAAY,KAAK,UAAU,SAAS;AACtD,QAAM,UAAU,SAAS,WAAW;AACpC,QAAM,aAAa,SAAS,cAAc;AAC1C,MAAI;AACJ,WAAS,UAAU,GAAG,WAAW,SAAS,WAAW;AACjD,QAAI;AACA,YAAM,UAAU,MAAM,gBAAgB,KAAK,QAAQ;AACnD,UAAI,QAAQ,SAAS,aAAa,QAAQ,SAAS,QAAQ;AACvD,eAAO,QAAQ;AAAA,MACnB;AACA,kBAAY,QAAQ;AAAA,IACxB,SACO,KAAK;AACR,kBAAY,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,IAC/D;AACA,QAAI,UAAU,SAAS;AACnB,YAAM,MAAM,aAAa,OAAO;AAAA,IACpC;AAAA,EACJ;AACA,SAAO,EAAE,SAAS,OAAO,OAAO,aAAa,kBAAiB;AAClE;","x_google_ignoreList":[0]}