@ncukondo/search-hub 0.12.1 → 0.12.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (334) hide show
  1. package/dist/_virtual/_commonjsHelpers.js +30 -0
  2. package/dist/_virtual/_commonjsHelpers.js.map +1 -0
  3. package/dist/_virtual/aliases.js +5 -0
  4. package/dist/_virtual/aliases.js.map +1 -0
  5. package/dist/_virtual/attributes.js +5 -0
  6. package/dist/_virtual/attributes.js.map +1 -0
  7. package/dist/_virtual/back.js +5 -0
  8. package/dist/_virtual/back.js.map +1 -0
  9. package/dist/_virtual/comment.js +5 -0
  10. package/dist/_virtual/comment.js.map +1 -0
  11. package/dist/_virtual/compile.js +5 -0
  12. package/dist/_virtual/compile.js.map +1 -0
  13. package/dist/_virtual/compile2.js +5 -0
  14. package/dist/_virtual/compile2.js.map +1 -0
  15. package/dist/_virtual/decode-data-html.js +5 -0
  16. package/dist/_virtual/decode-data-html.js.map +1 -0
  17. package/dist/_virtual/decode-data-xml.js +5 -0
  18. package/dist/_virtual/decode-data-xml.js.map +1 -0
  19. package/dist/_virtual/decode.js +5 -0
  20. package/dist/_virtual/decode.js.map +1 -0
  21. package/dist/_virtual/decode_codepoint.js +5 -0
  22. package/dist/_virtual/decode_codepoint.js.map +1 -0
  23. package/dist/_virtual/encode-html.js +5 -0
  24. package/dist/_virtual/encode-html.js.map +1 -0
  25. package/dist/_virtual/encode.js +5 -0
  26. package/dist/_virtual/encode.js.map +1 -0
  27. package/dist/_virtual/escape.js +5 -0
  28. package/dist/_virtual/escape.js.map +1 -0
  29. package/dist/_virtual/feeds.js +5 -0
  30. package/dist/_virtual/feeds.js.map +1 -0
  31. package/dist/_virtual/filters.js +5 -0
  32. package/dist/_virtual/filters.js.map +1 -0
  33. package/dist/_virtual/foreignNames.js +5 -0
  34. package/dist/_virtual/foreignNames.js.map +1 -0
  35. package/dist/_virtual/general.js +5 -0
  36. package/dist/_virtual/general.js.map +1 -0
  37. package/dist/_virtual/he.js +5 -0
  38. package/dist/_virtual/he.js.map +1 -0
  39. package/dist/_virtual/helpers.js +5 -0
  40. package/dist/_virtual/helpers.js.map +1 -0
  41. package/dist/_virtual/html.js +5 -0
  42. package/dist/_virtual/html.js.map +1 -0
  43. package/dist/_virtual/index.js +6 -0
  44. package/dist/_virtual/index.js.map +1 -0
  45. package/dist/_virtual/index10.js +5 -0
  46. package/dist/_virtual/index10.js.map +1 -0
  47. package/dist/_virtual/index11.js +5 -0
  48. package/dist/_virtual/index11.js.map +1 -0
  49. package/dist/_virtual/index2.js +5 -0
  50. package/dist/_virtual/index2.js.map +1 -0
  51. package/dist/_virtual/index3.js +5 -0
  52. package/dist/_virtual/index3.js.map +1 -0
  53. package/dist/_virtual/index4.js +5 -0
  54. package/dist/_virtual/index4.js.map +1 -0
  55. package/dist/_virtual/index5.js +7 -0
  56. package/dist/_virtual/index5.js.map +1 -0
  57. package/dist/_virtual/index6.js +5 -0
  58. package/dist/_virtual/index6.js.map +1 -0
  59. package/dist/_virtual/index7.js +5 -0
  60. package/dist/_virtual/index7.js.map +1 -0
  61. package/dist/_virtual/index8.js +5 -0
  62. package/dist/_virtual/index8.js.map +1 -0
  63. package/dist/_virtual/index9.js +5 -0
  64. package/dist/_virtual/index9.js.map +1 -0
  65. package/dist/_virtual/legacy.js +5 -0
  66. package/dist/_virtual/legacy.js.map +1 -0
  67. package/dist/_virtual/manipulation.js +5 -0
  68. package/dist/_virtual/manipulation.js.map +1 -0
  69. package/dist/_virtual/matcher.js +5 -0
  70. package/dist/_virtual/matcher.js.map +1 -0
  71. package/dist/_virtual/node.js +5 -0
  72. package/dist/_virtual/node.js.map +1 -0
  73. package/dist/_virtual/node2.js +5 -0
  74. package/dist/_virtual/node2.js.map +1 -0
  75. package/dist/_virtual/parse.js +5 -0
  76. package/dist/_virtual/parse.js.map +1 -0
  77. package/dist/_virtual/parse2.js +5 -0
  78. package/dist/_virtual/parse2.js.map +1 -0
  79. package/dist/_virtual/pseudos.js +5 -0
  80. package/dist/_virtual/pseudos.js.map +1 -0
  81. package/dist/_virtual/querying.js +5 -0
  82. package/dist/_virtual/querying.js.map +1 -0
  83. package/dist/_virtual/sort.js +5 -0
  84. package/dist/_virtual/sort.js.map +1 -0
  85. package/dist/_virtual/stringify.js +5 -0
  86. package/dist/_virtual/stringify.js.map +1 -0
  87. package/dist/_virtual/subselects.js +5 -0
  88. package/dist/_virtual/subselects.js.map +1 -0
  89. package/dist/_virtual/text.js +5 -0
  90. package/dist/_virtual/text.js.map +1 -0
  91. package/dist/_virtual/traversal.js +5 -0
  92. package/dist/_virtual/traversal.js.map +1 -0
  93. package/dist/_virtual/type.js +5 -0
  94. package/dist/_virtual/type.js.map +1 -0
  95. package/dist/_virtual/valid.js +5 -0
  96. package/dist/_virtual/valid.js.map +1 -0
  97. package/dist/_virtual/void-tag.js +5 -0
  98. package/dist/_virtual/void-tag.js.map +1 -0
  99. package/dist/cli/commands/fulltext/attach.js +1 -1
  100. package/dist/cli/commands/fulltext/attach.js.map +1 -1
  101. package/dist/cli/commands/fulltext/check.d.ts +1 -2
  102. package/dist/cli/commands/fulltext/check.d.ts.map +1 -1
  103. package/dist/cli/commands/fulltext/check.js +4 -2
  104. package/dist/cli/commands/fulltext/check.js.map +1 -1
  105. package/dist/cli/commands/fulltext/convert.d.ts.map +1 -1
  106. package/dist/cli/commands/fulltext/convert.js +8 -8
  107. package/dist/cli/commands/fulltext/convert.js.map +1 -1
  108. package/dist/cli/commands/fulltext/fetch.d.ts.map +1 -1
  109. package/dist/cli/commands/fulltext/fetch.js +10 -6
  110. package/dist/cli/commands/fulltext/fetch.js.map +1 -1
  111. package/dist/cli/commands/fulltext/index.d.ts.map +1 -1
  112. package/dist/cli/commands/fulltext/index.js +2 -0
  113. package/dist/cli/commands/fulltext/index.js.map +1 -1
  114. package/dist/cli/commands/fulltext/init.d.ts.map +1 -1
  115. package/dist/cli/commands/fulltext/init.js +6 -5
  116. package/dist/cli/commands/fulltext/init.js.map +1 -1
  117. package/dist/cli/commands/fulltext/pending.d.ts +1 -1
  118. package/dist/cli/commands/fulltext/pending.d.ts.map +1 -1
  119. package/dist/cli/commands/fulltext/pending.js +4 -2
  120. package/dist/cli/commands/fulltext/pending.js.map +1 -1
  121. package/dist/cli/commands/fulltext/status.d.ts.map +1 -1
  122. package/dist/cli/commands/fulltext/status.js +4 -2
  123. package/dist/cli/commands/fulltext/status.js.map +1 -1
  124. package/dist/cli/commands/fulltext/sync.d.ts.map +1 -1
  125. package/dist/cli/commands/fulltext/sync.js +6 -2
  126. package/dist/cli/commands/fulltext/sync.js.map +1 -1
  127. package/dist/cli/commands/review/types.d.ts +1 -1
  128. package/dist/cli/commands/review/types.d.ts.map +1 -1
  129. package/dist/cli/commands/review/types.js.map +1 -1
  130. package/dist/config/schema.d.ts +2 -0
  131. package/dist/config/schema.d.ts.map +1 -1
  132. package/dist/config/schema.js +6 -0
  133. package/dist/config/schema.js.map +1 -1
  134. package/dist/{fulltext → integration}/attach-shared.d.ts +2 -2
  135. package/dist/integration/attach-shared.d.ts.map +1 -0
  136. package/dist/integration/attach-shared.js.map +1 -0
  137. package/dist/integration/fulltext-attach.js +1 -1
  138. package/dist/integration/fulltext-attach.js.map +1 -1
  139. package/dist/{fulltext → node_modules/@ncukondo/academic-fulltext/dist}/citation-key.js +1 -1
  140. package/dist/node_modules/@ncukondo/academic-fulltext/dist/citation-key.js.map +1 -0
  141. package/dist/node_modules/@ncukondo/academic-fulltext/dist/convert/arxiv-html-parser.js +434 -0
  142. package/dist/node_modules/@ncukondo/academic-fulltext/dist/convert/arxiv-html-parser.js.map +1 -0
  143. package/dist/node_modules/@ncukondo/academic-fulltext/dist/convert/index.js +93 -0
  144. package/dist/node_modules/@ncukondo/academic-fulltext/dist/convert/index.js.map +1 -0
  145. package/dist/node_modules/@ncukondo/academic-fulltext/dist/convert/jats-parser.js +1060 -0
  146. package/dist/node_modules/@ncukondo/academic-fulltext/dist/convert/jats-parser.js.map +1 -0
  147. package/dist/{fulltext → node_modules/@ncukondo/academic-fulltext/dist}/convert/markdown-writer.js +146 -117
  148. package/dist/node_modules/@ncukondo/academic-fulltext/dist/convert/markdown-writer.js.map +1 -0
  149. package/dist/{fulltext → node_modules/@ncukondo/academic-fulltext/dist}/discovery/arxiv.js +8 -1
  150. package/dist/node_modules/@ncukondo/academic-fulltext/dist/discovery/arxiv.js.map +1 -0
  151. package/dist/{fulltext → node_modules/@ncukondo/academic-fulltext/dist}/discovery/core.js +6 -3
  152. package/dist/node_modules/@ncukondo/academic-fulltext/dist/discovery/core.js.map +1 -0
  153. package/dist/node_modules/@ncukondo/academic-fulltext/dist/discovery/index.js +139 -0
  154. package/dist/node_modules/@ncukondo/academic-fulltext/dist/discovery/index.js.map +1 -0
  155. package/dist/node_modules/@ncukondo/academic-fulltext/dist/discovery/ncbi-id-converter.js +46 -0
  156. package/dist/node_modules/@ncukondo/academic-fulltext/dist/discovery/ncbi-id-converter.js.map +1 -0
  157. package/dist/{fulltext → node_modules/@ncukondo/academic-fulltext/dist}/discovery/pmc.js +8 -4
  158. package/dist/node_modules/@ncukondo/academic-fulltext/dist/discovery/pmc.js.map +1 -0
  159. package/dist/{fulltext → node_modules/@ncukondo/academic-fulltext/dist}/discovery/unpaywall.js +43 -9
  160. package/dist/node_modules/@ncukondo/academic-fulltext/dist/discovery/unpaywall.js.map +1 -0
  161. package/dist/node_modules/@ncukondo/academic-fulltext/dist/download/arxiv-html.js +48 -0
  162. package/dist/node_modules/@ncukondo/academic-fulltext/dist/download/arxiv-html.js.map +1 -0
  163. package/dist/node_modules/@ncukondo/academic-fulltext/dist/download/downloader.js +64 -0
  164. package/dist/node_modules/@ncukondo/academic-fulltext/dist/download/downloader.js.map +1 -0
  165. package/dist/node_modules/@ncukondo/academic-fulltext/dist/download/orchestrator.js +236 -0
  166. package/dist/node_modules/@ncukondo/academic-fulltext/dist/download/orchestrator.js.map +1 -0
  167. package/dist/{fulltext → node_modules/@ncukondo/academic-fulltext/dist}/download/pmc-xml.js +2 -1
  168. package/dist/node_modules/@ncukondo/academic-fulltext/dist/download/pmc-xml.js.map +1 -0
  169. package/dist/{fulltext → node_modules/@ncukondo/academic-fulltext/dist}/meta.js +15 -10
  170. package/dist/node_modules/@ncukondo/academic-fulltext/dist/meta.js.map +1 -0
  171. package/dist/node_modules/@ncukondo/academic-fulltext/dist/paths.js.map +1 -0
  172. package/dist/{fulltext → node_modules/@ncukondo/academic-fulltext/dist}/readme.js +8 -4
  173. package/dist/node_modules/@ncukondo/academic-fulltext/dist/readme.js.map +1 -0
  174. package/dist/node_modules/boolbase/index.js +19 -0
  175. package/dist/node_modules/boolbase/index.js.map +1 -0
  176. package/dist/node_modules/css-select/lib/attributes.js +203 -0
  177. package/dist/node_modules/css-select/lib/attributes.js.map +1 -0
  178. package/dist/node_modules/css-select/lib/compile.js +141 -0
  179. package/dist/node_modules/css-select/lib/compile.js.map +1 -0
  180. package/dist/node_modules/css-select/lib/general.js +154 -0
  181. package/dist/node_modules/css-select/lib/general.js.map +1 -0
  182. package/dist/node_modules/css-select/lib/index.js +128 -0
  183. package/dist/node_modules/css-select/lib/index.js.map +1 -0
  184. package/dist/node_modules/css-select/lib/pseudo-selectors/aliases.js +40 -0
  185. package/dist/node_modules/css-select/lib/pseudo-selectors/aliases.js.map +1 -0
  186. package/dist/node_modules/css-select/lib/pseudo-selectors/filters.js +163 -0
  187. package/dist/node_modules/css-select/lib/pseudo-selectors/filters.js.map +1 -0
  188. package/dist/node_modules/css-select/lib/pseudo-selectors/index.js +71 -0
  189. package/dist/node_modules/css-select/lib/pseudo-selectors/index.js.map +1 -0
  190. package/dist/node_modules/css-select/lib/pseudo-selectors/pseudos.js +93 -0
  191. package/dist/node_modules/css-select/lib/pseudo-selectors/pseudos.js.map +1 -0
  192. package/dist/node_modules/css-select/lib/pseudo-selectors/subselects.js +111 -0
  193. package/dist/node_modules/css-select/lib/pseudo-selectors/subselects.js.map +1 -0
  194. package/dist/node_modules/css-select/lib/sort.js +78 -0
  195. package/dist/node_modules/css-select/lib/sort.js.map +1 -0
  196. package/dist/node_modules/css-what/lib/es/index.js +12 -0
  197. package/dist/node_modules/css-what/lib/es/index.js.map +1 -0
  198. package/dist/node_modules/css-what/lib/es/parse.js +349 -0
  199. package/dist/node_modules/css-what/lib/es/parse.js.map +1 -0
  200. package/dist/node_modules/css-what/lib/es/stringify.js +102 -0
  201. package/dist/node_modules/css-what/lib/es/stringify.js.map +1 -0
  202. package/dist/node_modules/css-what/lib/es/types.js +37 -0
  203. package/dist/node_modules/css-what/lib/es/types.js.map +1 -0
  204. package/dist/node_modules/dom-serializer/lib/foreignNames.js +117 -0
  205. package/dist/node_modules/dom-serializer/lib/foreignNames.js.map +1 -0
  206. package/dist/node_modules/dom-serializer/lib/index.js +207 -0
  207. package/dist/node_modules/dom-serializer/lib/index.js.map +1 -0
  208. package/dist/node_modules/dom-serializer/node_modules/entities/lib/decode.js +368 -0
  209. package/dist/node_modules/dom-serializer/node_modules/entities/lib/decode.js.map +1 -0
  210. package/dist/node_modules/dom-serializer/node_modules/entities/lib/decode_codepoint.js +70 -0
  211. package/dist/node_modules/dom-serializer/node_modules/entities/lib/decode_codepoint.js.map +1 -0
  212. package/dist/node_modules/dom-serializer/node_modules/entities/lib/encode.js +61 -0
  213. package/dist/node_modules/dom-serializer/node_modules/entities/lib/encode.js.map +1 -0
  214. package/dist/node_modules/dom-serializer/node_modules/entities/lib/escape.js +79 -0
  215. package/dist/node_modules/dom-serializer/node_modules/entities/lib/escape.js.map +1 -0
  216. package/dist/node_modules/dom-serializer/node_modules/entities/lib/generated/decode-data-html.js +18 -0
  217. package/dist/node_modules/dom-serializer/node_modules/entities/lib/generated/decode-data-html.js.map +1 -0
  218. package/dist/node_modules/dom-serializer/node_modules/entities/lib/generated/decode-data-xml.js +18 -0
  219. package/dist/node_modules/dom-serializer/node_modules/entities/lib/generated/decode-data-xml.js.map +1 -0
  220. package/dist/node_modules/dom-serializer/node_modules/entities/lib/generated/encode-html.js +19 -0
  221. package/dist/node_modules/dom-serializer/node_modules/entities/lib/generated/encode-html.js.map +1 -0
  222. package/dist/node_modules/dom-serializer/node_modules/entities/lib/index.js +139 -0
  223. package/dist/node_modules/dom-serializer/node_modules/entities/lib/index.js.map +1 -0
  224. package/dist/node_modules/domelementtype/lib/index.js +40 -0
  225. package/dist/node_modules/domelementtype/lib/index.js.map +1 -0
  226. package/dist/node_modules/domhandler/lib/index.js +167 -0
  227. package/dist/node_modules/domhandler/lib/index.js.map +1 -0
  228. package/dist/node_modules/domhandler/lib/node.js +439 -0
  229. package/dist/node_modules/domhandler/lib/node.js.map +1 -0
  230. package/dist/node_modules/domutils/lib/feeds.js +146 -0
  231. package/dist/node_modules/domutils/lib/feeds.js.map +1 -0
  232. package/dist/node_modules/domutils/lib/helpers.js +97 -0
  233. package/dist/node_modules/domutils/lib/helpers.js.map +1 -0
  234. package/dist/node_modules/domutils/lib/index.js +65 -0
  235. package/dist/node_modules/domutils/lib/index.js.map +1 -0
  236. package/dist/node_modules/domutils/lib/legacy.js +124 -0
  237. package/dist/node_modules/domutils/lib/legacy.js.map +1 -0
  238. package/dist/node_modules/domutils/lib/manipulation.js +107 -0
  239. package/dist/node_modules/domutils/lib/manipulation.js.map +1 -0
  240. package/dist/node_modules/domutils/lib/querying.js +102 -0
  241. package/dist/node_modules/domutils/lib/querying.js.map +1 -0
  242. package/dist/node_modules/domutils/lib/stringify.js +65 -0
  243. package/dist/node_modules/domutils/lib/stringify.js.map +1 -0
  244. package/dist/node_modules/domutils/lib/traversal.js +69 -0
  245. package/dist/node_modules/domutils/lib/traversal.js.map +1 -0
  246. package/dist/node_modules/he/he.js +256 -0
  247. package/dist/node_modules/he/he.js.map +1 -0
  248. package/dist/node_modules/node-html-parser/dist/back.js +16 -0
  249. package/dist/node_modules/node-html-parser/dist/back.js.map +1 -0
  250. package/dist/node_modules/node-html-parser/dist/index.js +48 -0
  251. package/dist/node_modules/node-html-parser/dist/index.js.map +1 -0
  252. package/dist/node_modules/node-html-parser/dist/matcher.js +112 -0
  253. package/dist/node_modules/node-html-parser/dist/matcher.js.map +1 -0
  254. package/dist/node_modules/node-html-parser/dist/nodes/comment.js +41 -0
  255. package/dist/node_modules/node-html-parser/dist/nodes/comment.js.map +1 -0
  256. package/dist/node_modules/node-html-parser/dist/nodes/html.js +1048 -0
  257. package/dist/node_modules/node-html-parser/dist/nodes/html.js.map +1 -0
  258. package/dist/node_modules/node-html-parser/dist/nodes/node.js +49 -0
  259. package/dist/node_modules/node-html-parser/dist/nodes/node.js.map +1 -0
  260. package/dist/node_modules/node-html-parser/dist/nodes/text.js +106 -0
  261. package/dist/node_modules/node-html-parser/dist/nodes/text.js.map +1 -0
  262. package/dist/node_modules/node-html-parser/dist/nodes/type.js +19 -0
  263. package/dist/node_modules/node-html-parser/dist/nodes/type.js.map +1 -0
  264. package/dist/node_modules/node-html-parser/dist/parse.js +20 -0
  265. package/dist/node_modules/node-html-parser/dist/parse.js.map +1 -0
  266. package/dist/node_modules/node-html-parser/dist/valid.js +19 -0
  267. package/dist/node_modules/node-html-parser/dist/valid.js.map +1 -0
  268. package/dist/node_modules/node-html-parser/dist/void-tag.js +36 -0
  269. package/dist/node_modules/node-html-parser/dist/void-tag.js.map +1 -0
  270. package/dist/node_modules/nth-check/lib/compile.js +76 -0
  271. package/dist/node_modules/nth-check/lib/compile.js.map +1 -0
  272. package/dist/node_modules/nth-check/lib/index.js +36 -0
  273. package/dist/node_modules/nth-check/lib/index.js.map +1 -0
  274. package/dist/node_modules/nth-check/lib/parse.js +69 -0
  275. package/dist/node_modules/nth-check/lib/parse.js.map +1 -0
  276. package/package.json +2 -2
  277. package/dist/fulltext/attach-shared.d.ts.map +0 -1
  278. package/dist/fulltext/attach-shared.js.map +0 -1
  279. package/dist/fulltext/citation-key.d.ts +0 -15
  280. package/dist/fulltext/citation-key.d.ts.map +0 -1
  281. package/dist/fulltext/citation-key.js.map +0 -1
  282. package/dist/fulltext/convert/index.d.ts +0 -20
  283. package/dist/fulltext/convert/index.d.ts.map +0 -1
  284. package/dist/fulltext/convert/index.js +0 -50
  285. package/dist/fulltext/convert/index.js.map +0 -1
  286. package/dist/fulltext/convert/jats-parser.d.ts +0 -36
  287. package/dist/fulltext/convert/jats-parser.d.ts.map +0 -1
  288. package/dist/fulltext/convert/jats-parser.js +0 -887
  289. package/dist/fulltext/convert/jats-parser.js.map +0 -1
  290. package/dist/fulltext/convert/markdown-writer.d.ts +0 -6
  291. package/dist/fulltext/convert/markdown-writer.d.ts.map +0 -1
  292. package/dist/fulltext/convert/markdown-writer.js.map +0 -1
  293. package/dist/fulltext/convert/types.d.ts +0 -141
  294. package/dist/fulltext/convert/types.d.ts.map +0 -1
  295. package/dist/fulltext/discovery/arxiv.d.ts +0 -11
  296. package/dist/fulltext/discovery/arxiv.d.ts.map +0 -1
  297. package/dist/fulltext/discovery/arxiv.js.map +0 -1
  298. package/dist/fulltext/discovery/core.d.ts +0 -11
  299. package/dist/fulltext/discovery/core.d.ts.map +0 -1
  300. package/dist/fulltext/discovery/core.js.map +0 -1
  301. package/dist/fulltext/discovery/index.d.ts +0 -28
  302. package/dist/fulltext/discovery/index.d.ts.map +0 -1
  303. package/dist/fulltext/discovery/index.js +0 -75
  304. package/dist/fulltext/discovery/index.js.map +0 -1
  305. package/dist/fulltext/discovery/pmc.d.ts +0 -19
  306. package/dist/fulltext/discovery/pmc.d.ts.map +0 -1
  307. package/dist/fulltext/discovery/pmc.js.map +0 -1
  308. package/dist/fulltext/discovery/unpaywall.d.ts +0 -11
  309. package/dist/fulltext/discovery/unpaywall.d.ts.map +0 -1
  310. package/dist/fulltext/discovery/unpaywall.js.map +0 -1
  311. package/dist/fulltext/download/downloader.d.ts +0 -21
  312. package/dist/fulltext/download/downloader.d.ts.map +0 -1
  313. package/dist/fulltext/download/downloader.js +0 -59
  314. package/dist/fulltext/download/downloader.js.map +0 -1
  315. package/dist/fulltext/download/orchestrator.d.ts +0 -33
  316. package/dist/fulltext/download/orchestrator.d.ts.map +0 -1
  317. package/dist/fulltext/download/orchestrator.js +0 -125
  318. package/dist/fulltext/download/orchestrator.js.map +0 -1
  319. package/dist/fulltext/download/pmc-xml.d.ts +0 -13
  320. package/dist/fulltext/download/pmc-xml.d.ts.map +0 -1
  321. package/dist/fulltext/download/pmc-xml.js.map +0 -1
  322. package/dist/fulltext/meta.d.ts +0 -25
  323. package/dist/fulltext/meta.d.ts.map +0 -1
  324. package/dist/fulltext/meta.js.map +0 -1
  325. package/dist/fulltext/paths.d.ts +0 -12
  326. package/dist/fulltext/paths.d.ts.map +0 -1
  327. package/dist/fulltext/paths.js.map +0 -1
  328. package/dist/fulltext/readme.d.ts +0 -4
  329. package/dist/fulltext/readme.d.ts.map +0 -1
  330. package/dist/fulltext/readme.js.map +0 -1
  331. package/dist/fulltext/types.d.ts +0 -90
  332. package/dist/fulltext/types.d.ts.map +0 -1
  333. /package/dist/{fulltext → integration}/attach-shared.js +0 -0
  334. /package/dist/{fulltext → node_modules/@ncukondo/academic-fulltext/dist}/paths.js +0 -0
@@ -1,887 +0,0 @@
1
- import { XMLParser } from "fast-xml-parser";
2
- const parser = new XMLParser({
3
- ignoreAttributes: false,
4
- attributeNamePrefix: "@_",
5
- textNodeName: "#text",
6
- trimValues: false,
7
- preserveOrder: true,
8
- processEntities: true,
9
- htmlEntities: true
10
- });
11
- function getTagName(node) {
12
- for (const key of Object.keys(node)) {
13
- if (key !== ":@" && key !== "#text") return key;
14
- }
15
- return void 0;
16
- }
17
- function getChildren(node) {
18
- const tag = getTagName(node);
19
- if (!tag) return [];
20
- const children = node[tag];
21
- return Array.isArray(children) ? children : [];
22
- }
23
- function getAttr(node, attrName) {
24
- const attrs = node[":@"];
25
- if (!attrs) return void 0;
26
- const val = attrs[`@_${attrName}`];
27
- return val != null ? String(val) : void 0;
28
- }
29
- function getAttrs(node) {
30
- const attrs = node[":@"];
31
- if (!attrs) return {};
32
- const result = {};
33
- for (const [key, value] of Object.entries(attrs)) {
34
- if (key.startsWith("@_")) {
35
- result[key.slice(2)] = String(value);
36
- }
37
- }
38
- return result;
39
- }
40
- function findChild(children, tagName) {
41
- for (const child of children) {
42
- if (tagName in child) {
43
- const childArr = child[tagName];
44
- return {
45
- node: child,
46
- children: Array.isArray(childArr) ? childArr : [],
47
- attrs: getAttrs(child)
48
- };
49
- }
50
- }
51
- return void 0;
52
- }
53
- function findChildren(children, tagName) {
54
- const results = [];
55
- for (const child of children) {
56
- if (tagName in child) {
57
- const childArr = child[tagName];
58
- results.push({
59
- node: child,
60
- children: Array.isArray(childArr) ? childArr : [],
61
- attrs: getAttrs(child)
62
- });
63
- }
64
- }
65
- return results;
66
- }
67
- function getTextContent(child) {
68
- if ("#text" in child) {
69
- const val = child["#text"];
70
- return val != null ? String(val) : void 0;
71
- }
72
- return void 0;
73
- }
74
- function findArticle(parsed) {
75
- const direct = findChild(parsed, "article");
76
- if (direct) return direct;
77
- const wrapper = findChild(parsed, "pmc-articleset");
78
- if (wrapper) return findChild(wrapper.children, "article");
79
- return void 0;
80
- }
81
- const SPACE_AFTER_TAGS = /* @__PURE__ */ new Set([
82
- "surname",
83
- "given-names",
84
- "name",
85
- "string-name"
86
- ]);
87
- function extractAllText(node) {
88
- if (node == null) return "";
89
- if (typeof node === "string") return node;
90
- if (typeof node === "number") return String(node);
91
- if (Array.isArray(node)) {
92
- return joinChildTexts(node);
93
- }
94
- if (typeof node === "object") {
95
- const obj = node;
96
- const text = getTextContent(obj);
97
- if (text != null) return text;
98
- const tag = getTagName(obj);
99
- if (tag) {
100
- const children = obj[tag];
101
- if (Array.isArray(children)) {
102
- return joinChildTexts(children);
103
- }
104
- }
105
- }
106
- return "";
107
- }
108
- function joinChildTexts(children) {
109
- const parts = [];
110
- for (const child of children) {
111
- const text = extractAllText(child);
112
- if (!text) continue;
113
- const tag = getTagName(child);
114
- if (tag && SPACE_AFTER_TAGS.has(tag) && parts.length > 0) {
115
- const prev = parts[parts.length - 1];
116
- if (prev && !/[\s,;.:()\-/]$/.test(prev)) {
117
- parts.push(" ");
118
- }
119
- }
120
- parts.push(text);
121
- }
122
- return parts.join("");
123
- }
124
- function parseJatsMetadata(xml) {
125
- const parsed = parser.parse(xml);
126
- const article = findArticle(parsed);
127
- if (!article) return { title: "", authors: [] };
128
- const front = findChild(article.children, "front");
129
- if (!front) return { title: "", authors: [] };
130
- const articleMeta = findChild(front.children, "article-meta");
131
- if (!articleMeta) return { title: "", authors: [] };
132
- const metaChildren = articleMeta.children;
133
- const titleGroup = findChild(metaChildren, "title-group");
134
- const articleTitle = titleGroup ? findChild(titleGroup.children, "article-title") : void 0;
135
- const title = articleTitle ? extractAllText(articleTitle.children) : "";
136
- const articleIds = findChildren(metaChildren, "article-id");
137
- let doi;
138
- let pmcid;
139
- let pmid;
140
- for (const idEntry of articleIds) {
141
- const idType = idEntry.attrs["pub-id-type"];
142
- const idText = extractAllText(idEntry.children);
143
- if (idType === "doi") doi = idText;
144
- if (idType === "pmc" || idType === "pmcid") {
145
- pmcid = idText.replace(/^PMC/, "");
146
- }
147
- if (idType === "pmid") pmid = idText;
148
- }
149
- const authors = [];
150
- const contribGroup = findChild(metaChildren, "contrib-group");
151
- if (contribGroup) {
152
- const contribs = findChildren(contribGroup.children, "contrib");
153
- for (const contrib of contribs) {
154
- if (contrib.attrs["contrib-type"] !== "author") continue;
155
- const nameNode = findChild(contrib.children, "name");
156
- if (!nameNode) continue;
157
- const surnameNode = findChild(nameNode.children, "surname");
158
- const givenNamesNode = findChild(nameNode.children, "given-names");
159
- const author = {
160
- surname: surnameNode ? extractAllText(surnameNode.children) : ""
161
- };
162
- const givenNames = givenNamesNode ? extractAllText(givenNamesNode.children) : "";
163
- if (givenNames) {
164
- author.givenNames = givenNames;
165
- }
166
- authors.push(author);
167
- }
168
- }
169
- const abstractNode = findChild(metaChildren, "abstract");
170
- let abstract;
171
- if (abstractNode) {
172
- const sections = findChildren(abstractNode.children, "sec");
173
- if (sections.length > 0) {
174
- const parts = [];
175
- for (const sec of sections) {
176
- const secTitleNode = findChild(sec.children, "title");
177
- const secTitle = secTitleNode ? extractAllText(secTitleNode.children) : "";
178
- const secPs = findChildren(sec.children, "p");
179
- const text = secPs.map((p) => extractAllText(p.children)).join(" ");
180
- if (secTitle) {
181
- parts.push(`${secTitle}: ${text}`);
182
- } else {
183
- parts.push(text);
184
- }
185
- }
186
- abstract = parts.join("\n\n");
187
- } else {
188
- const paragraphs = findChildren(abstractNode.children, "p");
189
- if (paragraphs.length > 0) {
190
- abstract = paragraphs.map((p) => extractAllText(p.children)).join("\n\n");
191
- } else {
192
- const text = extractAllText(abstractNode.children);
193
- if (text) abstract = text;
194
- }
195
- }
196
- }
197
- const pubDates = findChildren(metaChildren, "pub-date");
198
- let publicationDate;
199
- const datePriority = { epub: 0, ppub: 1, collection: 2 };
200
- let bestPriority = Infinity;
201
- for (const pd of pubDates) {
202
- const dateType = pd.attrs["pub-type"] ?? pd.attrs["date-type"] ?? "";
203
- const priority = datePriority[dateType] ?? 3;
204
- if (priority < bestPriority) {
205
- bestPriority = priority;
206
- const yearNode = findChild(pd.children, "year");
207
- if (yearNode) {
208
- const year = extractAllText(yearNode.children);
209
- const monthNode = findChild(pd.children, "month");
210
- const dayNode = findChild(pd.children, "day");
211
- const date = { year };
212
- if (monthNode) date.month = extractAllText(monthNode.children);
213
- if (dayNode) date.day = extractAllText(dayNode.children);
214
- publicationDate = date;
215
- }
216
- }
217
- }
218
- if (!publicationDate && pubDates.length > 0) {
219
- const pd = pubDates[0];
220
- const yearNode = findChild(pd.children, "year");
221
- if (yearNode) {
222
- const year = extractAllText(yearNode.children);
223
- const monthNode = findChild(pd.children, "month");
224
- const dayNode = findChild(pd.children, "day");
225
- const date = { year };
226
- if (monthNode) date.month = extractAllText(monthNode.children);
227
- if (dayNode) date.day = extractAllText(dayNode.children);
228
- publicationDate = date;
229
- }
230
- }
231
- const articleType = article.attrs["article-type"] || void 0;
232
- let license;
233
- const permissions = findChild(metaChildren, "permissions");
234
- if (permissions) {
235
- const licenseNode = findChild(permissions.children, "license");
236
- if (licenseNode) {
237
- const href = licenseNode.attrs["xlink:href"];
238
- if (href) {
239
- license = href;
240
- } else {
241
- const licenseP = findChild(licenseNode.children, "license-p");
242
- if (licenseP) license = extractAllText(licenseP.children).trim();
243
- }
244
- }
245
- }
246
- const kwdGroups = findChildren(metaChildren, "kwd-group");
247
- const keywords = [];
248
- for (const kwdGroup of kwdGroups) {
249
- const kwds = findChildren(kwdGroup.children, "kwd");
250
- for (const kwd of kwds) {
251
- const text = extractAllText(kwd.children).trim();
252
- if (text) keywords.push(text);
253
- }
254
- }
255
- const volumeNode = findChild(metaChildren, "volume");
256
- const volume = volumeNode ? extractAllText(volumeNode.children) : void 0;
257
- const issueNode = findChild(metaChildren, "issue");
258
- const issue = issueNode ? extractAllText(issueNode.children) : void 0;
259
- let pages;
260
- const fpageNode = findChild(metaChildren, "fpage");
261
- const lpageNode = findChild(metaChildren, "lpage");
262
- if (fpageNode) {
263
- const fp = extractAllText(fpageNode.children);
264
- const lp = lpageNode ? extractAllText(lpageNode.children) : "";
265
- pages = lp ? `${fp}-${lp}` : fp;
266
- } else {
267
- const elocationNode = findChild(metaChildren, "elocation-id");
268
- if (elocationNode) pages = extractAllText(elocationNode.children);
269
- }
270
- const journalMeta = findChild(front.children, "journal-meta");
271
- let journal;
272
- if (journalMeta) {
273
- const titleGroup2 = findChild(journalMeta.children, "journal-title-group");
274
- if (titleGroup2) {
275
- const jTitle = findChild(titleGroup2.children, "journal-title");
276
- if (jTitle) journal = extractAllText(jTitle.children);
277
- }
278
- if (!journal) {
279
- const jTitle = findChild(journalMeta.children, "journal-title");
280
- if (jTitle) journal = extractAllText(jTitle.children);
281
- }
282
- }
283
- const result = { title, authors };
284
- if (doi) result.doi = doi;
285
- if (pmcid) result.pmcid = pmcid;
286
- if (pmid) result.pmid = pmid;
287
- if (journal) result.journal = journal;
288
- if (publicationDate) result.publicationDate = publicationDate;
289
- if (volume) result.volume = volume;
290
- if (issue) result.issue = issue;
291
- if (pages) result.pages = pages;
292
- if (keywords.length > 0) result.keywords = keywords;
293
- if (articleType) result.articleType = articleType;
294
- if (license) result.license = license;
295
- if (abstract) result.abstract = abstract;
296
- return result;
297
- }
298
- function parseInlineContent(children) {
299
- const result = [];
300
- for (const child of children) {
301
- const text = getTextContent(child);
302
- if (text != null) {
303
- if (text) result.push({ type: "text", text });
304
- continue;
305
- }
306
- const tag = getTagName(child);
307
- if (!tag) continue;
308
- const innerChildren = getChildren(child);
309
- if (tag === "bold") {
310
- result.push({ type: "bold", children: parseInlineContent(innerChildren) });
311
- } else if (tag === "italic") {
312
- result.push({ type: "italic", children: parseInlineContent(innerChildren) });
313
- } else if (tag === "sup") {
314
- result.push({ type: "superscript", text: extractAllText(innerChildren) });
315
- } else if (tag === "sub") {
316
- result.push({ type: "subscript", text: extractAllText(innerChildren) });
317
- } else if (tag === "inline-formula") {
318
- let texMath = findChild(innerChildren, "tex-math");
319
- if (!texMath) {
320
- const alternatives = findChild(innerChildren, "alternatives");
321
- if (alternatives) {
322
- texMath = findChild(alternatives.children, "tex-math");
323
- }
324
- }
325
- const tex = texMath ? extractAllText(texMath.children) : void 0;
326
- const text2 = tex || extractAllText(innerChildren);
327
- const entry = {
328
- type: "inline-formula",
329
- text: text2
330
- };
331
- if (tex) entry.tex = tex;
332
- result.push(entry);
333
- } else if (tag === "monospace") {
334
- result.push({ type: "code", text: extractAllText(innerChildren) });
335
- } else if (tag === "ext-link") {
336
- const href = getAttr(child, "xlink:href");
337
- if (href) {
338
- result.push({ type: "link", url: href, children: parseInlineContent(innerChildren) });
339
- } else {
340
- const linkText = extractAllText(innerChildren);
341
- if (linkText) result.push({ type: "text", text: linkText });
342
- }
343
- } else if (tag === "uri") {
344
- const href = getAttr(child, "xlink:href");
345
- const textContent = extractAllText(innerChildren);
346
- const url = href || textContent;
347
- if (url) {
348
- result.push({ type: "link", url, children: parseInlineContent(innerChildren) });
349
- }
350
- } else if (tag === "underline" || tag === "sc") {
351
- const passText = extractAllText(innerChildren);
352
- if (passText) result.push({ type: "text", text: passText });
353
- } else if (tag === "xref") {
354
- const refType = getAttr(child, "ref-type");
355
- if (refType === "bibr") {
356
- result.push({
357
- type: "citation",
358
- refId: getAttr(child, "rid") ?? "",
359
- text: extractAllText(innerChildren)
360
- });
361
- } else {
362
- const xrefText = extractAllText(innerChildren);
363
- if (xrefText) result.push({ type: "text", text: xrefText });
364
- }
365
- } else {
366
- const unknownText = extractAllText(innerChildren);
367
- if (unknownText) result.push({ type: "text", text: unknownText });
368
- }
369
- }
370
- return result;
371
- }
372
- function parseList(listNode) {
373
- const listType = getAttr(listNode, "list-type");
374
- const ordered = listType === "order";
375
- const listChildren = getChildren(listNode);
376
- const listItems = findChildren(listChildren, "list-item");
377
- const items = [];
378
- for (const item of listItems) {
379
- const pNodes = findChildren(item.children, "p");
380
- const content = pNodes.flatMap((p) => parseInlineContent(p.children));
381
- items.push(content);
382
- }
383
- return { type: "list", ordered, items };
384
- }
385
- function parseTableRow(trChildren) {
386
- const cells = [];
387
- for (const child of trChildren) {
388
- const tag = getTagName(child);
389
- if (tag === "th" || tag === "td") {
390
- const cellChildren = getChildren(child);
391
- const paragraphs = findChildren(cellChildren, "p");
392
- if (paragraphs.length > 1) {
393
- cells.push(
394
- paragraphs.map((p) => extractAllText(p.children)).join("<br>")
395
- );
396
- } else {
397
- cells.push(extractAllText(cellChildren));
398
- }
399
- }
400
- }
401
- return cells;
402
- }
403
- function parseTableWrap(tableWrapNode) {
404
- const children = getChildren(tableWrapNode);
405
- const labelNode = findChild(children, "label");
406
- const label = labelNode ? extractAllText(labelNode.children) : "";
407
- const captionNode = findChild(children, "caption");
408
- const captionText = captionNode ? extractAllText(captionNode.children) : "";
409
- const captionStr = [label, captionText].filter(Boolean).join(". ");
410
- const tableNode = findChild(children, "table");
411
- const result = {
412
- headers: [],
413
- rows: []
414
- };
415
- if (captionStr) result.caption = captionStr;
416
- if (!tableNode) return result;
417
- const thead = findChild(tableNode.children, "thead");
418
- if (thead) {
419
- const headRows = findChildren(thead.children, "tr");
420
- if (headRows.length > 0) {
421
- result.headers.push(...parseTableRow(headRows[0].children));
422
- }
423
- }
424
- const tbody = findChild(tableNode.children, "tbody");
425
- if (tbody) {
426
- const bodyRows = findChildren(tbody.children, "tr");
427
- for (const row of bodyRows) {
428
- result.rows.push(parseTableRow(row.children));
429
- }
430
- }
431
- return result;
432
- }
433
- function parseBoxedText(node) {
434
- const children = getChildren(node);
435
- const titleNode = findChild(children, "title");
436
- const title = titleNode ? extractAllText(titleNode.children) : void 0;
437
- const content = parseBlockContent(children);
438
- const block = { type: "boxed-text", content };
439
- if (title) block.title = title;
440
- return block;
441
- }
442
- function parseDefList(node) {
443
- const children = getChildren(node);
444
- const titleNode = findChild(children, "title");
445
- const title = titleNode ? extractAllText(titleNode.children) : void 0;
446
- const defItems = findChildren(children, "def-item");
447
- const items = [];
448
- for (const item of defItems) {
449
- const termNode = findChild(item.children, "term");
450
- const defNode = findChild(item.children, "def");
451
- const term = termNode ? extractAllText(termNode.children) : "";
452
- const definition = defNode ? extractAllText(defNode.children) : "";
453
- items.push({ term, definition });
454
- }
455
- const block = { type: "def-list", items };
456
- if (title) block.title = title;
457
- return block;
458
- }
459
- function parseDispFormula(node) {
460
- const children = getChildren(node);
461
- const id = getAttr(node, "id");
462
- const labelNode = findChild(children, "label");
463
- const label = labelNode ? extractAllText(labelNode.children) : void 0;
464
- const alternatives = findChild(children, "alternatives");
465
- const searchChildren = alternatives ? alternatives.children : children;
466
- const texMath = findChild(searchChildren, "tex-math");
467
- const block = { type: "formula" };
468
- if (id) block.id = id;
469
- if (label) block.label = label;
470
- if (texMath) {
471
- block.tex = extractAllText(texMath.children);
472
- } else {
473
- const textChildren = children.filter((c) => !("label" in c));
474
- const text = extractAllText(textChildren).trim();
475
- if (text) block.text = text;
476
- }
477
- return block;
478
- }
479
- const BLOCK_TAGS = /* @__PURE__ */ new Set(["table-wrap", "fig", "disp-quote", "boxed-text"]);
480
- function parseDispQuote(node) {
481
- const children = getChildren(node);
482
- const paragraphs = findChildren(children, "p");
483
- const content = [];
484
- for (let i = 0; i < paragraphs.length; i++) {
485
- if (i > 0) content.push({ type: "text", text: "\n\n" });
486
- const para = paragraphs[i];
487
- if (para) content.push(...parseInlineContent(para.children));
488
- }
489
- if (paragraphs.length === 0) {
490
- content.push(...parseInlineContent(children));
491
- }
492
- return { type: "blockquote", content };
493
- }
494
- function parseTableBlock(node) {
495
- const tableResult = parseTableWrap(node);
496
- const tableBlock = {
497
- type: "table",
498
- headers: tableResult.headers,
499
- rows: tableResult.rows
500
- };
501
- if (tableResult.caption) tableBlock.caption = tableResult.caption;
502
- return tableBlock;
503
- }
504
- function parseFigBlock(node) {
505
- const innerChildren = getChildren(node);
506
- const figBlock = { type: "figure" };
507
- const figId = getAttr(node, "id");
508
- if (figId) figBlock.id = figId;
509
- const figLabel = findChild(innerChildren, "label");
510
- if (figLabel) {
511
- const labelText = extractAllText(figLabel.children);
512
- if (labelText) figBlock.label = labelText;
513
- }
514
- const figCaption = findChild(innerChildren, "caption");
515
- if (figCaption) {
516
- const captionText = extractAllText(figCaption.children);
517
- if (captionText) figBlock.caption = captionText;
518
- }
519
- return figBlock;
520
- }
521
- function parseParagraph(pChildren) {
522
- const hasNestedBlocks = pChildren.some((child) => {
523
- const tag = getTagName(child);
524
- return tag != null && BLOCK_TAGS.has(tag);
525
- });
526
- if (!hasNestedBlocks) {
527
- return [{ type: "paragraph", content: parseInlineContent(pChildren) }];
528
- }
529
- const blocks = [];
530
- let inlineBuffer = [];
531
- const flushInline = () => {
532
- if (inlineBuffer.length > 0) {
533
- const content = parseInlineContent(inlineBuffer);
534
- const hasNonWhitespace = content.some(
535
- (c) => c.type !== "text" || c.text.trim() !== ""
536
- );
537
- if (content.length > 0 && hasNonWhitespace) {
538
- blocks.push({ type: "paragraph", content });
539
- }
540
- inlineBuffer = [];
541
- }
542
- };
543
- for (const child of pChildren) {
544
- const tag = getTagName(child);
545
- if (tag === "table-wrap") {
546
- flushInline();
547
- blocks.push(parseTableBlock(child));
548
- } else if (tag === "fig") {
549
- flushInline();
550
- blocks.push(parseFigBlock(child));
551
- } else if (tag === "disp-quote") {
552
- flushInline();
553
- blocks.push(parseDispQuote(child));
554
- } else if (tag === "boxed-text") {
555
- flushInline();
556
- blocks.push(parseBoxedText(child));
557
- } else {
558
- inlineBuffer.push(child);
559
- }
560
- }
561
- flushInline();
562
- return blocks;
563
- }
564
- function parseBlockContent(sectionChildren) {
565
- const blocks = [];
566
- for (const child of sectionChildren) {
567
- const tag = getTagName(child);
568
- if (!tag) continue;
569
- if (tag === "p") {
570
- blocks.push(...parseParagraph(getChildren(child)));
571
- } else if (tag === "list") {
572
- blocks.push(parseList(child));
573
- } else if (tag === "table-wrap") {
574
- blocks.push(parseTableBlock(child));
575
- } else if (tag === "fig") {
576
- blocks.push(parseFigBlock(child));
577
- } else if (tag === "disp-quote") {
578
- blocks.push(parseDispQuote(child));
579
- } else if (tag === "boxed-text") {
580
- blocks.push(parseBoxedText(child));
581
- } else if (tag === "def-list") {
582
- blocks.push(parseDefList(child));
583
- } else if (tag === "disp-formula") {
584
- blocks.push(parseDispFormula(child));
585
- } else if (tag === "preformat") {
586
- const text = extractAllText(getChildren(child));
587
- blocks.push({ type: "preformat", text });
588
- } else if (tag === "supplementary-material") {
589
- const innerChildren = getChildren(child);
590
- const labelNode = findChild(innerChildren, "label");
591
- const captionNode = findChild(innerChildren, "caption");
592
- const labelText = labelNode ? extractAllText(labelNode.children) : "";
593
- const captionText = captionNode ? extractAllText(captionNode.children) : "";
594
- const text = [labelText, captionText].filter(Boolean).join(": ");
595
- if (text) {
596
- blocks.push({ type: "paragraph", content: [{ type: "text", text }] });
597
- }
598
- }
599
- }
600
- return blocks;
601
- }
602
- function parseSection(secChildren, level) {
603
- const titleNode = findChild(secChildren, "title");
604
- const title = titleNode ? extractAllText(titleNode.children) : "";
605
- const content = parseBlockContent(secChildren);
606
- const subsections = [];
607
- const nestedSecs = findChildren(secChildren, "sec");
608
- for (const sub of nestedSecs) {
609
- subsections.push(parseSection(sub.children, level + 1));
610
- }
611
- return { title, level, content, subsections };
612
- }
613
- function parseJatsBody(xml) {
614
- const parsed = parser.parse(xml);
615
- const article = findArticle(parsed);
616
- if (!article) return [];
617
- const body = findChild(article.children, "body");
618
- if (!body) return [];
619
- const sections = [];
620
- const secs = findChildren(body.children, "sec");
621
- if (secs.length > 0) {
622
- for (const sec of secs) {
623
- sections.push(parseSection(sec.children, 2));
624
- }
625
- } else {
626
- const content = parseBlockContent(body.children);
627
- if (content.length > 0) {
628
- sections.push({ title: "", level: 2, content, subsections: [] });
629
- }
630
- }
631
- return sections;
632
- }
633
- function formatElementCitation(children) {
634
- const parts = [];
635
- const personGroup = findChild(children, "person-group");
636
- if (personGroup) {
637
- const names = findChildren(personGroup.children, "name");
638
- const authorParts = [];
639
- for (const name of names) {
640
- const surname = findChild(name.children, "surname");
641
- const givenNames = findChild(name.children, "given-names");
642
- const surnameText = surname ? extractAllText(surname.children) : "";
643
- const givenText = givenNames ? extractAllText(givenNames.children) : "";
644
- if (surnameText && givenText) {
645
- authorParts.push(`${surnameText} ${givenText}`);
646
- } else if (surnameText) {
647
- authorParts.push(surnameText);
648
- }
649
- }
650
- if (authorParts.length > 0) {
651
- parts.push(authorParts.join(", "));
652
- }
653
- }
654
- const articleTitle = findChild(children, "article-title");
655
- if (articleTitle) {
656
- parts.push(extractAllText(articleTitle.children));
657
- }
658
- const source = findChild(children, "source");
659
- if (source) {
660
- parts.push(extractAllText(source.children));
661
- }
662
- const year = findChild(children, "year");
663
- const volume = findChild(children, "volume");
664
- const fpage = findChild(children, "fpage");
665
- const lpage = findChild(children, "lpage");
666
- if (year) {
667
- let yearStr = extractAllText(year.children);
668
- if (volume) {
669
- yearStr += `;${extractAllText(volume.children)}`;
670
- }
671
- if (fpage) {
672
- const fpageText = extractAllText(fpage.children);
673
- const lpageText = lpage ? extractAllText(lpage.children) : "";
674
- yearStr += `:${fpageText}${lpageText ? `-${lpageText}` : ""}`;
675
- }
676
- parts.push(yearStr);
677
- }
678
- return parts.join(". ") + ".";
679
- }
680
- function extractMixedCitationText(children) {
681
- const pubIds = findChildren(children, "pub-id");
682
- const pubIdValues = pubIds.map((p) => extractAllText(p.children).trim()).filter(Boolean);
683
- if (pubIdValues.length === 0) {
684
- return extractAllText(children).trim();
685
- }
686
- const fullText = extractAllText(children).trim();
687
- let result = fullText;
688
- for (const val of pubIdValues) {
689
- const escaped = val.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
690
- const matches = result.match(new RegExp(escaped, "g"));
691
- if (matches && matches.length > 1) {
692
- result = result.replace(val, "");
693
- result = result.replace(/\s{2,}/g, " ").trim();
694
- }
695
- }
696
- return result;
697
- }
698
- function extractPubIds(children) {
699
- const pubIds = findChildren(children, "pub-id");
700
- const result = {};
701
- for (const p of pubIds) {
702
- const idType = p.attrs["pub-id-type"];
703
- const value = extractAllText(p.children).trim();
704
- if (!value) continue;
705
- if (idType === "doi") result.doi = value;
706
- if (idType === "pmid") result.pmid = value;
707
- if (idType === "pmc" || idType === "pmcid") {
708
- result.pmcid = value.replace(/^PMC/, "");
709
- }
710
- }
711
- return result;
712
- }
713
- function stripPubIdValues(text, pubIds) {
714
- let result = text;
715
- const values = [pubIds.doi, pubIds.pmid, pubIds.pmcid].filter(Boolean);
716
- for (const val of values) {
717
- const escaped = val.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
718
- result = result.replace(new RegExp(`(?:doi|PMID|pmid|PMC|pmc)[:\\s]*${escaped}`, "gi"), "");
719
- result = result.replace(new RegExp(escaped, "g"), "");
720
- }
721
- if (pubIds.pmcid) {
722
- const pmcFull = `PMC${pubIds.pmcid}`;
723
- const escaped = pmcFull.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
724
- result = result.replace(new RegExp(`(?:pmc|pmcid)[:\\s]*${escaped}`, "gi"), "");
725
- result = result.replace(new RegExp(escaped, "g"), "");
726
- }
727
- result = result.replace(/\s{2,}/g, " ").trim();
728
- result = result.replace(/\.\s*\.$/, ".");
729
- return result;
730
- }
731
- function parseJatsReferences(xml) {
732
- const parsed = parser.parse(xml);
733
- const article = findArticle(parsed);
734
- if (!article) return [];
735
- const back = findChild(article.children, "back");
736
- if (!back) return [];
737
- const refList = findChild(back.children, "ref-list");
738
- if (!refList) return [];
739
- const refs = findChildren(refList.children, "ref");
740
- const references = [];
741
- for (const ref of refs) {
742
- const id = ref.attrs["id"] ?? "";
743
- const citationAlternatives = findChild(ref.children, "citation-alternatives");
744
- const searchChildren = citationAlternatives ? citationAlternatives.children : ref.children;
745
- const mixedCitation = findChild(searchChildren, "mixed-citation");
746
- if (mixedCitation) {
747
- const rawText = extractMixedCitationText(mixedCitation.children);
748
- const pubIds = extractPubIds(mixedCitation.children);
749
- const text2 = stripPubIdValues(rawText, pubIds);
750
- if (id && text2) references.push({ id, text: text2, ...pubIds });
751
- continue;
752
- }
753
- const elementCitation = findChild(searchChildren, "element-citation");
754
- if (elementCitation) {
755
- const rawText = formatElementCitation(elementCitation.children);
756
- const pubIds = extractPubIds(elementCitation.children);
757
- const text2 = stripPubIdValues(rawText, pubIds);
758
- if (id && text2) references.push({ id, text: text2, ...pubIds });
759
- continue;
760
- }
761
- const childrenWithoutLabel = ref.children.filter((c) => !("label" in c));
762
- const text = extractAllText(childrenWithoutLabel).trim();
763
- if (id && text) {
764
- references.push({ id, text });
765
- }
766
- }
767
- return references;
768
- }
769
- function parseJatsBackMatter(xml) {
770
- const parsed = parser.parse(xml);
771
- const article = findArticle(parsed);
772
- if (!article) return {};
773
- const result = {};
774
- const back = findChild(article.children, "back");
775
- if (back) {
776
- const ack = findChild(back.children, "ack");
777
- if (ack) {
778
- const paragraphs = findChildren(ack.children, "p");
779
- if (paragraphs.length > 0) {
780
- result.acknowledgments = paragraphs.map((p) => extractAllText(p.children)).join("\n\n");
781
- }
782
- }
783
- const appGroup = findChild(back.children, "app-group");
784
- if (appGroup) {
785
- const apps = findChildren(appGroup.children, "app");
786
- if (apps.length > 0) {
787
- result.appendices = apps.map((app) => parseSection(app.children, 2));
788
- }
789
- }
790
- const fnGroup = findChild(back.children, "fn-group");
791
- if (fnGroup) {
792
- const fns = findChildren(fnGroup.children, "fn");
793
- if (fns.length > 0) {
794
- result.footnotes = fns.map((fn) => {
795
- const parts = [];
796
- const titleNode = findChild(fn.children, "title");
797
- if (titleNode) {
798
- const titleText = extractAllText(titleNode.children).trim();
799
- if (titleText) parts.push(titleText);
800
- }
801
- const paragraphs = findChildren(fn.children, "p");
802
- for (const p of paragraphs) {
803
- const pText = extractAllText(p.children).trim();
804
- if (pText) parts.push(pText);
805
- }
806
- return {
807
- id: fn.attrs["id"] ?? "",
808
- text: parts.join(" ")
809
- };
810
- });
811
- }
812
- }
813
- const notesElements = findChildren(back.children, "notes");
814
- if (notesElements.length > 0) {
815
- const notes = [];
816
- for (const note of notesElements) {
817
- const secs = findChildren(note.children, "sec");
818
- const nestedNotes = findChildren(note.children, "notes");
819
- const subItems = secs.length > 0 ? secs : nestedNotes;
820
- if (subItems.length > 0) {
821
- for (const sub of subItems) {
822
- const subTitleNode = findChild(sub.children, "title");
823
- const subTitle = subTitleNode ? extractAllText(subTitleNode.children) : "";
824
- const subParagraphs = findChildren(sub.children, "p");
825
- const subText = subParagraphs.map((p) => extractAllText(p.children)).join("\n\n");
826
- if (subTitle || subText) {
827
- notes.push({ title: subTitle, text: subText });
828
- }
829
- }
830
- } else {
831
- const titleNode = findChild(note.children, "title");
832
- const title = titleNode ? extractAllText(titleNode.children) : "";
833
- const paragraphs = findChildren(note.children, "p");
834
- const text = paragraphs.map((p) => extractAllText(p.children)).join("\n\n");
835
- if (title || text) {
836
- notes.push({ title, text });
837
- }
838
- }
839
- }
840
- if (notes.length > 0) {
841
- result.notes = notes;
842
- }
843
- }
844
- const glossaryElements = findChildren(back.children, "glossary");
845
- for (const glossary of glossaryElements) {
846
- const titleNode = findChild(glossary.children, "title");
847
- const title = titleNode ? extractAllText(titleNode.children) : "Glossary";
848
- const defList = findChild(glossary.children, "def-list");
849
- if (defList) {
850
- const defItems = findChildren(defList.children, "def-item");
851
- const lines = [];
852
- for (const item of defItems) {
853
- const termNode = findChild(item.children, "term");
854
- const defNode = findChild(item.children, "def");
855
- const term = termNode ? extractAllText(termNode.children) : "";
856
- const definition = defNode ? extractAllText(defNode.children) : "";
857
- lines.push(`${term}: ${definition}`);
858
- }
859
- if (!result.notes) result.notes = [];
860
- result.notes.push({ title, text: lines.join("\n") });
861
- }
862
- }
863
- }
864
- const floatsGroup = findChild(article.children, "floats-group");
865
- if (floatsGroup) {
866
- const blocks = [];
867
- for (const child of floatsGroup.children) {
868
- const tag = getTagName(child);
869
- if (tag === "fig") {
870
- blocks.push(parseFigBlock(child));
871
- } else if (tag === "table-wrap") {
872
- blocks.push(parseTableBlock(child));
873
- }
874
- }
875
- if (blocks.length > 0) {
876
- result.floats = blocks;
877
- }
878
- }
879
- return result;
880
- }
881
- export {
882
- parseJatsBackMatter,
883
- parseJatsBody,
884
- parseJatsMetadata,
885
- parseJatsReferences
886
- };
887
- //# sourceMappingURL=jats-parser.js.map