@mantra-ai/core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (385) hide show
  1. package/dist/ai/google/client.d.ts +67 -0
  2. package/dist/ai/google/client.d.ts.map +1 -0
  3. package/dist/ai/google/client.js +169 -0
  4. package/dist/ai/google/client.js.map +1 -0
  5. package/dist/ai/google/generate.d.ts +10 -0
  6. package/dist/ai/google/generate.d.ts.map +1 -0
  7. package/dist/ai/google/generate.js +137 -0
  8. package/dist/ai/google/generate.js.map +1 -0
  9. package/dist/ai/google/index.d.ts +4 -0
  10. package/dist/ai/google/index.d.ts.map +1 -0
  11. package/dist/ai/google/index.js +4 -0
  12. package/dist/ai/google/index.js.map +1 -0
  13. package/dist/ai/google/types.d.ts +88 -0
  14. package/dist/ai/google/types.d.ts.map +1 -0
  15. package/dist/ai/google/types.js +55 -0
  16. package/dist/ai/google/types.js.map +1 -0
  17. package/dist/ai/index.d.ts +3 -0
  18. package/dist/ai/index.d.ts.map +1 -0
  19. package/dist/ai/index.js +3 -0
  20. package/dist/ai/index.js.map +1 -0
  21. package/dist/ai/openai/client.d.ts +22 -0
  22. package/dist/ai/openai/client.d.ts.map +1 -0
  23. package/dist/ai/openai/client.js +49 -0
  24. package/dist/ai/openai/client.js.map +1 -0
  25. package/dist/ai/openai/generate.d.ts +14 -0
  26. package/dist/ai/openai/generate.d.ts.map +1 -0
  27. package/dist/ai/openai/generate.js +178 -0
  28. package/dist/ai/openai/generate.js.map +1 -0
  29. package/dist/ai/openai/index.d.ts +4 -0
  30. package/dist/ai/openai/index.d.ts.map +1 -0
  31. package/dist/ai/openai/index.js +4 -0
  32. package/dist/ai/openai/index.js.map +1 -0
  33. package/dist/ai/openai/types.d.ts +86 -0
  34. package/dist/ai/openai/types.d.ts.map +1 -0
  35. package/dist/ai/openai/types.js +56 -0
  36. package/dist/ai/openai/types.js.map +1 -0
  37. package/dist/ai/prompts/index.d.ts +1 -0
  38. package/dist/ai/prompts/index.d.ts.map +1 -0
  39. package/dist/ai/prompts/index.js +2 -0
  40. package/dist/ai/prompts/index.js.map +1 -0
  41. package/dist/errors/index.d.ts +3 -0
  42. package/dist/errors/index.d.ts.map +1 -0
  43. package/dist/errors/index.js +4 -0
  44. package/dist/errors/index.js.map +1 -0
  45. package/dist/errors/schemas.d.ts +304 -0
  46. package/dist/errors/schemas.d.ts.map +1 -0
  47. package/dist/errors/schemas.js +57 -0
  48. package/dist/errors/schemas.js.map +1 -0
  49. package/dist/errors/types.d.ts +30 -0
  50. package/dist/errors/types.d.ts.map +1 -0
  51. package/dist/errors/types.js +33 -0
  52. package/dist/errors/types.js.map +1 -0
  53. package/dist/index.d.ts +21 -0
  54. package/dist/index.d.ts.map +1 -0
  55. package/dist/index.js +21 -0
  56. package/dist/index.js.map +1 -0
  57. package/dist/normalization/jats/index.d.ts +4 -0
  58. package/dist/normalization/jats/index.d.ts.map +1 -0
  59. package/dist/normalization/jats/index.js +3 -0
  60. package/dist/normalization/jats/index.js.map +1 -0
  61. package/dist/normalization/jats/normalize.d.ts +7 -0
  62. package/dist/normalization/jats/normalize.d.ts.map +1 -0
  63. package/dist/normalization/jats/normalize.js +213 -0
  64. package/dist/normalization/jats/normalize.js.map +1 -0
  65. package/dist/normalization/jats/utils/build/finalize.d.ts +3 -0
  66. package/dist/normalization/jats/utils/build/finalize.d.ts.map +1 -0
  67. package/dist/normalization/jats/utils/build/finalize.js +462 -0
  68. package/dist/normalization/jats/utils/build/finalize.js.map +1 -0
  69. package/dist/normalization/jats/utils/build/flatten.d.ts +20 -0
  70. package/dist/normalization/jats/utils/build/flatten.d.ts.map +1 -0
  71. package/dist/normalization/jats/utils/build/flatten.js +502 -0
  72. package/dist/normalization/jats/utils/build/flatten.js.map +1 -0
  73. package/dist/normalization/jats/utils/build/meta.d.ts +10 -0
  74. package/dist/normalization/jats/utils/build/meta.d.ts.map +1 -0
  75. package/dist/normalization/jats/utils/build/meta.js +32 -0
  76. package/dist/normalization/jats/utils/build/meta.js.map +1 -0
  77. package/dist/normalization/jats/utils/build/version.d.ts +3 -0
  78. package/dist/normalization/jats/utils/build/version.d.ts.map +1 -0
  79. package/dist/normalization/jats/utils/build/version.js +11 -0
  80. package/dist/normalization/jats/utils/build/version.js.map +1 -0
  81. package/dist/normalization/jats/utils/category.d.ts +11 -0
  82. package/dist/normalization/jats/utils/category.d.ts.map +1 -0
  83. package/dist/normalization/jats/utils/category.js +431 -0
  84. package/dist/normalization/jats/utils/category.js.map +1 -0
  85. package/dist/normalization/jats/utils/collectors/abstracts.d.ts +3 -0
  86. package/dist/normalization/jats/utils/collectors/abstracts.d.ts.map +1 -0
  87. package/dist/normalization/jats/utils/collectors/abstracts.js +168 -0
  88. package/dist/normalization/jats/utils/collectors/abstracts.js.map +1 -0
  89. package/dist/normalization/jats/utils/collectors/back.d.ts +35 -0
  90. package/dist/normalization/jats/utils/collectors/back.d.ts.map +1 -0
  91. package/dist/normalization/jats/utils/collectors/back.js +801 -0
  92. package/dist/normalization/jats/utils/collectors/back.js.map +1 -0
  93. package/dist/normalization/jats/utils/collectors/contributors.d.ts +4 -0
  94. package/dist/normalization/jats/utils/collectors/contributors.d.ts.map +1 -0
  95. package/dist/normalization/jats/utils/collectors/contributors.js +77 -0
  96. package/dist/normalization/jats/utils/collectors/contributors.js.map +1 -0
  97. package/dist/normalization/jats/utils/collectors/keywords.d.ts +2 -0
  98. package/dist/normalization/jats/utils/collectors/keywords.d.ts.map +1 -0
  99. package/dist/normalization/jats/utils/collectors/keywords.js +14 -0
  100. package/dist/normalization/jats/utils/collectors/keywords.js.map +1 -0
  101. package/dist/normalization/jats/utils/collectors/meta.d.ts +6 -0
  102. package/dist/normalization/jats/utils/collectors/meta.d.ts.map +1 -0
  103. package/dist/normalization/jats/utils/collectors/meta.js +103 -0
  104. package/dist/normalization/jats/utils/collectors/meta.js.map +1 -0
  105. package/dist/normalization/jats/utils/collectors/sections.d.ts +7 -0
  106. package/dist/normalization/jats/utils/collectors/sections.d.ts.map +1 -0
  107. package/dist/normalization/jats/utils/collectors/sections.js +484 -0
  108. package/dist/normalization/jats/utils/collectors/sections.js.map +1 -0
  109. package/dist/normalization/jats/utils/licenses.d.ts +5 -0
  110. package/dist/normalization/jats/utils/licenses.d.ts.map +1 -0
  111. package/dist/normalization/jats/utils/licenses.js +64 -0
  112. package/dist/normalization/jats/utils/licenses.js.map +1 -0
  113. package/dist/normalization/jats/utils/po/nodes.d.ts +6 -0
  114. package/dist/normalization/jats/utils/po/nodes.d.ts.map +1 -0
  115. package/dist/normalization/jats/utils/po/nodes.js +60 -0
  116. package/dist/normalization/jats/utils/po/nodes.js.map +1 -0
  117. package/dist/normalization/jats/utils/po/query.d.ts +7 -0
  118. package/dist/normalization/jats/utils/po/query.d.ts.map +1 -0
  119. package/dist/normalization/jats/utils/po/query.js +67 -0
  120. package/dist/normalization/jats/utils/po/query.js.map +1 -0
  121. package/dist/normalization/jats/utils/po/serialize.d.ts +4 -0
  122. package/dist/normalization/jats/utils/po/serialize.d.ts.map +1 -0
  123. package/dist/normalization/jats/utils/po/serialize.js +329 -0
  124. package/dist/normalization/jats/utils/po/serialize.js.map +1 -0
  125. package/dist/normalization/jats/utils/po/text.d.ts +7 -0
  126. package/dist/normalization/jats/utils/po/text.d.ts.map +1 -0
  127. package/dist/normalization/jats/utils/po/text.js +114 -0
  128. package/dist/normalization/jats/utils/po/text.js.map +1 -0
  129. package/dist/normalization/jats/utils/references.d.ts +26 -0
  130. package/dist/normalization/jats/utils/references.d.ts.map +1 -0
  131. package/dist/normalization/jats/utils/references.js +371 -0
  132. package/dist/normalization/jats/utils/references.js.map +1 -0
  133. package/dist/normalization/jats/utils/strings.d.ts +8 -0
  134. package/dist/normalization/jats/utils/strings.d.ts.map +1 -0
  135. package/dist/normalization/jats/utils/strings.js +197 -0
  136. package/dist/normalization/jats/utils/strings.js.map +1 -0
  137. package/dist/normalization/jats/utils/types.d.ts +233 -0
  138. package/dist/normalization/jats/utils/types.d.ts.map +1 -0
  139. package/dist/normalization/jats/utils/types.js +2 -0
  140. package/dist/normalization/jats/utils/types.js.map +1 -0
  141. package/dist/normalization/jats/utils/xml.d.ts +5 -0
  142. package/dist/normalization/jats/utils/xml.d.ts.map +1 -0
  143. package/dist/normalization/jats/utils/xml.js +69 -0
  144. package/dist/normalization/jats/utils/xml.js.map +1 -0
  145. package/dist/normalization/normalized-doc-schema.d.ts +1094 -0
  146. package/dist/normalization/normalized-doc-schema.d.ts.map +1 -0
  147. package/dist/normalization/normalized-doc-schema.js +410 -0
  148. package/dist/normalization/normalized-doc-schema.js.map +1 -0
  149. package/dist/normalization/pdf/index.d.ts +4 -0
  150. package/dist/normalization/pdf/index.d.ts.map +1 -0
  151. package/dist/normalization/pdf/index.js +3 -0
  152. package/dist/normalization/pdf/index.js.map +1 -0
  153. package/dist/normalization/pdf/normalize.d.ts +31 -0
  154. package/dist/normalization/pdf/normalize.d.ts.map +1 -0
  155. package/dist/normalization/pdf/normalize.js +321 -0
  156. package/dist/normalization/pdf/normalize.js.map +1 -0
  157. package/dist/normalization/pdf/prompt.d.ts +3 -0
  158. package/dist/normalization/pdf/prompt.d.ts.map +1 -0
  159. package/dist/normalization/pdf/prompt.js +118 -0
  160. package/dist/normalization/pdf/prompt.js.map +1 -0
  161. package/dist/sources/arxiv/client.d.ts +4 -0
  162. package/dist/sources/arxiv/client.d.ts.map +1 -0
  163. package/dist/sources/arxiv/client.js +13 -0
  164. package/dist/sources/arxiv/client.js.map +1 -0
  165. package/dist/sources/biorxiv/client.d.ts +21 -0
  166. package/dist/sources/biorxiv/client.d.ts.map +1 -0
  167. package/dist/sources/biorxiv/client.js +173 -0
  168. package/dist/sources/biorxiv/client.js.map +1 -0
  169. package/dist/sources/crossref/client.d.ts +3 -0
  170. package/dist/sources/crossref/client.d.ts.map +1 -0
  171. package/dist/sources/crossref/client.js +24 -0
  172. package/dist/sources/crossref/client.js.map +1 -0
  173. package/dist/sources/europepmc/client.d.ts +3 -0
  174. package/dist/sources/europepmc/client.d.ts.map +1 -0
  175. package/dist/sources/europepmc/client.js +29 -0
  176. package/dist/sources/europepmc/client.js.map +1 -0
  177. package/dist/sources/medrxiv/browser.d.ts +16 -0
  178. package/dist/sources/medrxiv/browser.d.ts.map +1 -0
  179. package/dist/sources/medrxiv/browser.js +210 -0
  180. package/dist/sources/medrxiv/browser.js.map +1 -0
  181. package/dist/sources/medrxiv/client.d.ts +34 -0
  182. package/dist/sources/medrxiv/client.d.ts.map +1 -0
  183. package/dist/sources/medrxiv/client.js +673 -0
  184. package/dist/sources/medrxiv/client.js.map +1 -0
  185. package/dist/sources/medrxiv/shared.d.ts +7 -0
  186. package/dist/sources/medrxiv/shared.d.ts.map +1 -0
  187. package/dist/sources/medrxiv/shared.js +18 -0
  188. package/dist/sources/medrxiv/shared.js.map +1 -0
  189. package/dist/sources/plos/client.d.ts +13 -0
  190. package/dist/sources/plos/client.d.ts.map +1 -0
  191. package/dist/sources/plos/client.js +147 -0
  192. package/dist/sources/plos/client.js.map +1 -0
  193. package/dist/sources/preprint-discovery.d.ts +55 -0
  194. package/dist/sources/preprint-discovery.d.ts.map +1 -0
  195. package/dist/sources/preprint-discovery.js +115 -0
  196. package/dist/sources/preprint-discovery.js.map +1 -0
  197. package/dist/types/expand.d.ts +5 -0
  198. package/dist/types/expand.d.ts.map +1 -0
  199. package/dist/types/expand.js +20 -0
  200. package/dist/types/expand.js.map +1 -0
  201. package/dist/types/methods-types.d.ts +37 -0
  202. package/dist/types/methods-types.d.ts.map +1 -0
  203. package/dist/types/methods-types.js +2 -0
  204. package/dist/types/methods-types.js.map +1 -0
  205. package/dist/types/multi-input-types.d.ts +57 -0
  206. package/dist/types/multi-input-types.d.ts.map +1 -0
  207. package/dist/types/multi-input-types.js +2 -0
  208. package/dist/types/multi-input-types.js.map +1 -0
  209. package/dist/types/paper/types.d.ts +41 -0
  210. package/dist/types/paper/types.d.ts.map +1 -0
  211. package/dist/types/paper/types.js +2 -0
  212. package/dist/types/paper/types.js.map +1 -0
  213. package/dist/types/results-types.d.ts +122 -0
  214. package/dist/types/results-types.d.ts.map +1 -0
  215. package/dist/types/results-types.js +17 -0
  216. package/dist/types/results-types.js.map +1 -0
  217. package/dist/types/supp-types.d.ts +6 -0
  218. package/dist/types/supp-types.d.ts.map +1 -0
  219. package/dist/types/supp-types.js +2 -0
  220. package/dist/types/supp-types.js.map +1 -0
  221. package/dist/types/version.d.ts +1828 -0
  222. package/dist/types/version.d.ts.map +1 -0
  223. package/dist/types/version.js +311 -0
  224. package/dist/types/version.js.map +1 -0
  225. package/dist/types/work.d.ts +4455 -0
  226. package/dist/types/work.d.ts.map +1 -0
  227. package/dist/types/work.js +330 -0
  228. package/dist/types/work.js.map +1 -0
  229. package/dist/works/adapters/crossref.d.ts +28 -0
  230. package/dist/works/adapters/crossref.d.ts.map +1 -0
  231. package/dist/works/adapters/crossref.js +43 -0
  232. package/dist/works/adapters/crossref.js.map +1 -0
  233. package/dist/works/adapters/europepmc.d.ts +14 -0
  234. package/dist/works/adapters/europepmc.d.ts.map +1 -0
  235. package/dist/works/adapters/europepmc.js +46 -0
  236. package/dist/works/adapters/europepmc.js.map +1 -0
  237. package/dist/works/adapters/openalex.d.ts +5 -0
  238. package/dist/works/adapters/openalex.d.ts.map +1 -0
  239. package/dist/works/adapters/openalex.js +75 -0
  240. package/dist/works/adapters/openalex.js.map +1 -0
  241. package/dist/works/errors.d.ts +23 -0
  242. package/dist/works/errors.d.ts.map +1 -0
  243. package/dist/works/errors.js +37 -0
  244. package/dist/works/errors.js.map +1 -0
  245. package/dist/works/id/detect-identifier.d.ts +15 -0
  246. package/dist/works/id/detect-identifier.d.ts.map +1 -0
  247. package/dist/works/id/detect-identifier.js +50 -0
  248. package/dist/works/id/detect-identifier.js.map +1 -0
  249. package/dist/works/id/normalize-external-id.d.ts +3 -0
  250. package/dist/works/id/normalize-external-id.d.ts.map +1 -0
  251. package/dist/works/id/normalize-external-id.js +44 -0
  252. package/dist/works/id/normalize-external-id.js.map +1 -0
  253. package/dist/works/id/normalize-ids.d.ts +66 -0
  254. package/dist/works/id/normalize-ids.d.ts.map +1 -0
  255. package/dist/works/id/normalize-ids.js +112 -0
  256. package/dist/works/id/normalize-ids.js.map +1 -0
  257. package/dist/works/id/normalize-internals.d.ts +7 -0
  258. package/dist/works/id/normalize-internals.d.ts.map +1 -0
  259. package/dist/works/id/normalize-internals.js +65 -0
  260. package/dist/works/id/normalize-internals.js.map +1 -0
  261. package/dist/works/id/resolve.d.ts +31 -0
  262. package/dist/works/id/resolve.d.ts.map +1 -0
  263. package/dist/works/id/resolve.js +123 -0
  264. package/dist/works/id/resolve.js.map +1 -0
  265. package/dist/works/id/resolveIds/assign.d.ts +4 -0
  266. package/dist/works/id/resolveIds/assign.d.ts.map +1 -0
  267. package/dist/works/id/resolveIds/assign.js +15 -0
  268. package/dist/works/id/resolveIds/assign.js.map +1 -0
  269. package/dist/works/id/resolveIds/flags.d.ts +11 -0
  270. package/dist/works/id/resolveIds/flags.d.ts.map +1 -0
  271. package/dist/works/id/resolveIds/flags.js +27 -0
  272. package/dist/works/id/resolveIds/flags.js.map +1 -0
  273. package/dist/works/id/resolveIds/idctx.d.ts +4 -0
  274. package/dist/works/id/resolveIds/idctx.d.ts.map +1 -0
  275. package/dist/works/id/resolveIds/idctx.js +25 -0
  276. package/dist/works/id/resolveIds/idctx.js.map +1 -0
  277. package/dist/works/id/resolveIds/index.d.ts +13 -0
  278. package/dist/works/id/resolveIds/index.d.ts.map +1 -0
  279. package/dist/works/id/resolveIds/index.js +498 -0
  280. package/dist/works/id/resolveIds/index.js.map +1 -0
  281. package/dist/works/id/resolveIds/versioning.d.ts +27 -0
  282. package/dist/works/id/resolveIds/versioning.d.ts.map +1 -0
  283. package/dist/works/id/resolveIds/versioning.js +156 -0
  284. package/dist/works/id/resolveIds/versioning.js.map +1 -0
  285. package/dist/works/id/resolveIds/workWhere.d.ts +3 -0
  286. package/dist/works/id/resolveIds/workWhere.d.ts.map +1 -0
  287. package/dist/works/id/resolveIds/workWhere.js +35 -0
  288. package/dist/works/id/resolveIds/workWhere.js.map +1 -0
  289. package/dist/works/id/types.d.ts +6 -0
  290. package/dist/works/id/types.d.ts.map +1 -0
  291. package/dist/works/id/types.js +2 -0
  292. package/dist/works/id/types.js.map +1 -0
  293. package/dist/works/pdf-fallback/candidates.d.ts +12 -0
  294. package/dist/works/pdf-fallback/candidates.d.ts.map +1 -0
  295. package/dist/works/pdf-fallback/candidates.js +51 -0
  296. package/dist/works/pdf-fallback/candidates.js.map +1 -0
  297. package/dist/works/pdf-fallback/fetch.d.ts +21 -0
  298. package/dist/works/pdf-fallback/fetch.d.ts.map +1 -0
  299. package/dist/works/pdf-fallback/fetch.js +89 -0
  300. package/dist/works/pdf-fallback/fetch.js.map +1 -0
  301. package/dist/works/pdf-fallback/index.d.ts +28 -0
  302. package/dist/works/pdf-fallback/index.d.ts.map +1 -0
  303. package/dist/works/pdf-fallback/index.js +35 -0
  304. package/dist/works/pdf-fallback/index.js.map +1 -0
  305. package/dist/works/plan.d.ts +8 -0
  306. package/dist/works/plan.d.ts.map +1 -0
  307. package/dist/works/plan.js +62 -0
  308. package/dist/works/plan.js.map +1 -0
  309. package/dist/works/strategies/arxiv.d.ts +3 -0
  310. package/dist/works/strategies/arxiv.d.ts.map +1 -0
  311. package/dist/works/strategies/arxiv.js +56 -0
  312. package/dist/works/strategies/arxiv.js.map +1 -0
  313. package/dist/works/strategies/biorxiv.d.ts +3 -0
  314. package/dist/works/strategies/biorxiv.d.ts.map +1 -0
  315. package/dist/works/strategies/biorxiv.js +63 -0
  316. package/dist/works/strategies/biorxiv.js.map +1 -0
  317. package/dist/works/strategies/europepmc.d.ts +3 -0
  318. package/dist/works/strategies/europepmc.d.ts.map +1 -0
  319. package/dist/works/strategies/europepmc.js +15 -0
  320. package/dist/works/strategies/europepmc.js.map +1 -0
  321. package/dist/works/strategies/index.d.ts +12 -0
  322. package/dist/works/strategies/index.d.ts.map +1 -0
  323. package/dist/works/strategies/index.js +19 -0
  324. package/dist/works/strategies/index.js.map +1 -0
  325. package/dist/works/strategies/landing-url.d.ts +3 -0
  326. package/dist/works/strategies/landing-url.d.ts.map +1 -0
  327. package/dist/works/strategies/landing-url.js +10 -0
  328. package/dist/works/strategies/landing-url.js.map +1 -0
  329. package/dist/works/strategies/medrxiv.d.ts +3 -0
  330. package/dist/works/strategies/medrxiv.d.ts.map +1 -0
  331. package/dist/works/strategies/medrxiv.js +47 -0
  332. package/dist/works/strategies/medrxiv.js.map +1 -0
  333. package/dist/works/strategies/plos.d.ts +3 -0
  334. package/dist/works/strategies/plos.d.ts.map +1 -0
  335. package/dist/works/strategies/plos.js +15 -0
  336. package/dist/works/strategies/plos.js.map +1 -0
  337. package/dist/works/strategies/shared.d.ts +11 -0
  338. package/dist/works/strategies/shared.d.ts.map +1 -0
  339. package/dist/works/strategies/shared.js +97 -0
  340. package/dist/works/strategies/shared.js.map +1 -0
  341. package/dist/works/strategies/ten1101.d.ts +3 -0
  342. package/dist/works/strategies/ten1101.d.ts.map +1 -0
  343. package/dist/works/strategies/ten1101.js +84 -0
  344. package/dist/works/strategies/ten1101.js.map +1 -0
  345. package/dist/works/text/acquire-fulltext.d.ts +7 -0
  346. package/dist/works/text/acquire-fulltext.d.ts.map +1 -0
  347. package/dist/works/text/acquire-fulltext.js +62 -0
  348. package/dist/works/text/acquire-fulltext.js.map +1 -0
  349. package/dist/works/text/normalize.d.ts +40 -0
  350. package/dist/works/text/normalize.d.ts.map +1 -0
  351. package/dist/works/text/normalize.js +188 -0
  352. package/dist/works/text/normalize.js.map +1 -0
  353. package/dist/works/types.d.ts +215 -0
  354. package/dist/works/types.d.ts.map +1 -0
  355. package/dist/works/types.js +6 -0
  356. package/dist/works/types.js.map +1 -0
  357. package/dist/works/util/debug.d.ts +7 -0
  358. package/dist/works/util/debug.d.ts.map +1 -0
  359. package/dist/works/util/debug.js +9 -0
  360. package/dist/works/util/debug.js.map +1 -0
  361. package/dist/works/util/license.d.ts +9 -0
  362. package/dist/works/util/license.d.ts.map +1 -0
  363. package/dist/works/util/license.js +39 -0
  364. package/dist/works/util/license.js.map +1 -0
  365. package/dist/works/util/normalize.d.ts +2 -0
  366. package/dist/works/util/normalize.d.ts.map +1 -0
  367. package/dist/works/util/normalize.js +76 -0
  368. package/dist/works/util/normalize.js.map +1 -0
  369. package/dist/works/util/parse.d.ts +8 -0
  370. package/dist/works/util/parse.d.ts.map +1 -0
  371. package/dist/works/util/parse.js +32 -0
  372. package/dist/works/util/parse.js.map +1 -0
  373. package/dist/works/util/source.d.ts +10 -0
  374. package/dist/works/util/source.d.ts.map +1 -0
  375. package/dist/works/util/source.js +48 -0
  376. package/dist/works/util/source.js.map +1 -0
  377. package/dist/works/util/version-label.d.ts +2 -0
  378. package/dist/works/util/version-label.d.ts.map +1 -0
  379. package/dist/works/util/version-label.js +8 -0
  380. package/dist/works/util/version-label.js.map +1 -0
  381. package/dist/works/util/work-id.d.ts +2 -0
  382. package/dist/works/util/work-id.d.ts.map +1 -0
  383. package/dist/works/util/work-id.js +27 -0
  384. package/dist/works/util/work-id.js.map +1 -0
  385. package/package.json +208 -0
@@ -0,0 +1,60 @@
1
+ import { asString } from "../strings";
2
+ export function poTagName(node) {
3
+ if (!node || typeof node !== "object" || Array.isArray(node))
4
+ return undefined;
5
+ const keys = Object.keys(node);
6
+ if (keys.length === 1)
7
+ return keys[0];
8
+ return keys.find((k) => k !== ":@");
9
+ }
10
+ export function poChildren(node) {
11
+ const name = poTagName(node);
12
+ if (!name)
13
+ return [];
14
+ // Never recurse into text nodes
15
+ if (name === "#text")
16
+ return [];
17
+ const v = node[name];
18
+ // Standard preserveOrder shape: children are an array
19
+ if (Array.isArray(v))
20
+ return v;
21
+ // Some inline nodes may collapse to a direct primitive value
22
+ // e.g., { "bold": "Text" } — surface it as a synthetic text child
23
+ if (typeof v === "string" || typeof v === "number") {
24
+ return [{ "#text": v }];
25
+ }
26
+ return [];
27
+ }
28
+ export function poIsTextNode(node) {
29
+ if (typeof node === "string" || typeof node === "number")
30
+ return true;
31
+ const name = poTagName(node);
32
+ return name === "#text";
33
+ }
34
+ export function poGetAttr(node, attr) {
35
+ // In preserveOrder mode, attributes live alongside the tag's children under a sibling
36
+ // key ":@". Do not traverse children; read the attribute bag directly.
37
+ if (!node || typeof node !== "object" || Array.isArray(node))
38
+ return undefined;
39
+ const bag = node[":@"];
40
+ if (!bag || typeof bag !== "object")
41
+ return undefined;
42
+ // Direct match (e.g., id or hwp:id)
43
+ const direct = bag[`@_${attr}`];
44
+ if (direct != null)
45
+ return asString(direct);
46
+ // Special-case hrefs which often appear under xlink namespace
47
+ if (attr === "href") {
48
+ const v2 = bag["@_xlink:href"] ?? bag["@_href"];
49
+ if (v2 != null)
50
+ return asString(v2);
51
+ }
52
+ return undefined;
53
+ }
54
+ export function poMatchesLocal(name, local) {
55
+ if (!name)
56
+ return false;
57
+ const nl = name.toLowerCase();
58
+ return nl === local || nl.endsWith(`:${local}`);
59
+ }
60
+ //# sourceMappingURL=nodes.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"nodes.js","sourceRoot":"","sources":["../../../../../src/normalization/jats/utils/po/nodes.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAEtC,MAAM,UAAU,SAAS,CAAC,IAAS;IACjC,IAAI,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC;QAC1D,OAAO,SAAS,CAAC;IACnB,MAAM,IAAI,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC/B,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC,CAAC,CAAC,CAAC;IACtC,OAAO,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,IAAI,CAAuB,CAAC;AAC5D,CAAC;AAED,MAAM,UAAU,UAAU,CAAC,IAAS;IAClC,MAAM,IAAI,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAC7B,IAAI,CAAC,IAAI;QAAE,OAAO,EAAE,CAAC;IACrB,gCAAgC;IAChC,IAAI,IAAI,KAAK,OAAO;QAAE,OAAO,EAAE,CAAC;IAChC,MAAM,CAAC,GAAI,IAAY,CAAC,IAAI,CAAC,CAAC;IAC9B,sDAAsD;IACtD,IAAI,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC;QAAE,OAAO,CAAC,CAAC;IAC/B,6DAA6D;IAC7D,kEAAkE;IAClE,IAAI,OAAO,CAAC,KAAK,QAAQ,IAAI,OAAO,CAAC,KAAK,QAAQ,EAAE,CAAC;QACnD,OAAO,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,CAAC,CAAC;IAC1B,CAAC;IACD,OAAO,EAAE,CAAC;AACZ,CAAC;AAED,MAAM,UAAU,YAAY,CAAC,IAAS;IACpC,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,OAAO,IAAI,KAAK,QAAQ;QAAE,OAAO,IAAI,CAAC;IACtE,MAAM,IAAI,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAC7B,OAAO,IAAI,KAAK,OAAO,CAAC;AAC1B,CAAC;AAED,MAAM,UAAU,SAAS,CAAC,IAAS,EAAE,IAAY;IAC/C,sFAAsF;IACtF,uEAAuE;IACvE,IAAI,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC;QAC1D,OAAO,SAAS,CAAC;IACnB,MAAM,GAAG,GAAI,IAAY,CAAC,IAAI,CAAQ,CAAC;IACvC,IAAI,CAAC,GAAG,IAAI,OAAO,GAAG,KAAK,QAAQ;QAAE,OAAO,SAAS,CAAC;IACtD,oCAAoC;IACpC,MAAM,MAAM,GAAG,GAAG,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC;IAChC,IAAI,MAAM,IAAI,IAAI;QAAE,OAAO,QAAQ,CAAC,MAAM,CAAC,CAAC;IAC5C,8DAA8D;IAC9D,IAAI,IAAI,KAAK,MAAM,EAAE,CAAC;QACpB,MAAM,EAAE,GAAG,GAAG,CAAC,cAAc,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC;QAChD,IAAI,EAAE,IAAI,IAAI;YAAE,OAAO,QAAQ,CAAC,EAAE,CAAC,CAAC;IACtC,CAAC;IACD,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,MAAM,UAAU,cAAc,CAC5B,IAAwB,EACxB,KAAa;IAEb,IAAI,CAAC,IAAI;QAAE,OAAO,KAAK,CAAC;IACxB,MAAM,EAAE,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;IAC9B,OAAO,EAAE,KAAK,KAAK,IAAI,EAAE,CAAC,QAAQ,CAAC,IAAI,KAAK,EAAE,CAAC,CAAC;AAClD,CAAC"}
@@ -0,0 +1,7 @@
1
+ export declare function poFindFirstByLocal(root: any, local: string): any | undefined;
2
+ export declare function poFindDescendantsByLocal(root: any, local: string): any[];
3
+ export declare function poFindFirstByLocalAttr(root: any, local: string, attr: string, value: string): any | undefined;
4
+ export declare function poFindSectionById(domPO: any, id: string): any | undefined;
5
+ export declare function poChildSections(root: any): any[];
6
+ export declare function getBodyNodePO(domPO: any): any | undefined;
7
+ //# sourceMappingURL=query.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"query.d.ts","sourceRoot":"","sources":["../../../../../src/normalization/jats/utils/po/query.ts"],"names":[],"mappings":"AAEA,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,GAAG,GAAG,GAAG,SAAS,CAU5E;AAED,wBAAgB,wBAAwB,CAAC,IAAI,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,GAAG,GAAG,EAAE,CAWxE;AAED,wBAAgB,sBAAsB,CACpC,IAAI,EAAE,GAAG,EACT,KAAK,EAAE,MAAM,EACb,IAAI,EAAE,MAAM,EACZ,KAAK,EAAE,MAAM,GACZ,GAAG,GAAG,SAAS,CAOjB;AAED,wBAAgB,iBAAiB,CAAC,KAAK,EAAE,GAAG,EAAE,EAAE,EAAE,MAAM,GAAG,GAAG,GAAG,SAAS,CAOzE;AAGD,wBAAgB,eAAe,CAAC,IAAI,EAAE,GAAG,GAAG,GAAG,EAAE,CAShD;AAED,wBAAgB,aAAa,CAAC,KAAK,EAAE,GAAG,GAAG,GAAG,GAAG,SAAS,CAOzD"}
@@ -0,0 +1,67 @@
1
+ import { poTagName, poMatchesLocal, poChildren, poGetAttr } from "./nodes";
2
+ export function poFindFirstByLocal(root, local) {
3
+ const stack = Array.isArray(root) ? [...root] : [root];
4
+ while (stack.length) {
5
+ const node = stack.shift();
6
+ const name = poTagName(node);
7
+ if (poMatchesLocal(name, local))
8
+ return node;
9
+ const children = poChildren(node);
10
+ for (const ch of children)
11
+ stack.push(ch);
12
+ }
13
+ return undefined;
14
+ }
15
+ export function poFindDescendantsByLocal(root, local) {
16
+ const out = [];
17
+ const stack = Array.isArray(root) ? [...root] : [root];
18
+ while (stack.length) {
19
+ const node = stack.shift();
20
+ const name = poTagName(node);
21
+ if (poMatchesLocal(name, local))
22
+ out.push(node);
23
+ const children = poChildren(node);
24
+ for (const ch of children)
25
+ stack.push(ch);
26
+ }
27
+ return out;
28
+ }
29
+ export function poFindFirstByLocalAttr(root, local, attr, value) {
30
+ const nodes = poFindDescendantsByLocal(root, local);
31
+ for (const n of nodes) {
32
+ const v = poGetAttr(n, attr);
33
+ if (v && v === value)
34
+ return n;
35
+ }
36
+ return undefined;
37
+ }
38
+ export function poFindSectionById(domPO, id) {
39
+ const secs = poFindDescendantsByLocal(domPO, "sec");
40
+ for (const s of secs) {
41
+ const sid = poGetAttr(s, "id") || poGetAttr(s, "hwp:id");
42
+ if (sid && sid === id)
43
+ return s;
44
+ }
45
+ return undefined;
46
+ }
47
+ // Direct child <sec> nodes of a PO node, in order
48
+ export function poChildSections(root) {
49
+ const out = [];
50
+ const children = poChildren(root);
51
+ for (const ch of children) {
52
+ const nm = poTagName(ch);
53
+ const name = (nm || "").toLowerCase();
54
+ if (poMatchesLocal(name, "sec"))
55
+ out.push(ch);
56
+ }
57
+ return out;
58
+ }
59
+ export function getBodyNodePO(domPO) {
60
+ const article = poFindFirstByLocal(domPO, "article");
61
+ if (!article)
62
+ return undefined;
63
+ const body = poFindFirstByLocal(article, "body") ||
64
+ poFindFirstByLocal(article, "article-body");
65
+ return body;
66
+ }
67
+ //# sourceMappingURL=query.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"query.js","sourceRoot":"","sources":["../../../../../src/normalization/jats/utils/po/query.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,cAAc,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAE3E,MAAM,UAAU,kBAAkB,CAAC,IAAS,EAAE,KAAa;IACzD,MAAM,KAAK,GAAU,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IAC9D,OAAO,KAAK,CAAC,MAAM,EAAE,CAAC;QACpB,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,EAAE,CAAC;QAC3B,MAAM,IAAI,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;QAC7B,IAAI,cAAc,CAAC,IAAI,EAAE,KAAK,CAAC;YAAE,OAAO,IAAI,CAAC;QAC7C,MAAM,QAAQ,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC;QAClC,KAAK,MAAM,EAAE,IAAI,QAAQ;YAAE,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAC5C,CAAC;IACD,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,MAAM,UAAU,wBAAwB,CAAC,IAAS,EAAE,KAAa;IAC/D,MAAM,GAAG,GAAU,EAAE,CAAC;IACtB,MAAM,KAAK,GAAU,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IAC9D,OAAO,KAAK,CAAC,MAAM,EAAE,CAAC;QACpB,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,EAAE,CAAC;QAC3B,MAAM,IAAI,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;QAC7B,IAAI,cAAc,CAAC,IAAI,EAAE,KAAK,CAAC;YAAE,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAChD,MAAM,QAAQ,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC;QAClC,KAAK,MAAM,EAAE,IAAI,QAAQ;YAAE,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAC5C,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,UAAU,sBAAsB,CACpC,IAAS,EACT,KAAa,EACb,IAAY,EACZ,KAAa;IAEb,MAAM,KAAK,GAAG,wBAAwB,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;IACpD,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,MAAM,CAAC,GAAG,SAAS,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;QAC7B,IAAI,CAAC,IAAI,CAAC,KAAK,KAAK;YAAE,OAAO,CAAC,CAAC;IACjC,CAAC;IACD,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,MAAM,UAAU,iBAAiB,CAAC,KAAU,EAAE,EAAU;IACtD,MAAM,IAAI,GAAG,wBAAwB,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;IACpD,KAAK,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC;QACrB,MAAM,GAAG,GAAG,SAAS,CAAC,CAAC,EAAE,IAAI,CAAC,IAAI,SAAS,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC;QACzD,IAAI,GAAG,IAAI,GAAG,KAAK,EAAE;YAAE,OAAO,CAAC,CAAC;IAClC,CAAC;IACD,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,kDAAkD;AAClD,MAAM,UAAU,eAAe,CAAC,IAAS;IACvC,MAAM,GAAG,GAAU,EAAE,CAAC;IACtB,MAAM,QAAQ,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC;IAClC,KAAK,MAAM,EAAE,IAAI,QAAQ,EAAE,CAAC;QAC1B,MAAM,EAAE,GAAG,SAAS,CAAC,EAAE,CAAC,CAAC;QACzB,MAAM,IAAI,GAAG,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;QACtC,IAAI,cAAc,CAAC,IAAI,EAAE,KAAK,CAAC;YAAE,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAChD,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,KAAU;IACtC,MAAM,OAAO,GAAG,kBAAkB,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;IACrD,IAAI,CAAC,OAAO;QAAE,OAAO,SAAS,CAAC;IAC/B,MAAM,IAAI,GACR,kBAAkB,CAAC,OAAO,EAAE,MAAM,CAAC;QACnC,kBAAkB,CAAC,OAAO,EAAE,cAAc,CAAC,CAAC;IAC9C,OAAO,IAAI,CAAC;AACd,CAAC"}
@@ -0,0 +1,4 @@
1
+ export declare function poSerialize(node: any): string | undefined;
2
+ export declare function poSerializeMinimal(node: any): string | undefined;
3
+ export declare function poSerializeCitationMinimal(node: any): string | undefined;
4
+ //# sourceMappingURL=serialize.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"serialize.d.ts","sourceRoot":"","sources":["../../../../../src/normalization/jats/utils/po/serialize.ts"],"names":[],"mappings":"AAyDA,wBAAgB,WAAW,CAAC,IAAI,EAAE,GAAG,GAAG,MAAM,GAAG,SAAS,CA0BzD;AAKD,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,GAAG,GAAG,MAAM,GAAG,SAAS,CAyKhE;AAGD,wBAAgB,0BAA0B,CAAC,IAAI,EAAE,GAAG,GAAG,MAAM,GAAG,SAAS,CA+FxE"}
@@ -0,0 +1,329 @@
1
+ import { poTagName, poChildren, poMatchesLocal } from "./nodes";
2
+ import { poGetAttr } from "./nodes";
3
+ import { escapeXml } from "../strings";
4
+ import { poFindFirstByLocal } from "./query";
5
+ const STARTS_WITH_INLINE = /^<(?:bibliography|figure|table|supplementary|extlink)\b/i;
6
+ const ENDS_WITH_INLINE_CLOSE = /<\/(?:bibliography|figure|table|supplementary|extlink)>\s*$/i;
7
+ function joinWithBoundarySpaces(parts) {
8
+ let out = "";
9
+ for (let i = 0; i < parts.length; i++) {
10
+ const cur = parts[i] || "";
11
+ const next = parts[i + 1] || "";
12
+ out += cur;
13
+ if (!next)
14
+ continue;
15
+ const curLast = cur.slice(-1);
16
+ const nextFirst = next.slice(0, 1);
17
+ // Add space before an inline chunk if previous char isn't whitespace
18
+ const needSpaceBeforeInline = !/\s/.test(curLast) && STARTS_WITH_INLINE.test(next);
19
+ // Add space after an inline chunk if followed by a word-ish char
20
+ const needSpaceAfterInline = ENDS_WITH_INLINE_CLOSE.test(cur) &&
21
+ !/\s/.test(nextFirst) &&
22
+ /[A-Za-z0-9(]/.test(nextFirst);
23
+ // Ensure a space after comma/semicolon before another inline tag
24
+ const needSpaceAfterCommaBeforeInline = /[,;:]\s*$/.test(cur) && STARTS_WITH_INLINE.test(next);
25
+ // Do NOT add space if we’re right after an opening bracket/paren
26
+ const openingBracketNoSpace = /[\(\[\{]\s*$/.test(cur) && STARTS_WITH_INLINE.test(next);
27
+ // Do NOT add space if next token is a closing bracket/paren/punct
28
+ const closingPunctNoSpace = ENDS_WITH_INLINE_CLOSE.test(cur) && /^[)\]\},.;:]/.test(nextFirst);
29
+ if ((needSpaceBeforeInline ||
30
+ needSpaceAfterInline ||
31
+ needSpaceAfterCommaBeforeInline) &&
32
+ !openingBracketNoSpace &&
33
+ !closingPunctNoSpace) {
34
+ out += " ";
35
+ }
36
+ }
37
+ return out;
38
+ }
39
+ // Serialize a preserveOrder node back into XML (best-effort)
40
+ export function poSerialize(node) {
41
+ const name = poTagName(node);
42
+ if (!name)
43
+ return undefined;
44
+ if (name === "#text") {
45
+ const v = node["#text"];
46
+ return v == null ? "" : String(v);
47
+ }
48
+ // Attribute bag lives under :@
49
+ const bag = node[":@"] || {};
50
+ const attrs = [];
51
+ for (const k of Object.keys(bag)) {
52
+ if (!k.startsWith("@_"))
53
+ continue;
54
+ const rawName = k.slice(2);
55
+ const val = bag[k];
56
+ if (val == null)
57
+ continue;
58
+ const esc = String(val)
59
+ .replace(/&/g, "&amp;")
60
+ .replace(/</g, "&lt;")
61
+ .replace(/\"/g, "&quot;");
62
+ attrs.push(`${rawName}="${esc}"`);
63
+ }
64
+ const attrStr = attrs.length ? " " + attrs.join(" ") : "";
65
+ const children = poChildren(node);
66
+ if (!children.length)
67
+ return `<${name}${attrStr}></${name}>`;
68
+ const inner = children.map((ch) => poSerialize(ch) ?? "").join("");
69
+ return `<${name}${attrStr}>${inner}</${name}>`;
70
+ }
71
+ // Serialize a node while unwrapping specific style tags.
72
+ // Drop/unwrap: bold, italic, em, strong, underline, u, s/strike, small, big, font
73
+ // Special-case br -> single space.
74
+ export function poSerializeMinimal(node) {
75
+ const DROP_UNWRAP = [
76
+ "bold",
77
+ "italic",
78
+ "em",
79
+ "strong",
80
+ "underline",
81
+ "u",
82
+ "s",
83
+ "strike",
84
+ "small",
85
+ "big",
86
+ "font",
87
+ ];
88
+ function serializeRec(n) {
89
+ const name = poTagName(n);
90
+ if (!name)
91
+ return "";
92
+ if (name === "#text") {
93
+ const v = n["#text"];
94
+ return v == null ? "" : String(v);
95
+ }
96
+ // br -> space
97
+ if (poMatchesLocal(name, "br") ||
98
+ poMatchesLocal(name, "linebreak") ||
99
+ poMatchesLocal(name, "line-break")) {
100
+ return " ";
101
+ }
102
+ // Unwrap <sup> when it only wraps bibliography refs and punctuation (e.g., citation superscripts)
103
+ if (poMatchesLocal(name, "sup")) {
104
+ const onlyBiblioAndPunct = (node) => {
105
+ const nm = poTagName(node);
106
+ // Text nodes: allow only whitespace and punctuation/dashes/parentheses/brackets
107
+ if (!nm ||
108
+ nm === "#text" ||
109
+ typeof node === "string" ||
110
+ typeof node === "number") {
111
+ let raw = "";
112
+ if (typeof node === "string" || typeof node === "number")
113
+ raw = String(node);
114
+ else if (node && typeof node === "object" && "#text" in node)
115
+ raw = String(node["#text"] ?? "");
116
+ // Normalize to make the test predictable
117
+ const s = raw;
118
+ // Allow whitespace and these punctuation: , . ; : - – — − ( ) [ ] and en/em dashes and non-breaking hyphen
119
+ return /^[\s,.;:()\[\]\-–—−‑]+$/.test(s || "");
120
+ }
121
+ // Allow style wrappers by validating their children
122
+ if (DROP_UNWRAP.some((t) => poMatchesLocal(nm, t))) {
123
+ const kids = poChildren(node);
124
+ return kids.every((ch) => onlyBiblioAndPunct(ch));
125
+ }
126
+ // Allow xref bibliography only
127
+ if (poMatchesLocal(nm, "xref")) {
128
+ const typ = (poGetAttr(node, "ref-type") ||
129
+ poGetAttr(node, "ref_type") ||
130
+ "").toLowerCase();
131
+ return typ === "bibr";
132
+ }
133
+ return false;
134
+ };
135
+ const kids = poChildren(n);
136
+ if (kids.length && kids.every((ch) => onlyBiblioAndPunct(ch))) {
137
+ // Unwrap: serialize children directly (xref will map to <bibliography> below)
138
+ return kids.map((ch) => serializeRec(ch)).join("");
139
+ }
140
+ // Otherwise, keep <sup> as-is
141
+ }
142
+ // unwrap style-only wrappers
143
+ for (const local of DROP_UNWRAP) {
144
+ if (poMatchesLocal(name, local)) {
145
+ const children = poChildren(n);
146
+ return children.map((ch) => serializeRec(ch)).join("");
147
+ }
148
+ }
149
+ // Normalize a minimal subset of cross-reference/link tags to simpler forms
150
+ // - xref[ref-type=bibr] -> <bibliography target="RID">..</bibliography>
151
+ // - xref[ref-type=fig] -> <figure target="RID">..</figure>
152
+ // - xref[ref-type=table|table-wrap] -> <table target="RID">..</table>
153
+ // - xref[ref-type=supplementary-material] -> <supplementary target="RID">..</supplementary>
154
+ // - ext-link -> <extlink href="...">..</extlink>
155
+ const escAttr = (val) => String(val)
156
+ .replace(/&/g, "&amp;")
157
+ .replace(/</g, "&lt;")
158
+ .replace(/\"/g, "&quot;");
159
+ if (poMatchesLocal(name, "xref")) {
160
+ const typ = (poGetAttr(n, "ref-type") ||
161
+ poGetAttr(n, "ref_type") ||
162
+ "").toLowerCase();
163
+ const rid = poGetAttr(n, "rid");
164
+ const inner = poChildren(n)
165
+ .map((ch) => serializeRec(ch))
166
+ .join("");
167
+ let tag;
168
+ if (typ === "bibr")
169
+ tag = "bibliography";
170
+ else if (typ === "fig" || typ === "figure")
171
+ tag = "figure";
172
+ else if (typ === "table" || typ === "table-wrap")
173
+ tag = "table";
174
+ else if (/supp(lementary)?-?material/.test(typ))
175
+ tag = "supplementary";
176
+ else if (typ === "supplementary" || typ === "supplement")
177
+ tag = "supplementary";
178
+ if (tag) {
179
+ const attrs = [];
180
+ if (rid)
181
+ attrs.push(`target="${escAttr(rid)}"`);
182
+ const attrStr = attrs.length ? " " + attrs.join(" ") : "";
183
+ return `<${tag}${attrStr}>${inner}</${tag}>`;
184
+ }
185
+ // Otherwise, fall through to regular element serialization below
186
+ }
187
+ if (poMatchesLocal(name, "ext-link") ||
188
+ poMatchesLocal(name, "ext_link") ||
189
+ poMatchesLocal(name, "uri")) {
190
+ const href = poGetAttr(n, "href");
191
+ const inner = poChildren(n)
192
+ .map((ch) => serializeRec(ch))
193
+ .join("");
194
+ const attrStr = href ? ` href="${escAttr(href)}"` : "";
195
+ return `<extlink${attrStr}>${inner}</extlink>`;
196
+ }
197
+ // Regular element: serialize with attributes and children
198
+ const bag = n[":@"] || {};
199
+ const attrs = [];
200
+ for (const k of Object.keys(bag)) {
201
+ if (!k.startsWith("@_"))
202
+ continue;
203
+ const rawName = k.slice(2);
204
+ const val = bag[k];
205
+ if (val == null)
206
+ continue;
207
+ const esc = String(val)
208
+ .replace(/&/g, "&amp;")
209
+ .replace(/</g, "&lt;")
210
+ .replace(/\"/g, "&quot;");
211
+ attrs.push(`${rawName}="${esc}"`);
212
+ }
213
+ const attrStr = attrs.length ? " " + attrs.join(" ") : "";
214
+ const children = poChildren(n);
215
+ if (!children.length)
216
+ return `<${name}${attrStr}></${name}>`;
217
+ const rendered = children.map((ch) => serializeRec(ch) ?? "");
218
+ let inner = joinWithBoundarySpaces(rendered);
219
+ // Remove space after opening paren/bracket before an inline tag: "( <tag" -> "(<tag"
220
+ inner = inner.replace(/([(\[{])\s+(<(?:bibliography|figure|table|supplementary|extlink)\b)/g, "$1$2");
221
+ // Remove space before closing paren/bracket/punct after an inline tag: "</tag> )" -> "</tag>)"
222
+ inner = inner.replace(/(<\/(?:bibliography|figure|table|supplementary|extlink)>)\s+([)\]},.;:])/g, "$1$2");
223
+ inner = inner.replace(new RegExp(String.raw `(</(?:bibliography|figure|table|supplementary|extlink)>)\s*([,;:])\s*(<(?:bibliography|figure|table|supplementary|extlink)\b)`, "g"), "$1$2 $3");
224
+ return `<${name}${attrStr}>${inner}</${name}>`;
225
+ }
226
+ return serializeRec(node);
227
+ }
228
+ // Serialize a citation keeping only the tags useful for LLM parsing and dropping attributes and styling.
229
+ export function poSerializeCitationMinimal(node) {
230
+ const ALLOWED = new Set([
231
+ // Core structure
232
+ "citation",
233
+ // People
234
+ "person-group",
235
+ "string-name",
236
+ "name",
237
+ "surname",
238
+ "given-names",
239
+ "prefix",
240
+ "suffix",
241
+ "collab",
242
+ "etal",
243
+ // Titles
244
+ "article-title",
245
+ "chapter-title",
246
+ "trans-title",
247
+ "trans-source",
248
+ // Container / source
249
+ "source",
250
+ "series",
251
+ "series-title",
252
+ "series-text",
253
+ "series-number",
254
+ // Publication ids and links
255
+ "pub-id",
256
+ "object-id",
257
+ "ext-link",
258
+ "extlink",
259
+ "uri",
260
+ // Publication info
261
+ "volume",
262
+ "issue",
263
+ "elocation-id",
264
+ "fpage",
265
+ "lpage",
266
+ "page-range",
267
+ "supplement",
268
+ // Date
269
+ "pub-date",
270
+ "year",
271
+ "month",
272
+ "day",
273
+ "season",
274
+ // Books / chapters / reports
275
+ "publisher-name",
276
+ "publisher-loc",
277
+ "edition",
278
+ // Conference proceedings
279
+ "conf-name",
280
+ "conf-loc",
281
+ "conf-date",
282
+ // Dataset / software
283
+ "data-title",
284
+ "version",
285
+ // Identifiers
286
+ "isbn",
287
+ "issn",
288
+ // Misc academic
289
+ "institution",
290
+ ]);
291
+ function ser(n, preserveText) {
292
+ const name = poTagName(n);
293
+ if (!name)
294
+ return "";
295
+ if (name === "#text") {
296
+ const s = String(n["#text"] ?? "");
297
+ const trimmed = s.trim();
298
+ if (!preserveText && /^[,.;:()\[\]{}–-]+$/.test(trimmed))
299
+ return "";
300
+ return escapeXml(s);
301
+ }
302
+ const local = (name || "").toLowerCase();
303
+ // Only keep allowed tags; otherwise inline their children
304
+ const children = poChildren(n);
305
+ // Special-case ext-link: if no inner text, materialize href as text
306
+ if (local === "ext-link" || local === "extlink") {
307
+ let innerText = children.map((ch) => ser(ch, true)).join("");
308
+ if (!innerText.trim()) {
309
+ const href = poGetAttr(n, "href");
310
+ if (href)
311
+ innerText = escapeXml(href);
312
+ }
313
+ return `<extlink>${innerText}</extlink>`;
314
+ }
315
+ const inner = children
316
+ .map((ch) => ser(ch, ALLOWED.has(local) || preserveText))
317
+ .join("");
318
+ if (!ALLOWED.has(local))
319
+ return inner;
320
+ return `<${local}>${inner}</${local}>`;
321
+ }
322
+ const tag = poTagName(node);
323
+ const root = ALLOWED.has((tag || "").toLowerCase())
324
+ ? node
325
+ : poFindFirstByLocal(node, "citation") || node;
326
+ const xml = ser(root, false);
327
+ return xml || undefined;
328
+ }
329
+ //# sourceMappingURL=serialize.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"serialize.js","sourceRoot":"","sources":["../../../../../src/normalization/jats/utils/po/serialize.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,UAAU,EAAE,cAAc,EAAE,MAAM,SAAS,CAAC;AAChE,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AACpC,OAAO,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AACvC,OAAO,EAAE,kBAAkB,EAAE,MAAM,SAAS,CAAC;AAE7C,MAAM,kBAAkB,GACtB,0DAA0D,CAAC;AAC7D,MAAM,sBAAsB,GAC1B,8DAA8D,CAAC;AAEjE,SAAS,sBAAsB,CAAC,KAAe;IAC7C,IAAI,GAAG,GAAG,EAAE,CAAC;IACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAC3B,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;QAChC,GAAG,IAAI,GAAG,CAAC;QACX,IAAI,CAAC,IAAI;YAAE,SAAS;QAEpB,MAAM,OAAO,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;QAC9B,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QAEnC,qEAAqE;QACrE,MAAM,qBAAqB,GACzB,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAEvD,iEAAiE;QACjE,MAAM,oBAAoB,GACxB,sBAAsB,CAAC,IAAI,CAAC,GAAG,CAAC;YAChC,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC;YACrB,cAAc,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAEjC,iEAAiE;QACjE,MAAM,+BAA+B,GACnC,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAEzD,iEAAiE;QACjE,MAAM,qBAAqB,GACzB,cAAc,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAE5D,kEAAkE;QAClE,MAAM,mBAAmB,GACvB,sBAAsB,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,cAAc,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAErE,IACE,CAAC,qBAAqB;YACpB,oBAAoB;YACpB,+BAA+B,CAAC;YAClC,CAAC,qBAAqB;YACtB,CAAC,mBAAmB,EACpB,CAAC;YACD,GAAG,IAAI,GAAG,CAAC;QACb,CAAC;IACH,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,6DAA6D;AAC7D,MAAM,UAAU,WAAW,CAAC,IAAS;IACnC,MAAM,IAAI,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAC7B,IAAI,CAAC,IAAI;QAAE,OAAO,SAAS,CAAC;IAC5B,IAAI,IAAI,KAAK,OAAO,EAAE,CAAC;QACrB,MAAM,CAAC,GAAI,IAAY,CAAC,OAAO,CAAC,CAAC;QACjC,OAAO,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;IACpC,CAAC;IACD,+BAA+B;IAC/B,MAAM,GAAG,GAAI,IAAY,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;IACtC,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;QACjC,IAAI,CAAC,CAAC,CAAC,UAAU,CAAC,IAAI,CAAC;YAAE,SAAS;QAClC,MAAM,OAAO,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAC3B,MAAM,GAAG,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;QACnB,IAAI,GAAG,IAAI,IAAI;YAAE,SAAS;QAC1B,MAAM,GAAG,GAAG,MAAM,CAAC,GAAG,CAAC;aACpB,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC;aACtB,OAAO,CAAC,IAAI,EAAE,MAAM,CAAC;aACrB,OAAO,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;QAC5B,KAAK,CAAC,IAAI,CAAC,GAAG,OAAO,KAAK,GAAG,GAAG,CAAC,CAAC;IACpC,CAAC;IACD,MAAM,OAAO,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,GAAG,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IAC1D,MAAM,QAAQ,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC;IAClC,IAAI,CAAC,QAAQ,CAAC,MAAM;QAAE,OAAO,IAAI,IAAI,GAAG,OAAO,MAAM,IAAI,GAAG,CAAC;IAC7D,MAAM,KAAK,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,WAAW,CAAC,EAAE,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACnE,OAAO,IAAI,IAAI,GAAG,OAAO,IAAI,KAAK,KAAK,IAAI,GAAG,CAAC;AACjD,CAAC;AAED,yDAAyD;AACzD,kFAAkF;AAClF,mCAAmC;AACnC,MAAM,UAAU,kBAAkB,CAAC,IAAS;IAC1C,MAAM,WAAW,GAAG;QAClB,MAAM;QACN,QAAQ;QACR,IAAI;QACJ,QAAQ;QACR,WAAW;QACX,GAAG;QACH,GAAG;QACH,QAAQ;QACR,OAAO;QACP,KAAK;QACL,MAAM;KACP,CAAC;IACF,SAAS,YAAY,CAAC,CAAM;QAC1B,MAAM,IAAI,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;QAC1B,IAAI,CAAC,IAAI;YAAE,OAAO,EAAE,CAAC;QACrB,IAAI,IAAI,KAAK,OAAO,EAAE,CAAC;YACrB,MAAM,CAAC,GAAI,CAAS,CAAC,OAAO,CAAC,CAAC;YAC9B,OAAO,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;QACpC,CAAC;QACD,cAAc;QACd,IACE,cAAc,CAAC,IAAI,EAAE,IAAI,CAAC;YAC1B,cAAc,CAAC,IAAI,EAAE,WAAW,CAAC;YACjC,cAAc,CAAC,IAAI,EAAE,YAAY,CAAC,EAClC,CAAC;YACD,OAAO,GAAG,CAAC;QACb,CAAC;QACD,kGAAkG;QAClG,IAAI,cAAc,CAAC,IAAI,EAAE,KAAK,CAAC,EAAE,CAAC;YAChC,MAAM,kBAAkB,GAAG,CAAC,IAAS,EAAW,EAAE;gBAChD,MAAM,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;gBAC3B,gFAAgF;gBAChF,IACE,CAAC,EAAE;oBACH,EAAE,KAAK,OAAO;oBACd,OAAO,IAAI,KAAK,QAAQ;oBACxB,OAAO,IAAI,KAAK,QAAQ,EACxB,CAAC;oBACD,IAAI,GAAG,GAAG,EAAE,CAAC;oBACb,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,OAAO,IAAI,KAAK,QAAQ;wBACtD,GAAG,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC;yBAChB,IAAI,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,OAAO,IAAI,IAAI;wBAC1D,GAAG,GAAG,MAAM,CAAE,IAAY,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC;oBAC7C,yCAAyC;oBACzC,MAAM,CAAC,GAAG,GAAG,CAAC;oBACd,2GAA2G;oBAC3G,OAAO,yBAAyB,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;gBACjD,CAAC;gBACD,oDAAoD;gBACpD,IAAI,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,cAAc,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;oBACnD,MAAM,IAAI,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC;oBAC9B,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,kBAAkB,CAAC,EAAE,CAAC,CAAC,CAAC;gBACpD,CAAC;gBACD,+BAA+B;gBAC/B,IAAI,cAAc,CAAC,EAAE,EAAE,MAAM,CAAC,EAAE,CAAC;oBAC/B,MAAM,GAAG,GAAG,CACV,SAAS,CAAC,IAAI,EAAE,UAAU,CAAC;wBAC3B,SAAS,CAAC,IAAI,EAAE,UAAU,CAAC;wBAC3B,EAAE,CACH,CAAC,WAAW,EAAE,CAAC;oBAChB,OAAO,GAAG,KAAK,MAAM,CAAC;gBACxB,CAAC;gBACD,OAAO,KAAK,CAAC;YACf,CAAC,CAAC;YACF,MAAM,IAAI,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;YAC3B,IAAI,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,kBAAkB,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;gBAC9D,8EAA8E;gBAC9E,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,YAAY,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACrD,CAAC;YACD,8BAA8B;QAChC,CAAC;QACD,6BAA6B;QAC7B,KAAK,MAAM,KAAK,IAAI,WAAW,EAAE,CAAC;YAChC,IAAI,cAAc,CAAC,IAAI,EAAE,KAAK,CAAC,EAAE,CAAC;gBAChC,MAAM,QAAQ,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;gBAC/B,OAAO,QAAQ,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,YAAY,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACzD,CAAC;QACH,CAAC;QACD,2EAA2E;QAC3E,wEAAwE;QACxE,4DAA4D;QAC5D,sEAAsE;QACtE,4FAA4F;QAC5F,iDAAiD;QACjD,MAAM,OAAO,GAAG,CAAC,GAAQ,EAAE,EAAE,CAC3B,MAAM,CAAC,GAAG,CAAC;aACR,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC;aACtB,OAAO,CAAC,IAAI,EAAE,MAAM,CAAC;aACrB,OAAO,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;QAC9B,IAAI,cAAc,CAAC,IAAI,EAAE,MAAM,CAAC,EAAE,CAAC;YACjC,MAAM,GAAG,GAAG,CACV,SAAS,CAAC,CAAC,EAAE,UAAU,CAAC;gBACxB,SAAS,CAAC,CAAC,EAAE,UAAU,CAAC;gBACxB,EAAE,CACH,CAAC,WAAW,EAAE,CAAC;YAChB,MAAM,GAAG,GAAG,SAAS,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;YAChC,MAAM,KAAK,GAAG,UAAU,CAAC,CAAC,CAAC;iBACxB,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,YAAY,CAAC,EAAE,CAAC,CAAC;iBAC7B,IAAI,CAAC,EAAE,CAAC,CAAC;YACZ,IAAI,GAAuB,CAAC;YAC5B,IAAI,GAAG,KAAK,MAAM;gBAAE,GAAG,GAAG,cAAc,CAAC;iBACpC,IAAI,GAAG,KAAK,KAAK,IAAI,GAAG,KAAK,QAAQ;gBAAE,GAAG,GAAG,QAAQ,CAAC;iBACtD,IAAI,GAAG,KAAK,OAAO,IAAI,GAAG,KAAK,YAAY;gBAAE,GAAG,GAAG,OAAO,CAAC;iBAC3D,IAAI,4BAA4B,CAAC,IAAI,CAAC,GAAG,CAAC;gBAAE,GAAG,GAAG,eAAe,CAAC;iBAClE,IAAI,GAAG,KAAK,eAAe,IAAI,GAAG,KAAK,YAAY;gBACtD,GAAG,GAAG,eAAe,CAAC;YACxB,IAAI,GAAG,EAAE,CAAC;gBACR,MAAM,KAAK,GAAa,EAAE,CAAC;gBAC3B,IAAI,GAAG;oBAAE,KAAK,CAAC,IAAI,CAAC,WAAW,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;gBAChD,MAAM,OAAO,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,GAAG,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;gBAC1D,OAAO,IAAI,GAAG,GAAG,OAAO,IAAI,KAAK,KAAK,GAAG,GAAG,CAAC;YAC/C,CAAC;YACD,iEAAiE;QACnE,CAAC;QACD,IACE,cAAc,CAAC,IAAI,EAAE,UAAU,CAAC;YAChC,cAAc,CAAC,IAAI,EAAE,UAAU,CAAC;YAChC,cAAc,CAAC,IAAI,EAAE,KAAK,CAAC,EAC3B,CAAC;YACD,MAAM,IAAI,GAAG,SAAS,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC;YAClC,MAAM,KAAK,GAAG,UAAU,CAAC,CAAC,CAAC;iBACxB,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,YAAY,CAAC,EAAE,CAAC,CAAC;iBAC7B,IAAI,CAAC,EAAE,CAAC,CAAC;YACZ,MAAM,OAAO,GAAG,IAAI,CAAC,CAAC,CAAC,UAAU,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;YACvD,OAAO,WAAW,OAAO,IAAI,KAAK,YAAY,CAAC;QACjD,CAAC;QACD,0DAA0D;QAC1D,MAAM,GAAG,GAAI,CAAS,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;QACnC,MAAM,KAAK,GAAa,EAAE,CAAC;QAC3B,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;YACjC,IAAI,CAAC,CAAC,CAAC,UAAU,CAAC,IAAI,CAAC;gBAAE,SAAS;YAClC,MAAM,OAAO,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YAC3B,MAAM,GAAG,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;YACnB,IAAI,GAAG,IAAI,IAAI;gBAAE,SAAS;YAC1B,MAAM,GAAG,GAAG,MAAM,CAAC,GAAG,CAAC;iBACpB,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC;iBACtB,OAAO,CAAC,IAAI,EAAE,MAAM,CAAC;iBACrB,OAAO,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;YAC5B,KAAK,CAAC,IAAI,CAAC,GAAG,OAAO,KAAK,GAAG,GAAG,CAAC,CAAC;QACpC,CAAC;QACD,MAAM,OAAO,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,GAAG,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QAC1D,MAAM,QAAQ,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;QAC/B,IAAI,CAAC,QAAQ,CAAC,MAAM;YAAE,OAAO,IAAI,IAAI,GAAG,OAAO,MAAM,IAAI,GAAG,CAAC;QAC7D,MAAM,QAAQ,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,YAAY,CAAC,EAAE,CAAC,IAAI,EAAE,CAAC,CAAC;QAC9D,IAAI,KAAK,GAAG,sBAAsB,CAAC,QAAQ,CAAC,CAAC;QAE7C,qFAAqF;QACrF,KAAK,GAAG,KAAK,CAAC,OAAO,CACnB,sEAAsE,EACtE,MAAM,CACP,CAAC;QACF,+FAA+F;QAC/F,KAAK,GAAG,KAAK,CAAC,OAAO,CACnB,2EAA2E,EAC3E,MAAM,CACP,CAAC;QACF,KAAK,GAAG,KAAK,CAAC,OAAO,CACnB,IAAI,MAAM,CACR,MAAM,CAAC,GAAG,CAAA,+HAA+H,EACzI,GAAG,CACJ,EACD,SAAS,CACV,CAAC;QAEF,OAAO,IAAI,IAAI,GAAG,OAAO,IAAI,KAAK,KAAK,IAAI,GAAG,CAAC;IACjD,CAAC;IACD,OAAO,YAAY,CAAC,IAAI,CAAC,CAAC;AAC5B,CAAC;AAED,yGAAyG;AACzG,MAAM,UAAU,0BAA0B,CAAC,IAAS;IAClD,MAAM,OAAO,GAAG,IAAI,GAAG,CAAC;QACtB,iBAAiB;QACjB,UAAU;QACV,SAAS;QACT,cAAc;QACd,aAAa;QACb,MAAM;QACN,SAAS;QACT,aAAa;QACb,QAAQ;QACR,QAAQ;QACR,QAAQ;QACR,MAAM;QACN,SAAS;QACT,eAAe;QACf,eAAe;QACf,aAAa;QACb,cAAc;QACd,qBAAqB;QACrB,QAAQ;QACR,QAAQ;QACR,cAAc;QACd,aAAa;QACb,eAAe;QACf,4BAA4B;QAC5B,QAAQ;QACR,WAAW;QACX,UAAU;QACV,SAAS;QACT,KAAK;QACL,mBAAmB;QACnB,QAAQ;QACR,OAAO;QACP,cAAc;QACd,OAAO;QACP,OAAO;QACP,YAAY;QACZ,YAAY;QACZ,OAAO;QACP,UAAU;QACV,MAAM;QACN,OAAO;QACP,KAAK;QACL,QAAQ;QACR,6BAA6B;QAC7B,gBAAgB;QAChB,eAAe;QACf,SAAS;QACT,yBAAyB;QACzB,WAAW;QACX,UAAU;QACV,WAAW;QACX,qBAAqB;QACrB,YAAY;QACZ,SAAS;QACT,cAAc;QACd,MAAM;QACN,MAAM;QACN,gBAAgB;QAChB,aAAa;KACd,CAAC,CAAC;IACH,SAAS,GAAG,CAAC,CAAM,EAAE,YAAqB;QACxC,MAAM,IAAI,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;QAC1B,IAAI,CAAC,IAAI;YAAE,OAAO,EAAE,CAAC;QACrB,IAAI,IAAI,KAAK,OAAO,EAAE,CAAC;YACrB,MAAM,CAAC,GAAG,MAAM,CAAE,CAAS,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC;YAC5C,MAAM,OAAO,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;YACzB,IAAI,CAAC,YAAY,IAAI,qBAAqB,CAAC,IAAI,CAAC,OAAO,CAAC;gBAAE,OAAO,EAAE,CAAC;YACpE,OAAO,SAAS,CAAC,CAAC,CAAC,CAAC;QACtB,CAAC;QACD,MAAM,KAAK,GAAG,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;QACzC,0DAA0D;QAC1D,MAAM,QAAQ,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;QAC/B,oEAAoE;QACpE,IAAI,KAAK,KAAK,UAAU,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;YAChD,IAAI,SAAS,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,GAAG,CAAC,EAAE,EAAE,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAC7D,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,EAAE,CAAC;gBACtB,MAAM,IAAI,GAAG,SAAS,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC;gBAClC,IAAI,IAAI;oBAAE,SAAS,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;YACxC,CAAC;YACD,OAAO,YAAY,SAAS,YAAY,CAAC;QAC3C,CAAC;QACD,MAAM,KAAK,GAAG,QAAQ;aACnB,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,GAAG,CAAC,EAAE,EAAE,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,YAAY,CAAC,CAAC;aACxD,IAAI,CAAC,EAAE,CAAC,CAAC;QACZ,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC;YAAE,OAAO,KAAK,CAAC;QACtC,OAAO,IAAI,KAAK,IAAI,KAAK,KAAK,KAAK,GAAG,CAAC;IACzC,CAAC;IACD,MAAM,GAAG,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAC5B,MAAM,IAAI,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;QACjD,CAAC,CAAC,IAAI;QACN,CAAC,CAAC,kBAAkB,CAAC,IAAI,EAAE,UAAU,CAAC,IAAI,IAAI,CAAC;IACjD,MAAM,GAAG,GAAG,GAAG,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;IAC7B,OAAO,GAAG,IAAI,SAAS,CAAC;AAC1B,CAAC"}
@@ -0,0 +1,7 @@
1
+ export declare function poText(node: any): string | undefined;
2
+ export declare function poCollectSectionTexts(secPO: any): {
3
+ paragraphs: string[];
4
+ captions: string[];
5
+ };
6
+ export declare function poCollectParagraphNodesUnder(root: any): any[];
7
+ //# sourceMappingURL=text.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"text.d.ts","sourceRoot":"","sources":["../../../../../src/normalization/jats/utils/po/text.ts"],"names":[],"mappings":"AAGA,wBAAgB,MAAM,CAAC,IAAI,EAAE,GAAG,GAAG,MAAM,GAAG,SAAS,CA+BpD;AAED,wBAAgB,qBAAqB,CAAC,KAAK,EAAE,GAAG,GAAG;IACjD,UAAU,EAAE,MAAM,EAAE,CAAC;IACrB,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB,CA+CA;AAGD,wBAAgB,4BAA4B,CAAC,IAAI,EAAE,GAAG,GAAG,GAAG,EAAE,CAkB7D"}
@@ -0,0 +1,114 @@
1
+ import { poTagName, poChildren, poIsTextNode, poMatchesLocal } from "./nodes";
2
+ import { decodeEntities } from "../strings";
3
+ export function poText(node) {
4
+ const parts = [];
5
+ const rec = (n) => {
6
+ if (n == null)
7
+ return;
8
+ if (typeof n === "string" || typeof n === "number") {
9
+ const s = String(n).trim();
10
+ if (s)
11
+ parts.push(s);
12
+ return;
13
+ }
14
+ if (Array.isArray(n)) {
15
+ for (const it of n)
16
+ rec(it);
17
+ return;
18
+ }
19
+ if (typeof n === "object") {
20
+ const name = poTagName(n);
21
+ if (name === "#text") {
22
+ const v = n["#text"];
23
+ if (v != null) {
24
+ const s = String(v).trim();
25
+ if (s)
26
+ parts.push(s);
27
+ }
28
+ return;
29
+ }
30
+ const children = poChildren(n);
31
+ for (const ch of children)
32
+ rec(ch);
33
+ return;
34
+ }
35
+ };
36
+ rec(node);
37
+ if (!parts.length)
38
+ return undefined;
39
+ return decodeEntities(parts.join(" ").replace(/\s+/g, " ").trim());
40
+ }
41
+ export function poCollectSectionTexts(secPO) {
42
+ const paras = [];
43
+ const caps = [];
44
+ const push = (s, toCaps = false) => {
45
+ if (!s)
46
+ return;
47
+ const t = s.trim();
48
+ if (!t)
49
+ return;
50
+ if (toCaps)
51
+ caps.push(t);
52
+ else
53
+ paras.push(t);
54
+ };
55
+ const walk = (node, ancestors) => {
56
+ const children = poChildren(node);
57
+ for (const ch of children) {
58
+ const nm = poTagName(ch);
59
+ const name = (nm || "").toLowerCase();
60
+ // Capture direct stray text under section-like containers, but not under titles/captions
61
+ if (poIsTextNode(ch)) {
62
+ const parent = (ancestors[ancestors.length - 1] || "").toLowerCase();
63
+ const ban = new Set(["title", "caption", "label"]);
64
+ // Only capture stray text from boxed-text; avoid grabbing raw text directly under <sec>
65
+ if (!ban.has(parent) && parent === "boxed-text") {
66
+ const t = poText(ch);
67
+ push(t || undefined);
68
+ }
69
+ continue;
70
+ }
71
+ if (poMatchesLocal(name, "p")) {
72
+ push(poText(poChildren(ch)) || poText(ch));
73
+ // do not recurse into <p> to avoid duplicating inline text
74
+ continue;
75
+ }
76
+ // Skip lists and figures to avoid injecting non-paragraph content out of place
77
+ if (poMatchesLocal(name, "list") ||
78
+ poMatchesLocal(name, "fig") ||
79
+ poMatchesLocal(name, "table-wrap") ||
80
+ poMatchesLocal(name, "def-list") ||
81
+ poMatchesLocal(name, "disp-formula")) {
82
+ continue;
83
+ }
84
+ // Avoid descending into nested sec boundaries here; subsections handled separately
85
+ if (!poMatchesLocal(name, "sec"))
86
+ walk(ch, ancestors.concat(name || ""));
87
+ }
88
+ };
89
+ walk(secPO, ["sec"]);
90
+ return { paragraphs: paras, captions: caps };
91
+ }
92
+ // Collect descendant <p> nodes under a container, in order, while skipping nested <sec> blocks
93
+ export function poCollectParagraphNodesUnder(root) {
94
+ const out = [];
95
+ const walk = (node) => {
96
+ const children = poChildren(node);
97
+ for (const ch of children) {
98
+ const nm = poTagName(ch);
99
+ if (!nm)
100
+ continue;
101
+ if (poMatchesLocal(nm, "p")) {
102
+ out.push(ch);
103
+ // do not recurse into <p>
104
+ continue;
105
+ }
106
+ if (poMatchesLocal(nm, "sec"))
107
+ continue; // skip nested sections
108
+ walk(ch);
109
+ }
110
+ };
111
+ walk(root);
112
+ return out;
113
+ }
114
+ //# sourceMappingURL=text.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"text.js","sourceRoot":"","sources":["../../../../../src/normalization/jats/utils/po/text.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,UAAU,EAAE,YAAY,EAAE,cAAc,EAAE,MAAM,SAAS,CAAC;AAC9E,OAAO,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAE5C,MAAM,UAAU,MAAM,CAAC,IAAS;IAC9B,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,MAAM,GAAG,GAAG,CAAC,CAAM,EAAE,EAAE;QACrB,IAAI,CAAC,IAAI,IAAI;YAAE,OAAO;QACtB,IAAI,OAAO,CAAC,KAAK,QAAQ,IAAI,OAAO,CAAC,KAAK,QAAQ,EAAE,CAAC;YACnD,MAAM,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;YAC3B,IAAI,CAAC;gBAAE,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YACrB,OAAO;QACT,CAAC;QACD,IAAI,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC;YACrB,KAAK,MAAM,EAAE,IAAI,CAAC;gBAAE,GAAG,CAAC,EAAE,CAAC,CAAC;YAC5B,OAAO;QACT,CAAC;QACD,IAAI,OAAO,CAAC,KAAK,QAAQ,EAAE,CAAC;YAC1B,MAAM,IAAI,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;YAC1B,IAAI,IAAI,KAAK,OAAO,EAAE,CAAC;gBACrB,MAAM,CAAC,GAAI,CAAS,CAAC,OAAO,CAAC,CAAC;gBAC9B,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC;oBACd,MAAM,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;oBAC3B,IAAI,CAAC;wBAAE,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;gBACvB,CAAC;gBACD,OAAO;YACT,CAAC;YACD,MAAM,QAAQ,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;YAC/B,KAAK,MAAM,EAAE,IAAI,QAAQ;gBAAE,GAAG,CAAC,EAAE,CAAC,CAAC;YACnC,OAAO;QACT,CAAC;IACH,CAAC,CAAC;IACF,GAAG,CAAC,IAAI,CAAC,CAAC;IACV,IAAI,CAAC,KAAK,CAAC,MAAM;QAAE,OAAO,SAAS,CAAC;IACpC,OAAO,cAAc,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;AACrE,CAAC;AAED,MAAM,UAAU,qBAAqB,CAAC,KAAU;IAI9C,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,MAAM,IAAI,GAAa,EAAE,CAAC;IAC1B,MAAM,IAAI,GAAG,CAAC,CAAU,EAAE,MAAM,GAAG,KAAK,EAAE,EAAE;QAC1C,IAAI,CAAC,CAAC;YAAE,OAAO;QACf,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;QACnB,IAAI,CAAC,CAAC;YAAE,OAAO;QACf,IAAI,MAAM;YAAE,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;;YACpB,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACrB,CAAC,CAAC;IACF,MAAM,IAAI,GAAG,CAAC,IAAS,EAAE,SAAmB,EAAE,EAAE;QAC9C,MAAM,QAAQ,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC;QAClC,KAAK,MAAM,EAAE,IAAI,QAAQ,EAAE,CAAC;YAC1B,MAAM,EAAE,GAAG,SAAS,CAAC,EAAE,CAAC,CAAC;YACzB,MAAM,IAAI,GAAG,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;YACtC,yFAAyF;YACzF,IAAI,YAAY,CAAC,EAAE,CAAC,EAAE,CAAC;gBACrB,MAAM,MAAM,GAAG,CAAC,SAAS,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;gBACrE,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,CAAC,OAAO,EAAE,SAAS,EAAE,OAAO,CAAC,CAAC,CAAC;gBACnD,wFAAwF;gBACxF,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,MAAM,KAAK,YAAY,EAAE,CAAC;oBAChD,MAAM,CAAC,GAAG,MAAM,CAAC,EAAE,CAAC,CAAC;oBACrB,IAAI,CAAC,CAAC,IAAI,SAAS,CAAC,CAAC;gBACvB,CAAC;gBACD,SAAS;YACX,CAAC;YACD,IAAI,cAAc,CAAC,IAAI,EAAE,GAAG,CAAC,EAAE,CAAC;gBAC9B,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC,IAAI,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;gBAC3C,2DAA2D;gBAC3D,SAAS;YACX,CAAC;YACD,+EAA+E;YAC/E,IACE,cAAc,CAAC,IAAI,EAAE,MAAM,CAAC;gBAC5B,cAAc,CAAC,IAAI,EAAE,KAAK,CAAC;gBAC3B,cAAc,CAAC,IAAI,EAAE,YAAY,CAAC;gBAClC,cAAc,CAAC,IAAI,EAAE,UAAU,CAAC;gBAChC,cAAc,CAAC,IAAI,EAAE,cAAc,CAAC,EACpC,CAAC;gBACD,SAAS;YACX,CAAC;YACD,mFAAmF;YACnF,IAAI,CAAC,cAAc,CAAC,IAAI,EAAE,KAAK,CAAC;gBAAE,IAAI,CAAC,EAAE,EAAE,SAAS,CAAC,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC,CAAC;QAC3E,CAAC;IACH,CAAC,CAAC;IACF,IAAI,CAAC,KAAK,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC;IACrB,OAAO,EAAE,UAAU,EAAE,KAAK,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC;AAC/C,CAAC;AAED,+FAA+F;AAC/F,MAAM,UAAU,4BAA4B,CAAC,IAAS;IACpD,MAAM,GAAG,GAAU,EAAE,CAAC;IACtB,MAAM,IAAI,GAAG,CAAC,IAAS,EAAE,EAAE;QACzB,MAAM,QAAQ,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC;QAClC,KAAK,MAAM,EAAE,IAAI,QAAQ,EAAE,CAAC;YAC1B,MAAM,EAAE,GAAG,SAAS,CAAC,EAAE,CAAC,CAAC;YACzB,IAAI,CAAC,EAAE;gBAAE,SAAS;YAClB,IAAI,cAAc,CAAC,EAAE,EAAE,GAAG,CAAC,EAAE,CAAC;gBAC5B,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBACb,0BAA0B;gBAC1B,SAAS;YACX,CAAC;YACD,IAAI,cAAc,CAAC,EAAE,EAAE,KAAK,CAAC;gBAAE,SAAS,CAAC,uBAAuB;YAChE,IAAI,CAAC,EAAE,CAAC,CAAC;QACX,CAAC;IACH,CAAC,CAAC;IACF,IAAI,CAAC,IAAI,CAAC,CAAC;IACX,OAAO,GAAG,CAAC;AACb,CAAC"}