@ncukondo/search-hub 0.12.1 → 0.12.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_virtual/_commonjsHelpers.js +30 -0
- package/dist/_virtual/_commonjsHelpers.js.map +1 -0
- package/dist/_virtual/aliases.js +5 -0
- package/dist/_virtual/aliases.js.map +1 -0
- package/dist/_virtual/attributes.js +5 -0
- package/dist/_virtual/attributes.js.map +1 -0
- package/dist/_virtual/back.js +5 -0
- package/dist/_virtual/back.js.map +1 -0
- package/dist/_virtual/comment.js +5 -0
- package/dist/_virtual/comment.js.map +1 -0
- package/dist/_virtual/compile.js +5 -0
- package/dist/_virtual/compile.js.map +1 -0
- package/dist/_virtual/compile2.js +5 -0
- package/dist/_virtual/compile2.js.map +1 -0
- package/dist/_virtual/decode-data-html.js +5 -0
- package/dist/_virtual/decode-data-html.js.map +1 -0
- package/dist/_virtual/decode-data-xml.js +5 -0
- package/dist/_virtual/decode-data-xml.js.map +1 -0
- package/dist/_virtual/decode.js +5 -0
- package/dist/_virtual/decode.js.map +1 -0
- package/dist/_virtual/decode_codepoint.js +5 -0
- package/dist/_virtual/decode_codepoint.js.map +1 -0
- package/dist/_virtual/encode-html.js +5 -0
- package/dist/_virtual/encode-html.js.map +1 -0
- package/dist/_virtual/encode.js +5 -0
- package/dist/_virtual/encode.js.map +1 -0
- package/dist/_virtual/escape.js +5 -0
- package/dist/_virtual/escape.js.map +1 -0
- package/dist/_virtual/feeds.js +5 -0
- package/dist/_virtual/feeds.js.map +1 -0
- package/dist/_virtual/filters.js +5 -0
- package/dist/_virtual/filters.js.map +1 -0
- package/dist/_virtual/foreignNames.js +5 -0
- package/dist/_virtual/foreignNames.js.map +1 -0
- package/dist/_virtual/general.js +5 -0
- package/dist/_virtual/general.js.map +1 -0
- package/dist/_virtual/he.js +5 -0
- package/dist/_virtual/he.js.map +1 -0
- package/dist/_virtual/helpers.js +5 -0
- package/dist/_virtual/helpers.js.map +1 -0
- package/dist/_virtual/html.js +5 -0
- package/dist/_virtual/html.js.map +1 -0
- package/dist/_virtual/index.js +6 -0
- package/dist/_virtual/index.js.map +1 -0
- package/dist/_virtual/index10.js +5 -0
- package/dist/_virtual/index10.js.map +1 -0
- package/dist/_virtual/index11.js +5 -0
- package/dist/_virtual/index11.js.map +1 -0
- package/dist/_virtual/index2.js +5 -0
- package/dist/_virtual/index2.js.map +1 -0
- package/dist/_virtual/index3.js +5 -0
- package/dist/_virtual/index3.js.map +1 -0
- package/dist/_virtual/index4.js +5 -0
- package/dist/_virtual/index4.js.map +1 -0
- package/dist/_virtual/index5.js +7 -0
- package/dist/_virtual/index5.js.map +1 -0
- package/dist/_virtual/index6.js +5 -0
- package/dist/_virtual/index6.js.map +1 -0
- package/dist/_virtual/index7.js +5 -0
- package/dist/_virtual/index7.js.map +1 -0
- package/dist/_virtual/index8.js +5 -0
- package/dist/_virtual/index8.js.map +1 -0
- package/dist/_virtual/index9.js +5 -0
- package/dist/_virtual/index9.js.map +1 -0
- package/dist/_virtual/legacy.js +5 -0
- package/dist/_virtual/legacy.js.map +1 -0
- package/dist/_virtual/manipulation.js +5 -0
- package/dist/_virtual/manipulation.js.map +1 -0
- package/dist/_virtual/matcher.js +5 -0
- package/dist/_virtual/matcher.js.map +1 -0
- package/dist/_virtual/node.js +5 -0
- package/dist/_virtual/node.js.map +1 -0
- package/dist/_virtual/node2.js +5 -0
- package/dist/_virtual/node2.js.map +1 -0
- package/dist/_virtual/parse.js +5 -0
- package/dist/_virtual/parse.js.map +1 -0
- package/dist/_virtual/parse2.js +5 -0
- package/dist/_virtual/parse2.js.map +1 -0
- package/dist/_virtual/pseudos.js +5 -0
- package/dist/_virtual/pseudos.js.map +1 -0
- package/dist/_virtual/querying.js +5 -0
- package/dist/_virtual/querying.js.map +1 -0
- package/dist/_virtual/sort.js +5 -0
- package/dist/_virtual/sort.js.map +1 -0
- package/dist/_virtual/stringify.js +5 -0
- package/dist/_virtual/stringify.js.map +1 -0
- package/dist/_virtual/subselects.js +5 -0
- package/dist/_virtual/subselects.js.map +1 -0
- package/dist/_virtual/text.js +5 -0
- package/dist/_virtual/text.js.map +1 -0
- package/dist/_virtual/traversal.js +5 -0
- package/dist/_virtual/traversal.js.map +1 -0
- package/dist/_virtual/type.js +5 -0
- package/dist/_virtual/type.js.map +1 -0
- package/dist/_virtual/valid.js +5 -0
- package/dist/_virtual/valid.js.map +1 -0
- package/dist/_virtual/void-tag.js +5 -0
- package/dist/_virtual/void-tag.js.map +1 -0
- package/dist/cli/commands/fulltext/attach.js +1 -1
- package/dist/cli/commands/fulltext/attach.js.map +1 -1
- package/dist/cli/commands/fulltext/check.d.ts +1 -2
- package/dist/cli/commands/fulltext/check.d.ts.map +1 -1
- package/dist/cli/commands/fulltext/check.js +4 -2
- package/dist/cli/commands/fulltext/check.js.map +1 -1
- package/dist/cli/commands/fulltext/convert.d.ts.map +1 -1
- package/dist/cli/commands/fulltext/convert.js +8 -8
- package/dist/cli/commands/fulltext/convert.js.map +1 -1
- package/dist/cli/commands/fulltext/fetch.d.ts.map +1 -1
- package/dist/cli/commands/fulltext/fetch.js +10 -6
- package/dist/cli/commands/fulltext/fetch.js.map +1 -1
- package/dist/cli/commands/fulltext/index.d.ts.map +1 -1
- package/dist/cli/commands/fulltext/index.js +2 -0
- package/dist/cli/commands/fulltext/index.js.map +1 -1
- package/dist/cli/commands/fulltext/init.d.ts.map +1 -1
- package/dist/cli/commands/fulltext/init.js +6 -5
- package/dist/cli/commands/fulltext/init.js.map +1 -1
- package/dist/cli/commands/fulltext/pending.d.ts +1 -1
- package/dist/cli/commands/fulltext/pending.d.ts.map +1 -1
- package/dist/cli/commands/fulltext/pending.js +4 -2
- package/dist/cli/commands/fulltext/pending.js.map +1 -1
- package/dist/cli/commands/fulltext/status.d.ts.map +1 -1
- package/dist/cli/commands/fulltext/status.js +4 -2
- package/dist/cli/commands/fulltext/status.js.map +1 -1
- package/dist/cli/commands/fulltext/sync.d.ts.map +1 -1
- package/dist/cli/commands/fulltext/sync.js +6 -2
- package/dist/cli/commands/fulltext/sync.js.map +1 -1
- package/dist/cli/commands/review/types.d.ts +1 -1
- package/dist/cli/commands/review/types.d.ts.map +1 -1
- package/dist/cli/commands/review/types.js.map +1 -1
- package/dist/config/schema.d.ts +2 -0
- package/dist/config/schema.d.ts.map +1 -1
- package/dist/config/schema.js +6 -0
- package/dist/config/schema.js.map +1 -1
- package/dist/{fulltext → integration}/attach-shared.d.ts +2 -2
- package/dist/integration/attach-shared.d.ts.map +1 -0
- package/dist/integration/attach-shared.js.map +1 -0
- package/dist/integration/fulltext-attach.js +1 -1
- package/dist/integration/fulltext-attach.js.map +1 -1
- package/dist/{fulltext → node_modules/@ncukondo/academic-fulltext/dist}/citation-key.js +1 -1
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/citation-key.js.map +1 -0
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/convert/arxiv-html-parser.js +434 -0
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/convert/arxiv-html-parser.js.map +1 -0
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/convert/index.js +93 -0
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/convert/index.js.map +1 -0
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/convert/jats-parser.js +1060 -0
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/convert/jats-parser.js.map +1 -0
- package/dist/{fulltext → node_modules/@ncukondo/academic-fulltext/dist}/convert/markdown-writer.js +146 -117
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/convert/markdown-writer.js.map +1 -0
- package/dist/{fulltext → node_modules/@ncukondo/academic-fulltext/dist}/discovery/arxiv.js +8 -1
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/discovery/arxiv.js.map +1 -0
- package/dist/{fulltext → node_modules/@ncukondo/academic-fulltext/dist}/discovery/core.js +6 -3
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/discovery/core.js.map +1 -0
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/discovery/index.js +139 -0
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/discovery/index.js.map +1 -0
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/discovery/ncbi-id-converter.js +46 -0
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/discovery/ncbi-id-converter.js.map +1 -0
- package/dist/{fulltext → node_modules/@ncukondo/academic-fulltext/dist}/discovery/pmc.js +8 -4
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/discovery/pmc.js.map +1 -0
- package/dist/{fulltext → node_modules/@ncukondo/academic-fulltext/dist}/discovery/unpaywall.js +43 -9
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/discovery/unpaywall.js.map +1 -0
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/download/arxiv-html.js +48 -0
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/download/arxiv-html.js.map +1 -0
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/download/downloader.js +64 -0
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/download/downloader.js.map +1 -0
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/download/orchestrator.js +236 -0
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/download/orchestrator.js.map +1 -0
- package/dist/{fulltext → node_modules/@ncukondo/academic-fulltext/dist}/download/pmc-xml.js +2 -1
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/download/pmc-xml.js.map +1 -0
- package/dist/{fulltext → node_modules/@ncukondo/academic-fulltext/dist}/meta.js +15 -10
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/meta.js.map +1 -0
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/paths.js.map +1 -0
- package/dist/{fulltext → node_modules/@ncukondo/academic-fulltext/dist}/readme.js +8 -4
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/readme.js.map +1 -0
- package/dist/node_modules/boolbase/index.js +19 -0
- package/dist/node_modules/boolbase/index.js.map +1 -0
- package/dist/node_modules/css-select/lib/attributes.js +203 -0
- package/dist/node_modules/css-select/lib/attributes.js.map +1 -0
- package/dist/node_modules/css-select/lib/compile.js +141 -0
- package/dist/node_modules/css-select/lib/compile.js.map +1 -0
- package/dist/node_modules/css-select/lib/general.js +154 -0
- package/dist/node_modules/css-select/lib/general.js.map +1 -0
- package/dist/node_modules/css-select/lib/index.js +128 -0
- package/dist/node_modules/css-select/lib/index.js.map +1 -0
- package/dist/node_modules/css-select/lib/pseudo-selectors/aliases.js +40 -0
- package/dist/node_modules/css-select/lib/pseudo-selectors/aliases.js.map +1 -0
- package/dist/node_modules/css-select/lib/pseudo-selectors/filters.js +163 -0
- package/dist/node_modules/css-select/lib/pseudo-selectors/filters.js.map +1 -0
- package/dist/node_modules/css-select/lib/pseudo-selectors/index.js +71 -0
- package/dist/node_modules/css-select/lib/pseudo-selectors/index.js.map +1 -0
- package/dist/node_modules/css-select/lib/pseudo-selectors/pseudos.js +93 -0
- package/dist/node_modules/css-select/lib/pseudo-selectors/pseudos.js.map +1 -0
- package/dist/node_modules/css-select/lib/pseudo-selectors/subselects.js +111 -0
- package/dist/node_modules/css-select/lib/pseudo-selectors/subselects.js.map +1 -0
- package/dist/node_modules/css-select/lib/sort.js +78 -0
- package/dist/node_modules/css-select/lib/sort.js.map +1 -0
- package/dist/node_modules/css-what/lib/es/index.js +12 -0
- package/dist/node_modules/css-what/lib/es/index.js.map +1 -0
- package/dist/node_modules/css-what/lib/es/parse.js +349 -0
- package/dist/node_modules/css-what/lib/es/parse.js.map +1 -0
- package/dist/node_modules/css-what/lib/es/stringify.js +102 -0
- package/dist/node_modules/css-what/lib/es/stringify.js.map +1 -0
- package/dist/node_modules/css-what/lib/es/types.js +37 -0
- package/dist/node_modules/css-what/lib/es/types.js.map +1 -0
- package/dist/node_modules/dom-serializer/lib/foreignNames.js +117 -0
- package/dist/node_modules/dom-serializer/lib/foreignNames.js.map +1 -0
- package/dist/node_modules/dom-serializer/lib/index.js +207 -0
- package/dist/node_modules/dom-serializer/lib/index.js.map +1 -0
- package/dist/node_modules/dom-serializer/node_modules/entities/lib/decode.js +368 -0
- package/dist/node_modules/dom-serializer/node_modules/entities/lib/decode.js.map +1 -0
- package/dist/node_modules/dom-serializer/node_modules/entities/lib/decode_codepoint.js +70 -0
- package/dist/node_modules/dom-serializer/node_modules/entities/lib/decode_codepoint.js.map +1 -0
- package/dist/node_modules/dom-serializer/node_modules/entities/lib/encode.js +61 -0
- package/dist/node_modules/dom-serializer/node_modules/entities/lib/encode.js.map +1 -0
- package/dist/node_modules/dom-serializer/node_modules/entities/lib/escape.js +79 -0
- package/dist/node_modules/dom-serializer/node_modules/entities/lib/escape.js.map +1 -0
- package/dist/node_modules/dom-serializer/node_modules/entities/lib/generated/decode-data-html.js +18 -0
- package/dist/node_modules/dom-serializer/node_modules/entities/lib/generated/decode-data-html.js.map +1 -0
- package/dist/node_modules/dom-serializer/node_modules/entities/lib/generated/decode-data-xml.js +18 -0
- package/dist/node_modules/dom-serializer/node_modules/entities/lib/generated/decode-data-xml.js.map +1 -0
- package/dist/node_modules/dom-serializer/node_modules/entities/lib/generated/encode-html.js +19 -0
- package/dist/node_modules/dom-serializer/node_modules/entities/lib/generated/encode-html.js.map +1 -0
- package/dist/node_modules/dom-serializer/node_modules/entities/lib/index.js +139 -0
- package/dist/node_modules/dom-serializer/node_modules/entities/lib/index.js.map +1 -0
- package/dist/node_modules/domelementtype/lib/index.js +40 -0
- package/dist/node_modules/domelementtype/lib/index.js.map +1 -0
- package/dist/node_modules/domhandler/lib/index.js +167 -0
- package/dist/node_modules/domhandler/lib/index.js.map +1 -0
- package/dist/node_modules/domhandler/lib/node.js +439 -0
- package/dist/node_modules/domhandler/lib/node.js.map +1 -0
- package/dist/node_modules/domutils/lib/feeds.js +146 -0
- package/dist/node_modules/domutils/lib/feeds.js.map +1 -0
- package/dist/node_modules/domutils/lib/helpers.js +97 -0
- package/dist/node_modules/domutils/lib/helpers.js.map +1 -0
- package/dist/node_modules/domutils/lib/index.js +65 -0
- package/dist/node_modules/domutils/lib/index.js.map +1 -0
- package/dist/node_modules/domutils/lib/legacy.js +124 -0
- package/dist/node_modules/domutils/lib/legacy.js.map +1 -0
- package/dist/node_modules/domutils/lib/manipulation.js +107 -0
- package/dist/node_modules/domutils/lib/manipulation.js.map +1 -0
- package/dist/node_modules/domutils/lib/querying.js +102 -0
- package/dist/node_modules/domutils/lib/querying.js.map +1 -0
- package/dist/node_modules/domutils/lib/stringify.js +65 -0
- package/dist/node_modules/domutils/lib/stringify.js.map +1 -0
- package/dist/node_modules/domutils/lib/traversal.js +69 -0
- package/dist/node_modules/domutils/lib/traversal.js.map +1 -0
- package/dist/node_modules/he/he.js +256 -0
- package/dist/node_modules/he/he.js.map +1 -0
- package/dist/node_modules/node-html-parser/dist/back.js +16 -0
- package/dist/node_modules/node-html-parser/dist/back.js.map +1 -0
- package/dist/node_modules/node-html-parser/dist/index.js +48 -0
- package/dist/node_modules/node-html-parser/dist/index.js.map +1 -0
- package/dist/node_modules/node-html-parser/dist/matcher.js +112 -0
- package/dist/node_modules/node-html-parser/dist/matcher.js.map +1 -0
- package/dist/node_modules/node-html-parser/dist/nodes/comment.js +41 -0
- package/dist/node_modules/node-html-parser/dist/nodes/comment.js.map +1 -0
- package/dist/node_modules/node-html-parser/dist/nodes/html.js +1048 -0
- package/dist/node_modules/node-html-parser/dist/nodes/html.js.map +1 -0
- package/dist/node_modules/node-html-parser/dist/nodes/node.js +49 -0
- package/dist/node_modules/node-html-parser/dist/nodes/node.js.map +1 -0
- package/dist/node_modules/node-html-parser/dist/nodes/text.js +106 -0
- package/dist/node_modules/node-html-parser/dist/nodes/text.js.map +1 -0
- package/dist/node_modules/node-html-parser/dist/nodes/type.js +19 -0
- package/dist/node_modules/node-html-parser/dist/nodes/type.js.map +1 -0
- package/dist/node_modules/node-html-parser/dist/parse.js +20 -0
- package/dist/node_modules/node-html-parser/dist/parse.js.map +1 -0
- package/dist/node_modules/node-html-parser/dist/valid.js +19 -0
- package/dist/node_modules/node-html-parser/dist/valid.js.map +1 -0
- package/dist/node_modules/node-html-parser/dist/void-tag.js +36 -0
- package/dist/node_modules/node-html-parser/dist/void-tag.js.map +1 -0
- package/dist/node_modules/nth-check/lib/compile.js +76 -0
- package/dist/node_modules/nth-check/lib/compile.js.map +1 -0
- package/dist/node_modules/nth-check/lib/index.js +36 -0
- package/dist/node_modules/nth-check/lib/index.js.map +1 -0
- package/dist/node_modules/nth-check/lib/parse.js +69 -0
- package/dist/node_modules/nth-check/lib/parse.js.map +1 -0
- package/package.json +2 -2
- package/dist/fulltext/attach-shared.d.ts.map +0 -1
- package/dist/fulltext/attach-shared.js.map +0 -1
- package/dist/fulltext/citation-key.d.ts +0 -15
- package/dist/fulltext/citation-key.d.ts.map +0 -1
- package/dist/fulltext/citation-key.js.map +0 -1
- package/dist/fulltext/convert/index.d.ts +0 -20
- package/dist/fulltext/convert/index.d.ts.map +0 -1
- package/dist/fulltext/convert/index.js +0 -50
- package/dist/fulltext/convert/index.js.map +0 -1
- package/dist/fulltext/convert/jats-parser.d.ts +0 -36
- package/dist/fulltext/convert/jats-parser.d.ts.map +0 -1
- package/dist/fulltext/convert/jats-parser.js +0 -887
- package/dist/fulltext/convert/jats-parser.js.map +0 -1
- package/dist/fulltext/convert/markdown-writer.d.ts +0 -6
- package/dist/fulltext/convert/markdown-writer.d.ts.map +0 -1
- package/dist/fulltext/convert/markdown-writer.js.map +0 -1
- package/dist/fulltext/convert/types.d.ts +0 -141
- package/dist/fulltext/convert/types.d.ts.map +0 -1
- package/dist/fulltext/discovery/arxiv.d.ts +0 -11
- package/dist/fulltext/discovery/arxiv.d.ts.map +0 -1
- package/dist/fulltext/discovery/arxiv.js.map +0 -1
- package/dist/fulltext/discovery/core.d.ts +0 -11
- package/dist/fulltext/discovery/core.d.ts.map +0 -1
- package/dist/fulltext/discovery/core.js.map +0 -1
- package/dist/fulltext/discovery/index.d.ts +0 -28
- package/dist/fulltext/discovery/index.d.ts.map +0 -1
- package/dist/fulltext/discovery/index.js +0 -75
- package/dist/fulltext/discovery/index.js.map +0 -1
- package/dist/fulltext/discovery/pmc.d.ts +0 -19
- package/dist/fulltext/discovery/pmc.d.ts.map +0 -1
- package/dist/fulltext/discovery/pmc.js.map +0 -1
- package/dist/fulltext/discovery/unpaywall.d.ts +0 -11
- package/dist/fulltext/discovery/unpaywall.d.ts.map +0 -1
- package/dist/fulltext/discovery/unpaywall.js.map +0 -1
- package/dist/fulltext/download/downloader.d.ts +0 -21
- package/dist/fulltext/download/downloader.d.ts.map +0 -1
- package/dist/fulltext/download/downloader.js +0 -59
- package/dist/fulltext/download/downloader.js.map +0 -1
- package/dist/fulltext/download/orchestrator.d.ts +0 -33
- package/dist/fulltext/download/orchestrator.d.ts.map +0 -1
- package/dist/fulltext/download/orchestrator.js +0 -125
- package/dist/fulltext/download/orchestrator.js.map +0 -1
- package/dist/fulltext/download/pmc-xml.d.ts +0 -13
- package/dist/fulltext/download/pmc-xml.d.ts.map +0 -1
- package/dist/fulltext/download/pmc-xml.js.map +0 -1
- package/dist/fulltext/meta.d.ts +0 -25
- package/dist/fulltext/meta.d.ts.map +0 -1
- package/dist/fulltext/meta.js.map +0 -1
- package/dist/fulltext/paths.d.ts +0 -12
- package/dist/fulltext/paths.d.ts.map +0 -1
- package/dist/fulltext/paths.js.map +0 -1
- package/dist/fulltext/readme.d.ts +0 -4
- package/dist/fulltext/readme.d.ts.map +0 -1
- package/dist/fulltext/readme.js.map +0 -1
- package/dist/fulltext/types.d.ts +0 -90
- package/dist/fulltext/types.d.ts.map +0 -1
- /package/dist/{fulltext → integration}/attach-shared.js +0 -0
- /package/dist/{fulltext → node_modules/@ncukondo/academic-fulltext/dist}/paths.js +0 -0
|
@@ -0,0 +1,1060 @@
|
|
|
1
|
+
import { XMLParser } from "fast-xml-parser";
|
|
2
|
+
const parser = new XMLParser({
|
|
3
|
+
ignoreAttributes: false,
|
|
4
|
+
attributeNamePrefix: "@_",
|
|
5
|
+
textNodeName: "#text",
|
|
6
|
+
trimValues: false,
|
|
7
|
+
preserveOrder: true,
|
|
8
|
+
processEntities: true,
|
|
9
|
+
htmlEntities: true
|
|
10
|
+
});
|
|
11
|
+
function getTagName(node) {
|
|
12
|
+
for (const key of Object.keys(node)) {
|
|
13
|
+
if (key !== ":@" && key !== "#text")
|
|
14
|
+
return key;
|
|
15
|
+
}
|
|
16
|
+
return void 0;
|
|
17
|
+
}
|
|
18
|
+
function getChildren(node) {
|
|
19
|
+
const tag = getTagName(node);
|
|
20
|
+
if (!tag)
|
|
21
|
+
return [];
|
|
22
|
+
const children = node[tag];
|
|
23
|
+
return Array.isArray(children) ? children : [];
|
|
24
|
+
}
|
|
25
|
+
function getAttr(node, attrName) {
|
|
26
|
+
const attrs = node[":@"];
|
|
27
|
+
if (!attrs)
|
|
28
|
+
return void 0;
|
|
29
|
+
const val = attrs[`@_${attrName}`];
|
|
30
|
+
return val != null ? String(val) : void 0;
|
|
31
|
+
}
|
|
32
|
+
function getAttrs(node) {
|
|
33
|
+
const attrs = node[":@"];
|
|
34
|
+
if (!attrs)
|
|
35
|
+
return {};
|
|
36
|
+
const result = {};
|
|
37
|
+
for (const [key, value] of Object.entries(attrs)) {
|
|
38
|
+
if (key.startsWith("@_")) {
|
|
39
|
+
result[key.slice(2)] = String(value);
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
return result;
|
|
43
|
+
}
|
|
44
|
+
function findChild(children, tagName) {
|
|
45
|
+
for (const child of children) {
|
|
46
|
+
if (tagName in child) {
|
|
47
|
+
const childArr = child[tagName];
|
|
48
|
+
return {
|
|
49
|
+
node: child,
|
|
50
|
+
children: Array.isArray(childArr) ? childArr : [],
|
|
51
|
+
attrs: getAttrs(child)
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
return void 0;
|
|
56
|
+
}
|
|
57
|
+
function findChildren(children, tagName) {
|
|
58
|
+
const results = [];
|
|
59
|
+
for (const child of children) {
|
|
60
|
+
if (tagName in child) {
|
|
61
|
+
const childArr = child[tagName];
|
|
62
|
+
results.push({
|
|
63
|
+
node: child,
|
|
64
|
+
children: Array.isArray(childArr) ? childArr : [],
|
|
65
|
+
attrs: getAttrs(child)
|
|
66
|
+
});
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
return results;
|
|
70
|
+
}
|
|
71
|
+
function getTextContent(child) {
|
|
72
|
+
if ("#text" in child) {
|
|
73
|
+
const val = child["#text"];
|
|
74
|
+
return val != null ? String(val) : void 0;
|
|
75
|
+
}
|
|
76
|
+
return void 0;
|
|
77
|
+
}
|
|
78
|
+
function findArticle(parsed) {
|
|
79
|
+
const direct = findChild(parsed, "article");
|
|
80
|
+
if (direct)
|
|
81
|
+
return direct;
|
|
82
|
+
const wrapper = findChild(parsed, "pmc-articleset");
|
|
83
|
+
if (wrapper)
|
|
84
|
+
return findChild(wrapper.children, "article");
|
|
85
|
+
return void 0;
|
|
86
|
+
}
|
|
87
|
+
const SPACE_AFTER_TAGS = /* @__PURE__ */ new Set(["surname", "given-names", "name", "string-name"]);
|
|
88
|
+
function extractAllText(node) {
|
|
89
|
+
if (node == null)
|
|
90
|
+
return "";
|
|
91
|
+
if (typeof node === "string")
|
|
92
|
+
return node;
|
|
93
|
+
if (typeof node === "number")
|
|
94
|
+
return String(node);
|
|
95
|
+
if (Array.isArray(node)) {
|
|
96
|
+
return joinChildTexts(node);
|
|
97
|
+
}
|
|
98
|
+
if (typeof node === "object") {
|
|
99
|
+
const obj = node;
|
|
100
|
+
const text = getTextContent(obj);
|
|
101
|
+
if (text != null)
|
|
102
|
+
return text;
|
|
103
|
+
const tag = getTagName(obj);
|
|
104
|
+
if (tag) {
|
|
105
|
+
const children = obj[tag];
|
|
106
|
+
if (Array.isArray(children)) {
|
|
107
|
+
return joinChildTexts(children);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
return "";
|
|
112
|
+
}
|
|
113
|
+
function joinChildTexts(children) {
|
|
114
|
+
const parts = [];
|
|
115
|
+
for (const child of children) {
|
|
116
|
+
const text = extractAllText(child);
|
|
117
|
+
if (!text)
|
|
118
|
+
continue;
|
|
119
|
+
const tag = getTagName(child);
|
|
120
|
+
if (tag && SPACE_AFTER_TAGS.has(tag) && parts.length > 0) {
|
|
121
|
+
const prev = parts.at(-1);
|
|
122
|
+
if (prev && !/[\s,;.:()\-/]$/.test(prev)) {
|
|
123
|
+
parts.push(" ");
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
parts.push(text);
|
|
127
|
+
}
|
|
128
|
+
return parts.join("");
|
|
129
|
+
}
|
|
130
|
+
function parseArticleIds(metaChildren) {
|
|
131
|
+
const articleIds = findChildren(metaChildren, "article-id");
|
|
132
|
+
const result = {};
|
|
133
|
+
for (const idEntry of articleIds) {
|
|
134
|
+
const idType = idEntry.attrs["pub-id-type"];
|
|
135
|
+
const idText = extractAllText(idEntry.children);
|
|
136
|
+
if (idType === "doi")
|
|
137
|
+
result.doi = idText;
|
|
138
|
+
if (idType === "pmc" || idType === "pmcid") {
|
|
139
|
+
result.pmcid = idText.replace(/^PMC/, "");
|
|
140
|
+
}
|
|
141
|
+
if (idType === "pmid")
|
|
142
|
+
result.pmid = idText;
|
|
143
|
+
}
|
|
144
|
+
return result;
|
|
145
|
+
}
|
|
146
|
+
function parseAuthors(metaChildren) {
|
|
147
|
+
const authors = [];
|
|
148
|
+
const contribGroup = findChild(metaChildren, "contrib-group");
|
|
149
|
+
if (!contribGroup)
|
|
150
|
+
return authors;
|
|
151
|
+
const contribs = findChildren(contribGroup.children, "contrib");
|
|
152
|
+
for (const contrib of contribs) {
|
|
153
|
+
if (contrib.attrs["contrib-type"] !== "author")
|
|
154
|
+
continue;
|
|
155
|
+
const nameNode = findChild(contrib.children, "name");
|
|
156
|
+
if (!nameNode)
|
|
157
|
+
continue;
|
|
158
|
+
const surnameNode = findChild(nameNode.children, "surname");
|
|
159
|
+
const givenNamesNode = findChild(nameNode.children, "given-names");
|
|
160
|
+
const author = {
|
|
161
|
+
surname: surnameNode ? extractAllText(surnameNode.children) : ""
|
|
162
|
+
};
|
|
163
|
+
const givenNames = givenNamesNode ? extractAllText(givenNamesNode.children) : "";
|
|
164
|
+
if (givenNames) {
|
|
165
|
+
author.givenNames = givenNames;
|
|
166
|
+
}
|
|
167
|
+
authors.push(author);
|
|
168
|
+
}
|
|
169
|
+
return authors;
|
|
170
|
+
}
|
|
171
|
+
function parseAbstract(metaChildren) {
|
|
172
|
+
const abstractNode = findChild(metaChildren, "abstract");
|
|
173
|
+
if (!abstractNode)
|
|
174
|
+
return void 0;
|
|
175
|
+
const sections = findChildren(abstractNode.children, "sec");
|
|
176
|
+
if (sections.length > 0) {
|
|
177
|
+
return parseStructuredAbstract(sections);
|
|
178
|
+
}
|
|
179
|
+
const paragraphs = findChildren(abstractNode.children, "p");
|
|
180
|
+
if (paragraphs.length > 0) {
|
|
181
|
+
return paragraphs.map((p) => extractAllText(p.children)).join("\n\n");
|
|
182
|
+
}
|
|
183
|
+
const text = extractAllText(abstractNode.children);
|
|
184
|
+
return text || void 0;
|
|
185
|
+
}
|
|
186
|
+
function parseStructuredAbstract(sections) {
|
|
187
|
+
const parts = [];
|
|
188
|
+
for (const sec of sections) {
|
|
189
|
+
const secTitleNode = findChild(sec.children, "title");
|
|
190
|
+
const secTitle = secTitleNode ? extractAllText(secTitleNode.children) : "";
|
|
191
|
+
const secPs = findChildren(sec.children, "p");
|
|
192
|
+
const text = secPs.map((p) => extractAllText(p.children)).join(" ");
|
|
193
|
+
parts.push(secTitle ? `${secTitle}: ${text}` : text);
|
|
194
|
+
}
|
|
195
|
+
return parts.join("\n\n");
|
|
196
|
+
}
|
|
197
|
+
function extractDateFromNode(pd) {
|
|
198
|
+
const yearNode = findChild(pd.children, "year");
|
|
199
|
+
if (!yearNode)
|
|
200
|
+
return void 0;
|
|
201
|
+
const year = extractAllText(yearNode.children);
|
|
202
|
+
const monthNode = findChild(pd.children, "month");
|
|
203
|
+
const dayNode = findChild(pd.children, "day");
|
|
204
|
+
const date = { year };
|
|
205
|
+
if (monthNode)
|
|
206
|
+
date.month = extractAllText(monthNode.children);
|
|
207
|
+
if (dayNode)
|
|
208
|
+
date.day = extractAllText(dayNode.children);
|
|
209
|
+
return date;
|
|
210
|
+
}
|
|
211
|
+
function parsePublicationDate(metaChildren) {
|
|
212
|
+
const pubDates = findChildren(metaChildren, "pub-date");
|
|
213
|
+
const datePriority = { epub: 0, ppub: 1, collection: 2 };
|
|
214
|
+
let bestPriority = Number.POSITIVE_INFINITY;
|
|
215
|
+
let publicationDate;
|
|
216
|
+
for (const pd of pubDates) {
|
|
217
|
+
const dateType = pd.attrs["pub-type"] ?? pd.attrs["date-type"] ?? "";
|
|
218
|
+
const priority = datePriority[dateType] ?? 3;
|
|
219
|
+
if (priority < bestPriority) {
|
|
220
|
+
bestPriority = priority;
|
|
221
|
+
publicationDate = extractDateFromNode(pd);
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
if (!publicationDate && pubDates.length > 0) {
|
|
225
|
+
const pd = pubDates.at(0);
|
|
226
|
+
if (pd) {
|
|
227
|
+
publicationDate = extractDateFromNode(pd);
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
return publicationDate;
|
|
231
|
+
}
|
|
232
|
+
function parseLicense(metaChildren) {
|
|
233
|
+
const permissions = findChild(metaChildren, "permissions");
|
|
234
|
+
if (!permissions)
|
|
235
|
+
return void 0;
|
|
236
|
+
const licenseNode = findChild(permissions.children, "license");
|
|
237
|
+
if (!licenseNode)
|
|
238
|
+
return void 0;
|
|
239
|
+
const href = licenseNode.attrs["xlink:href"];
|
|
240
|
+
if (href)
|
|
241
|
+
return href;
|
|
242
|
+
const licenseP = findChild(licenseNode.children, "license-p");
|
|
243
|
+
if (licenseP)
|
|
244
|
+
return extractAllText(licenseP.children).trim();
|
|
245
|
+
return void 0;
|
|
246
|
+
}
|
|
247
|
+
function parseKeywords(metaChildren) {
|
|
248
|
+
const kwdGroups = findChildren(metaChildren, "kwd-group");
|
|
249
|
+
const keywords = [];
|
|
250
|
+
for (const kwdGroup of kwdGroups) {
|
|
251
|
+
const kwds = findChildren(kwdGroup.children, "kwd");
|
|
252
|
+
for (const kwd of kwds) {
|
|
253
|
+
const text = extractAllText(kwd.children).trim();
|
|
254
|
+
if (text)
|
|
255
|
+
keywords.push(text);
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
return keywords;
|
|
259
|
+
}
|
|
260
|
+
function parseVolumeAndPages(metaChildren) {
|
|
261
|
+
const volumeNode = findChild(metaChildren, "volume");
|
|
262
|
+
const volume = volumeNode ? extractAllText(volumeNode.children) : void 0;
|
|
263
|
+
const issueNode = findChild(metaChildren, "issue");
|
|
264
|
+
const issue = issueNode ? extractAllText(issueNode.children) : void 0;
|
|
265
|
+
let pages;
|
|
266
|
+
const fpageNode = findChild(metaChildren, "fpage");
|
|
267
|
+
const lpageNode = findChild(metaChildren, "lpage");
|
|
268
|
+
if (fpageNode) {
|
|
269
|
+
const fp = extractAllText(fpageNode.children);
|
|
270
|
+
const lp = lpageNode ? extractAllText(lpageNode.children) : "";
|
|
271
|
+
pages = lp ? `${fp}-${lp}` : fp;
|
|
272
|
+
} else {
|
|
273
|
+
const elocationNode = findChild(metaChildren, "elocation-id");
|
|
274
|
+
if (elocationNode)
|
|
275
|
+
pages = extractAllText(elocationNode.children);
|
|
276
|
+
}
|
|
277
|
+
const result = {};
|
|
278
|
+
if (volume !== void 0)
|
|
279
|
+
result.volume = volume;
|
|
280
|
+
if (issue !== void 0)
|
|
281
|
+
result.issue = issue;
|
|
282
|
+
if (pages !== void 0)
|
|
283
|
+
result.pages = pages;
|
|
284
|
+
return result;
|
|
285
|
+
}
|
|
286
|
+
function parseJournalName(frontChildren) {
|
|
287
|
+
const journalMeta = findChild(frontChildren, "journal-meta");
|
|
288
|
+
if (!journalMeta)
|
|
289
|
+
return void 0;
|
|
290
|
+
const titleGroup = findChild(journalMeta.children, "journal-title-group");
|
|
291
|
+
if (titleGroup) {
|
|
292
|
+
const jTitle2 = findChild(titleGroup.children, "journal-title");
|
|
293
|
+
if (jTitle2)
|
|
294
|
+
return extractAllText(jTitle2.children);
|
|
295
|
+
}
|
|
296
|
+
const jTitle = findChild(journalMeta.children, "journal-title");
|
|
297
|
+
if (jTitle)
|
|
298
|
+
return extractAllText(jTitle.children);
|
|
299
|
+
return void 0;
|
|
300
|
+
}
|
|
301
|
+
function parseArticleTitle(metaChildren) {
|
|
302
|
+
const titleGroup = findChild(metaChildren, "title-group");
|
|
303
|
+
const articleTitle = titleGroup ? findChild(titleGroup.children, "article-title") : void 0;
|
|
304
|
+
return articleTitle ? extractAllText(articleTitle.children) : "";
|
|
305
|
+
}
|
|
306
|
+
function assembleMetadata(base, fields) {
|
|
307
|
+
const result = { ...base };
|
|
308
|
+
if (fields.doi)
|
|
309
|
+
result.doi = fields.doi;
|
|
310
|
+
if (fields.pmcid)
|
|
311
|
+
result.pmcid = fields.pmcid;
|
|
312
|
+
if (fields.pmid)
|
|
313
|
+
result.pmid = fields.pmid;
|
|
314
|
+
if (fields.journal)
|
|
315
|
+
result.journal = fields.journal;
|
|
316
|
+
if (fields.publicationDate)
|
|
317
|
+
result.publicationDate = fields.publicationDate;
|
|
318
|
+
if (fields.volume)
|
|
319
|
+
result.volume = fields.volume;
|
|
320
|
+
if (fields.issue)
|
|
321
|
+
result.issue = fields.issue;
|
|
322
|
+
if (fields.pages)
|
|
323
|
+
result.pages = fields.pages;
|
|
324
|
+
if (fields.keywords.length > 0)
|
|
325
|
+
result.keywords = fields.keywords;
|
|
326
|
+
if (fields.articleType)
|
|
327
|
+
result.articleType = fields.articleType;
|
|
328
|
+
if (fields.license)
|
|
329
|
+
result.license = fields.license;
|
|
330
|
+
if (fields.abstract)
|
|
331
|
+
result.abstract = fields.abstract;
|
|
332
|
+
return result;
|
|
333
|
+
}
|
|
334
|
+
function parseJatsMetadata(xml) {
|
|
335
|
+
const parsed = parser.parse(xml);
|
|
336
|
+
const article = findArticle(parsed);
|
|
337
|
+
if (!article)
|
|
338
|
+
return { title: "", authors: [] };
|
|
339
|
+
const front = findChild(article.children, "front");
|
|
340
|
+
if (!front)
|
|
341
|
+
return { title: "", authors: [] };
|
|
342
|
+
const articleMeta = findChild(front.children, "article-meta");
|
|
343
|
+
if (!articleMeta)
|
|
344
|
+
return { title: "", authors: [] };
|
|
345
|
+
const metaChildren = articleMeta.children;
|
|
346
|
+
const journal = parseJournalName(front.children);
|
|
347
|
+
const publicationDate = parsePublicationDate(metaChildren);
|
|
348
|
+
const articleType = article.attrs["article-type"] || void 0;
|
|
349
|
+
const license = parseLicense(metaChildren);
|
|
350
|
+
const abstract = parseAbstract(metaChildren);
|
|
351
|
+
return assembleMetadata({ title: parseArticleTitle(metaChildren), authors: parseAuthors(metaChildren) }, {
|
|
352
|
+
...parseArticleIds(metaChildren),
|
|
353
|
+
...journal !== void 0 ? { journal } : {},
|
|
354
|
+
...publicationDate !== void 0 ? { publicationDate } : {},
|
|
355
|
+
...parseVolumeAndPages(metaChildren),
|
|
356
|
+
keywords: parseKeywords(metaChildren),
|
|
357
|
+
...articleType !== void 0 ? { articleType } : {},
|
|
358
|
+
...license !== void 0 ? { license } : {},
|
|
359
|
+
...abstract !== void 0 ? { abstract } : {}
|
|
360
|
+
});
|
|
361
|
+
}
|
|
362
|
+
function handleInlineFormula(innerChildren) {
|
|
363
|
+
let texMath = findChild(innerChildren, "tex-math");
|
|
364
|
+
if (!texMath) {
|
|
365
|
+
const alternatives = findChild(innerChildren, "alternatives");
|
|
366
|
+
if (alternatives) {
|
|
367
|
+
texMath = findChild(alternatives.children, "tex-math");
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
const tex = texMath ? extractAllText(texMath.children) : void 0;
|
|
371
|
+
const text = tex || extractAllText(innerChildren);
|
|
372
|
+
const entry = {
|
|
373
|
+
type: "inline-formula",
|
|
374
|
+
text
|
|
375
|
+
};
|
|
376
|
+
if (tex)
|
|
377
|
+
entry.tex = tex;
|
|
378
|
+
return entry;
|
|
379
|
+
}
|
|
380
|
+
function handleExtLink(child, innerChildren) {
|
|
381
|
+
const href = getAttr(child, "xlink:href");
|
|
382
|
+
if (href) {
|
|
383
|
+
return { type: "link", url: href, children: parseInlineContent(innerChildren) };
|
|
384
|
+
}
|
|
385
|
+
const linkText = extractAllText(innerChildren);
|
|
386
|
+
if (linkText)
|
|
387
|
+
return { type: "text", text: linkText };
|
|
388
|
+
return null;
|
|
389
|
+
}
|
|
390
|
+
function handleUri(child, innerChildren) {
|
|
391
|
+
const href = getAttr(child, "xlink:href");
|
|
392
|
+
const textContent = extractAllText(innerChildren);
|
|
393
|
+
const url = href || textContent;
|
|
394
|
+
if (url) {
|
|
395
|
+
return { type: "link", url, children: parseInlineContent(innerChildren) };
|
|
396
|
+
}
|
|
397
|
+
return null;
|
|
398
|
+
}
|
|
399
|
+
function handleXref(child, innerChildren) {
|
|
400
|
+
const refType = getAttr(child, "ref-type");
|
|
401
|
+
if (refType === "bibr") {
|
|
402
|
+
return {
|
|
403
|
+
type: "citation",
|
|
404
|
+
refId: getAttr(child, "rid") ?? "",
|
|
405
|
+
text: extractAllText(innerChildren)
|
|
406
|
+
};
|
|
407
|
+
}
|
|
408
|
+
const xrefText = extractAllText(innerChildren);
|
|
409
|
+
if (xrefText)
|
|
410
|
+
return { type: "text", text: xrefText };
|
|
411
|
+
return null;
|
|
412
|
+
}
|
|
413
|
+
const inlineTagHandlers = {
|
|
414
|
+
bold: (_child, innerChildren) => ({
|
|
415
|
+
type: "bold",
|
|
416
|
+
children: parseInlineContent(innerChildren)
|
|
417
|
+
}),
|
|
418
|
+
italic: (_child, innerChildren) => ({
|
|
419
|
+
type: "italic",
|
|
420
|
+
children: parseInlineContent(innerChildren)
|
|
421
|
+
}),
|
|
422
|
+
sup: (_child, innerChildren) => ({
|
|
423
|
+
type: "superscript",
|
|
424
|
+
text: extractAllText(innerChildren)
|
|
425
|
+
}),
|
|
426
|
+
sub: (_child, innerChildren) => ({
|
|
427
|
+
type: "subscript",
|
|
428
|
+
text: extractAllText(innerChildren)
|
|
429
|
+
}),
|
|
430
|
+
"inline-formula": (_child, innerChildren) => handleInlineFormula(innerChildren),
|
|
431
|
+
monospace: (_child, innerChildren) => ({
|
|
432
|
+
type: "code",
|
|
433
|
+
text: extractAllText(innerChildren)
|
|
434
|
+
}),
|
|
435
|
+
"ext-link": (child, innerChildren) => handleExtLink(child, innerChildren),
|
|
436
|
+
uri: (child, innerChildren) => handleUri(child, innerChildren),
|
|
437
|
+
underline: (_child, innerChildren) => {
|
|
438
|
+
const passText = extractAllText(innerChildren);
|
|
439
|
+
return passText ? { type: "text", text: passText } : null;
|
|
440
|
+
},
|
|
441
|
+
sc: (_child, innerChildren) => {
|
|
442
|
+
const passText = extractAllText(innerChildren);
|
|
443
|
+
return passText ? { type: "text", text: passText } : null;
|
|
444
|
+
},
|
|
445
|
+
xref: (child, innerChildren) => handleXref(child, innerChildren)
|
|
446
|
+
};
|
|
447
|
+
function processInlineChild(child) {
|
|
448
|
+
const text = getTextContent(child);
|
|
449
|
+
if (text != null) {
|
|
450
|
+
return text ? { type: "text", text } : null;
|
|
451
|
+
}
|
|
452
|
+
const tag = getTagName(child);
|
|
453
|
+
if (!tag)
|
|
454
|
+
return null;
|
|
455
|
+
const innerChildren = getChildren(child);
|
|
456
|
+
const handler = inlineTagHandlers[tag];
|
|
457
|
+
if (handler)
|
|
458
|
+
return handler(child, innerChildren);
|
|
459
|
+
const unknownText = extractAllText(innerChildren);
|
|
460
|
+
return unknownText ? { type: "text", text: unknownText } : null;
|
|
461
|
+
}
|
|
462
|
+
function parseInlineContent(children) {
|
|
463
|
+
const result = [];
|
|
464
|
+
for (const child of children) {
|
|
465
|
+
const content = processInlineChild(child);
|
|
466
|
+
if (content)
|
|
467
|
+
result.push(content);
|
|
468
|
+
}
|
|
469
|
+
return result;
|
|
470
|
+
}
|
|
471
|
+
function parseList(listNode) {
|
|
472
|
+
const listType = getAttr(listNode, "list-type");
|
|
473
|
+
const ordered = listType === "order";
|
|
474
|
+
const listChildren = getChildren(listNode);
|
|
475
|
+
const listItems = findChildren(listChildren, "list-item");
|
|
476
|
+
const items = [];
|
|
477
|
+
for (const item of listItems) {
|
|
478
|
+
const pNodes = findChildren(item.children, "p");
|
|
479
|
+
const content = pNodes.flatMap((p) => parseInlineContent(p.children));
|
|
480
|
+
items.push(content);
|
|
481
|
+
}
|
|
482
|
+
return { type: "list", ordered, items };
|
|
483
|
+
}
|
|
484
|
+
function parseTableRow(trChildren) {
|
|
485
|
+
const cells = [];
|
|
486
|
+
for (const child of trChildren) {
|
|
487
|
+
const tag = getTagName(child);
|
|
488
|
+
if (tag === "th" || tag === "td") {
|
|
489
|
+
const cellChildren = getChildren(child);
|
|
490
|
+
const paragraphs = findChildren(cellChildren, "p");
|
|
491
|
+
if (paragraphs.length > 1) {
|
|
492
|
+
cells.push(paragraphs.map((p) => extractAllText(p.children)).join("<br>"));
|
|
493
|
+
} else {
|
|
494
|
+
cells.push(extractAllText(cellChildren));
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
return cells;
|
|
499
|
+
}
|
|
500
|
+
function parseTableWrap(tableWrapNode) {
|
|
501
|
+
const children = getChildren(tableWrapNode);
|
|
502
|
+
const labelNode = findChild(children, "label");
|
|
503
|
+
const label = labelNode ? extractAllText(labelNode.children) : "";
|
|
504
|
+
const captionNode = findChild(children, "caption");
|
|
505
|
+
const captionText = captionNode ? extractAllText(captionNode.children) : "";
|
|
506
|
+
const captionStr = [label, captionText].filter(Boolean).join(". ");
|
|
507
|
+
const tableNode = findChild(children, "table");
|
|
508
|
+
const result = {
|
|
509
|
+
headers: [],
|
|
510
|
+
rows: []
|
|
511
|
+
};
|
|
512
|
+
if (captionStr)
|
|
513
|
+
result.caption = captionStr;
|
|
514
|
+
if (!tableNode)
|
|
515
|
+
return result;
|
|
516
|
+
const thead = findChild(tableNode.children, "thead");
|
|
517
|
+
if (thead) {
|
|
518
|
+
const headRows = findChildren(thead.children, "tr");
|
|
519
|
+
if (headRows.length > 0) {
|
|
520
|
+
result.headers.push(...parseTableRow(headRows[0]?.children ?? []));
|
|
521
|
+
}
|
|
522
|
+
}
|
|
523
|
+
const tbody = findChild(tableNode.children, "tbody");
|
|
524
|
+
if (tbody) {
|
|
525
|
+
const bodyRows = findChildren(tbody.children, "tr");
|
|
526
|
+
for (const row of bodyRows) {
|
|
527
|
+
result.rows.push(parseTableRow(row.children));
|
|
528
|
+
}
|
|
529
|
+
}
|
|
530
|
+
return result;
|
|
531
|
+
}
|
|
532
|
+
function parseBoxedText(node) {
|
|
533
|
+
const children = getChildren(node);
|
|
534
|
+
const titleNode = findChild(children, "title");
|
|
535
|
+
const title = titleNode ? extractAllText(titleNode.children) : void 0;
|
|
536
|
+
const content = parseBlockContent(children);
|
|
537
|
+
const block = { type: "boxed-text", content };
|
|
538
|
+
if (title)
|
|
539
|
+
block.title = title;
|
|
540
|
+
return block;
|
|
541
|
+
}
|
|
542
|
+
function parseDefList(node) {
|
|
543
|
+
const children = getChildren(node);
|
|
544
|
+
const titleNode = findChild(children, "title");
|
|
545
|
+
const title = titleNode ? extractAllText(titleNode.children) : void 0;
|
|
546
|
+
const defItems = findChildren(children, "def-item");
|
|
547
|
+
const items = [];
|
|
548
|
+
for (const item of defItems) {
|
|
549
|
+
const termNode = findChild(item.children, "term");
|
|
550
|
+
const defNode = findChild(item.children, "def");
|
|
551
|
+
const term = termNode ? extractAllText(termNode.children) : "";
|
|
552
|
+
const definition = defNode ? extractAllText(defNode.children) : "";
|
|
553
|
+
items.push({ term, definition });
|
|
554
|
+
}
|
|
555
|
+
const block = { type: "def-list", items };
|
|
556
|
+
if (title)
|
|
557
|
+
block.title = title;
|
|
558
|
+
return block;
|
|
559
|
+
}
|
|
560
|
+
function parseDispFormula(node) {
|
|
561
|
+
const children = getChildren(node);
|
|
562
|
+
const id = getAttr(node, "id");
|
|
563
|
+
const labelNode = findChild(children, "label");
|
|
564
|
+
const label = labelNode ? extractAllText(labelNode.children) : void 0;
|
|
565
|
+
const alternatives = findChild(children, "alternatives");
|
|
566
|
+
const searchChildren = alternatives ? alternatives.children : children;
|
|
567
|
+
const texMath = findChild(searchChildren, "tex-math");
|
|
568
|
+
const block = { type: "formula" };
|
|
569
|
+
if (id)
|
|
570
|
+
block.id = id;
|
|
571
|
+
if (label)
|
|
572
|
+
block.label = label;
|
|
573
|
+
if (texMath) {
|
|
574
|
+
block.tex = extractAllText(texMath.children);
|
|
575
|
+
} else {
|
|
576
|
+
const textChildren = children.filter((c) => !("label" in c));
|
|
577
|
+
const text = extractAllText(textChildren).trim();
|
|
578
|
+
if (text)
|
|
579
|
+
block.text = text;
|
|
580
|
+
}
|
|
581
|
+
return block;
|
|
582
|
+
}
|
|
583
|
+
const BLOCK_TAGS = /* @__PURE__ */ new Set(["table-wrap", "fig", "disp-quote", "boxed-text"]);
|
|
584
|
+
function parseDispQuote(node) {
|
|
585
|
+
const children = getChildren(node);
|
|
586
|
+
const paragraphs = findChildren(children, "p");
|
|
587
|
+
const content = [];
|
|
588
|
+
for (let i = 0; i < paragraphs.length; i++) {
|
|
589
|
+
if (i > 0)
|
|
590
|
+
content.push({ type: "text", text: "\n\n" });
|
|
591
|
+
const para = paragraphs[i];
|
|
592
|
+
if (para)
|
|
593
|
+
content.push(...parseInlineContent(para.children));
|
|
594
|
+
}
|
|
595
|
+
if (paragraphs.length === 0) {
|
|
596
|
+
content.push(...parseInlineContent(children));
|
|
597
|
+
}
|
|
598
|
+
return { type: "blockquote", content };
|
|
599
|
+
}
|
|
600
|
+
function parseTableBlock(node) {
|
|
601
|
+
const tableResult = parseTableWrap(node);
|
|
602
|
+
const tableBlock = {
|
|
603
|
+
type: "table",
|
|
604
|
+
headers: tableResult.headers,
|
|
605
|
+
rows: tableResult.rows
|
|
606
|
+
};
|
|
607
|
+
if (tableResult.caption)
|
|
608
|
+
tableBlock.caption = tableResult.caption;
|
|
609
|
+
return tableBlock;
|
|
610
|
+
}
|
|
611
|
+
function parseFigBlock(node) {
|
|
612
|
+
const innerChildren = getChildren(node);
|
|
613
|
+
const figBlock = { type: "figure" };
|
|
614
|
+
const figId = getAttr(node, "id");
|
|
615
|
+
if (figId)
|
|
616
|
+
figBlock.id = figId;
|
|
617
|
+
const figLabel = findChild(innerChildren, "label");
|
|
618
|
+
if (figLabel) {
|
|
619
|
+
const labelText = extractAllText(figLabel.children);
|
|
620
|
+
if (labelText)
|
|
621
|
+
figBlock.label = labelText;
|
|
622
|
+
}
|
|
623
|
+
const figCaption = findChild(innerChildren, "caption");
|
|
624
|
+
if (figCaption) {
|
|
625
|
+
const captionText = extractAllText(figCaption.children);
|
|
626
|
+
if (captionText)
|
|
627
|
+
figBlock.caption = captionText;
|
|
628
|
+
}
|
|
629
|
+
return figBlock;
|
|
630
|
+
}
|
|
631
|
+
function parseParagraph(pChildren) {
|
|
632
|
+
const hasNestedBlocks = pChildren.some((child) => {
|
|
633
|
+
const tag = getTagName(child);
|
|
634
|
+
return tag != null && BLOCK_TAGS.has(tag);
|
|
635
|
+
});
|
|
636
|
+
if (!hasNestedBlocks) {
|
|
637
|
+
return [{ type: "paragraph", content: parseInlineContent(pChildren) }];
|
|
638
|
+
}
|
|
639
|
+
const blocks = [];
|
|
640
|
+
let inlineBuffer = [];
|
|
641
|
+
const flushInline = () => {
|
|
642
|
+
if (inlineBuffer.length > 0) {
|
|
643
|
+
const content = parseInlineContent(inlineBuffer);
|
|
644
|
+
const hasNonWhitespace = content.some((c) => c.type !== "text" || c.text.trim() !== "");
|
|
645
|
+
if (content.length > 0 && hasNonWhitespace) {
|
|
646
|
+
blocks.push({ type: "paragraph", content });
|
|
647
|
+
}
|
|
648
|
+
inlineBuffer = [];
|
|
649
|
+
}
|
|
650
|
+
};
|
|
651
|
+
for (const child of pChildren) {
|
|
652
|
+
const tag = getTagName(child);
|
|
653
|
+
if (tag === "table-wrap") {
|
|
654
|
+
flushInline();
|
|
655
|
+
blocks.push(parseTableBlock(child));
|
|
656
|
+
} else if (tag === "fig") {
|
|
657
|
+
flushInline();
|
|
658
|
+
blocks.push(parseFigBlock(child));
|
|
659
|
+
} else if (tag === "disp-quote") {
|
|
660
|
+
flushInline();
|
|
661
|
+
blocks.push(parseDispQuote(child));
|
|
662
|
+
} else if (tag === "boxed-text") {
|
|
663
|
+
flushInline();
|
|
664
|
+
blocks.push(parseBoxedText(child));
|
|
665
|
+
} else {
|
|
666
|
+
inlineBuffer.push(child);
|
|
667
|
+
}
|
|
668
|
+
}
|
|
669
|
+
flushInline();
|
|
670
|
+
return blocks;
|
|
671
|
+
}
|
|
672
|
+
function parseSupplementaryMaterial(child) {
|
|
673
|
+
const innerChildren = getChildren(child);
|
|
674
|
+
const labelNode = findChild(innerChildren, "label");
|
|
675
|
+
const captionNode = findChild(innerChildren, "caption");
|
|
676
|
+
const labelText = labelNode ? extractAllText(labelNode.children) : "";
|
|
677
|
+
const captionText = captionNode ? extractAllText(captionNode.children) : "";
|
|
678
|
+
const text = [labelText, captionText].filter(Boolean).join(": ");
|
|
679
|
+
if (text) {
|
|
680
|
+
return { type: "paragraph", content: [{ type: "text", text }] };
|
|
681
|
+
}
|
|
682
|
+
return null;
|
|
683
|
+
}
|
|
684
|
+
const blockTagHandlers = {
|
|
685
|
+
p: (child) => parseParagraph(getChildren(child)),
|
|
686
|
+
list: (child) => parseList(child),
|
|
687
|
+
"table-wrap": (child) => parseTableBlock(child),
|
|
688
|
+
fig: (child) => parseFigBlock(child),
|
|
689
|
+
"disp-quote": (child) => parseDispQuote(child),
|
|
690
|
+
"boxed-text": (child) => parseBoxedText(child),
|
|
691
|
+
"def-list": (child) => parseDefList(child),
|
|
692
|
+
"disp-formula": (child) => parseDispFormula(child),
|
|
693
|
+
preformat: (child) => ({ type: "preformat", text: extractAllText(getChildren(child)) }),
|
|
694
|
+
"supplementary-material": (child) => parseSupplementaryMaterial(child)
|
|
695
|
+
};
|
|
696
|
+
function parseBlockContent(sectionChildren) {
|
|
697
|
+
const blocks = [];
|
|
698
|
+
for (const child of sectionChildren) {
|
|
699
|
+
const tag = getTagName(child);
|
|
700
|
+
if (!tag)
|
|
701
|
+
continue;
|
|
702
|
+
const handler = blockTagHandlers[tag];
|
|
703
|
+
if (!handler)
|
|
704
|
+
continue;
|
|
705
|
+
const result = handler(child);
|
|
706
|
+
if (result == null)
|
|
707
|
+
continue;
|
|
708
|
+
if (Array.isArray(result)) {
|
|
709
|
+
blocks.push(...result);
|
|
710
|
+
} else {
|
|
711
|
+
blocks.push(result);
|
|
712
|
+
}
|
|
713
|
+
}
|
|
714
|
+
return blocks;
|
|
715
|
+
}
|
|
716
|
+
function parseSection(secChildren, level) {
|
|
717
|
+
const titleNode = findChild(secChildren, "title");
|
|
718
|
+
const title = titleNode ? extractAllText(titleNode.children) : "";
|
|
719
|
+
const content = parseBlockContent(secChildren);
|
|
720
|
+
const subsections = [];
|
|
721
|
+
const nestedSecs = findChildren(secChildren, "sec");
|
|
722
|
+
for (const sub of nestedSecs) {
|
|
723
|
+
subsections.push(parseSection(sub.children, level + 1));
|
|
724
|
+
}
|
|
725
|
+
return { title, level, content, subsections };
|
|
726
|
+
}
|
|
727
|
+
function parseJatsBody(xml) {
|
|
728
|
+
const parsed = parser.parse(xml);
|
|
729
|
+
const article = findArticle(parsed);
|
|
730
|
+
if (!article)
|
|
731
|
+
return [];
|
|
732
|
+
const body = findChild(article.children, "body");
|
|
733
|
+
if (!body)
|
|
734
|
+
return [];
|
|
735
|
+
const sections = [];
|
|
736
|
+
const secs = findChildren(body.children, "sec");
|
|
737
|
+
if (secs.length > 0) {
|
|
738
|
+
for (const sec of secs) {
|
|
739
|
+
sections.push(parseSection(sec.children, 2));
|
|
740
|
+
}
|
|
741
|
+
} else {
|
|
742
|
+
const content = parseBlockContent(body.children);
|
|
743
|
+
if (content.length > 0) {
|
|
744
|
+
sections.push({ title: "", level: 2, content, subsections: [] });
|
|
745
|
+
}
|
|
746
|
+
}
|
|
747
|
+
return sections;
|
|
748
|
+
}
|
|
749
|
+
function extractCitationAuthors(children) {
|
|
750
|
+
const personGroup = findChild(children, "person-group");
|
|
751
|
+
if (!personGroup)
|
|
752
|
+
return void 0;
|
|
753
|
+
const names = findChildren(personGroup.children, "name");
|
|
754
|
+
const authorParts = [];
|
|
755
|
+
for (const name of names) {
|
|
756
|
+
const surname = findChild(name.children, "surname");
|
|
757
|
+
const givenNames = findChild(name.children, "given-names");
|
|
758
|
+
const surnameText = surname ? extractAllText(surname.children) : "";
|
|
759
|
+
const givenText = givenNames ? extractAllText(givenNames.children) : "";
|
|
760
|
+
if (surnameText && givenText) {
|
|
761
|
+
authorParts.push(`${surnameText} ${givenText}`);
|
|
762
|
+
} else if (surnameText) {
|
|
763
|
+
authorParts.push(surnameText);
|
|
764
|
+
}
|
|
765
|
+
}
|
|
766
|
+
return authorParts.length > 0 ? authorParts.join(", ") : void 0;
|
|
767
|
+
}
|
|
768
|
+
function formatYearVolumePage(children) {
|
|
769
|
+
const year = findChild(children, "year");
|
|
770
|
+
if (!year)
|
|
771
|
+
return void 0;
|
|
772
|
+
let yearStr = extractAllText(year.children);
|
|
773
|
+
const volume = findChild(children, "volume");
|
|
774
|
+
if (volume) {
|
|
775
|
+
yearStr += `;${extractAllText(volume.children)}`;
|
|
776
|
+
}
|
|
777
|
+
const fpage = findChild(children, "fpage");
|
|
778
|
+
if (fpage) {
|
|
779
|
+
const fpageText = extractAllText(fpage.children);
|
|
780
|
+
const lpage = findChild(children, "lpage");
|
|
781
|
+
const lpageText = lpage ? extractAllText(lpage.children) : "";
|
|
782
|
+
yearStr += `:${fpageText}${lpageText ? `-${lpageText}` : ""}`;
|
|
783
|
+
}
|
|
784
|
+
return yearStr;
|
|
785
|
+
}
|
|
786
|
+
function formatElementCitation(children) {
|
|
787
|
+
const parts = [];
|
|
788
|
+
const authorsStr = extractCitationAuthors(children);
|
|
789
|
+
if (authorsStr)
|
|
790
|
+
parts.push(authorsStr);
|
|
791
|
+
const articleTitle = findChild(children, "article-title");
|
|
792
|
+
if (articleTitle) {
|
|
793
|
+
parts.push(extractAllText(articleTitle.children));
|
|
794
|
+
}
|
|
795
|
+
const source = findChild(children, "source");
|
|
796
|
+
if (source) {
|
|
797
|
+
parts.push(extractAllText(source.children));
|
|
798
|
+
}
|
|
799
|
+
const yearVolumePage = formatYearVolumePage(children);
|
|
800
|
+
if (yearVolumePage)
|
|
801
|
+
parts.push(yearVolumePage);
|
|
802
|
+
return `${parts.join(". ")}.`;
|
|
803
|
+
}
|
|
804
|
+
function extractMixedCitationText(children) {
|
|
805
|
+
const pubIds = findChildren(children, "pub-id");
|
|
806
|
+
const pubIdValues = pubIds.map((p) => extractAllText(p.children).trim()).filter(Boolean);
|
|
807
|
+
if (pubIdValues.length === 0) {
|
|
808
|
+
return extractAllText(children).trim();
|
|
809
|
+
}
|
|
810
|
+
const fullText = extractAllText(children).trim();
|
|
811
|
+
let result = fullText;
|
|
812
|
+
for (const val of pubIdValues) {
|
|
813
|
+
const escaped = val.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
814
|
+
const matches = result.match(new RegExp(escaped, "g"));
|
|
815
|
+
if (matches && matches.length > 1) {
|
|
816
|
+
result = result.replace(val, "");
|
|
817
|
+
result = result.replace(/\s{2,}/g, " ").trim();
|
|
818
|
+
}
|
|
819
|
+
}
|
|
820
|
+
return result;
|
|
821
|
+
}
|
|
822
|
+
function extractPubIds(children) {
|
|
823
|
+
const pubIds = findChildren(children, "pub-id");
|
|
824
|
+
const result = {};
|
|
825
|
+
for (const p of pubIds) {
|
|
826
|
+
const idType = p.attrs["pub-id-type"];
|
|
827
|
+
const value = extractAllText(p.children).trim();
|
|
828
|
+
if (!value)
|
|
829
|
+
continue;
|
|
830
|
+
if (idType === "doi")
|
|
831
|
+
result.doi = value;
|
|
832
|
+
if (idType === "pmid")
|
|
833
|
+
result.pmid = value;
|
|
834
|
+
if (idType === "pmc" || idType === "pmcid") {
|
|
835
|
+
result.pmcid = value.replace(/^PMC/, "");
|
|
836
|
+
}
|
|
837
|
+
}
|
|
838
|
+
return result;
|
|
839
|
+
}
|
|
840
|
+
function stripPubIdValues(text, pubIds) {
|
|
841
|
+
let result = text;
|
|
842
|
+
const values = [pubIds.doi, pubIds.pmid, pubIds.pmcid].filter(Boolean);
|
|
843
|
+
for (const val of values) {
|
|
844
|
+
const escaped = val.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
845
|
+
result = result.replace(new RegExp(`(?:doi|PMID|pmid|PMC|pmc)[:\\s]*${escaped}`, "gi"), "");
|
|
846
|
+
result = result.replace(new RegExp(escaped, "g"), "");
|
|
847
|
+
}
|
|
848
|
+
if (pubIds.pmcid) {
|
|
849
|
+
const pmcFull = `PMC${pubIds.pmcid}`;
|
|
850
|
+
const escaped = pmcFull.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
851
|
+
result = result.replace(new RegExp(`(?:pmc|pmcid)[:\\s]*${escaped}`, "gi"), "");
|
|
852
|
+
result = result.replace(new RegExp(escaped, "g"), "");
|
|
853
|
+
}
|
|
854
|
+
result = result.replace(/\s{2,}/g, " ").trim();
|
|
855
|
+
result = result.replace(/\.\s*\.$/, ".");
|
|
856
|
+
return result;
|
|
857
|
+
}
|
|
858
|
+
function parseSingleReference(ref) {
|
|
859
|
+
const id = getAttr(ref.node, "id") ?? "";
|
|
860
|
+
const citationAlternatives = findChild(ref.children, "citation-alternatives");
|
|
861
|
+
const searchChildren = citationAlternatives ? citationAlternatives.children : ref.children;
|
|
862
|
+
const mixedCitation = findChild(searchChildren, "mixed-citation");
|
|
863
|
+
if (mixedCitation) {
|
|
864
|
+
const rawText = extractMixedCitationText(mixedCitation.children);
|
|
865
|
+
const pubIds = extractPubIds(mixedCitation.children);
|
|
866
|
+
const text2 = stripPubIdValues(rawText, pubIds);
|
|
867
|
+
if (id && text2)
|
|
868
|
+
return { id, text: text2, ...pubIds };
|
|
869
|
+
return null;
|
|
870
|
+
}
|
|
871
|
+
const elementCitation = findChild(searchChildren, "element-citation");
|
|
872
|
+
if (elementCitation) {
|
|
873
|
+
const rawText = formatElementCitation(elementCitation.children);
|
|
874
|
+
const pubIds = extractPubIds(elementCitation.children);
|
|
875
|
+
const text2 = stripPubIdValues(rawText, pubIds);
|
|
876
|
+
if (id && text2)
|
|
877
|
+
return { id, text: text2, ...pubIds };
|
|
878
|
+
return null;
|
|
879
|
+
}
|
|
880
|
+
const childrenWithoutLabel = ref.children.filter((c) => !("label" in c));
|
|
881
|
+
const text = extractAllText(childrenWithoutLabel).trim();
|
|
882
|
+
if (id && text)
|
|
883
|
+
return { id, text };
|
|
884
|
+
return null;
|
|
885
|
+
}
|
|
886
|
+
function parseJatsReferences(xml) {
|
|
887
|
+
const parsed = parser.parse(xml);
|
|
888
|
+
const article = findArticle(parsed);
|
|
889
|
+
if (!article)
|
|
890
|
+
return [];
|
|
891
|
+
const back = findChild(article.children, "back");
|
|
892
|
+
if (!back)
|
|
893
|
+
return [];
|
|
894
|
+
const refList = findChild(back.children, "ref-list");
|
|
895
|
+
if (!refList)
|
|
896
|
+
return [];
|
|
897
|
+
const refs = findChildren(refList.children, "ref");
|
|
898
|
+
const references = [];
|
|
899
|
+
for (const ref of refs) {
|
|
900
|
+
const reference = parseSingleReference(ref);
|
|
901
|
+
if (reference)
|
|
902
|
+
references.push(reference);
|
|
903
|
+
}
|
|
904
|
+
return references;
|
|
905
|
+
}
|
|
906
|
+
function parseAcknowledgments(backChildren) {
|
|
907
|
+
const ack = findChild(backChildren, "ack");
|
|
908
|
+
if (!ack)
|
|
909
|
+
return void 0;
|
|
910
|
+
const paragraphs = findChildren(ack.children, "p");
|
|
911
|
+
if (paragraphs.length > 0) {
|
|
912
|
+
return paragraphs.map((p) => extractAllText(p.children)).join("\n\n");
|
|
913
|
+
}
|
|
914
|
+
return void 0;
|
|
915
|
+
}
|
|
916
|
+
function parseAppendices(backChildren) {
|
|
917
|
+
const appGroup = findChild(backChildren, "app-group");
|
|
918
|
+
if (!appGroup)
|
|
919
|
+
return void 0;
|
|
920
|
+
const apps = findChildren(appGroup.children, "app");
|
|
921
|
+
if (apps.length === 0)
|
|
922
|
+
return void 0;
|
|
923
|
+
return apps.map((app) => parseSection(app.children, 2));
|
|
924
|
+
}
|
|
925
|
+
function parseSingleFootnote(fn) {
|
|
926
|
+
const parts = [];
|
|
927
|
+
const titleNode = findChild(fn.children, "title");
|
|
928
|
+
if (titleNode) {
|
|
929
|
+
const titleText = extractAllText(titleNode.children).trim();
|
|
930
|
+
if (titleText)
|
|
931
|
+
parts.push(titleText);
|
|
932
|
+
}
|
|
933
|
+
const paragraphs = findChildren(fn.children, "p");
|
|
934
|
+
for (const p of paragraphs) {
|
|
935
|
+
const pText = extractAllText(p.children).trim();
|
|
936
|
+
if (pText)
|
|
937
|
+
parts.push(pText);
|
|
938
|
+
}
|
|
939
|
+
return {
|
|
940
|
+
id: getAttr(fn.node, "id") ?? "",
|
|
941
|
+
text: parts.join(" ")
|
|
942
|
+
};
|
|
943
|
+
}
|
|
944
|
+
function parseFootnotes(backChildren) {
|
|
945
|
+
const fnGroup = findChild(backChildren, "fn-group");
|
|
946
|
+
if (!fnGroup)
|
|
947
|
+
return void 0;
|
|
948
|
+
const fns = findChildren(fnGroup.children, "fn");
|
|
949
|
+
if (fns.length === 0)
|
|
950
|
+
return void 0;
|
|
951
|
+
return fns.map(parseSingleFootnote);
|
|
952
|
+
}
|
|
953
|
+
function parseSingleNotesElement(note) {
|
|
954
|
+
const notes = [];
|
|
955
|
+
const secs = findChildren(note.children, "sec");
|
|
956
|
+
const nestedNotes = findChildren(note.children, "notes");
|
|
957
|
+
const subItems = secs.length > 0 ? secs : nestedNotes;
|
|
958
|
+
if (subItems.length > 0) {
|
|
959
|
+
for (const sub of subItems) {
|
|
960
|
+
const subTitleNode = findChild(sub.children, "title");
|
|
961
|
+
const subTitle = subTitleNode ? extractAllText(subTitleNode.children) : "";
|
|
962
|
+
const subParagraphs = findChildren(sub.children, "p");
|
|
963
|
+
const subText = subParagraphs.map((p) => extractAllText(p.children)).join("\n\n");
|
|
964
|
+
if (subTitle || subText) {
|
|
965
|
+
notes.push({ title: subTitle, text: subText });
|
|
966
|
+
}
|
|
967
|
+
}
|
|
968
|
+
return notes;
|
|
969
|
+
}
|
|
970
|
+
const titleNode = findChild(note.children, "title");
|
|
971
|
+
const title = titleNode ? extractAllText(titleNode.children) : "";
|
|
972
|
+
const paragraphs = findChildren(note.children, "p");
|
|
973
|
+
const text = paragraphs.map((p) => extractAllText(p.children)).join("\n\n");
|
|
974
|
+
if (title || text) {
|
|
975
|
+
notes.push({ title, text });
|
|
976
|
+
}
|
|
977
|
+
return notes;
|
|
978
|
+
}
|
|
979
|
+
function parseNotes(backChildren) {
|
|
980
|
+
const notesElements = findChildren(backChildren, "notes");
|
|
981
|
+
if (notesElements.length === 0)
|
|
982
|
+
return void 0;
|
|
983
|
+
const notes = [];
|
|
984
|
+
for (const note of notesElements) {
|
|
985
|
+
notes.push(...parseSingleNotesElement(note));
|
|
986
|
+
}
|
|
987
|
+
return notes.length > 0 ? notes : void 0;
|
|
988
|
+
}
|
|
989
|
+
function parseGlossary(backChildren) {
|
|
990
|
+
const glossaryElements = findChildren(backChildren, "glossary");
|
|
991
|
+
const notes = [];
|
|
992
|
+
for (const glossary of glossaryElements) {
|
|
993
|
+
const titleNode = findChild(glossary.children, "title");
|
|
994
|
+
const title = titleNode ? extractAllText(titleNode.children) : "Glossary";
|
|
995
|
+
const defList = findChild(glossary.children, "def-list");
|
|
996
|
+
if (!defList)
|
|
997
|
+
continue;
|
|
998
|
+
const defItems = findChildren(defList.children, "def-item");
|
|
999
|
+
const lines = [];
|
|
1000
|
+
for (const item of defItems) {
|
|
1001
|
+
const termNode = findChild(item.children, "term");
|
|
1002
|
+
const defNode = findChild(item.children, "def");
|
|
1003
|
+
const term = termNode ? extractAllText(termNode.children) : "";
|
|
1004
|
+
const definition = defNode ? extractAllText(defNode.children) : "";
|
|
1005
|
+
lines.push(`${term}: ${definition}`);
|
|
1006
|
+
}
|
|
1007
|
+
notes.push({ title, text: lines.join("\n") });
|
|
1008
|
+
}
|
|
1009
|
+
return notes;
|
|
1010
|
+
}
|
|
1011
|
+
function parseFloatsGroup(articleChildren) {
|
|
1012
|
+
const floatsGroup = findChild(articleChildren, "floats-group");
|
|
1013
|
+
if (!floatsGroup)
|
|
1014
|
+
return void 0;
|
|
1015
|
+
const blocks = [];
|
|
1016
|
+
for (const child of floatsGroup.children) {
|
|
1017
|
+
const tag = getTagName(child);
|
|
1018
|
+
if (tag === "fig") {
|
|
1019
|
+
blocks.push(parseFigBlock(child));
|
|
1020
|
+
} else if (tag === "table-wrap") {
|
|
1021
|
+
blocks.push(parseTableBlock(child));
|
|
1022
|
+
}
|
|
1023
|
+
}
|
|
1024
|
+
return blocks.length > 0 ? blocks : void 0;
|
|
1025
|
+
}
|
|
1026
|
+
function parseJatsBackMatter(xml) {
|
|
1027
|
+
const parsed = parser.parse(xml);
|
|
1028
|
+
const article = findArticle(parsed);
|
|
1029
|
+
if (!article)
|
|
1030
|
+
return {};
|
|
1031
|
+
const result = {};
|
|
1032
|
+
const back = findChild(article.children, "back");
|
|
1033
|
+
if (back) {
|
|
1034
|
+
const ack = parseAcknowledgments(back.children);
|
|
1035
|
+
if (ack)
|
|
1036
|
+
result.acknowledgments = ack;
|
|
1037
|
+
const app = parseAppendices(back.children);
|
|
1038
|
+
if (app)
|
|
1039
|
+
result.appendices = app;
|
|
1040
|
+
const fn = parseFootnotes(back.children);
|
|
1041
|
+
if (fn)
|
|
1042
|
+
result.footnotes = fn;
|
|
1043
|
+
const notes = parseNotes(back.children);
|
|
1044
|
+
const glossaryNotes = parseGlossary(back.children);
|
|
1045
|
+
if (notes || glossaryNotes.length > 0) {
|
|
1046
|
+
result.notes = [...notes ?? [], ...glossaryNotes];
|
|
1047
|
+
}
|
|
1048
|
+
}
|
|
1049
|
+
const floats = parseFloatsGroup(article.children);
|
|
1050
|
+
if (floats)
|
|
1051
|
+
result.floats = floats;
|
|
1052
|
+
return result;
|
|
1053
|
+
}
|
|
1054
|
+
export {
|
|
1055
|
+
parseJatsBackMatter,
|
|
1056
|
+
parseJatsBody,
|
|
1057
|
+
parseJatsMetadata,
|
|
1058
|
+
parseJatsReferences
|
|
1059
|
+
};
|
|
1060
|
+
//# sourceMappingURL=jats-parser.js.map
|