@mantra-ai/core 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai/google/client.d.ts +67 -0
- package/dist/ai/google/client.d.ts.map +1 -0
- package/dist/ai/google/client.js +169 -0
- package/dist/ai/google/client.js.map +1 -0
- package/dist/ai/google/generate.d.ts +10 -0
- package/dist/ai/google/generate.d.ts.map +1 -0
- package/dist/ai/google/generate.js +137 -0
- package/dist/ai/google/generate.js.map +1 -0
- package/dist/ai/google/index.d.ts +4 -0
- package/dist/ai/google/index.d.ts.map +1 -0
- package/dist/ai/google/index.js +4 -0
- package/dist/ai/google/index.js.map +1 -0
- package/dist/ai/google/types.d.ts +88 -0
- package/dist/ai/google/types.d.ts.map +1 -0
- package/dist/ai/google/types.js +55 -0
- package/dist/ai/google/types.js.map +1 -0
- package/dist/ai/index.d.ts +3 -0
- package/dist/ai/index.d.ts.map +1 -0
- package/dist/ai/index.js +3 -0
- package/dist/ai/index.js.map +1 -0
- package/dist/ai/openai/client.d.ts +22 -0
- package/dist/ai/openai/client.d.ts.map +1 -0
- package/dist/ai/openai/client.js +49 -0
- package/dist/ai/openai/client.js.map +1 -0
- package/dist/ai/openai/generate.d.ts +14 -0
- package/dist/ai/openai/generate.d.ts.map +1 -0
- package/dist/ai/openai/generate.js +178 -0
- package/dist/ai/openai/generate.js.map +1 -0
- package/dist/ai/openai/index.d.ts +4 -0
- package/dist/ai/openai/index.d.ts.map +1 -0
- package/dist/ai/openai/index.js +4 -0
- package/dist/ai/openai/index.js.map +1 -0
- package/dist/ai/openai/types.d.ts +86 -0
- package/dist/ai/openai/types.d.ts.map +1 -0
- package/dist/ai/openai/types.js +56 -0
- package/dist/ai/openai/types.js.map +1 -0
- package/dist/ai/prompts/index.d.ts +1 -0
- package/dist/ai/prompts/index.d.ts.map +1 -0
- package/dist/ai/prompts/index.js +2 -0
- package/dist/ai/prompts/index.js.map +1 -0
- package/dist/errors/index.d.ts +3 -0
- package/dist/errors/index.d.ts.map +1 -0
- package/dist/errors/index.js +4 -0
- package/dist/errors/index.js.map +1 -0
- package/dist/errors/schemas.d.ts +304 -0
- package/dist/errors/schemas.d.ts.map +1 -0
- package/dist/errors/schemas.js +57 -0
- package/dist/errors/schemas.js.map +1 -0
- package/dist/errors/types.d.ts +30 -0
- package/dist/errors/types.d.ts.map +1 -0
- package/dist/errors/types.js +33 -0
- package/dist/errors/types.js.map +1 -0
- package/dist/index.d.ts +21 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +21 -0
- package/dist/index.js.map +1 -0
- package/dist/normalization/jats/index.d.ts +4 -0
- package/dist/normalization/jats/index.d.ts.map +1 -0
- package/dist/normalization/jats/index.js +3 -0
- package/dist/normalization/jats/index.js.map +1 -0
- package/dist/normalization/jats/normalize.d.ts +7 -0
- package/dist/normalization/jats/normalize.d.ts.map +1 -0
- package/dist/normalization/jats/normalize.js +213 -0
- package/dist/normalization/jats/normalize.js.map +1 -0
- package/dist/normalization/jats/utils/build/finalize.d.ts +3 -0
- package/dist/normalization/jats/utils/build/finalize.d.ts.map +1 -0
- package/dist/normalization/jats/utils/build/finalize.js +462 -0
- package/dist/normalization/jats/utils/build/finalize.js.map +1 -0
- package/dist/normalization/jats/utils/build/flatten.d.ts +20 -0
- package/dist/normalization/jats/utils/build/flatten.d.ts.map +1 -0
- package/dist/normalization/jats/utils/build/flatten.js +502 -0
- package/dist/normalization/jats/utils/build/flatten.js.map +1 -0
- package/dist/normalization/jats/utils/build/meta.d.ts +10 -0
- package/dist/normalization/jats/utils/build/meta.d.ts.map +1 -0
- package/dist/normalization/jats/utils/build/meta.js +32 -0
- package/dist/normalization/jats/utils/build/meta.js.map +1 -0
- package/dist/normalization/jats/utils/build/version.d.ts +3 -0
- package/dist/normalization/jats/utils/build/version.d.ts.map +1 -0
- package/dist/normalization/jats/utils/build/version.js +11 -0
- package/dist/normalization/jats/utils/build/version.js.map +1 -0
- package/dist/normalization/jats/utils/category.d.ts +11 -0
- package/dist/normalization/jats/utils/category.d.ts.map +1 -0
- package/dist/normalization/jats/utils/category.js +431 -0
- package/dist/normalization/jats/utils/category.js.map +1 -0
- package/dist/normalization/jats/utils/collectors/abstracts.d.ts +3 -0
- package/dist/normalization/jats/utils/collectors/abstracts.d.ts.map +1 -0
- package/dist/normalization/jats/utils/collectors/abstracts.js +168 -0
- package/dist/normalization/jats/utils/collectors/abstracts.js.map +1 -0
- package/dist/normalization/jats/utils/collectors/back.d.ts +35 -0
- package/dist/normalization/jats/utils/collectors/back.d.ts.map +1 -0
- package/dist/normalization/jats/utils/collectors/back.js +801 -0
- package/dist/normalization/jats/utils/collectors/back.js.map +1 -0
- package/dist/normalization/jats/utils/collectors/contributors.d.ts +4 -0
- package/dist/normalization/jats/utils/collectors/contributors.d.ts.map +1 -0
- package/dist/normalization/jats/utils/collectors/contributors.js +77 -0
- package/dist/normalization/jats/utils/collectors/contributors.js.map +1 -0
- package/dist/normalization/jats/utils/collectors/keywords.d.ts +2 -0
- package/dist/normalization/jats/utils/collectors/keywords.d.ts.map +1 -0
- package/dist/normalization/jats/utils/collectors/keywords.js +14 -0
- package/dist/normalization/jats/utils/collectors/keywords.js.map +1 -0
- package/dist/normalization/jats/utils/collectors/meta.d.ts +6 -0
- package/dist/normalization/jats/utils/collectors/meta.d.ts.map +1 -0
- package/dist/normalization/jats/utils/collectors/meta.js +103 -0
- package/dist/normalization/jats/utils/collectors/meta.js.map +1 -0
- package/dist/normalization/jats/utils/collectors/sections.d.ts +7 -0
- package/dist/normalization/jats/utils/collectors/sections.d.ts.map +1 -0
- package/dist/normalization/jats/utils/collectors/sections.js +484 -0
- package/dist/normalization/jats/utils/collectors/sections.js.map +1 -0
- package/dist/normalization/jats/utils/licenses.d.ts +5 -0
- package/dist/normalization/jats/utils/licenses.d.ts.map +1 -0
- package/dist/normalization/jats/utils/licenses.js +64 -0
- package/dist/normalization/jats/utils/licenses.js.map +1 -0
- package/dist/normalization/jats/utils/po/nodes.d.ts +6 -0
- package/dist/normalization/jats/utils/po/nodes.d.ts.map +1 -0
- package/dist/normalization/jats/utils/po/nodes.js +60 -0
- package/dist/normalization/jats/utils/po/nodes.js.map +1 -0
- package/dist/normalization/jats/utils/po/query.d.ts +7 -0
- package/dist/normalization/jats/utils/po/query.d.ts.map +1 -0
- package/dist/normalization/jats/utils/po/query.js +67 -0
- package/dist/normalization/jats/utils/po/query.js.map +1 -0
- package/dist/normalization/jats/utils/po/serialize.d.ts +4 -0
- package/dist/normalization/jats/utils/po/serialize.d.ts.map +1 -0
- package/dist/normalization/jats/utils/po/serialize.js +329 -0
- package/dist/normalization/jats/utils/po/serialize.js.map +1 -0
- package/dist/normalization/jats/utils/po/text.d.ts +7 -0
- package/dist/normalization/jats/utils/po/text.d.ts.map +1 -0
- package/dist/normalization/jats/utils/po/text.js +114 -0
- package/dist/normalization/jats/utils/po/text.js.map +1 -0
- package/dist/normalization/jats/utils/references.d.ts +26 -0
- package/dist/normalization/jats/utils/references.d.ts.map +1 -0
- package/dist/normalization/jats/utils/references.js +371 -0
- package/dist/normalization/jats/utils/references.js.map +1 -0
- package/dist/normalization/jats/utils/strings.d.ts +8 -0
- package/dist/normalization/jats/utils/strings.d.ts.map +1 -0
- package/dist/normalization/jats/utils/strings.js +197 -0
- package/dist/normalization/jats/utils/strings.js.map +1 -0
- package/dist/normalization/jats/utils/types.d.ts +233 -0
- package/dist/normalization/jats/utils/types.d.ts.map +1 -0
- package/dist/normalization/jats/utils/types.js +2 -0
- package/dist/normalization/jats/utils/types.js.map +1 -0
- package/dist/normalization/jats/utils/xml.d.ts +5 -0
- package/dist/normalization/jats/utils/xml.d.ts.map +1 -0
- package/dist/normalization/jats/utils/xml.js +69 -0
- package/dist/normalization/jats/utils/xml.js.map +1 -0
- package/dist/normalization/normalized-doc-schema.d.ts +1094 -0
- package/dist/normalization/normalized-doc-schema.d.ts.map +1 -0
- package/dist/normalization/normalized-doc-schema.js +410 -0
- package/dist/normalization/normalized-doc-schema.js.map +1 -0
- package/dist/normalization/pdf/index.d.ts +4 -0
- package/dist/normalization/pdf/index.d.ts.map +1 -0
- package/dist/normalization/pdf/index.js +3 -0
- package/dist/normalization/pdf/index.js.map +1 -0
- package/dist/normalization/pdf/normalize.d.ts +31 -0
- package/dist/normalization/pdf/normalize.d.ts.map +1 -0
- package/dist/normalization/pdf/normalize.js +321 -0
- package/dist/normalization/pdf/normalize.js.map +1 -0
- package/dist/normalization/pdf/prompt.d.ts +3 -0
- package/dist/normalization/pdf/prompt.d.ts.map +1 -0
- package/dist/normalization/pdf/prompt.js +118 -0
- package/dist/normalization/pdf/prompt.js.map +1 -0
- package/dist/sources/arxiv/client.d.ts +4 -0
- package/dist/sources/arxiv/client.d.ts.map +1 -0
- package/dist/sources/arxiv/client.js +13 -0
- package/dist/sources/arxiv/client.js.map +1 -0
- package/dist/sources/biorxiv/client.d.ts +21 -0
- package/dist/sources/biorxiv/client.d.ts.map +1 -0
- package/dist/sources/biorxiv/client.js +173 -0
- package/dist/sources/biorxiv/client.js.map +1 -0
- package/dist/sources/crossref/client.d.ts +3 -0
- package/dist/sources/crossref/client.d.ts.map +1 -0
- package/dist/sources/crossref/client.js +24 -0
- package/dist/sources/crossref/client.js.map +1 -0
- package/dist/sources/europepmc/client.d.ts +3 -0
- package/dist/sources/europepmc/client.d.ts.map +1 -0
- package/dist/sources/europepmc/client.js +29 -0
- package/dist/sources/europepmc/client.js.map +1 -0
- package/dist/sources/medrxiv/browser.d.ts +16 -0
- package/dist/sources/medrxiv/browser.d.ts.map +1 -0
- package/dist/sources/medrxiv/browser.js +210 -0
- package/dist/sources/medrxiv/browser.js.map +1 -0
- package/dist/sources/medrxiv/client.d.ts +34 -0
- package/dist/sources/medrxiv/client.d.ts.map +1 -0
- package/dist/sources/medrxiv/client.js +673 -0
- package/dist/sources/medrxiv/client.js.map +1 -0
- package/dist/sources/medrxiv/shared.d.ts +7 -0
- package/dist/sources/medrxiv/shared.d.ts.map +1 -0
- package/dist/sources/medrxiv/shared.js +18 -0
- package/dist/sources/medrxiv/shared.js.map +1 -0
- package/dist/sources/plos/client.d.ts +13 -0
- package/dist/sources/plos/client.d.ts.map +1 -0
- package/dist/sources/plos/client.js +147 -0
- package/dist/sources/plos/client.js.map +1 -0
- package/dist/sources/preprint-discovery.d.ts +55 -0
- package/dist/sources/preprint-discovery.d.ts.map +1 -0
- package/dist/sources/preprint-discovery.js +115 -0
- package/dist/sources/preprint-discovery.js.map +1 -0
- package/dist/types/expand.d.ts +5 -0
- package/dist/types/expand.d.ts.map +1 -0
- package/dist/types/expand.js +20 -0
- package/dist/types/expand.js.map +1 -0
- package/dist/types/methods-types.d.ts +37 -0
- package/dist/types/methods-types.d.ts.map +1 -0
- package/dist/types/methods-types.js +2 -0
- package/dist/types/methods-types.js.map +1 -0
- package/dist/types/multi-input-types.d.ts +57 -0
- package/dist/types/multi-input-types.d.ts.map +1 -0
- package/dist/types/multi-input-types.js +2 -0
- package/dist/types/multi-input-types.js.map +1 -0
- package/dist/types/paper/types.d.ts +41 -0
- package/dist/types/paper/types.d.ts.map +1 -0
- package/dist/types/paper/types.js +2 -0
- package/dist/types/paper/types.js.map +1 -0
- package/dist/types/results-types.d.ts +122 -0
- package/dist/types/results-types.d.ts.map +1 -0
- package/dist/types/results-types.js +17 -0
- package/dist/types/results-types.js.map +1 -0
- package/dist/types/supp-types.d.ts +6 -0
- package/dist/types/supp-types.d.ts.map +1 -0
- package/dist/types/supp-types.js +2 -0
- package/dist/types/supp-types.js.map +1 -0
- package/dist/types/version.d.ts +1828 -0
- package/dist/types/version.d.ts.map +1 -0
- package/dist/types/version.js +311 -0
- package/dist/types/version.js.map +1 -0
- package/dist/types/work.d.ts +4455 -0
- package/dist/types/work.d.ts.map +1 -0
- package/dist/types/work.js +330 -0
- package/dist/types/work.js.map +1 -0
- package/dist/works/adapters/crossref.d.ts +28 -0
- package/dist/works/adapters/crossref.d.ts.map +1 -0
- package/dist/works/adapters/crossref.js +43 -0
- package/dist/works/adapters/crossref.js.map +1 -0
- package/dist/works/adapters/europepmc.d.ts +14 -0
- package/dist/works/adapters/europepmc.d.ts.map +1 -0
- package/dist/works/adapters/europepmc.js +46 -0
- package/dist/works/adapters/europepmc.js.map +1 -0
- package/dist/works/adapters/openalex.d.ts +5 -0
- package/dist/works/adapters/openalex.d.ts.map +1 -0
- package/dist/works/adapters/openalex.js +75 -0
- package/dist/works/adapters/openalex.js.map +1 -0
- package/dist/works/errors.d.ts +23 -0
- package/dist/works/errors.d.ts.map +1 -0
- package/dist/works/errors.js +37 -0
- package/dist/works/errors.js.map +1 -0
- package/dist/works/id/detect-identifier.d.ts +15 -0
- package/dist/works/id/detect-identifier.d.ts.map +1 -0
- package/dist/works/id/detect-identifier.js +50 -0
- package/dist/works/id/detect-identifier.js.map +1 -0
- package/dist/works/id/normalize-external-id.d.ts +3 -0
- package/dist/works/id/normalize-external-id.d.ts.map +1 -0
- package/dist/works/id/normalize-external-id.js +44 -0
- package/dist/works/id/normalize-external-id.js.map +1 -0
- package/dist/works/id/normalize-ids.d.ts +66 -0
- package/dist/works/id/normalize-ids.d.ts.map +1 -0
- package/dist/works/id/normalize-ids.js +112 -0
- package/dist/works/id/normalize-ids.js.map +1 -0
- package/dist/works/id/normalize-internals.d.ts +7 -0
- package/dist/works/id/normalize-internals.d.ts.map +1 -0
- package/dist/works/id/normalize-internals.js +65 -0
- package/dist/works/id/normalize-internals.js.map +1 -0
- package/dist/works/id/resolve.d.ts +31 -0
- package/dist/works/id/resolve.d.ts.map +1 -0
- package/dist/works/id/resolve.js +123 -0
- package/dist/works/id/resolve.js.map +1 -0
- package/dist/works/id/resolveIds/assign.d.ts +4 -0
- package/dist/works/id/resolveIds/assign.d.ts.map +1 -0
- package/dist/works/id/resolveIds/assign.js +15 -0
- package/dist/works/id/resolveIds/assign.js.map +1 -0
- package/dist/works/id/resolveIds/flags.d.ts +11 -0
- package/dist/works/id/resolveIds/flags.d.ts.map +1 -0
- package/dist/works/id/resolveIds/flags.js +27 -0
- package/dist/works/id/resolveIds/flags.js.map +1 -0
- package/dist/works/id/resolveIds/idctx.d.ts +4 -0
- package/dist/works/id/resolveIds/idctx.d.ts.map +1 -0
- package/dist/works/id/resolveIds/idctx.js +25 -0
- package/dist/works/id/resolveIds/idctx.js.map +1 -0
- package/dist/works/id/resolveIds/index.d.ts +13 -0
- package/dist/works/id/resolveIds/index.d.ts.map +1 -0
- package/dist/works/id/resolveIds/index.js +498 -0
- package/dist/works/id/resolveIds/index.js.map +1 -0
- package/dist/works/id/resolveIds/versioning.d.ts +27 -0
- package/dist/works/id/resolveIds/versioning.d.ts.map +1 -0
- package/dist/works/id/resolveIds/versioning.js +156 -0
- package/dist/works/id/resolveIds/versioning.js.map +1 -0
- package/dist/works/id/resolveIds/workWhere.d.ts +3 -0
- package/dist/works/id/resolveIds/workWhere.d.ts.map +1 -0
- package/dist/works/id/resolveIds/workWhere.js +35 -0
- package/dist/works/id/resolveIds/workWhere.js.map +1 -0
- package/dist/works/id/types.d.ts +6 -0
- package/dist/works/id/types.d.ts.map +1 -0
- package/dist/works/id/types.js +2 -0
- package/dist/works/id/types.js.map +1 -0
- package/dist/works/pdf-fallback/candidates.d.ts +12 -0
- package/dist/works/pdf-fallback/candidates.d.ts.map +1 -0
- package/dist/works/pdf-fallback/candidates.js +51 -0
- package/dist/works/pdf-fallback/candidates.js.map +1 -0
- package/dist/works/pdf-fallback/fetch.d.ts +21 -0
- package/dist/works/pdf-fallback/fetch.d.ts.map +1 -0
- package/dist/works/pdf-fallback/fetch.js +89 -0
- package/dist/works/pdf-fallback/fetch.js.map +1 -0
- package/dist/works/pdf-fallback/index.d.ts +28 -0
- package/dist/works/pdf-fallback/index.d.ts.map +1 -0
- package/dist/works/pdf-fallback/index.js +35 -0
- package/dist/works/pdf-fallback/index.js.map +1 -0
- package/dist/works/plan.d.ts +8 -0
- package/dist/works/plan.d.ts.map +1 -0
- package/dist/works/plan.js +62 -0
- package/dist/works/plan.js.map +1 -0
- package/dist/works/strategies/arxiv.d.ts +3 -0
- package/dist/works/strategies/arxiv.d.ts.map +1 -0
- package/dist/works/strategies/arxiv.js +56 -0
- package/dist/works/strategies/arxiv.js.map +1 -0
- package/dist/works/strategies/biorxiv.d.ts +3 -0
- package/dist/works/strategies/biorxiv.d.ts.map +1 -0
- package/dist/works/strategies/biorxiv.js +63 -0
- package/dist/works/strategies/biorxiv.js.map +1 -0
- package/dist/works/strategies/europepmc.d.ts +3 -0
- package/dist/works/strategies/europepmc.d.ts.map +1 -0
- package/dist/works/strategies/europepmc.js +15 -0
- package/dist/works/strategies/europepmc.js.map +1 -0
- package/dist/works/strategies/index.d.ts +12 -0
- package/dist/works/strategies/index.d.ts.map +1 -0
- package/dist/works/strategies/index.js +19 -0
- package/dist/works/strategies/index.js.map +1 -0
- package/dist/works/strategies/landing-url.d.ts +3 -0
- package/dist/works/strategies/landing-url.d.ts.map +1 -0
- package/dist/works/strategies/landing-url.js +10 -0
- package/dist/works/strategies/landing-url.js.map +1 -0
- package/dist/works/strategies/medrxiv.d.ts +3 -0
- package/dist/works/strategies/medrxiv.d.ts.map +1 -0
- package/dist/works/strategies/medrxiv.js +47 -0
- package/dist/works/strategies/medrxiv.js.map +1 -0
- package/dist/works/strategies/plos.d.ts +3 -0
- package/dist/works/strategies/plos.d.ts.map +1 -0
- package/dist/works/strategies/plos.js +15 -0
- package/dist/works/strategies/plos.js.map +1 -0
- package/dist/works/strategies/shared.d.ts +11 -0
- package/dist/works/strategies/shared.d.ts.map +1 -0
- package/dist/works/strategies/shared.js +97 -0
- package/dist/works/strategies/shared.js.map +1 -0
- package/dist/works/strategies/ten1101.d.ts +3 -0
- package/dist/works/strategies/ten1101.d.ts.map +1 -0
- package/dist/works/strategies/ten1101.js +84 -0
- package/dist/works/strategies/ten1101.js.map +1 -0
- package/dist/works/text/acquire-fulltext.d.ts +7 -0
- package/dist/works/text/acquire-fulltext.d.ts.map +1 -0
- package/dist/works/text/acquire-fulltext.js +62 -0
- package/dist/works/text/acquire-fulltext.js.map +1 -0
- package/dist/works/text/normalize.d.ts +40 -0
- package/dist/works/text/normalize.d.ts.map +1 -0
- package/dist/works/text/normalize.js +188 -0
- package/dist/works/text/normalize.js.map +1 -0
- package/dist/works/types.d.ts +215 -0
- package/dist/works/types.d.ts.map +1 -0
- package/dist/works/types.js +6 -0
- package/dist/works/types.js.map +1 -0
- package/dist/works/util/debug.d.ts +7 -0
- package/dist/works/util/debug.d.ts.map +1 -0
- package/dist/works/util/debug.js +9 -0
- package/dist/works/util/debug.js.map +1 -0
- package/dist/works/util/license.d.ts +9 -0
- package/dist/works/util/license.d.ts.map +1 -0
- package/dist/works/util/license.js +39 -0
- package/dist/works/util/license.js.map +1 -0
- package/dist/works/util/normalize.d.ts +2 -0
- package/dist/works/util/normalize.d.ts.map +1 -0
- package/dist/works/util/normalize.js +76 -0
- package/dist/works/util/normalize.js.map +1 -0
- package/dist/works/util/parse.d.ts +8 -0
- package/dist/works/util/parse.d.ts.map +1 -0
- package/dist/works/util/parse.js +32 -0
- package/dist/works/util/parse.js.map +1 -0
- package/dist/works/util/source.d.ts +10 -0
- package/dist/works/util/source.d.ts.map +1 -0
- package/dist/works/util/source.js +48 -0
- package/dist/works/util/source.js.map +1 -0
- package/dist/works/util/version-label.d.ts +2 -0
- package/dist/works/util/version-label.d.ts.map +1 -0
- package/dist/works/util/version-label.js +8 -0
- package/dist/works/util/version-label.js.map +1 -0
- package/dist/works/util/work-id.d.ts +2 -0
- package/dist/works/util/work-id.d.ts.map +1 -0
- package/dist/works/util/work-id.js +27 -0
- package/dist/works/util/work-id.js.map +1 -0
- package/package.json +208 -0
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
// services/sources/biorxiv/client.ts
|
|
2
|
+
import { normPreprintCore, discoverPreprintRecord, } from "../preprint-discovery";
|
|
3
|
+
const DEBUG = process.env.DEBUG_BIORXIV_FETCH === "1";
|
|
4
|
+
function looksLikeJatsXml(s) {
|
|
5
|
+
if (!s)
|
|
6
|
+
return false;
|
|
7
|
+
const head = s.slice(0, 4096).toLowerCase();
|
|
8
|
+
return (head.includes("<article") &&
|
|
9
|
+
(head.includes("<front") || head.includes("<article-meta") || head.includes("xmlns:xlink")));
|
|
10
|
+
}
|
|
11
|
+
/** Return the versioned .source.xml record for the best-matching version (or null) */
|
|
12
|
+
export async function buildBiorxivLatestSourceXmlUrlViaApi(idOrDoi, opts) {
|
|
13
|
+
return discoverPreprintRecord(idOrDoi, "biorxiv", {
|
|
14
|
+
UA: opts?.UA,
|
|
15
|
+
preferVersion: opts?.preferVersion,
|
|
16
|
+
debug: DEBUG,
|
|
17
|
+
});
|
|
18
|
+
}
|
|
19
|
+
/** Fetch the JATS XML content via the API-discovered URL */
|
|
20
|
+
export async function fetchBiorxivSourceXmlViaApi(idOrDoi, opts) {
|
|
21
|
+
const record = await buildBiorxivLatestSourceXmlUrlViaApi(idOrDoi, opts);
|
|
22
|
+
const jatsUrl = record?.url;
|
|
23
|
+
if (!jatsUrl)
|
|
24
|
+
return null;
|
|
25
|
+
const core = normPreprintCore(idOrDoi, "biorxiv");
|
|
26
|
+
const landingUrl = `https://www.biorxiv.org/content/10.1101/${core}`;
|
|
27
|
+
DEBUG && console.log("[biorxiv] trying jats url:", jatsUrl);
|
|
28
|
+
const userAgent = opts?.UA || "drylab-reveng-api/1.0";
|
|
29
|
+
// Try 1: explicit XML accept
|
|
30
|
+
let res = await fetch(jatsUrl, {
|
|
31
|
+
headers: {
|
|
32
|
+
Accept: "text/xml,application/xml;q=0.9,*/*;q=0.1",
|
|
33
|
+
"User-Agent": userAgent,
|
|
34
|
+
},
|
|
35
|
+
});
|
|
36
|
+
DEBUG &&
|
|
37
|
+
console.log("[biorxiv] attempt#1 status=", res.status, "ct=", res.headers.get("content-type") || "?");
|
|
38
|
+
// If blocked or content-negotiation fails, retry once with browsery headers and a plausible Referer
|
|
39
|
+
if (!res.ok) {
|
|
40
|
+
const referer = landingUrl;
|
|
41
|
+
const browserUA = userAgent && /^Mozilla\//.test(userAgent)
|
|
42
|
+
? userAgent
|
|
43
|
+
: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36";
|
|
44
|
+
res = await fetch(jatsUrl, {
|
|
45
|
+
headers: {
|
|
46
|
+
Accept: "text/xml,application/xml;q=0.9,*/*;q=0.1",
|
|
47
|
+
"Accept-Language": "en-US,en;q=0.9",
|
|
48
|
+
"User-Agent": browserUA,
|
|
49
|
+
Referer: referer,
|
|
50
|
+
},
|
|
51
|
+
});
|
|
52
|
+
DEBUG &&
|
|
53
|
+
console.log("[biorxiv] attempt#2 (browser UA) status=", res.status, "ct=", res.headers.get("content-type") || "?");
|
|
54
|
+
}
|
|
55
|
+
// If still blocked, try to prime cookies from the landing page, then retry with cookies
|
|
56
|
+
if (!res.ok) {
|
|
57
|
+
try {
|
|
58
|
+
const priming = await fetch(landingUrl, {
|
|
59
|
+
headers: {
|
|
60
|
+
Accept: "text/html,application/xhtml+xml",
|
|
61
|
+
"Accept-Language": "en-US,en;q=0.9",
|
|
62
|
+
"User-Agent": userAgent && /^Mozilla\//.test(userAgent)
|
|
63
|
+
? userAgent
|
|
64
|
+
: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36",
|
|
65
|
+
},
|
|
66
|
+
});
|
|
67
|
+
DEBUG && console.log("[biorxiv] priming status:", priming.status);
|
|
68
|
+
const raw = priming?.headers?.raw?.();
|
|
69
|
+
const setCookies = raw?.["set-cookie"] || [];
|
|
70
|
+
const cookieHeader = setCookies
|
|
71
|
+
.map((c) => String(c).split(";")[0])
|
|
72
|
+
.filter(Boolean)
|
|
73
|
+
.join("; ");
|
|
74
|
+
DEBUG && console.log("[biorxiv] cookies primed:", setCookies.length);
|
|
75
|
+
if (cookieHeader) {
|
|
76
|
+
res = await fetch(jatsUrl, {
|
|
77
|
+
headers: {
|
|
78
|
+
Accept: "text/xml,application/xml;q=0.9,*/*;q=0.1",
|
|
79
|
+
"Accept-Language": "en-US,en;q=0.9",
|
|
80
|
+
"User-Agent": userAgent && /^Mozilla\//.test(userAgent)
|
|
81
|
+
? userAgent
|
|
82
|
+
: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36",
|
|
83
|
+
Referer: landingUrl,
|
|
84
|
+
Cookie: cookieHeader,
|
|
85
|
+
},
|
|
86
|
+
});
|
|
87
|
+
DEBUG &&
|
|
88
|
+
console.log("[biorxiv] attempt#3 (with cookies) status=", res.status, "ct=", res.headers.get("content-type") || "?");
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
catch { }
|
|
92
|
+
}
|
|
93
|
+
// No further direct-origin attempts; origin blocks server fetches with 403 consistently.
|
|
94
|
+
if (!res.ok) {
|
|
95
|
+
// Last-resort proxy fetch via r.jina.ai to bypass CDN checks
|
|
96
|
+
const tryProxy = async (url) => {
|
|
97
|
+
try {
|
|
98
|
+
const proxyUrl = `https://r.jina.ai/http://${url.replace(/^https?:\/\//i, "")}`;
|
|
99
|
+
DEBUG && console.log("[biorxiv] proxy fetch:", proxyUrl);
|
|
100
|
+
const pres = await fetch(proxyUrl, {
|
|
101
|
+
headers: {
|
|
102
|
+
Accept: "text/plain, text/xml;q=0.9, */*;q=0.1",
|
|
103
|
+
"User-Agent": userAgent,
|
|
104
|
+
},
|
|
105
|
+
});
|
|
106
|
+
DEBUG && console.log("[biorxiv] proxy status:", pres.status);
|
|
107
|
+
if (pres.ok) {
|
|
108
|
+
const ptxt = await pres.text();
|
|
109
|
+
const head = ptxt.slice(0, 512);
|
|
110
|
+
DEBUG && console.log("[biorxiv] proxy head:", head.replace(/\s+/g, " ").slice(0, 200));
|
|
111
|
+
const seemsXml = looksLikeJatsXml(ptxt) ||
|
|
112
|
+
/<\?xml/i.test(head) ||
|
|
113
|
+
/<!doctype/i.test(head) ||
|
|
114
|
+
/<article[\s>]/i.test(head);
|
|
115
|
+
if (seemsXml && ptxt.trim().length > 100) {
|
|
116
|
+
DEBUG && console.log("[biorxiv] proxy ok, xml length=", ptxt.length);
|
|
117
|
+
return { url, xml: ptxt };
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
catch { }
|
|
122
|
+
return null;
|
|
123
|
+
};
|
|
124
|
+
const viaJina = await tryProxy(jatsUrl);
|
|
125
|
+
if (viaJina) {
|
|
126
|
+
return { url: viaJina.url, xml: viaJina.xml, version: record?.version ?? opts?.preferVersion };
|
|
127
|
+
}
|
|
128
|
+
DEBUG && console.log("[biorxiv] all attempts failed, returning null");
|
|
129
|
+
return null;
|
|
130
|
+
}
|
|
131
|
+
const xml = await res.text();
|
|
132
|
+
if (looksLikeJatsXml(xml)) {
|
|
133
|
+
DEBUG && console.log("[biorxiv] success, xml length=", xml.length);
|
|
134
|
+
return { url: res.url || jatsUrl, xml, version: record?.version ?? opts?.preferVersion };
|
|
135
|
+
}
|
|
136
|
+
DEBUG && console.log("[biorxiv] got non-JATS response, returning null");
|
|
137
|
+
return null;
|
|
138
|
+
}
|
|
139
|
+
/** Build the canonical full HTML URL (versioned) from a biorxiv id/doi */
|
|
140
|
+
export function buildBiorxivFullHtmlUrlFromId(idOrDoi) {
|
|
141
|
+
const s = String(idOrDoi).trim();
|
|
142
|
+
// Accept forms: 2025.09.03.673748v1 or 10.1101/2025.09.03.673748v1
|
|
143
|
+
const m = s.match(/(?:10\.1101\/)?([0-9]{4}\.[0-9]{2}\.[0-9]{2}\.[0-9]+)(v\d+)/i);
|
|
144
|
+
if (!m)
|
|
145
|
+
return null;
|
|
146
|
+
const core = m[1];
|
|
147
|
+
const v = m[2].toLowerCase();
|
|
148
|
+
return `https://www.biorxiv.org/content/10.1101/${core}${v}.full`;
|
|
149
|
+
}
|
|
150
|
+
/** Fetch the full HTML via r.jina.ai proxy to bypass origin 403s */
|
|
151
|
+
export async function fetchBiorxivFullHtmlViaProxy(idOrDoiWithVersion, UA = "drylab-reveng-api/1.0") {
|
|
152
|
+
const fullUrl = buildBiorxivFullHtmlUrlFromId(idOrDoiWithVersion);
|
|
153
|
+
if (!fullUrl)
|
|
154
|
+
return null;
|
|
155
|
+
const proxy = `https://r.jina.ai/http://${fullUrl.replace(/^https?:\/\//i, "")}`;
|
|
156
|
+
try {
|
|
157
|
+
const res = await fetch(proxy, {
|
|
158
|
+
headers: {
|
|
159
|
+
Accept: "text/plain, text/html;q=0.9, */*;q=0.1",
|
|
160
|
+
"User-Agent": UA,
|
|
161
|
+
},
|
|
162
|
+
});
|
|
163
|
+
if (!res.ok)
|
|
164
|
+
return null;
|
|
165
|
+
const txt = await res.text();
|
|
166
|
+
const ok = txt && txt.trim().length > 100 && /<html/i.test(txt);
|
|
167
|
+
return ok ? { url: fullUrl, html: txt } : null;
|
|
168
|
+
}
|
|
169
|
+
catch {
|
|
170
|
+
return null;
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
//# sourceMappingURL=client.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"client.js","sourceRoot":"","sources":["../../../src/sources/biorxiv/client.ts"],"names":[],"mappings":"AAAA,qCAAqC;AACrC,OAAO,EACL,gBAAgB,EAChB,sBAAsB,GAEvB,MAAM,uBAAuB,CAAC;AAE/B,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,mBAAmB,KAAK,GAAG,CAAC;AAOtD,SAAS,gBAAgB,CAAC,CAA4B;IACpD,IAAI,CAAC,CAAC;QAAE,OAAO,KAAK,CAAC;IACrB,MAAM,IAAI,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,WAAW,EAAE,CAAC;IAC5C,OAAO,CACL,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC;QACzB,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,eAAe,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC,CAC5F,CAAC;AACJ,CAAC;AAED,sFAAsF;AACtF,MAAM,CAAC,KAAK,UAAU,oCAAoC,CACxD,OAAe,EACf,IAAyB;IAEzB,OAAO,sBAAsB,CAAC,OAAO,EAAE,SAAS,EAAE;QAChD,EAAE,EAAE,IAAI,EAAE,EAAE;QACZ,aAAa,EAAE,IAAI,EAAE,aAAa;QAClC,KAAK,EAAE,KAAK;KACb,CAAC,CAAC;AACL,CAAC;AAED,4DAA4D;AAC5D,MAAM,CAAC,KAAK,UAAU,2BAA2B,CAC/C,OAAe,EACf,IAAyB;IAEzB,MAAM,MAAM,GAAG,MAAM,oCAAoC,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;IACzE,MAAM,OAAO,GAAG,MAAM,EAAE,GAAG,CAAC;IAC5B,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAC;IAC1B,MAAM,IAAI,GAAG,gBAAgB,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;IAClD,MAAM,UAAU,GAAG,2CAA2C,IAAI,EAAE,CAAC;IACrE,KAAK,IAAI,OAAO,CAAC,GAAG,CAAC,4BAA4B,EAAE,OAAO,CAAC,CAAC;IAC5D,MAAM,SAAS,GAAG,IAAI,EAAE,EAAE,IAAI,uBAAuB,CAAC;IAEtD,6BAA6B;IAC7B,IAAI,GAAG,GAAG,MAAM,KAAK,CAAC,OAAO,EAAE;QAC7B,OAAO,EAAE;YACP,MAAM,EAAE,0CAA0C;YAClD,YAAY,EAAE,SAAS;SACjB;KACF,CAAC,CAAC;IACV,KAAK;QACH,OAAO,CAAC,GAAG,CACT,6BAA6B,EAC7B,GAAG,CAAC,MAAM,EACV,KAAK,EACL,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,GAAG,CACvC,CAAC;IAEJ,oGAAoG;IACpG,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;QACZ,MAAM,OAAO,GAAG,UAAU,CAAC;QAC3B,MAAM,SAAS,GACb,SAAS,IAAI,YAAY,CAAC,IAAI,CAAC,SAAS,CAAC;YACvC,CAAC,CAAC,SAAS;YACX,CAAC,CAAC,mGAAmG,CAAC;QAE1G,GAAG,GAAG,MAAM,KAAK,CAAC,OAAO,EAAE;YACzB,OAAO,EAAE;gBACP,MAAM,EAAE,0CAA0C;gBAClD,iBAAiB,EAAE,gBAAgB;gBACnC,YAAY,EAAE,SAAS;gBACvB,OAAO,EAAE,OAAO;aACV;SACF,CAAC,CAAC;QACV,KAAK;YACH,OAAO,CAAC,GAAG,CACT,0CAA0C,EAC1C,GAAG,CAAC,MAAM,EACV,KAAK,EACL,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,GAAG,CACvC,CAAC;IACN,CAAC;IAED,wFAAwF;IACxF,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;QACZ,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,MAAM,KAAK,CAAC,UAAU,EAAE;gBACtC,OAAO,EAAE;oBACP,MAAM,EAAE,iCAAiC;oBACzC,iBAAiB,EAAE,gBAAgB;oBACnC,YAAY,EACV,SAAS,IAAI,YAAY,CAAC,IAAI,CAAC,SAAS,CAAC;wBACvC,CAAC,CAAC,SAAS;wBACX,CAAC,CAAC,mGAAmG;iBACnG;aACF,CAAC,CAAC;YACV,KAAK,IAAI,OAAO,CAAC,GAAG,CAAC,2BAA2B,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC;YAClE,MAAM,GAAG,GAAI,OAAe,EAAE,OAAO,EAAE,GAAG,EAAE,EAAE,CAAC;YAC/C,MAAM,UAAU,GAAa,GAAG,EAAE,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC;YACvD,MAAM,YAAY,GAAG,UAAU;iBAC5B,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;iBACnC,MAAM,CAAC,OAAO,CAAC;iBACf,IAAI,CAAC,IAAI,CAAC,CAAC;YACd,KAAK,IAAI,OAAO,CAAC,GAAG,CAAC,2BAA2B,EAAE,UAAU,CAAC,MAAM,CAAC,CAAC;YACrE,IAAI,YAAY,EAAE,CAAC;gBACjB,GAAG,GAAG,MAAM,KAAK,CAAC,OAAO,EAAE;oBACzB,OAAO,EAAE;wBACP,MAAM,EAAE,0CAA0C;wBAClD,iBAAiB,EAAE,gBAAgB;wBACnC,YAAY,EACV,SAAS,IAAI,YAAY,CAAC,IAAI,CAAC,SAAS,CAAC;4BACvC,CAAC,CAAC,SAAS;4BACX,CAAC,CAAC,mGAAmG;wBACzG,OAAO,EAAE,UAAU;wBACnB,MAAM,EAAE,YAAY;qBACd;iBACF,CAAC,CAAC;gBACV,KAAK;oBACH,OAAO,CAAC,GAAG,CACT,4CAA4C,EAC5C,GAAG,CAAC,MAAM,EACV,KAAK,EACL,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,GAAG,CACvC,CAAC;YACN,CAAC;QACH,CAAC;QAAC,MAAM,CAAC,CAAA,CAAC;IACZ,CAAC;IAED,yFAAyF;IAEzF,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;QACZ,6DAA6D;QAC7D,MAAM,QAAQ,GAAG,KAAK,EAAE,GAAW,EAAE,EAAE;YACrC,IAAI,CAAC;gBACH,MAAM,QAAQ,GAAG,4BAA4B,GAAG,CAAC,OAAO,CAAC,eAAe,EAAE,EAAE,CAAC,EAAE,CAAC;gBAChF,KAAK,IAAI,OAAO,CAAC,GAAG,CAAC,wBAAwB,EAAE,QAAQ,CAAC,CAAC;gBACzD,MAAM,IAAI,GAAG,MAAM,KAAK,CAAC,QAAQ,EAAE;oBACjC,OAAO,EAAE;wBACP,MAAM,EAAE,uCAAuC;wBAC/C,YAAY,EAAE,SAAS;qBACjB;iBACF,CAAC,CAAC;gBACV,KAAK,IAAI,OAAO,CAAC,GAAG,CAAC,yBAAyB,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;gBAC7D,IAAI,IAAI,CAAC,EAAE,EAAE,CAAC;oBACZ,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,IAAI,EAAE,CAAC;oBAC/B,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;oBAChC,KAAK,IAAI,OAAO,CAAC,GAAG,CAAC,uBAAuB,EAAE,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC;oBACvF,MAAM,QAAQ,GACZ,gBAAgB,CAAC,IAAI,CAAC;wBACtB,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC;wBACpB,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC;wBACvB,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;oBAC9B,IAAI,QAAQ,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;wBACzC,KAAK,IAAI,OAAO,CAAC,GAAG,CAAC,iCAAiC,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;wBACrE,OAAO,EAAE,GAAG,EAAE,GAAG,EAAE,IAAI,EAAW,CAAC;oBACrC,CAAC;gBACH,CAAC;YACH,CAAC;YAAC,MAAM,CAAC,CAAA,CAAC;YACV,OAAO,IAAI,CAAC;QACd,CAAC,CAAC;QACF,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,OAAO,CAAC,CAAC;QACxC,IAAI,OAAO,EAAE,CAAC;YACZ,OAAO,EAAE,GAAG,EAAE,OAAO,CAAC,GAAG,EAAE,GAAG,EAAE,OAAO,CAAC,GAAG,EAAE,OAAO,EAAE,MAAM,EAAE,OAAO,IAAI,IAAI,EAAE,aAAa,EAAE,CAAC;QACjG,CAAC;QACD,KAAK,IAAI,OAAO,CAAC,GAAG,CAAC,+CAA+C,CAAC,CAAC;QACtE,OAAO,IAAI,CAAC;IACd,CAAC;IAED,MAAM,GAAG,GAAG,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC;IAC7B,IAAI,gBAAgB,CAAC,GAAG,CAAC,EAAE,CAAC;QAC1B,KAAK,IAAI,OAAO,CAAC,GAAG,CAAC,gCAAgC,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;QACnE,OAAO,EAAE,GAAG,EAAG,GAAW,CAAC,GAAG,IAAI,OAAO,EAAE,GAAG,EAAE,OAAO,EAAE,MAAM,EAAE,OAAO,IAAI,IAAI,EAAE,aAAa,EAAE,CAAC;IACpG,CAAC;IACD,KAAK,IAAI,OAAO,CAAC,GAAG,CAAC,iDAAiD,CAAC,CAAC;IACxE,OAAO,IAAI,CAAC;AACd,CAAC;AAED,0EAA0E;AAC1E,MAAM,UAAU,6BAA6B,CAAC,OAAe;IAC3D,MAAM,CAAC,GAAG,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC;IACjC,mEAAmE;IACnE,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,8DAA8D,CAAC,CAAC;IAClF,IAAI,CAAC,CAAC;QAAE,OAAO,IAAI,CAAC;IACpB,MAAM,IAAI,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC;IACnB,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC,WAAW,EAAE,CAAC;IAC9B,OAAO,2CAA2C,IAAI,GAAG,CAAC,OAAO,CAAC;AACpE,CAAC;AAED,oEAAoE;AACpE,MAAM,CAAC,KAAK,UAAU,4BAA4B,CAChD,kBAA0B,EAC1B,EAAE,GAAG,uBAAuB;IAE5B,MAAM,OAAO,GAAG,6BAA6B,CAAC,kBAAkB,CAAC,CAAC;IAClE,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAC;IAC1B,MAAM,KAAK,GAAG,4BAA4B,OAAO,CAAC,OAAO,CAAC,eAAe,EAAE,EAAE,CAAC,EAAE,CAAC;IACjF,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,KAAK,EAAE;YAC7B,OAAO,EAAE;gBACP,MAAM,EAAE,wCAAwC;gBAChD,YAAY,EAAE,EAAE;aACV;SACF,CAAC,CAAC;QACV,IAAI,CAAC,GAAG,CAAC,EAAE;YAAE,OAAO,IAAI,CAAC;QACzB,MAAM,GAAG,GAAG,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC;QAC7B,MAAM,EAAE,GAAG,GAAG,IAAI,GAAG,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,GAAG,IAAI,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAChE,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,OAAO,EAAE,IAAI,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;IACjD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"client.d.ts","sourceRoot":"","sources":["../../../src/sources/crossref/client.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAC,MAAM,mBAAmB,CAAC;AAGtD,wBAAsB,sBAAsB,CAC1C,GAAG,EAAE,MAAM,GACV,OAAO,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,CAiB7B"}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
// Crossref returns deeply nested, untyped JSON — keep as `any` at the adapter boundary
|
|
2
|
+
export async function fetchCrossrefWorkByDoi(doi) {
|
|
3
|
+
const url = `https://api.crossref.org/works/${encodeURIComponent(doi)}`;
|
|
4
|
+
try {
|
|
5
|
+
const res = await fetch(url, { headers: { Accept: "application/json" } });
|
|
6
|
+
if (!res.ok) {
|
|
7
|
+
const status = res.status;
|
|
8
|
+
if (status === 404)
|
|
9
|
+
return { ok: false, status, reason: "not_found" };
|
|
10
|
+
if (status === 429)
|
|
11
|
+
return { ok: false, status, reason: "rate_limited" };
|
|
12
|
+
return { ok: false, status, reason: "upstream_error" };
|
|
13
|
+
}
|
|
14
|
+
const data = await res.json();
|
|
15
|
+
const message = data?.message ?? null;
|
|
16
|
+
if (!message)
|
|
17
|
+
return { ok: false, reason: "not_found" };
|
|
18
|
+
return { ok: true, data: message };
|
|
19
|
+
}
|
|
20
|
+
catch {
|
|
21
|
+
return { ok: false, reason: "network_error" };
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
//# sourceMappingURL=client.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"client.js","sourceRoot":"","sources":["../../../src/sources/crossref/client.ts"],"names":[],"mappings":"AAEA,uFAAuF;AACvF,MAAM,CAAC,KAAK,UAAU,sBAAsB,CAC1C,GAAW;IAEX,MAAM,GAAG,GAAG,kCAAkC,kBAAkB,CAAC,GAAG,CAAC,EAAE,CAAC;IACxE,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,EAAE,MAAM,EAAE,kBAAkB,EAAE,EAAE,CAAC,CAAC;QAC1E,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;YACZ,MAAM,MAAM,GAAG,GAAG,CAAC,MAAM,CAAC;YAC1B,IAAI,MAAM,KAAK,GAAG;gBAAE,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,CAAC;YACtE,IAAI,MAAM,KAAK,GAAG;gBAAE,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,cAAc,EAAE,CAAC;YACzE,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,gBAAgB,EAAE,CAAC;QACzD,CAAC;QACD,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC;QAC9B,MAAM,OAAO,GAAI,IAAY,EAAE,OAAO,IAAI,IAAI,CAAC;QAC/C,IAAI,CAAC,OAAO;YAAE,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,WAAW,EAAE,CAAC;QACxD,OAAO,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC;IACrC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,eAAe,EAAE,CAAC;IAChD,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"client.d.ts","sourceRoot":"","sources":["../../../src/sources/europepmc/client.ts"],"names":[],"mappings":"AAEA,wBAAsB,oBAAoB,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAczE;AAED,wBAAsB,mBAAmB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAa7E"}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
const UA = process.env.UA || "drylab-reveng-api/1.0";
|
|
2
|
+
export async function fetchEpmcJatsByPmcid(pmcid) {
|
|
3
|
+
const id = pmcid.startsWith("PMC") ? pmcid : `PMC${pmcid}`;
|
|
4
|
+
const url = `https://www.ebi.ac.uk/europepmc/webservices/rest/${encodeURIComponent(id)}/fullTextXML`;
|
|
5
|
+
const res = await fetch(url, {
|
|
6
|
+
headers: { Accept: "application/xml", "User-Agent": UA || "drylab/1.0" },
|
|
7
|
+
});
|
|
8
|
+
if (!res.ok)
|
|
9
|
+
throw new Error(`Europe PMC fetch failed: ${res.status}`);
|
|
10
|
+
const text = await res.text();
|
|
11
|
+
if (!text || !text.includes("<article")) {
|
|
12
|
+
throw new Error("Europe PMC: no JATS article in response");
|
|
13
|
+
}
|
|
14
|
+
return text;
|
|
15
|
+
}
|
|
16
|
+
export async function fetchEpmcPmcidByDoi(doi) {
|
|
17
|
+
const q = `DOI:${doi}`;
|
|
18
|
+
const url = `https://www.ebi.ac.uk/europepmc/webservices/rest/search?format=json&pageSize=1&query=${encodeURIComponent(q)}`;
|
|
19
|
+
const res = await fetch(url, {
|
|
20
|
+
headers: { Accept: "application/json", "User-Agent": UA || "drylab/1.0" },
|
|
21
|
+
});
|
|
22
|
+
if (!res.ok)
|
|
23
|
+
return null;
|
|
24
|
+
const data = (await res.json());
|
|
25
|
+
const item = data?.resultList?.result?.[0];
|
|
26
|
+
const pmcid = item?.pmcid || item?.pmcidList?.[0];
|
|
27
|
+
return pmcid ?? null;
|
|
28
|
+
}
|
|
29
|
+
//# sourceMappingURL=client.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"client.js","sourceRoot":"","sources":["../../../src/sources/europepmc/client.ts"],"names":[],"mappings":"AAAA,MAAM,EAAE,GAAG,OAAO,CAAC,GAAG,CAAC,EAAE,IAAI,uBAAuB,CAAC;AAErD,MAAM,CAAC,KAAK,UAAU,oBAAoB,CAAC,KAAa;IACtD,MAAM,EAAE,GAAG,KAAK,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,KAAK,EAAE,CAAC;IAC3D,MAAM,GAAG,GAAG,oDAAoD,kBAAkB,CAChF,EAAE,CACH,cAAc,CAAC;IAChB,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;QAC3B,OAAO,EAAE,EAAE,MAAM,EAAE,iBAAiB,EAAE,YAAY,EAAE,EAAE,IAAI,YAAY,EAAS;KACzE,CAAC,CAAC;IACV,IAAI,CAAC,GAAG,CAAC,EAAE;QAAE,MAAM,IAAI,KAAK,CAAC,4BAA4B,GAAG,CAAC,MAAM,EAAE,CAAC,CAAC;IACvE,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC;IAC9B,IAAI,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,EAAE,CAAC;QACxC,MAAM,IAAI,KAAK,CAAC,yCAAyC,CAAC,CAAC;IAC7D,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,mBAAmB,CAAC,GAAW;IACnD,MAAM,CAAC,GAAG,OAAO,GAAG,EAAE,CAAC;IACvB,MAAM,GAAG,GAAG,wFAAwF,kBAAkB,CACpH,CAAC,CACF,EAAE,CAAC;IACJ,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;QAC3B,OAAO,EAAE,EAAE,MAAM,EAAE,kBAAkB,EAAE,YAAY,EAAE,EAAE,IAAI,YAAY,EAAS;KAC1E,CAAC,CAAC;IACV,IAAI,CAAC,GAAG,CAAC,EAAE;QAAE,OAAO,IAAI,CAAC;IACzB,MAAM,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC,IAAI,EAAE,CAAQ,CAAC;IACvC,MAAM,IAAI,GAAG,IAAI,EAAE,UAAU,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,CAAC;IAC3C,MAAM,KAAK,GAAuB,IAAI,EAAE,KAAK,IAAI,IAAI,EAAE,SAAS,EAAE,CAAC,CAAC,CAAC,CAAC;IACtE,OAAO,KAAK,IAAI,IAAI,CAAC;AACvB,CAAC"}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import type { SourceFetchOptions } from "./client";
|
|
2
|
+
export type BrowserFetchParams = {
|
|
3
|
+
coreId: string;
|
|
4
|
+
jatsUrl: string;
|
|
5
|
+
inferredVersion?: number | null;
|
|
6
|
+
refererUrl?: string;
|
|
7
|
+
sourceOptions?: SourceFetchOptions;
|
|
8
|
+
};
|
|
9
|
+
export declare const fetchMedrxivSourceXmlViaBrowser: (params: BrowserFetchParams) => Promise<{
|
|
10
|
+
url: string;
|
|
11
|
+
xml: string;
|
|
12
|
+
version?: number | null;
|
|
13
|
+
cookies?: string;
|
|
14
|
+
} | null>;
|
|
15
|
+
export declare const peekBrowserCookies: () => string | null;
|
|
16
|
+
//# sourceMappingURL=browser.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"browser.d.ts","sourceRoot":"","sources":["../../../src/sources/medrxiv/browser.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,UAAU,CAAC;AAuGnD,MAAM,MAAM,kBAAkB,GAAG;IAC/B,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,eAAe,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAChC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,aAAa,CAAC,EAAE,kBAAkB,CAAC;CACpC,CAAC;AAEF,eAAO,MAAM,+BAA+B,GAC1C,QAAQ,kBAAkB,KACzB,OAAO,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,GAAG,EAAE,MAAM,CAAC;IAAC,OAAO,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAAC,OAAO,CAAC,EAAE,MAAM,CAAA;CAAE,GAAG,IAAI,CA6IxF,CAAC;AAEF,eAAO,MAAM,kBAAkB,QAAO,MAAM,GAAG,IAAwB,CAAC"}
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
import puppeteer, { Browser } from "puppeteer-core";
|
|
2
|
+
import chromium from "@sparticuz/chromium";
|
|
3
|
+
import { MEDRXIV_ACCEPT_LANGUAGE, MEDRXIV_ACCEPT_XML, MEDRXIV_BROWSER_UA, MEDRXIV_HTML_ACCEPT, looksLikeJatsXml, safePreview, } from "./shared";
|
|
4
|
+
const DEBUG = process.env.DEBUG_MEDRXIV_FETCH === "1";
|
|
5
|
+
const BROWSER_IDLE_TIMEOUT_MS = 5 * 60 * 1000; // 5 minutes
|
|
6
|
+
const PAGE_TIMEOUT_MS = 45 * 1000;
|
|
7
|
+
let browserPromise = null;
|
|
8
|
+
let browserIdleTimer = null;
|
|
9
|
+
let browserLastUsed = 0;
|
|
10
|
+
let lastCookieHeader = null;
|
|
11
|
+
const resetBrowserIdleTimer = () => {
|
|
12
|
+
if (browserIdleTimer) {
|
|
13
|
+
clearTimeout(browserIdleTimer);
|
|
14
|
+
browserIdleTimer = null;
|
|
15
|
+
}
|
|
16
|
+
if (!browserPromise)
|
|
17
|
+
return;
|
|
18
|
+
browserIdleTimer = setTimeout(async () => {
|
|
19
|
+
if (!browserPromise)
|
|
20
|
+
return;
|
|
21
|
+
if (Date.now() - browserLastUsed < BROWSER_IDLE_TIMEOUT_MS) {
|
|
22
|
+
resetBrowserIdleTimer();
|
|
23
|
+
return;
|
|
24
|
+
}
|
|
25
|
+
try {
|
|
26
|
+
const browser = await browserPromise;
|
|
27
|
+
if (browser && browser.isConnected()) {
|
|
28
|
+
DEBUG && console.log("[medrxiv] browser idle timeout -> closing");
|
|
29
|
+
await browser.close();
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
catch (err) {
|
|
33
|
+
DEBUG && console.log("[medrxiv] browser close error", err);
|
|
34
|
+
}
|
|
35
|
+
finally {
|
|
36
|
+
browserPromise = null;
|
|
37
|
+
}
|
|
38
|
+
}, BROWSER_IDLE_TIMEOUT_MS);
|
|
39
|
+
browserIdleTimer.unref?.();
|
|
40
|
+
};
|
|
41
|
+
const ensureBrowser = async () => {
|
|
42
|
+
if (!browserPromise) {
|
|
43
|
+
browserPromise = (async () => {
|
|
44
|
+
const envExecutable = process.env.PUPPETEER_EXECUTABLE_PATH;
|
|
45
|
+
let resolvedExecutable = envExecutable;
|
|
46
|
+
if (!envExecutable) {
|
|
47
|
+
console.warn("[medrxiv] PUPPETEER_EXECUTABLE_PATH not set; using @sparticuz/chromium executable");
|
|
48
|
+
resolvedExecutable = await chromium.executablePath();
|
|
49
|
+
}
|
|
50
|
+
const launchArgs = Array.from(new Set([
|
|
51
|
+
...chromium.args,
|
|
52
|
+
"--no-sandbox",
|
|
53
|
+
"--disable-setuid-sandbox",
|
|
54
|
+
"--disable-dev-shm-usage",
|
|
55
|
+
"--disable-gpu",
|
|
56
|
+
"--single-process",
|
|
57
|
+
"--no-zygote",
|
|
58
|
+
"--disable-background-networking",
|
|
59
|
+
"--disable-sync",
|
|
60
|
+
"--disable-extensions",
|
|
61
|
+
"--disable-component-extensions-with-background-pages",
|
|
62
|
+
"--mute-audio",
|
|
63
|
+
]));
|
|
64
|
+
return puppeteer.launch({
|
|
65
|
+
headless: chromium.headless ?? "new",
|
|
66
|
+
executablePath: resolvedExecutable || undefined,
|
|
67
|
+
args: launchArgs,
|
|
68
|
+
defaultViewport: chromium.defaultViewport ?? { width: 1024, height: 768 },
|
|
69
|
+
});
|
|
70
|
+
})()
|
|
71
|
+
.catch((err) => {
|
|
72
|
+
DEBUG && console.log("[medrxiv] puppeteer launch failed", err);
|
|
73
|
+
browserPromise = null;
|
|
74
|
+
throw err;
|
|
75
|
+
});
|
|
76
|
+
process.once("exit", async () => {
|
|
77
|
+
try {
|
|
78
|
+
const browser = await browserPromise;
|
|
79
|
+
if (browser && browser.isConnected())
|
|
80
|
+
await browser.close();
|
|
81
|
+
}
|
|
82
|
+
catch { }
|
|
83
|
+
});
|
|
84
|
+
}
|
|
85
|
+
const browser = await browserPromise;
|
|
86
|
+
if (!browser)
|
|
87
|
+
throw new Error("puppeteer browser unavailable");
|
|
88
|
+
browserLastUsed = Date.now();
|
|
89
|
+
resetBrowserIdleTimer();
|
|
90
|
+
return browser;
|
|
91
|
+
};
|
|
92
|
+
export const fetchMedrxivSourceXmlViaBrowser = async (params) => {
|
|
93
|
+
const { coreId, jatsUrl, inferredVersion, refererUrl, sourceOptions } = params;
|
|
94
|
+
const browser = await ensureBrowser();
|
|
95
|
+
const page = await browser.newPage();
|
|
96
|
+
const browserUA = sourceOptions?.UA && /^Mozilla\//.test(sourceOptions.UA)
|
|
97
|
+
? sourceOptions.UA
|
|
98
|
+
: MEDRXIV_BROWSER_UA;
|
|
99
|
+
const versionLabel = inferredVersion && Number.isFinite(inferredVersion)
|
|
100
|
+
? `v${Number(inferredVersion)}`
|
|
101
|
+
: "";
|
|
102
|
+
const landingUrl = `https://www.medrxiv.org/content/10.1101/${coreId}${versionLabel}`;
|
|
103
|
+
const targetReferer = refererUrl || landingUrl;
|
|
104
|
+
const captureCookies = async () => {
|
|
105
|
+
try {
|
|
106
|
+
const cookies = await page.cookies();
|
|
107
|
+
if (cookies && cookies.length) {
|
|
108
|
+
const header = cookies
|
|
109
|
+
.filter((cookie) => cookie.name && cookie.value)
|
|
110
|
+
.map((cookie) => `${cookie.name}=${cookie.value}`)
|
|
111
|
+
.join("; ");
|
|
112
|
+
if (header) {
|
|
113
|
+
lastCookieHeader = header;
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
catch (err) {
|
|
118
|
+
DEBUG && console.log("[medrxiv] browser cookie capture error", err?.message ?? err);
|
|
119
|
+
}
|
|
120
|
+
};
|
|
121
|
+
try {
|
|
122
|
+
DEBUG && console.log("[medrxiv] browser fetch landing", landingUrl);
|
|
123
|
+
await page.setUserAgent(browserUA);
|
|
124
|
+
await page.setExtraHTTPHeaders({
|
|
125
|
+
"Accept-Language": MEDRXIV_ACCEPT_LANGUAGE,
|
|
126
|
+
Accept: MEDRXIV_HTML_ACCEPT,
|
|
127
|
+
});
|
|
128
|
+
await page.goto(landingUrl, { waitUntil: "domcontentloaded", timeout: PAGE_TIMEOUT_MS });
|
|
129
|
+
await captureCookies();
|
|
130
|
+
// Priming done, now fetch the JATS with appropriate headers.
|
|
131
|
+
await page.setExtraHTTPHeaders({
|
|
132
|
+
"Accept-Language": MEDRXIV_ACCEPT_LANGUAGE,
|
|
133
|
+
Accept: MEDRXIV_ACCEPT_XML,
|
|
134
|
+
Referer: targetReferer,
|
|
135
|
+
});
|
|
136
|
+
DEBUG && console.log("[medrxiv] browser fetch jats", jatsUrl);
|
|
137
|
+
const fetchResult = (await page.evaluate(async ({ url, timeoutMs, acceptHeader }) => {
|
|
138
|
+
const controller = new AbortController();
|
|
139
|
+
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
140
|
+
try {
|
|
141
|
+
const response = await fetch(url, {
|
|
142
|
+
method: "GET",
|
|
143
|
+
credentials: "include",
|
|
144
|
+
headers: {
|
|
145
|
+
Accept: acceptHeader,
|
|
146
|
+
},
|
|
147
|
+
signal: controller.signal,
|
|
148
|
+
});
|
|
149
|
+
const contentType = response.headers.get("content-type") || "";
|
|
150
|
+
const text = await response.text();
|
|
151
|
+
return {
|
|
152
|
+
ok: response.ok,
|
|
153
|
+
status: response.status,
|
|
154
|
+
url: response.url,
|
|
155
|
+
contentType,
|
|
156
|
+
text,
|
|
157
|
+
};
|
|
158
|
+
}
|
|
159
|
+
catch (error) {
|
|
160
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
161
|
+
return { ok: false, error: message };
|
|
162
|
+
}
|
|
163
|
+
finally {
|
|
164
|
+
clearTimeout(timer);
|
|
165
|
+
}
|
|
166
|
+
}, { url: jatsUrl, timeoutMs: PAGE_TIMEOUT_MS, acceptHeader: MEDRXIV_ACCEPT_XML }));
|
|
167
|
+
await captureCookies();
|
|
168
|
+
if (!fetchResult) {
|
|
169
|
+
DEBUG && console.log("[medrxiv] browser missing fetch result");
|
|
170
|
+
return null;
|
|
171
|
+
}
|
|
172
|
+
if (!fetchResult.ok) {
|
|
173
|
+
DEBUG &&
|
|
174
|
+
console.log("[medrxiv] browser fetch failed", fetchResult.status ?? "?", fetchResult.error || "unknown");
|
|
175
|
+
return null;
|
|
176
|
+
}
|
|
177
|
+
DEBUG &&
|
|
178
|
+
console.log("[medrxiv] browser status", fetchResult.status ?? "?", fetchResult.contentType || "?");
|
|
179
|
+
const text = (fetchResult.text || "").replace(/^\uFEFF/, "");
|
|
180
|
+
if (!text) {
|
|
181
|
+
DEBUG && console.log("[medrxiv] browser empty payload");
|
|
182
|
+
return null;
|
|
183
|
+
}
|
|
184
|
+
if (!looksLikeJatsXml(text)) {
|
|
185
|
+
DEBUG && console.log("[medrxiv] browser non-jats", safePreview(text));
|
|
186
|
+
return null;
|
|
187
|
+
}
|
|
188
|
+
browserLastUsed = Date.now();
|
|
189
|
+
resetBrowserIdleTimer();
|
|
190
|
+
return {
|
|
191
|
+
url: fetchResult.url || jatsUrl,
|
|
192
|
+
xml: text,
|
|
193
|
+
version: inferredVersion ?? sourceOptions?.preferVersion ?? null,
|
|
194
|
+
cookies: lastCookieHeader ?? undefined,
|
|
195
|
+
};
|
|
196
|
+
}
|
|
197
|
+
catch (err) {
|
|
198
|
+
DEBUG && console.log("[medrxiv] browser fetch error", err?.message ?? err);
|
|
199
|
+
return null;
|
|
200
|
+
}
|
|
201
|
+
finally {
|
|
202
|
+
await captureCookies();
|
|
203
|
+
try {
|
|
204
|
+
await page.close();
|
|
205
|
+
}
|
|
206
|
+
catch { }
|
|
207
|
+
}
|
|
208
|
+
};
|
|
209
|
+
export const peekBrowserCookies = () => lastCookieHeader;
|
|
210
|
+
//# sourceMappingURL=browser.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"browser.js","sourceRoot":"","sources":["../../../src/sources/medrxiv/browser.ts"],"names":[],"mappings":"AAAA,OAAO,SAAS,EAAE,EAAE,OAAO,EAAE,MAAM,gBAAgB,CAAC;AACpD,OAAO,QAAQ,MAAM,qBAAqB,CAAC;AAE3C,OAAO,EACL,uBAAuB,EACvB,kBAAkB,EAClB,kBAAkB,EAClB,mBAAmB,EACnB,gBAAgB,EAChB,WAAW,GACZ,MAAM,UAAU,CAAC;AAElB,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,mBAAmB,KAAK,GAAG,CAAC;AAEtD,MAAM,uBAAuB,GAAG,CAAC,GAAG,EAAE,GAAG,IAAI,CAAC,CAAC,YAAY;AAC3D,MAAM,eAAe,GAAG,EAAE,GAAG,IAAI,CAAC;AAElC,IAAI,cAAc,GAAmC,IAAI,CAAC;AAC1D,IAAI,gBAAgB,GAA0B,IAAI,CAAC;AACnD,IAAI,eAAe,GAAG,CAAC,CAAC;AACxB,IAAI,gBAAgB,GAAkB,IAAI,CAAC;AAE3C,MAAM,qBAAqB,GAAG,GAAG,EAAE;IACjC,IAAI,gBAAgB,EAAE,CAAC;QACrB,YAAY,CAAC,gBAAgB,CAAC,CAAC;QAC/B,gBAAgB,GAAG,IAAI,CAAC;IAC1B,CAAC;IACD,IAAI,CAAC,cAAc;QAAE,OAAO;IAC5B,gBAAgB,GAAG,UAAU,CAAC,KAAK,IAAI,EAAE;QACvC,IAAI,CAAC,cAAc;YAAE,OAAO;QAC5B,IAAI,IAAI,CAAC,GAAG,EAAE,GAAG,eAAe,GAAG,uBAAuB,EAAE,CAAC;YAC3D,qBAAqB,EAAE,CAAC;YACxB,OAAO;QACT,CAAC;QACD,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,MAAM,cAAc,CAAC;YACrC,IAAI,OAAO,IAAI,OAAO,CAAC,WAAW,EAAE,EAAE,CAAC;gBACrC,KAAK,IAAI,OAAO,CAAC,GAAG,CAAC,2CAA2C,CAAC,CAAC;gBAClE,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC;YACxB,CAAC;QACH,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,KAAK,IAAI,OAAO,CAAC,GAAG,CAAC,+BAA+B,EAAE,GAAG,CAAC,CAAC;QAC7D,CAAC;gBAAS,CAAC;YACT,cAAc,GAAG,IAAI,CAAC;QACxB,CAAC;IACH,CAAC,EAAE,uBAAuB,CAAC,CAAC;IAC5B,gBAAgB,CAAC,KAAK,EAAE,EAAE,CAAC;AAC7B,CAAC,CAAC;AAEF,MAAM,aAAa,GAAG,KAAK,IAAsB,EAAE;IACjD,IAAI,CAAC,cAAc,EAAE,CAAC;QACpB,cAAc,GAAG,CAAC,KAAK,IAAI,EAAE;YAC3B,MAAM,aAAa,GAAG,OAAO,CAAC,GAAG,CAAC,yBAAyB,CAAC;YAC5D,IAAI,kBAAkB,GAAG,aAAa,CAAC;YACvC,IAAI,CAAC,aAAa,EAAE,CAAC;gBACnB,OAAO,CAAC,IAAI,CACV,mFAAmF,CACpF,CAAC;gBACF,kBAAkB,GAAG,MAAM,QAAQ,CAAC,cAAc,EAAE,CAAC;YACvD,CAAC;YAED,MAAM,UAAU,GAAG,KAAK,CAAC,IAAI,CAC3B,IAAI,GAAG,CAAC;gBACN,GAAG,QAAQ,CAAC,IAAI;gBAChB,cAAc;gBACd,0BAA0B;gBAC1B,yBAAyB;gBACzB,eAAe;gBACf,kBAAkB;gBAClB,aAAa;gBACb,iCAAiC;gBACjC,gBAAgB;gBAChB,sBAAsB;gBACtB,sDAAsD;gBACtD,cAAc;aACf,CAAC,CACH,CAAC;YAEF,OAAO,SAAS,CAAC,MAAM,CAAC;gBACtB,QAAQ,EAAG,QAAgB,CAAC,QAAQ,IAAI,KAAK;gBAC7C,cAAc,EAAE,kBAAkB,IAAI,SAAS;gBAC/C,IAAI,EAAE,UAAU;gBAChB,eAAe,EAAG,QAAgB,CAAC,eAAe,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE;aACnF,CAAC,CAAC;QACL,CAAC,CAAC,EAAE;aACD,KAAK,CAAC,CAAC,GAAG,EAAE,EAAE;YACb,KAAK,IAAI,OAAO,CAAC,GAAG,CAAC,mCAAmC,EAAE,GAAG,CAAC,CAAC;YAC/D,cAAc,GAAG,IAAI,CAAC;YACtB,MAAM,GAAG,CAAC;QACZ,CAAC,CAAC,CAAC;QAEL,OAAO,CAAC,IAAI,CAAC,MAAM,EAAE,KAAK,IAAI,EAAE;YAC9B,IAAI,CAAC;gBACH,MAAM,OAAO,GAAG,MAAM,cAAc,CAAC;gBACrC,IAAI,OAAO,IAAI,OAAO,CAAC,WAAW,EAAE;oBAAE,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC;YAC9D,CAAC;YAAC,MAAM,CAAC,CAAA,CAAC;QACZ,CAAC,CAAC,CAAC;IACL,CAAC;IACD,MAAM,OAAO,GAAG,MAAM,cAAc,CAAC;IACrC,IAAI,CAAC,OAAO;QAAE,MAAM,IAAI,KAAK,CAAC,+BAA+B,CAAC,CAAC;IAC/D,eAAe,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAC7B,qBAAqB,EAAE,CAAC;IACxB,OAAO,OAAO,CAAC;AACjB,CAAC,CAAC;AAUF,MAAM,CAAC,MAAM,+BAA+B,GAAG,KAAK,EAClD,MAA0B,EAC+D,EAAE;IAC3F,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,eAAe,EAAE,UAAU,EAAE,aAAa,EAAE,GAAG,MAAM,CAAC;IAC/E,MAAM,OAAO,GAAG,MAAM,aAAa,EAAE,CAAC;IACtC,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;IACrC,MAAM,SAAS,GACb,aAAa,EAAE,EAAE,IAAI,YAAY,CAAC,IAAI,CAAC,aAAa,CAAC,EAAE,CAAC;QACtD,CAAC,CAAC,aAAa,CAAC,EAAE;QAClB,CAAC,CAAC,kBAAkB,CAAC;IACzB,MAAM,YAAY,GAAG,eAAe,IAAI,MAAM,CAAC,QAAQ,CAAC,eAAe,CAAC;QACtE,CAAC,CAAC,IAAI,MAAM,CAAC,eAAe,CAAC,EAAE;QAC/B,CAAC,CAAC,EAAE,CAAC;IACP,MAAM,UAAU,GAAG,2CAA2C,MAAM,GAAG,YAAY,EAAE,CAAC;IACtF,MAAM,aAAa,GAAG,UAAU,IAAI,UAAU,CAAC;IAC/C,MAAM,cAAc,GAAG,KAAK,IAAI,EAAE;QAChC,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;YACrC,IAAI,OAAO,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;gBAC9B,MAAM,MAAM,GAAG,OAAO;qBACnB,MAAM,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,IAAI,IAAI,MAAM,CAAC,KAAK,CAAC;qBAC/C,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,GAAG,MAAM,CAAC,IAAI,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;qBACjD,IAAI,CAAC,IAAI,CAAC,CAAC;gBACd,IAAI,MAAM,EAAE,CAAC;oBACX,gBAAgB,GAAG,MAAM,CAAC;gBAC5B,CAAC;YACH,CAAC;QACH,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,KAAK,IAAI,OAAO,CAAC,GAAG,CAAC,wCAAwC,EAAG,GAAa,EAAE,OAAO,IAAI,GAAG,CAAC,CAAC;QACjG,CAAC;IACH,CAAC,CAAC;IACF,IAAI,CAAC;QACH,KAAK,IAAI,OAAO,CAAC,GAAG,CAAC,iCAAiC,EAAE,UAAU,CAAC,CAAC;QACpE,MAAM,IAAI,CAAC,YAAY,CAAC,SAAS,CAAC,CAAC;QACnC,MAAM,IAAI,CAAC,mBAAmB,CAAC;YAC7B,iBAAiB,EAAE,uBAAuB;YAC1C,MAAM,EAAE,mBAAmB;SAC5B,CAAC,CAAC;QACH,MAAM,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,EAAE,SAAS,EAAE,kBAAkB,EAAE,OAAO,EAAE,eAAe,EAAE,CAAC,CAAC;QACzF,MAAM,cAAc,EAAE,CAAC;QAEvB,6DAA6D;QAC7D,MAAM,IAAI,CAAC,mBAAmB,CAAC;YAC7B,iBAAiB,EAAE,uBAAuB;YAC1C,MAAM,EAAE,kBAAkB;YAC1B,OAAO,EAAE,aAAa;SACvB,CAAC,CAAC;QAEH,KAAK,IAAI,OAAO,CAAC,GAAG,CAAC,8BAA8B,EAAE,OAAO,CAAC,CAAC;QAE9D,MAAM,WAAW,GAAG,CAAC,MAAM,IAAI,CAAC,QAAQ,CACtC,KAAK,EAAE,EAAE,GAAG,EAAE,SAAS,EAAE,YAAY,EAA4D,EAAE,EAAE;YACnG,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;YACzC,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,SAAS,CAAC,CAAC;YAC9D,IAAI,CAAC;gBACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;oBAChC,MAAM,EAAE,KAAK;oBACb,WAAW,EAAE,SAAS;oBACtB,OAAO,EAAE;wBACP,MAAM,EAAE,YAAY;qBACrB;oBACD,MAAM,EAAE,UAAU,CAAC,MAAM;iBAC1B,CAAC,CAAC;gBACH,MAAM,WAAW,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC;gBAC/D,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;gBACnC,OAAO;oBACL,EAAE,EAAE,QAAQ,CAAC,EAAE;oBACf,MAAM,EAAE,QAAQ,CAAC,MAAM;oBACvB,GAAG,EAAE,QAAQ,CAAC,GAAG;oBACjB,WAAW;oBACX,IAAI;iBACL,CAAC;YACJ,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;gBACvE,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,OAAO,EAAE,CAAC;YACvC,CAAC;oBAAS,CAAC;gBACT,YAAY,CAAC,KAAK,CAAC,CAAC;YACtB,CAAC;QACH,CAAC,EACD,EAAE,GAAG,EAAE,OAAO,EAAE,SAAS,EAAE,eAAe,EAAE,YAAY,EAAE,kBAAkB,EAAE,CAC/E,CASO,CAAC;QAET,MAAM,cAAc,EAAE,CAAC;QAEvB,IAAI,CAAC,WAAW,EAAE,CAAC;YACjB,KAAK,IAAI,OAAO,CAAC,GAAG,CAAC,wCAAwC,CAAC,CAAC;YAC/D,OAAO,IAAI,CAAC;QACd,CAAC;QAED,IAAI,CAAC,WAAW,CAAC,EAAE,EAAE,CAAC;YACpB,KAAK;gBACH,OAAO,CAAC,GAAG,CACT,gCAAgC,EAChC,WAAW,CAAC,MAAM,IAAI,GAAG,EACzB,WAAW,CAAC,KAAK,IAAI,SAAS,CAC/B,CAAC;YACJ,OAAO,IAAI,CAAC;QACd,CAAC;QAED,KAAK;YACH,OAAO,CAAC,GAAG,CACT,0BAA0B,EAC1B,WAAW,CAAC,MAAM,IAAI,GAAG,EACzB,WAAW,CAAC,WAAW,IAAI,GAAG,CAC/B,CAAC;QAEJ,MAAM,IAAI,GAAG,CAAC,WAAW,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;QAC7D,IAAI,CAAC,IAAI,EAAE,CAAC;YACV,KAAK,IAAI,OAAO,CAAC,GAAG,CAAC,iCAAiC,CAAC,CAAC;YACxD,OAAO,IAAI,CAAC;QACd,CAAC;QACD,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,EAAE,CAAC;YAC5B,KAAK,IAAI,OAAO,CAAC,GAAG,CAAC,4BAA4B,EAAE,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC;YACtE,OAAO,IAAI,CAAC;QACd,CAAC;QAED,eAAe,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC7B,qBAAqB,EAAE,CAAC;QAExB,OAAO;YACL,GAAG,EAAE,WAAW,CAAC,GAAG,IAAI,OAAO;YAC/B,GAAG,EAAE,IAAI;YACT,OAAO,EAAE,eAAe,IAAI,aAAa,EAAE,aAAa,IAAI,IAAI;YAChE,OAAO,EAAE,gBAAgB,IAAI,SAAS;SACvC,CAAC;IACJ,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,KAAK,IAAI,OAAO,CAAC,GAAG,CAAC,+BAA+B,EAAG,GAAa,EAAE,OAAO,IAAI,GAAG,CAAC,CAAC;QACtF,OAAO,IAAI,CAAC;IACd,CAAC;YAAS,CAAC;QACT,MAAM,cAAc,EAAE,CAAC;QACvB,IAAI,CAAC;YACH,MAAM,IAAI,CAAC,KAAK,EAAE,CAAC;QACrB,CAAC;QAAC,MAAM,CAAC,CAAA,CAAC;IACZ,CAAC;AACH,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,kBAAkB,GAAG,GAAkB,EAAE,CAAC,gBAAgB,CAAC"}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import { type PreprintRecord } from "../preprint-discovery";
|
|
2
|
+
export type SourceFetchOptions = {
|
|
3
|
+
UA?: string;
|
|
4
|
+
preferVersion?: number | null;
|
|
5
|
+
};
|
|
6
|
+
type StrategyContext = {
|
|
7
|
+
coreId: string;
|
|
8
|
+
jatsUrl: string;
|
|
9
|
+
landingUrl: string;
|
|
10
|
+
refererUrl: string;
|
|
11
|
+
inferredVersion: number | null;
|
|
12
|
+
opts?: SourceFetchOptions;
|
|
13
|
+
shared: {
|
|
14
|
+
cookies?: string;
|
|
15
|
+
browserCookies?: string;
|
|
16
|
+
};
|
|
17
|
+
};
|
|
18
|
+
/** Return the versioned .source.xml record for the best-matching version (or null) */
|
|
19
|
+
export declare function buildMedrxivLatestSourceXmlUrlViaApi(idOrDoi: string, opts?: SourceFetchOptions): Promise<PreprintRecord | null>;
|
|
20
|
+
/** Fetch the JATS XML content via the API-discovered URL */
|
|
21
|
+
export declare function fetchMedrxivSourceXmlViaApi(idOrDoi: string, opts?: SourceFetchOptions): Promise<{
|
|
22
|
+
url: string;
|
|
23
|
+
xml: string;
|
|
24
|
+
version?: number | null;
|
|
25
|
+
} | null>;
|
|
26
|
+
declare function collectFollowupUrlsFromText(text: string, baseUrl: string): string[];
|
|
27
|
+
export declare const __medrxivTestExports: {
|
|
28
|
+
resolveVersion: (ctx: StrategyContext) => number | null;
|
|
29
|
+
collectFollowupUrlsFromText: typeof collectFollowupUrlsFromText;
|
|
30
|
+
};
|
|
31
|
+
/** Get the latest version number for a medRxiv core/DOI (null if unknown). */
|
|
32
|
+
export declare function fetchLatestMedrxivVersionNumber(idOrDoi: string, opts?: SourceFetchOptions): Promise<number | null>;
|
|
33
|
+
export {};
|
|
34
|
+
//# sourceMappingURL=client.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"client.d.ts","sourceRoot":"","sources":["../../../src/sources/medrxiv/client.ts"],"names":[],"mappings":"AAaA,OAAO,EAIL,KAAK,cAAc,EACpB,MAAM,uBAAuB,CAAC;AAI/B,MAAM,MAAM,kBAAkB,GAAG;IAC/B,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,aAAa,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CAC/B,CAAC;AAMF,KAAK,eAAe,GAAG;IACrB,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,eAAe,EAAE,MAAM,GAAG,IAAI,CAAC;IAC/B,IAAI,CAAC,EAAE,kBAAkB,CAAC;IAC1B,MAAM,EAAE;QACN,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,cAAc,CAAC,EAAE,MAAM,CAAC;KACzB,CAAC;CACH,CAAC;AAmTF,sFAAsF;AACtF,wBAAsB,oCAAoC,CACxD,OAAO,EAAE,MAAM,EACf,IAAI,CAAC,EAAE,kBAAkB,GACxB,OAAO,CAAC,cAAc,GAAG,IAAI,CAAC,CAMhC;AAED,4DAA4D;AAC5D,wBAAsB,2BAA2B,CAC/C,OAAO,EAAE,MAAM,EACf,IAAI,CAAC,EAAE,kBAAkB,GACxB,OAAO,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,GAAG,EAAE,MAAM,CAAC;IAAC,OAAO,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;CAAE,GAAG,IAAI,CAAC,CAoDvE;AA2ND,iBAAS,2BAA2B,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,MAAM,EAAE,CAM5E;AAqID,eAAO,MAAM,oBAAoB;0BA5qBJ,eAAe,KAAG,MAAM,GAAG,IAAI;;CA+qB3D,CAAC;AAEF,8EAA8E;AAC9E,wBAAsB,+BAA+B,CACnD,OAAO,EAAE,MAAM,EACf,IAAI,CAAC,EAAE,kBAAkB,GACxB,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAaxB"}
|