@mantra-ai/core 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai/google/client.d.ts +67 -0
- package/dist/ai/google/client.d.ts.map +1 -0
- package/dist/ai/google/client.js +169 -0
- package/dist/ai/google/client.js.map +1 -0
- package/dist/ai/google/generate.d.ts +10 -0
- package/dist/ai/google/generate.d.ts.map +1 -0
- package/dist/ai/google/generate.js +137 -0
- package/dist/ai/google/generate.js.map +1 -0
- package/dist/ai/google/index.d.ts +4 -0
- package/dist/ai/google/index.d.ts.map +1 -0
- package/dist/ai/google/index.js +4 -0
- package/dist/ai/google/index.js.map +1 -0
- package/dist/ai/google/types.d.ts +88 -0
- package/dist/ai/google/types.d.ts.map +1 -0
- package/dist/ai/google/types.js +55 -0
- package/dist/ai/google/types.js.map +1 -0
- package/dist/ai/index.d.ts +3 -0
- package/dist/ai/index.d.ts.map +1 -0
- package/dist/ai/index.js +3 -0
- package/dist/ai/index.js.map +1 -0
- package/dist/ai/openai/client.d.ts +22 -0
- package/dist/ai/openai/client.d.ts.map +1 -0
- package/dist/ai/openai/client.js +49 -0
- package/dist/ai/openai/client.js.map +1 -0
- package/dist/ai/openai/generate.d.ts +14 -0
- package/dist/ai/openai/generate.d.ts.map +1 -0
- package/dist/ai/openai/generate.js +178 -0
- package/dist/ai/openai/generate.js.map +1 -0
- package/dist/ai/openai/index.d.ts +4 -0
- package/dist/ai/openai/index.d.ts.map +1 -0
- package/dist/ai/openai/index.js +4 -0
- package/dist/ai/openai/index.js.map +1 -0
- package/dist/ai/openai/types.d.ts +86 -0
- package/dist/ai/openai/types.d.ts.map +1 -0
- package/dist/ai/openai/types.js +56 -0
- package/dist/ai/openai/types.js.map +1 -0
- package/dist/ai/prompts/index.d.ts +1 -0
- package/dist/ai/prompts/index.d.ts.map +1 -0
- package/dist/ai/prompts/index.js +2 -0
- package/dist/ai/prompts/index.js.map +1 -0
- package/dist/errors/index.d.ts +3 -0
- package/dist/errors/index.d.ts.map +1 -0
- package/dist/errors/index.js +4 -0
- package/dist/errors/index.js.map +1 -0
- package/dist/errors/schemas.d.ts +304 -0
- package/dist/errors/schemas.d.ts.map +1 -0
- package/dist/errors/schemas.js +57 -0
- package/dist/errors/schemas.js.map +1 -0
- package/dist/errors/types.d.ts +30 -0
- package/dist/errors/types.d.ts.map +1 -0
- package/dist/errors/types.js +33 -0
- package/dist/errors/types.js.map +1 -0
- package/dist/index.d.ts +21 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +21 -0
- package/dist/index.js.map +1 -0
- package/dist/normalization/jats/index.d.ts +4 -0
- package/dist/normalization/jats/index.d.ts.map +1 -0
- package/dist/normalization/jats/index.js +3 -0
- package/dist/normalization/jats/index.js.map +1 -0
- package/dist/normalization/jats/normalize.d.ts +7 -0
- package/dist/normalization/jats/normalize.d.ts.map +1 -0
- package/dist/normalization/jats/normalize.js +213 -0
- package/dist/normalization/jats/normalize.js.map +1 -0
- package/dist/normalization/jats/utils/build/finalize.d.ts +3 -0
- package/dist/normalization/jats/utils/build/finalize.d.ts.map +1 -0
- package/dist/normalization/jats/utils/build/finalize.js +462 -0
- package/dist/normalization/jats/utils/build/finalize.js.map +1 -0
- package/dist/normalization/jats/utils/build/flatten.d.ts +20 -0
- package/dist/normalization/jats/utils/build/flatten.d.ts.map +1 -0
- package/dist/normalization/jats/utils/build/flatten.js +502 -0
- package/dist/normalization/jats/utils/build/flatten.js.map +1 -0
- package/dist/normalization/jats/utils/build/meta.d.ts +10 -0
- package/dist/normalization/jats/utils/build/meta.d.ts.map +1 -0
- package/dist/normalization/jats/utils/build/meta.js +32 -0
- package/dist/normalization/jats/utils/build/meta.js.map +1 -0
- package/dist/normalization/jats/utils/build/version.d.ts +3 -0
- package/dist/normalization/jats/utils/build/version.d.ts.map +1 -0
- package/dist/normalization/jats/utils/build/version.js +11 -0
- package/dist/normalization/jats/utils/build/version.js.map +1 -0
- package/dist/normalization/jats/utils/category.d.ts +11 -0
- package/dist/normalization/jats/utils/category.d.ts.map +1 -0
- package/dist/normalization/jats/utils/category.js +431 -0
- package/dist/normalization/jats/utils/category.js.map +1 -0
- package/dist/normalization/jats/utils/collectors/abstracts.d.ts +3 -0
- package/dist/normalization/jats/utils/collectors/abstracts.d.ts.map +1 -0
- package/dist/normalization/jats/utils/collectors/abstracts.js +168 -0
- package/dist/normalization/jats/utils/collectors/abstracts.js.map +1 -0
- package/dist/normalization/jats/utils/collectors/back.d.ts +35 -0
- package/dist/normalization/jats/utils/collectors/back.d.ts.map +1 -0
- package/dist/normalization/jats/utils/collectors/back.js +801 -0
- package/dist/normalization/jats/utils/collectors/back.js.map +1 -0
- package/dist/normalization/jats/utils/collectors/contributors.d.ts +4 -0
- package/dist/normalization/jats/utils/collectors/contributors.d.ts.map +1 -0
- package/dist/normalization/jats/utils/collectors/contributors.js +77 -0
- package/dist/normalization/jats/utils/collectors/contributors.js.map +1 -0
- package/dist/normalization/jats/utils/collectors/keywords.d.ts +2 -0
- package/dist/normalization/jats/utils/collectors/keywords.d.ts.map +1 -0
- package/dist/normalization/jats/utils/collectors/keywords.js +14 -0
- package/dist/normalization/jats/utils/collectors/keywords.js.map +1 -0
- package/dist/normalization/jats/utils/collectors/meta.d.ts +6 -0
- package/dist/normalization/jats/utils/collectors/meta.d.ts.map +1 -0
- package/dist/normalization/jats/utils/collectors/meta.js +103 -0
- package/dist/normalization/jats/utils/collectors/meta.js.map +1 -0
- package/dist/normalization/jats/utils/collectors/sections.d.ts +7 -0
- package/dist/normalization/jats/utils/collectors/sections.d.ts.map +1 -0
- package/dist/normalization/jats/utils/collectors/sections.js +484 -0
- package/dist/normalization/jats/utils/collectors/sections.js.map +1 -0
- package/dist/normalization/jats/utils/licenses.d.ts +5 -0
- package/dist/normalization/jats/utils/licenses.d.ts.map +1 -0
- package/dist/normalization/jats/utils/licenses.js +64 -0
- package/dist/normalization/jats/utils/licenses.js.map +1 -0
- package/dist/normalization/jats/utils/po/nodes.d.ts +6 -0
- package/dist/normalization/jats/utils/po/nodes.d.ts.map +1 -0
- package/dist/normalization/jats/utils/po/nodes.js +60 -0
- package/dist/normalization/jats/utils/po/nodes.js.map +1 -0
- package/dist/normalization/jats/utils/po/query.d.ts +7 -0
- package/dist/normalization/jats/utils/po/query.d.ts.map +1 -0
- package/dist/normalization/jats/utils/po/query.js +67 -0
- package/dist/normalization/jats/utils/po/query.js.map +1 -0
- package/dist/normalization/jats/utils/po/serialize.d.ts +4 -0
- package/dist/normalization/jats/utils/po/serialize.d.ts.map +1 -0
- package/dist/normalization/jats/utils/po/serialize.js +329 -0
- package/dist/normalization/jats/utils/po/serialize.js.map +1 -0
- package/dist/normalization/jats/utils/po/text.d.ts +7 -0
- package/dist/normalization/jats/utils/po/text.d.ts.map +1 -0
- package/dist/normalization/jats/utils/po/text.js +114 -0
- package/dist/normalization/jats/utils/po/text.js.map +1 -0
- package/dist/normalization/jats/utils/references.d.ts +26 -0
- package/dist/normalization/jats/utils/references.d.ts.map +1 -0
- package/dist/normalization/jats/utils/references.js +371 -0
- package/dist/normalization/jats/utils/references.js.map +1 -0
- package/dist/normalization/jats/utils/strings.d.ts +8 -0
- package/dist/normalization/jats/utils/strings.d.ts.map +1 -0
- package/dist/normalization/jats/utils/strings.js +197 -0
- package/dist/normalization/jats/utils/strings.js.map +1 -0
- package/dist/normalization/jats/utils/types.d.ts +233 -0
- package/dist/normalization/jats/utils/types.d.ts.map +1 -0
- package/dist/normalization/jats/utils/types.js +2 -0
- package/dist/normalization/jats/utils/types.js.map +1 -0
- package/dist/normalization/jats/utils/xml.d.ts +5 -0
- package/dist/normalization/jats/utils/xml.d.ts.map +1 -0
- package/dist/normalization/jats/utils/xml.js +69 -0
- package/dist/normalization/jats/utils/xml.js.map +1 -0
- package/dist/normalization/normalized-doc-schema.d.ts +1094 -0
- package/dist/normalization/normalized-doc-schema.d.ts.map +1 -0
- package/dist/normalization/normalized-doc-schema.js +410 -0
- package/dist/normalization/normalized-doc-schema.js.map +1 -0
- package/dist/normalization/pdf/index.d.ts +4 -0
- package/dist/normalization/pdf/index.d.ts.map +1 -0
- package/dist/normalization/pdf/index.js +3 -0
- package/dist/normalization/pdf/index.js.map +1 -0
- package/dist/normalization/pdf/normalize.d.ts +31 -0
- package/dist/normalization/pdf/normalize.d.ts.map +1 -0
- package/dist/normalization/pdf/normalize.js +321 -0
- package/dist/normalization/pdf/normalize.js.map +1 -0
- package/dist/normalization/pdf/prompt.d.ts +3 -0
- package/dist/normalization/pdf/prompt.d.ts.map +1 -0
- package/dist/normalization/pdf/prompt.js +118 -0
- package/dist/normalization/pdf/prompt.js.map +1 -0
- package/dist/sources/arxiv/client.d.ts +4 -0
- package/dist/sources/arxiv/client.d.ts.map +1 -0
- package/dist/sources/arxiv/client.js +13 -0
- package/dist/sources/arxiv/client.js.map +1 -0
- package/dist/sources/biorxiv/client.d.ts +21 -0
- package/dist/sources/biorxiv/client.d.ts.map +1 -0
- package/dist/sources/biorxiv/client.js +173 -0
- package/dist/sources/biorxiv/client.js.map +1 -0
- package/dist/sources/crossref/client.d.ts +3 -0
- package/dist/sources/crossref/client.d.ts.map +1 -0
- package/dist/sources/crossref/client.js +24 -0
- package/dist/sources/crossref/client.js.map +1 -0
- package/dist/sources/europepmc/client.d.ts +3 -0
- package/dist/sources/europepmc/client.d.ts.map +1 -0
- package/dist/sources/europepmc/client.js +29 -0
- package/dist/sources/europepmc/client.js.map +1 -0
- package/dist/sources/medrxiv/browser.d.ts +16 -0
- package/dist/sources/medrxiv/browser.d.ts.map +1 -0
- package/dist/sources/medrxiv/browser.js +210 -0
- package/dist/sources/medrxiv/browser.js.map +1 -0
- package/dist/sources/medrxiv/client.d.ts +34 -0
- package/dist/sources/medrxiv/client.d.ts.map +1 -0
- package/dist/sources/medrxiv/client.js +673 -0
- package/dist/sources/medrxiv/client.js.map +1 -0
- package/dist/sources/medrxiv/shared.d.ts +7 -0
- package/dist/sources/medrxiv/shared.d.ts.map +1 -0
- package/dist/sources/medrxiv/shared.js +18 -0
- package/dist/sources/medrxiv/shared.js.map +1 -0
- package/dist/sources/plos/client.d.ts +13 -0
- package/dist/sources/plos/client.d.ts.map +1 -0
- package/dist/sources/plos/client.js +147 -0
- package/dist/sources/plos/client.js.map +1 -0
- package/dist/sources/preprint-discovery.d.ts +55 -0
- package/dist/sources/preprint-discovery.d.ts.map +1 -0
- package/dist/sources/preprint-discovery.js +115 -0
- package/dist/sources/preprint-discovery.js.map +1 -0
- package/dist/types/expand.d.ts +5 -0
- package/dist/types/expand.d.ts.map +1 -0
- package/dist/types/expand.js +20 -0
- package/dist/types/expand.js.map +1 -0
- package/dist/types/methods-types.d.ts +37 -0
- package/dist/types/methods-types.d.ts.map +1 -0
- package/dist/types/methods-types.js +2 -0
- package/dist/types/methods-types.js.map +1 -0
- package/dist/types/multi-input-types.d.ts +57 -0
- package/dist/types/multi-input-types.d.ts.map +1 -0
- package/dist/types/multi-input-types.js +2 -0
- package/dist/types/multi-input-types.js.map +1 -0
- package/dist/types/paper/types.d.ts +41 -0
- package/dist/types/paper/types.d.ts.map +1 -0
- package/dist/types/paper/types.js +2 -0
- package/dist/types/paper/types.js.map +1 -0
- package/dist/types/results-types.d.ts +122 -0
- package/dist/types/results-types.d.ts.map +1 -0
- package/dist/types/results-types.js +17 -0
- package/dist/types/results-types.js.map +1 -0
- package/dist/types/supp-types.d.ts +6 -0
- package/dist/types/supp-types.d.ts.map +1 -0
- package/dist/types/supp-types.js +2 -0
- package/dist/types/supp-types.js.map +1 -0
- package/dist/types/version.d.ts +1828 -0
- package/dist/types/version.d.ts.map +1 -0
- package/dist/types/version.js +311 -0
- package/dist/types/version.js.map +1 -0
- package/dist/types/work.d.ts +4455 -0
- package/dist/types/work.d.ts.map +1 -0
- package/dist/types/work.js +330 -0
- package/dist/types/work.js.map +1 -0
- package/dist/works/adapters/crossref.d.ts +28 -0
- package/dist/works/adapters/crossref.d.ts.map +1 -0
- package/dist/works/adapters/crossref.js +43 -0
- package/dist/works/adapters/crossref.js.map +1 -0
- package/dist/works/adapters/europepmc.d.ts +14 -0
- package/dist/works/adapters/europepmc.d.ts.map +1 -0
- package/dist/works/adapters/europepmc.js +46 -0
- package/dist/works/adapters/europepmc.js.map +1 -0
- package/dist/works/adapters/openalex.d.ts +5 -0
- package/dist/works/adapters/openalex.d.ts.map +1 -0
- package/dist/works/adapters/openalex.js +75 -0
- package/dist/works/adapters/openalex.js.map +1 -0
- package/dist/works/errors.d.ts +23 -0
- package/dist/works/errors.d.ts.map +1 -0
- package/dist/works/errors.js +37 -0
- package/dist/works/errors.js.map +1 -0
- package/dist/works/id/detect-identifier.d.ts +15 -0
- package/dist/works/id/detect-identifier.d.ts.map +1 -0
- package/dist/works/id/detect-identifier.js +50 -0
- package/dist/works/id/detect-identifier.js.map +1 -0
- package/dist/works/id/normalize-external-id.d.ts +3 -0
- package/dist/works/id/normalize-external-id.d.ts.map +1 -0
- package/dist/works/id/normalize-external-id.js +44 -0
- package/dist/works/id/normalize-external-id.js.map +1 -0
- package/dist/works/id/normalize-ids.d.ts +66 -0
- package/dist/works/id/normalize-ids.d.ts.map +1 -0
- package/dist/works/id/normalize-ids.js +112 -0
- package/dist/works/id/normalize-ids.js.map +1 -0
- package/dist/works/id/normalize-internals.d.ts +7 -0
- package/dist/works/id/normalize-internals.d.ts.map +1 -0
- package/dist/works/id/normalize-internals.js +65 -0
- package/dist/works/id/normalize-internals.js.map +1 -0
- package/dist/works/id/resolve.d.ts +31 -0
- package/dist/works/id/resolve.d.ts.map +1 -0
- package/dist/works/id/resolve.js +123 -0
- package/dist/works/id/resolve.js.map +1 -0
- package/dist/works/id/resolveIds/assign.d.ts +4 -0
- package/dist/works/id/resolveIds/assign.d.ts.map +1 -0
- package/dist/works/id/resolveIds/assign.js +15 -0
- package/dist/works/id/resolveIds/assign.js.map +1 -0
- package/dist/works/id/resolveIds/flags.d.ts +11 -0
- package/dist/works/id/resolveIds/flags.d.ts.map +1 -0
- package/dist/works/id/resolveIds/flags.js +27 -0
- package/dist/works/id/resolveIds/flags.js.map +1 -0
- package/dist/works/id/resolveIds/idctx.d.ts +4 -0
- package/dist/works/id/resolveIds/idctx.d.ts.map +1 -0
- package/dist/works/id/resolveIds/idctx.js +25 -0
- package/dist/works/id/resolveIds/idctx.js.map +1 -0
- package/dist/works/id/resolveIds/index.d.ts +13 -0
- package/dist/works/id/resolveIds/index.d.ts.map +1 -0
- package/dist/works/id/resolveIds/index.js +498 -0
- package/dist/works/id/resolveIds/index.js.map +1 -0
- package/dist/works/id/resolveIds/versioning.d.ts +27 -0
- package/dist/works/id/resolveIds/versioning.d.ts.map +1 -0
- package/dist/works/id/resolveIds/versioning.js +156 -0
- package/dist/works/id/resolveIds/versioning.js.map +1 -0
- package/dist/works/id/resolveIds/workWhere.d.ts +3 -0
- package/dist/works/id/resolveIds/workWhere.d.ts.map +1 -0
- package/dist/works/id/resolveIds/workWhere.js +35 -0
- package/dist/works/id/resolveIds/workWhere.js.map +1 -0
- package/dist/works/id/types.d.ts +6 -0
- package/dist/works/id/types.d.ts.map +1 -0
- package/dist/works/id/types.js +2 -0
- package/dist/works/id/types.js.map +1 -0
- package/dist/works/pdf-fallback/candidates.d.ts +12 -0
- package/dist/works/pdf-fallback/candidates.d.ts.map +1 -0
- package/dist/works/pdf-fallback/candidates.js +51 -0
- package/dist/works/pdf-fallback/candidates.js.map +1 -0
- package/dist/works/pdf-fallback/fetch.d.ts +21 -0
- package/dist/works/pdf-fallback/fetch.d.ts.map +1 -0
- package/dist/works/pdf-fallback/fetch.js +89 -0
- package/dist/works/pdf-fallback/fetch.js.map +1 -0
- package/dist/works/pdf-fallback/index.d.ts +28 -0
- package/dist/works/pdf-fallback/index.d.ts.map +1 -0
- package/dist/works/pdf-fallback/index.js +35 -0
- package/dist/works/pdf-fallback/index.js.map +1 -0
- package/dist/works/plan.d.ts +8 -0
- package/dist/works/plan.d.ts.map +1 -0
- package/dist/works/plan.js +62 -0
- package/dist/works/plan.js.map +1 -0
- package/dist/works/strategies/arxiv.d.ts +3 -0
- package/dist/works/strategies/arxiv.d.ts.map +1 -0
- package/dist/works/strategies/arxiv.js +56 -0
- package/dist/works/strategies/arxiv.js.map +1 -0
- package/dist/works/strategies/biorxiv.d.ts +3 -0
- package/dist/works/strategies/biorxiv.d.ts.map +1 -0
- package/dist/works/strategies/biorxiv.js +63 -0
- package/dist/works/strategies/biorxiv.js.map +1 -0
- package/dist/works/strategies/europepmc.d.ts +3 -0
- package/dist/works/strategies/europepmc.d.ts.map +1 -0
- package/dist/works/strategies/europepmc.js +15 -0
- package/dist/works/strategies/europepmc.js.map +1 -0
- package/dist/works/strategies/index.d.ts +12 -0
- package/dist/works/strategies/index.d.ts.map +1 -0
- package/dist/works/strategies/index.js +19 -0
- package/dist/works/strategies/index.js.map +1 -0
- package/dist/works/strategies/landing-url.d.ts +3 -0
- package/dist/works/strategies/landing-url.d.ts.map +1 -0
- package/dist/works/strategies/landing-url.js +10 -0
- package/dist/works/strategies/landing-url.js.map +1 -0
- package/dist/works/strategies/medrxiv.d.ts +3 -0
- package/dist/works/strategies/medrxiv.d.ts.map +1 -0
- package/dist/works/strategies/medrxiv.js +47 -0
- package/dist/works/strategies/medrxiv.js.map +1 -0
- package/dist/works/strategies/plos.d.ts +3 -0
- package/dist/works/strategies/plos.d.ts.map +1 -0
- package/dist/works/strategies/plos.js +15 -0
- package/dist/works/strategies/plos.js.map +1 -0
- package/dist/works/strategies/shared.d.ts +11 -0
- package/dist/works/strategies/shared.d.ts.map +1 -0
- package/dist/works/strategies/shared.js +97 -0
- package/dist/works/strategies/shared.js.map +1 -0
- package/dist/works/strategies/ten1101.d.ts +3 -0
- package/dist/works/strategies/ten1101.d.ts.map +1 -0
- package/dist/works/strategies/ten1101.js +84 -0
- package/dist/works/strategies/ten1101.js.map +1 -0
- package/dist/works/text/acquire-fulltext.d.ts +7 -0
- package/dist/works/text/acquire-fulltext.d.ts.map +1 -0
- package/dist/works/text/acquire-fulltext.js +62 -0
- package/dist/works/text/acquire-fulltext.js.map +1 -0
- package/dist/works/text/normalize.d.ts +40 -0
- package/dist/works/text/normalize.d.ts.map +1 -0
- package/dist/works/text/normalize.js +188 -0
- package/dist/works/text/normalize.js.map +1 -0
- package/dist/works/types.d.ts +215 -0
- package/dist/works/types.d.ts.map +1 -0
- package/dist/works/types.js +6 -0
- package/dist/works/types.js.map +1 -0
- package/dist/works/util/debug.d.ts +7 -0
- package/dist/works/util/debug.d.ts.map +1 -0
- package/dist/works/util/debug.js +9 -0
- package/dist/works/util/debug.js.map +1 -0
- package/dist/works/util/license.d.ts +9 -0
- package/dist/works/util/license.d.ts.map +1 -0
- package/dist/works/util/license.js +39 -0
- package/dist/works/util/license.js.map +1 -0
- package/dist/works/util/normalize.d.ts +2 -0
- package/dist/works/util/normalize.d.ts.map +1 -0
- package/dist/works/util/normalize.js +76 -0
- package/dist/works/util/normalize.js.map +1 -0
- package/dist/works/util/parse.d.ts +8 -0
- package/dist/works/util/parse.d.ts.map +1 -0
- package/dist/works/util/parse.js +32 -0
- package/dist/works/util/parse.js.map +1 -0
- package/dist/works/util/source.d.ts +10 -0
- package/dist/works/util/source.d.ts.map +1 -0
- package/dist/works/util/source.js +48 -0
- package/dist/works/util/source.js.map +1 -0
- package/dist/works/util/version-label.d.ts +2 -0
- package/dist/works/util/version-label.d.ts.map +1 -0
- package/dist/works/util/version-label.js +8 -0
- package/dist/works/util/version-label.js.map +1 -0
- package/dist/works/util/work-id.d.ts +2 -0
- package/dist/works/util/work-id.d.ts.map +1 -0
- package/dist/works/util/work-id.js +27 -0
- package/dist/works/util/work-id.js.map +1 -0
- package/package.json +208 -0
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import { strategiesInOrder, fetchTextWithRetries, } from "../strategies/index";
|
|
2
|
+
const context = { fetchText: fetchTextWithRetries };
|
|
3
|
+
async function acquireFullText(ids, logger) {
|
|
4
|
+
const trace = [];
|
|
5
|
+
// Track best URL-only result as fallback when no strategy yields content
|
|
6
|
+
let bestUrlOnly = null;
|
|
7
|
+
for (const s of strategiesInOrder) {
|
|
8
|
+
if (!s.matches(ids)) {
|
|
9
|
+
trace.push({ strategy: s.name, matched: false, attempted: false, succeeded: false });
|
|
10
|
+
continue;
|
|
11
|
+
}
|
|
12
|
+
logger?.debug?.({ strategy: s.name }, "fulltext.strategy.try");
|
|
13
|
+
try {
|
|
14
|
+
const hit = await s.getFullText(ids, context);
|
|
15
|
+
if (hit) {
|
|
16
|
+
// Only treat as terminal success if there is actual content
|
|
17
|
+
if (hit.content) {
|
|
18
|
+
logger?.info?.({ strategy: s.name, source: hit.source }, "fulltext.strategy.success");
|
|
19
|
+
trace.push({ strategy: s.name, matched: true, attempted: true, succeeded: true });
|
|
20
|
+
return { result: hit, trace };
|
|
21
|
+
}
|
|
22
|
+
// URL-only or empty-content result — record and continue
|
|
23
|
+
logger?.debug?.({ strategy: s.name, source: hit.source, url: hit.url }, "fulltext.strategy.url_only");
|
|
24
|
+
trace.push({
|
|
25
|
+
strategy: s.name,
|
|
26
|
+
matched: true,
|
|
27
|
+
attempted: true,
|
|
28
|
+
succeeded: false,
|
|
29
|
+
skipReason: "no_content",
|
|
30
|
+
});
|
|
31
|
+
if (!bestUrlOnly)
|
|
32
|
+
bestUrlOnly = hit;
|
|
33
|
+
continue;
|
|
34
|
+
}
|
|
35
|
+
trace.push({ strategy: s.name, matched: true, attempted: true, succeeded: false });
|
|
36
|
+
logger?.debug?.({ strategy: s.name }, "fulltext.strategy.miss: matched but no result");
|
|
37
|
+
}
|
|
38
|
+
catch (err) {
|
|
39
|
+
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
40
|
+
logger?.warn?.({ strategy: s.name, err }, "fulltext.strategy.error");
|
|
41
|
+
trace.push({ strategy: s.name, matched: true, attempted: true, succeeded: false, error: errorMsg });
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
// Return best URL-only result if we have one (still no content, but
|
|
45
|
+
// preserves URL/version metadata for downstream consumers).
|
|
46
|
+
if (bestUrlOnly) {
|
|
47
|
+
logger?.info?.({ source: bestUrlOnly.source, url: bestUrlOnly.url }, "fulltext.url_only_fallback");
|
|
48
|
+
return { result: bestUrlOnly, trace };
|
|
49
|
+
}
|
|
50
|
+
const idsPresent = Object.keys(ids).filter((k) => ids[k]);
|
|
51
|
+
logger?.warn?.({ idsPresent }, "fulltext.none: all strategies exhausted");
|
|
52
|
+
return { result: null, trace };
|
|
53
|
+
}
|
|
54
|
+
// ---- public API (unchanged name & results) ----
|
|
55
|
+
export async function getBestFullText(ids, logger) {
|
|
56
|
+
const { result } = await acquireFullText(ids, logger);
|
|
57
|
+
return result;
|
|
58
|
+
}
|
|
59
|
+
export async function getBestFullTextWithTrace(ids, logger) {
|
|
60
|
+
return acquireFullText(ids, logger);
|
|
61
|
+
}
|
|
62
|
+
//# sourceMappingURL=acquire-fulltext.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"acquire-fulltext.js","sourceRoot":"","sources":["../../../src/works/text/acquire-fulltext.ts"],"names":[],"mappings":"AAMA,OAAO,EACL,iBAAiB,EACjB,oBAAoB,GACrB,MAAM,qBAAqB,CAAC;AAE7B,MAAM,OAAO,GAAG,EAAE,SAAS,EAAE,oBAAoB,EAAE,CAAC;AAEpD,KAAK,UAAU,eAAe,CAC5B,GAAgB,EAChB,MAAmB;IAEnB,MAAM,KAAK,GAAsB,EAAE,CAAC;IAEpC,yEAAyE;IACzE,IAAI,WAAW,GAA0B,IAAI,CAAC;IAE9C,KAAK,MAAM,CAAC,IAAI,iBAAiB,EAAE,CAAC;QAClC,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC;YACpB,KAAK,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,CAAC,CAAC,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,KAAK,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC,CAAC;YACrF,SAAS;QACX,CAAC;QAED,MAAM,EAAE,KAAK,EAAE,CAAC,EAAE,QAAQ,EAAE,CAAC,CAAC,IAAI,EAAE,EAAE,uBAAuB,CAAC,CAAC;QAC/D,IAAI,CAAC;YACH,MAAM,GAAG,GAAG,MAAM,CAAC,CAAC,WAAW,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;YAC9C,IAAI,GAAG,EAAE,CAAC;gBACR,4DAA4D;gBAC5D,IAAI,GAAG,CAAC,OAAO,EAAE,CAAC;oBAChB,MAAM,EAAE,IAAI,EAAE,CAAC,EAAE,QAAQ,EAAE,CAAC,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,CAAC,MAAM,EAAE,EAAE,2BAA2B,CAAC,CAAC;oBACtF,KAAK,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,CAAC,CAAC,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;oBAClF,OAAO,EAAE,MAAM,EAAE,GAAG,EAAE,KAAK,EAAE,CAAC;gBAChC,CAAC;gBAED,yDAAyD;gBACzD,MAAM,EAAE,KAAK,EAAE,CACb,EAAE,QAAQ,EAAE,CAAC,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,CAAC,MAAM,EAAE,GAAG,EAAE,GAAG,CAAC,GAAG,EAAE,EACtD,4BAA4B,CAC7B,CAAC;gBACF,KAAK,CAAC,IAAI,CAAC;oBACT,QAAQ,EAAE,CAAC,CAAC,IAAI;oBAChB,OAAO,EAAE,IAAI;oBACb,SAAS,EAAE,IAAI;oBACf,SAAS,EAAE,KAAK;oBAChB,UAAU,EAAE,YAAY;iBACzB,CAAC,CAAC;gBAEH,IAAI,CAAC,WAAW;oBAAE,WAAW,GAAG,GAAG,CAAC;gBACpC,SAAS;YACX,CAAC;YACD,KAAK,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,CAAC,CAAC,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC,CAAC;YACnF,MAAM,EAAE,KAAK,EAAE,CAAC,EAAE,QAAQ,EAAE,CAAC,CAAC,IAAI,EAAE,EAAE,+CAA+C,CAAC,CAAC;QACzF,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,QAAQ,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YAClE,MAAM,EAAE,IAAI,EAAE,CAAC,EAAE,QAAQ,EAAE,CAAC,CAAC,IAAI,EAAE,GAAG,EAAE,EAAE,yBAAyB,CAAC,CAAC;YACrE,KAAK,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,CAAC,CAAC,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,SAAS,EAAE,KAAK,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC,CAAC;QACtG,CAAC;IACH,CAAC;IAED,oEAAoE;IACpE,4DAA4D;IAC5D,IAAI,WAAW,EAAE,CAAC;QAChB,MAAM,EAAE,IAAI,EAAE,CACZ,EAAE,MAAM,EAAE,WAAW,CAAC,MAAM,EAAE,GAAG,EAAE,WAAW,CAAC,GAAG,EAAE,EACpD,4BAA4B,CAC7B,CAAC;QACF,OAAO,EAAE,MAAM,EAAE,WAAW,EAAE,KAAK,EAAE,CAAC;IACxC,CAAC;IAED,MAAM,UAAU,GAAG,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,MAAM,CACxC,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,CAAsB,CAAC,CACnC,CAAC;IACF,MAAM,EAAE,IAAI,EAAE,CAAC,EAAE,UAAU,EAAE,EAAE,yCAAyC,CAAC,CAAC;IAC1E,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC;AACjC,CAAC;AAED,kDAAkD;AAClD,MAAM,CAAC,KAAK,UAAU,eAAe,CACnC,GAAgB,EAChB,MAAmB;IAEnB,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,eAAe,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC;IACtD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,wBAAwB,CAC5C,GAAgB,EAChB,MAAmB;IAEnB,OAAO,eAAe,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC;AACtC,CAAC"}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import type { CompleteIds, LoggerLike } from "../types";
|
|
2
|
+
import type { NormalizedDoc } from "../../normalization/normalized-doc-schema";
|
|
3
|
+
export type FullTextRawJats = {
|
|
4
|
+
type: "jats-xml";
|
|
5
|
+
content: string;
|
|
6
|
+
source: string;
|
|
7
|
+
url?: string | null;
|
|
8
|
+
license?: string | null;
|
|
9
|
+
versionLabel?: string | null;
|
|
10
|
+
};
|
|
11
|
+
export type NormalizeJatsInput = {
|
|
12
|
+
raw: FullTextRawJats;
|
|
13
|
+
completeIds: CompleteIds;
|
|
14
|
+
logger?: LoggerLike;
|
|
15
|
+
cid?: string;
|
|
16
|
+
};
|
|
17
|
+
export type NormalizedJats = {
|
|
18
|
+
isJats: true;
|
|
19
|
+
versionLabel?: string;
|
|
20
|
+
normalizedJson: NormalizedDoc;
|
|
21
|
+
metadata: NormalizedDoc["metadata"] | null;
|
|
22
|
+
schemaVersion: string | null;
|
|
23
|
+
title: string | undefined;
|
|
24
|
+
abstractText?: string;
|
|
25
|
+
licenseFromMeta?: string | null;
|
|
26
|
+
license?: string | null;
|
|
27
|
+
source: string;
|
|
28
|
+
sourceUrl?: string | null;
|
|
29
|
+
rawXml: string;
|
|
30
|
+
blocks?: NormalizedDoc["blocks"];
|
|
31
|
+
sections?: NormalizedDoc["sections"];
|
|
32
|
+
citations?: NormalizedDoc["citations"];
|
|
33
|
+
assets?: NormalizedDoc["assets"];
|
|
34
|
+
};
|
|
35
|
+
/**
|
|
36
|
+
* Normalize JATS into our canonical JSON structure, merge meta IDs into completeIds (non-destructive),
|
|
37
|
+
* and resolve a version label. No persistence here.
|
|
38
|
+
*/
|
|
39
|
+
export declare function normalizeJats(input: NormalizeJatsInput): Promise<NormalizedJats>;
|
|
40
|
+
//# sourceMappingURL=normalize.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"normalize.d.ts","sourceRoot":"","sources":["../../../src/works/text/normalize.ts"],"names":[],"mappings":"AAOA,OAAO,KAAK,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC;AACxD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,2CAA2C,CAAC;AAG/E,MAAM,MAAM,eAAe,GAAG;IAC5B,IAAI,EAAE,UAAU,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,GAAG,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACpB,OAAO,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,YAAY,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CAC9B,CAAC;AAEF,MAAM,MAAM,kBAAkB,GAAG;IAC/B,GAAG,EAAE,eAAe,CAAC;IACrB,WAAW,EAAE,WAAW,CAAC;IACzB,MAAM,CAAC,EAAE,UAAU,CAAC;IACpB,GAAG,CAAC,EAAE,MAAM,CAAC;CACd,CAAC;AAEF,MAAM,MAAM,cAAc,GAAG;IAC3B,MAAM,EAAE,IAAI,CAAC;IACb,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,cAAc,EAAE,aAAa,CAAC;IAC9B,QAAQ,EAAE,aAAa,CAAC,UAAU,CAAC,GAAG,IAAI,CAAC;IAC3C,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,KAAK,EAAE,MAAM,GAAG,SAAS,CAAC;IAC1B,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,eAAe,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAChC,OAAO,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,aAAa,CAAC,QAAQ,CAAC,CAAC;IACjC,QAAQ,CAAC,EAAE,aAAa,CAAC,UAAU,CAAC,CAAC;IACrC,SAAS,CAAC,EAAE,aAAa,CAAC,WAAW,CAAC,CAAC;IACvC,MAAM,CAAC,EAAE,aAAa,CAAC,QAAQ,CAAC,CAAC;CAClC,CAAC;AAEF;;;GAGG;AACH,wBAAsB,aAAa,CACjC,KAAK,EAAE,kBAAkB,GACxB,OAAO,CAAC,cAAc,CAAC,CAqNzB"}
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
import { normalizeJatsToDoc } from "../../normalization/jats";
|
|
2
|
+
import { normalizeDOI, normalizePMID, normalizePMCID, normalizeArXivId, } from "../id/normalize-ids";
|
|
3
|
+
import { applyNormalizedCompleteIds } from "../id/resolveIds/assign";
|
|
4
|
+
/**
|
|
5
|
+
* Normalize JATS into our canonical JSON structure, merge meta IDs into completeIds (non-destructive),
|
|
6
|
+
* and resolve a version label. No persistence here.
|
|
7
|
+
*/
|
|
8
|
+
export async function normalizeJats(input) {
|
|
9
|
+
const { raw, completeIds, logger, cid } = input;
|
|
10
|
+
if (!raw.content || typeof raw.content !== "string") {
|
|
11
|
+
throw new Error(`normalizeJats: expected raw.content to be a non-empty string, got ${typeof raw.content}`);
|
|
12
|
+
}
|
|
13
|
+
// --- 1) Parse & normalize JATS to our env/data shape
|
|
14
|
+
const env = normalizeJatsToDoc(raw.content, {
|
|
15
|
+
contentType: "application/jats+xml",
|
|
16
|
+
encoding: "utf-8",
|
|
17
|
+
source: {
|
|
18
|
+
name: raw.source,
|
|
19
|
+
url: raw.url ?? undefined,
|
|
20
|
+
license: raw.license ?? undefined,
|
|
21
|
+
},
|
|
22
|
+
});
|
|
23
|
+
const normalizedDoc = env.data;
|
|
24
|
+
const meta = normalizedDoc?.metadata ?? {};
|
|
25
|
+
// --- helpers
|
|
26
|
+
const collapseWs = (s) => s.replace(/\s+/g, " ").trim();
|
|
27
|
+
const stripTags = (s) => s.replace(/<[^>]*>/g, ""); // simple, fast
|
|
28
|
+
const isMeaningful = (s) => typeof s === "string" &&
|
|
29
|
+
collapseWs(s).length > 0 &&
|
|
30
|
+
collapseWs(s).toLowerCase() !== "untitled";
|
|
31
|
+
// --- 2) Extract convenient fields from metadata
|
|
32
|
+
// --- 2) Extract convenient fields from metadata
|
|
33
|
+
// Title helpers that handle many shapes + a robust JATS fallback
|
|
34
|
+
const pickFirstString = (v) => {
|
|
35
|
+
if (!v)
|
|
36
|
+
return undefined;
|
|
37
|
+
if (typeof v === "string")
|
|
38
|
+
return collapseWs(v);
|
|
39
|
+
if (typeof v?.text === "string")
|
|
40
|
+
return collapseWs(v.text);
|
|
41
|
+
if (Array.isArray(v)) {
|
|
42
|
+
for (const it of v) {
|
|
43
|
+
const s = pickFirstString(it);
|
|
44
|
+
if (s)
|
|
45
|
+
return s;
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
return undefined;
|
|
49
|
+
};
|
|
50
|
+
const extractTitleFromJats = (xml) => {
|
|
51
|
+
// naive but effective: <article-title>…</article-title>
|
|
52
|
+
const m = xml.match(/<article-title\b[^>]*>([\s\S]*?)<\/article-title>/i);
|
|
53
|
+
if (!m)
|
|
54
|
+
return undefined;
|
|
55
|
+
const cleaned = collapseWs(stripTags(m[1]));
|
|
56
|
+
return cleaned.length ? cleaned : undefined;
|
|
57
|
+
};
|
|
58
|
+
// Try multiple likely locations
|
|
59
|
+
let title =
|
|
60
|
+
// 1) metadata.title (string, {text}, or array)
|
|
61
|
+
pickFirstString(meta?.title) ??
|
|
62
|
+
// 2) some normalizers expose `titles` as an array
|
|
63
|
+
pickFirstString(meta?.titles) ??
|
|
64
|
+
// 3) common JATS-shaped nests
|
|
65
|
+
pickFirstString(meta?.["title-group"]?.["article-title"]) ??
|
|
66
|
+
pickFirstString(meta?.titleGroup?.articleTitle) ??
|
|
67
|
+
// 4) top-level doc fields some pipelines expose
|
|
68
|
+
pickFirstString(normalizedDoc?.title) ??
|
|
69
|
+
pickFirstString(normalizedDoc?.titles) ??
|
|
70
|
+
// 5) last-resort: parse <article-title> from raw XML
|
|
71
|
+
extractTitleFromJats(raw.content);
|
|
72
|
+
if (title && !isMeaningful(title))
|
|
73
|
+
title = undefined;
|
|
74
|
+
// Abstract: paragraphs → text; otherwise fall back to xml_minimal/xml or plain string
|
|
75
|
+
let abstractText;
|
|
76
|
+
const paraList = Array.isArray(meta?.abstract?.paragraphs)
|
|
77
|
+
? meta.abstract.paragraphs
|
|
78
|
+
: undefined;
|
|
79
|
+
if (paraList && paraList.length) {
|
|
80
|
+
const asText = paraList
|
|
81
|
+
.map((p) => typeof p === "string"
|
|
82
|
+
? p
|
|
83
|
+
: typeof p?.text === "string"
|
|
84
|
+
? p.text
|
|
85
|
+
: String(p ?? ""))
|
|
86
|
+
.map((s) => collapseWs(s))
|
|
87
|
+
.filter((s) => s.length > 0)
|
|
88
|
+
.join(" ");
|
|
89
|
+
abstractText = asText.length ? asText : undefined;
|
|
90
|
+
}
|
|
91
|
+
if (!abstractText) {
|
|
92
|
+
const xmlMin = Array.isArray(meta?.abstract?.xml_minimal)
|
|
93
|
+
? meta.abstract.xml_minimal.join(" ")
|
|
94
|
+
: typeof meta?.abstract?.xml_minimal === "string"
|
|
95
|
+
? meta.abstract.xml_minimal
|
|
96
|
+
: undefined;
|
|
97
|
+
const xmlFull = Array.isArray(meta?.abstract?.xml)
|
|
98
|
+
? meta.abstract.xml.join(" ")
|
|
99
|
+
: typeof meta?.abstract?.xml === "string"
|
|
100
|
+
? meta.abstract.xml
|
|
101
|
+
: undefined;
|
|
102
|
+
const raw = typeof meta?.abstract === "string" ? meta.abstract : undefined;
|
|
103
|
+
const candidate = (xmlMin && stripTags(xmlMin)) || (xmlFull && stripTags(xmlFull)) || raw;
|
|
104
|
+
if (candidate) {
|
|
105
|
+
const cleaned = collapseWs(candidate);
|
|
106
|
+
abstractText = cleaned.length ? cleaned : undefined;
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
// License: prefer href; fallback to type (e.g., "CC-BY-4.0")
|
|
110
|
+
const metaLicenseHrefRaw = meta?.license?.href;
|
|
111
|
+
const licenseTypeRaw = meta?.license?.type;
|
|
112
|
+
const licenseFromMeta = typeof metaLicenseHrefRaw === "string" && collapseWs(metaLicenseHrefRaw)
|
|
113
|
+
? collapseWs(metaLicenseHrefRaw)
|
|
114
|
+
: typeof licenseTypeRaw === "string" && collapseWs(licenseTypeRaw)
|
|
115
|
+
? collapseWs(licenseTypeRaw)
|
|
116
|
+
: null;
|
|
117
|
+
// --- 3) Merge meta IDs (only if missing in completeIds)
|
|
118
|
+
const metaIds = meta?.ids ?? {};
|
|
119
|
+
const mergeIfEmpty = (key, value) => {
|
|
120
|
+
if (!value)
|
|
121
|
+
return;
|
|
122
|
+
if (completeIds[key])
|
|
123
|
+
return; // do not overwrite existing IDs
|
|
124
|
+
completeIds[key] = value;
|
|
125
|
+
};
|
|
126
|
+
const normalizedMetaDoi = metaIds.doi
|
|
127
|
+
? normalizeDOI(String(metaIds.doi))
|
|
128
|
+
: null;
|
|
129
|
+
const normalizedMetaPmid = metaIds.pmid
|
|
130
|
+
? normalizePMID(String(metaIds.pmid))
|
|
131
|
+
: null;
|
|
132
|
+
const normalizedMetaPmcid = metaIds.pmcid
|
|
133
|
+
? normalizePMCID(String(metaIds.pmcid))
|
|
134
|
+
: null;
|
|
135
|
+
// Be liberal in what we accept for arXiv keys coming from various normalizers
|
|
136
|
+
const candidateArxiv = metaIds.arxiv ?? metaIds.arXiv ?? metaIds.arXivId ?? metaIds.arxivId;
|
|
137
|
+
const normalizedMetaArxiv = candidateArxiv
|
|
138
|
+
? normalizeArXivId(String(candidateArxiv))
|
|
139
|
+
: null;
|
|
140
|
+
mergeIfEmpty("doi", normalizedMetaDoi ?? undefined);
|
|
141
|
+
mergeIfEmpty("pmid", normalizedMetaPmid ?? undefined);
|
|
142
|
+
mergeIfEmpty("pmcid", normalizedMetaPmcid ?? undefined);
|
|
143
|
+
mergeIfEmpty("arxivId", normalizedMetaArxiv ?? undefined);
|
|
144
|
+
// Re-apply normalization after we potentially augmented completeIds
|
|
145
|
+
applyNormalizedCompleteIds(completeIds);
|
|
146
|
+
// --- 4) Version label: use the pre-computed value from resolveIds (single source of truth)
|
|
147
|
+
const resolvedVersionLabel = completeIds.version ?? raw.versionLabel ?? undefined;
|
|
148
|
+
// --- 5) Pull structured slices for downstream persistence
|
|
149
|
+
const blocks = Array.isArray(normalizedDoc?.blocks)
|
|
150
|
+
? normalizedDoc.blocks
|
|
151
|
+
: undefined;
|
|
152
|
+
const sections = Array.isArray(normalizedDoc?.sections)
|
|
153
|
+
? normalizedDoc.sections
|
|
154
|
+
: undefined;
|
|
155
|
+
const citations = Array.isArray(normalizedDoc?.citations)
|
|
156
|
+
? normalizedDoc.citations
|
|
157
|
+
: undefined;
|
|
158
|
+
const assets = normalizedDoc?.assets ?? undefined;
|
|
159
|
+
// Optional debug logs
|
|
160
|
+
logger?.warn?.({
|
|
161
|
+
correlationId: cid,
|
|
162
|
+
source: raw.source,
|
|
163
|
+
haveTitle: Boolean(title),
|
|
164
|
+
haveAbstract: Boolean(abstractText),
|
|
165
|
+
resolvedVersionLabel,
|
|
166
|
+
ids: completeIds,
|
|
167
|
+
}, "jats.normalize.summary");
|
|
168
|
+
// --- 6) Return normalized package (no side effects beyond completeIds merge)
|
|
169
|
+
return {
|
|
170
|
+
isJats: true,
|
|
171
|
+
versionLabel: resolvedVersionLabel,
|
|
172
|
+
normalizedJson: normalizedDoc,
|
|
173
|
+
metadata: meta || null,
|
|
174
|
+
schemaVersion: normalizedDoc?.schemaVersion ?? null,
|
|
175
|
+
title,
|
|
176
|
+
abstractText,
|
|
177
|
+
licenseFromMeta,
|
|
178
|
+
license: raw.license ?? null,
|
|
179
|
+
source: raw.source,
|
|
180
|
+
sourceUrl: raw.url ?? null,
|
|
181
|
+
rawXml: raw.content,
|
|
182
|
+
blocks,
|
|
183
|
+
sections,
|
|
184
|
+
citations,
|
|
185
|
+
assets,
|
|
186
|
+
};
|
|
187
|
+
}
|
|
188
|
+
//# sourceMappingURL=normalize.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"normalize.js","sourceRoot":"","sources":["../../../src/works/text/normalize.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,kBAAkB,EAAE,MAAM,0BAA0B,CAAC;AAC9D,OAAO,EACL,YAAY,EACZ,aAAa,EACb,cAAc,EACd,gBAAgB,GACjB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EAAE,0BAA0B,EAAE,MAAM,yBAAyB,CAAC;AAqCrE;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,KAAyB;IAEzB,MAAM,EAAE,GAAG,EAAE,WAAW,EAAE,MAAM,EAAE,GAAG,EAAE,GAAG,KAAK,CAAC;IAEhD,IAAI,CAAC,GAAG,CAAC,OAAO,IAAI,OAAO,GAAG,CAAC,OAAO,KAAK,QAAQ,EAAE,CAAC;QACpD,MAAM,IAAI,KAAK,CACb,qEAAqE,OAAO,GAAG,CAAC,OAAO,EAAE,CAC1F,CAAC;IACJ,CAAC;IAED,sDAAsD;IACtD,MAAM,GAAG,GAAG,kBAAkB,CAAC,GAAG,CAAC,OAAO,EAAE;QAC1C,WAAW,EAAE,sBAAsB;QACnC,QAAQ,EAAE,OAAO;QACjB,MAAM,EAAE;YACN,IAAI,EAAE,GAAG,CAAC,MAAM;YAChB,GAAG,EAAE,GAAG,CAAC,GAAG,IAAI,SAAS;YACzB,OAAO,EAAE,GAAG,CAAC,OAAO,IAAI,SAAS;SAClC;KACF,CAAC,CAAC;IAEH,MAAM,aAAa,GAAG,GAAG,CAAC,IAAI,CAAC;IAC/B,MAAM,IAAI,GAAG,aAAa,EAAE,QAAQ,IAAI,EAAE,CAAC;IAE3C,cAAc;IACd,MAAM,UAAU,GAAG,CAAC,CAAS,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;IAChE,MAAM,SAAS,GAAG,CAAC,CAAS,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC,CAAC,eAAe;IAC3E,MAAM,YAAY,GAAG,CAAC,CAAiB,EAAE,EAAE,CACzC,OAAO,CAAC,KAAK,QAAQ;QACrB,UAAU,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC;QACxB,UAAU,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,KAAK,UAAU,CAAC;IAE7C,iDAAiD;IAEjD,iDAAiD;IAEjD,iEAAiE;IACjE,MAAM,eAAe,GAAG,CAAC,CAAM,EAAsB,EAAE;QACrD,IAAI,CAAC,CAAC;YAAE,OAAO,SAAS,CAAC;QACzB,IAAI,OAAO,CAAC,KAAK,QAAQ;YAAE,OAAO,UAAU,CAAC,CAAC,CAAC,CAAC;QAChD,IAAI,OAAO,CAAC,EAAE,IAAI,KAAK,QAAQ;YAAE,OAAO,UAAU,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QAC3D,IAAI,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC;YACrB,KAAK,MAAM,EAAE,IAAI,CAAC,EAAE,CAAC;gBACnB,MAAM,CAAC,GAAG,eAAe,CAAC,EAAE,CAAC,CAAC;gBAC9B,IAAI,CAAC;oBAAE,OAAO,CAAC,CAAC;YAClB,CAAC;QACH,CAAC;QACD,OAAO,SAAS,CAAC;IACnB,CAAC,CAAC;IAEF,MAAM,oBAAoB,GAAG,CAAC,GAAW,EAAsB,EAAE;QAC/D,wDAAwD;QACxD,MAAM,CAAC,GAAG,GAAG,CAAC,KAAK,CAAC,oDAAoD,CAAC,CAAC;QAC1E,IAAI,CAAC,CAAC;YAAE,OAAO,SAAS,CAAC;QACzB,MAAM,OAAO,GAAG,UAAU,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAE,CAAC,CAAC,CAAC;QAC7C,OAAO,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC;IAC9C,CAAC,CAAC;IAEF,gCAAgC;IAChC,IAAI,KAAK;IACP,+CAA+C;IAC/C,eAAe,CAAC,IAAI,EAAE,KAAK,CAAC;QAC5B,kDAAkD;QAClD,eAAe,CAAE,IAAY,EAAE,MAAM,CAAC;QACtC,8BAA8B;QAC9B,eAAe,CAAE,IAAY,EAAE,CAAC,aAAa,CAAC,EAAE,CAAC,eAAe,CAAC,CAAC;QAClE,eAAe,CAAE,IAAY,EAAE,UAAU,EAAE,YAAY,CAAC;QACxD,gDAAgD;QAChD,eAAe,CAAE,aAAqB,EAAE,KAAK,CAAC;QAC9C,eAAe,CAAE,aAAqB,EAAE,MAAM,CAAC;QAC/C,qDAAqD;QACrD,oBAAoB,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;IAEpC,IAAI,KAAK,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC;QAAE,KAAK,GAAG,SAAS,CAAC;IAErD,sFAAsF;IACtF,IAAI,YAAgC,CAAC;IAErC,MAAM,QAAQ,GAAG,KAAK,CAAC,OAAO,CAAC,IAAI,EAAE,QAAQ,EAAE,UAAU,CAAC;QACxD,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,UAAU;QAC1B,CAAC,CAAC,SAAS,CAAC;IAEd,IAAI,QAAQ,IAAI,QAAQ,CAAC,MAAM,EAAE,CAAC;QAChC,MAAM,MAAM,GAAG,QAAQ;aACpB,GAAG,CAAC,CAAC,CAAM,EAAE,EAAE,CACd,OAAO,CAAC,KAAK,QAAQ;YACnB,CAAC,CAAC,CAAC;YACH,CAAC,CAAC,OAAO,CAAC,EAAE,IAAI,KAAK,QAAQ;gBAC7B,CAAC,CAAC,CAAC,CAAC,IAAI;gBACR,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC,CACpB;aACA,GAAG,CAAC,CAAC,CAAS,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;aACjC,MAAM,CAAC,CAAC,CAAS,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;aACnC,IAAI,CAAC,GAAG,CAAC,CAAC;QACb,YAAY,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC;IACpD,CAAC;IAED,IAAI,CAAC,YAAY,EAAE,CAAC;QAClB,MAAM,MAAM,GAAG,KAAK,CAAC,OAAO,CAAC,IAAI,EAAE,QAAQ,EAAE,WAAW,CAAC;YACvD,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC;YACrC,CAAC,CAAC,OAAO,IAAI,EAAE,QAAQ,EAAE,WAAW,KAAK,QAAQ;gBACjD,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,WAAW;gBAC3B,CAAC,CAAC,SAAS,CAAC;QAEd,MAAM,OAAO,GAAG,KAAK,CAAC,OAAO,CAAC,IAAI,EAAE,QAAQ,EAAE,GAAG,CAAC;YAChD,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC;YAC7B,CAAC,CAAC,OAAO,IAAI,EAAE,QAAQ,EAAE,GAAG,KAAK,QAAQ;gBACzC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG;gBACnB,CAAC,CAAC,SAAS,CAAC;QAEd,MAAM,GAAG,GAAG,OAAO,IAAI,EAAE,QAAQ,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC;QAE3E,MAAM,SAAS,GACb,CAAC,MAAM,IAAI,SAAS,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,OAAO,IAAI,SAAS,CAAC,OAAO,CAAC,CAAC,IAAI,GAAG,CAAC;QAE1E,IAAI,SAAS,EAAE,CAAC;YACd,MAAM,OAAO,GAAG,UAAU,CAAC,SAAS,CAAC,CAAC;YACtC,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC;QACtD,CAAC;IACH,CAAC;IAED,6DAA6D;IAC7D,MAAM,kBAAkB,GAAG,IAAI,EAAE,OAAO,EAAE,IAAI,CAAC;IAC/C,MAAM,cAAc,GAAG,IAAI,EAAE,OAAO,EAAE,IAAI,CAAC;IAC3C,MAAM,eAAe,GACnB,OAAO,kBAAkB,KAAK,QAAQ,IAAI,UAAU,CAAC,kBAAkB,CAAC;QACtE,CAAC,CAAC,UAAU,CAAC,kBAAkB,CAAC;QAChC,CAAC,CAAC,OAAO,cAAc,KAAK,QAAQ,IAAI,UAAU,CAAC,cAAc,CAAC;YAClE,CAAC,CAAC,UAAU,CAAC,cAAc,CAAC;YAC5B,CAAC,CAAC,IAAI,CAAC;IAEX,yDAAyD;IACzD,MAAM,OAAO,GAAQ,IAAI,EAAE,GAAG,IAAI,EAAE,CAAC;IACrC,MAAM,YAAY,GAAG,CAAC,GAAsB,EAAE,KAAqB,EAAE,EAAE;QACrE,IAAI,CAAC,KAAK;YAAE,OAAO;QACnB,IAAI,WAAW,CAAC,GAAG,CAAC;YAAE,OAAO,CAAC,gCAAgC;QAC7D,WAAmB,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC;IACpC,CAAC,CAAC;IAEF,MAAM,iBAAiB,GAAG,OAAO,CAAC,GAAG;QACnC,CAAC,CAAC,YAAY,CAAC,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACnC,CAAC,CAAC,IAAI,CAAC;IAET,MAAM,kBAAkB,GAAG,OAAO,CAAC,IAAI;QACrC,CAAC,CAAC,aAAa,CAAC,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QACrC,CAAC,CAAC,IAAI,CAAC;IAET,MAAM,mBAAmB,GAAG,OAAO,CAAC,KAAK;QACvC,CAAC,CAAC,cAAc,CAAC,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;QACvC,CAAC,CAAC,IAAI,CAAC;IAET,8EAA8E;IAC9E,MAAM,cAAc,GAClB,OAAO,CAAC,KAAK,IAAI,OAAO,CAAC,KAAK,IAAI,OAAO,CAAC,OAAO,IAAI,OAAO,CAAC,OAAO,CAAC;IACvE,MAAM,mBAAmB,GAAG,cAAc;QACxC,CAAC,CAAC,gBAAgB,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC;QAC1C,CAAC,CAAC,IAAI,CAAC;IAET,YAAY,CAAC,KAAK,EAAE,iBAAiB,IAAI,SAAS,CAAC,CAAC;IACpD,YAAY,CAAC,MAAM,EAAE,kBAAkB,IAAI,SAAS,CAAC,CAAC;IACtD,YAAY,CAAC,OAAO,EAAE,mBAAmB,IAAI,SAAS,CAAC,CAAC;IACxD,YAAY,CAAC,SAAS,EAAE,mBAAmB,IAAI,SAAS,CAAC,CAAC;IAE1D,oEAAoE;IACpE,0BAA0B,CAAC,WAAW,CAAC,CAAC;IAExC,4FAA4F;IAC5F,MAAM,oBAAoB,GACxB,WAAW,CAAC,OAAO,IAAI,GAAG,CAAC,YAAY,IAAI,SAAS,CAAC;IAEvD,2DAA2D;IAC3D,MAAM,MAAM,GAAG,KAAK,CAAC,OAAO,CAAC,aAAa,EAAE,MAAM,CAAC;QACjD,CAAC,CAAC,aAAa,CAAC,MAAM;QACtB,CAAC,CAAC,SAAS,CAAC;IACd,MAAM,QAAQ,GAAG,KAAK,CAAC,OAAO,CAAC,aAAa,EAAE,QAAQ,CAAC;QACrD,CAAC,CAAC,aAAa,CAAC,QAAQ;QACxB,CAAC,CAAC,SAAS,CAAC;IACd,MAAM,SAAS,GAAG,KAAK,CAAC,OAAO,CAAC,aAAa,EAAE,SAAS,CAAC;QACvD,CAAC,CAAC,aAAa,CAAC,SAAS;QACzB,CAAC,CAAC,SAAS,CAAC;IACd,MAAM,MAAM,GAAG,aAAa,EAAE,MAAM,IAAI,SAAS,CAAC;IAElD,sBAAsB;IACtB,MAAM,EAAE,IAAI,EAAE,CACZ;QACE,aAAa,EAAE,GAAG;QAClB,MAAM,EAAE,GAAG,CAAC,MAAM;QAClB,SAAS,EAAE,OAAO,CAAC,KAAK,CAAC;QACzB,YAAY,EAAE,OAAO,CAAC,YAAY,CAAC;QACnC,oBAAoB;QACpB,GAAG,EAAE,WAAW;KACjB,EACD,wBAAwB,CACzB,CAAC;IAEF,8EAA8E;IAC9E,OAAO;QACL,MAAM,EAAE,IAAa;QACrB,YAAY,EAAE,oBAAoB;QAClC,cAAc,EAAE,aAAa;QAC7B,QAAQ,EAAE,IAAI,IAAI,IAAI;QACtB,aAAa,EAAE,aAAa,EAAE,aAAa,IAAI,IAAI;QACnD,KAAK;QACL,YAAY;QACZ,eAAe;QACf,OAAO,EAAE,GAAG,CAAC,OAAO,IAAI,IAAI;QAC5B,MAAM,EAAE,GAAG,CAAC,MAAM;QAClB,SAAS,EAAE,GAAG,CAAC,GAAG,IAAI,IAAI;QAC1B,MAAM,EAAE,GAAG,CAAC,OAAO;QACnB,MAAM;QACN,QAAQ;QACR,SAAS;QACT,MAAM;KACP,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pure types for the works domain.
|
|
3
|
+
* No Prisma, no S3, no Fastify dependencies.
|
|
4
|
+
*/
|
|
5
|
+
export type VersionSource = "biorxiv" | "medrxiv" | "arxiv" | "publisher" | "plos" | "elife" | "osf" | "pmc" | "other";
|
|
6
|
+
export type ActionsExternalType = "doi" | "pmid" | "pmcid" | "arxivId" | "biorxivId" | "medrxivId" | "openalexId";
|
|
7
|
+
export type ActionsIdBody = {
|
|
8
|
+
externalIdType: ActionsExternalType;
|
|
9
|
+
id: string;
|
|
10
|
+
};
|
|
11
|
+
export type Coordinator = {
|
|
12
|
+
workExists: boolean;
|
|
13
|
+
versionExists: boolean;
|
|
14
|
+
isOpenAccess: boolean;
|
|
15
|
+
sourceKind: VersionSource;
|
|
16
|
+
sourceName?: string | null;
|
|
17
|
+
actions: {
|
|
18
|
+
ensureWork: boolean;
|
|
19
|
+
ensureVersion: boolean;
|
|
20
|
+
fetchFullText: boolean;
|
|
21
|
+
};
|
|
22
|
+
chosenVersionLabel: string;
|
|
23
|
+
matched: {
|
|
24
|
+
work_id?: string;
|
|
25
|
+
versionId?: string | null;
|
|
26
|
+
};
|
|
27
|
+
reasons: string[];
|
|
28
|
+
conflict?: boolean;
|
|
29
|
+
};
|
|
30
|
+
export type VersionStage = "journal" | "preprint" | "unknown";
|
|
31
|
+
export type WorkRollup = {
|
|
32
|
+
title: string;
|
|
33
|
+
abstract?: string;
|
|
34
|
+
landingUrl?: string;
|
|
35
|
+
isOpenAccess: boolean;
|
|
36
|
+
bestLicense?: string;
|
|
37
|
+
firstPublishedAt?: string;
|
|
38
|
+
};
|
|
39
|
+
export type UpstreamSource = "crossref" | "openalex" | "europepmc";
|
|
40
|
+
export type AdapterResult<T> = {
|
|
41
|
+
ok: true;
|
|
42
|
+
data: T;
|
|
43
|
+
} | {
|
|
44
|
+
ok: false;
|
|
45
|
+
status?: number;
|
|
46
|
+
reason: "not_found" | "rate_limited" | "upstream_error" | "network_error";
|
|
47
|
+
};
|
|
48
|
+
export type UpstreamError = {
|
|
49
|
+
source: UpstreamSource;
|
|
50
|
+
status?: number;
|
|
51
|
+
reason: string;
|
|
52
|
+
};
|
|
53
|
+
export type ResolutionMeta = {
|
|
54
|
+
sourceKind?: VersionSource;
|
|
55
|
+
sourceName?: string | null;
|
|
56
|
+
versionStage?: VersionStage;
|
|
57
|
+
versionConfidence?: number;
|
|
58
|
+
versionEvidence?: string[];
|
|
59
|
+
confirmedBy?: UpstreamSource[];
|
|
60
|
+
upstreamErrors?: UpstreamError[];
|
|
61
|
+
};
|
|
62
|
+
export type ResolvedIds = Partial<{
|
|
63
|
+
doi: string;
|
|
64
|
+
pmid: string;
|
|
65
|
+
pmcid: string;
|
|
66
|
+
arxivId: string;
|
|
67
|
+
biorxivId: string;
|
|
68
|
+
medrxivId: string;
|
|
69
|
+
openalexId: string;
|
|
70
|
+
}>;
|
|
71
|
+
export type ResolvedMetadata = Partial<{
|
|
72
|
+
landingUrl: string;
|
|
73
|
+
license: string;
|
|
74
|
+
bestLicense: string;
|
|
75
|
+
publisher: string;
|
|
76
|
+
title: string;
|
|
77
|
+
abstract: string;
|
|
78
|
+
journal: string;
|
|
79
|
+
year: number;
|
|
80
|
+
version: string;
|
|
81
|
+
isPreprint: boolean;
|
|
82
|
+
isOpenAccess: boolean;
|
|
83
|
+
bestPdfUrl: string;
|
|
84
|
+
firstPublishedAt: string;
|
|
85
|
+
}> & ResolutionMeta & {
|
|
86
|
+
workRollup?: WorkRollup;
|
|
87
|
+
};
|
|
88
|
+
export type CompleteIds = ResolvedIds & ResolvedMetadata;
|
|
89
|
+
export type FullTextType = "jats-xml" | "html" | "pdf";
|
|
90
|
+
export type FullTextResult = {
|
|
91
|
+
type: FullTextType;
|
|
92
|
+
source: string;
|
|
93
|
+
url?: string;
|
|
94
|
+
content?: string;
|
|
95
|
+
license?: string;
|
|
96
|
+
versionLabel?: string;
|
|
97
|
+
json?: unknown;
|
|
98
|
+
licenseHref?: string;
|
|
99
|
+
licenseBucket?: string;
|
|
100
|
+
};
|
|
101
|
+
export type IngestWorkResult = {
|
|
102
|
+
work_id: string;
|
|
103
|
+
completeIds: CompleteIds;
|
|
104
|
+
fullText?: FullTextResult | null;
|
|
105
|
+
warnings?: string[];
|
|
106
|
+
};
|
|
107
|
+
export type OpenAlexIds = {
|
|
108
|
+
openalex?: string;
|
|
109
|
+
doi?: string | null;
|
|
110
|
+
pmid?: string | null;
|
|
111
|
+
pmcid?: string | null;
|
|
112
|
+
};
|
|
113
|
+
export type OpenAlexLocation = {
|
|
114
|
+
source?: {
|
|
115
|
+
id?: string | null;
|
|
116
|
+
display_name?: string | null;
|
|
117
|
+
} | null;
|
|
118
|
+
landing_page_url?: string | null;
|
|
119
|
+
pdf_url?: string | null;
|
|
120
|
+
is_oa?: boolean;
|
|
121
|
+
license?: string | null;
|
|
122
|
+
};
|
|
123
|
+
export type OpenAlexWork = {
|
|
124
|
+
id?: string;
|
|
125
|
+
ids?: OpenAlexIds;
|
|
126
|
+
title?: string | null;
|
|
127
|
+
display_name?: string | null;
|
|
128
|
+
primary_location?: OpenAlexLocation | null;
|
|
129
|
+
best_oa_location?: OpenAlexLocation | null;
|
|
130
|
+
locations?: OpenAlexLocation[] | null;
|
|
131
|
+
host_venue?: {
|
|
132
|
+
display_name?: string | null;
|
|
133
|
+
publisher?: string | null;
|
|
134
|
+
} | null;
|
|
135
|
+
publication_year?: number | null;
|
|
136
|
+
publication_date?: string | null;
|
|
137
|
+
abstract_inverted_index?: Record<string, number[]> | null;
|
|
138
|
+
};
|
|
139
|
+
export type LoggerLike = {
|
|
140
|
+
warn?: (obj: any, msg?: string) => void;
|
|
141
|
+
info?: (obj: any, msg?: string) => void;
|
|
142
|
+
error?: (obj: any, msg?: string) => void;
|
|
143
|
+
debug?: (obj: any, msg?: string) => void;
|
|
144
|
+
};
|
|
145
|
+
export type IngestWorkOptions = {
|
|
146
|
+
logger?: LoggerLike;
|
|
147
|
+
debug?: boolean;
|
|
148
|
+
};
|
|
149
|
+
export type IngestLogger = LoggerLike | undefined;
|
|
150
|
+
export type S3RawSaved = {
|
|
151
|
+
key: string;
|
|
152
|
+
sha256?: string | null;
|
|
153
|
+
sizeBytes?: number | null;
|
|
154
|
+
etag?: string | null;
|
|
155
|
+
};
|
|
156
|
+
export type Step = {
|
|
157
|
+
kind: "ReturnExisting";
|
|
158
|
+
work_id: string;
|
|
159
|
+
versionId: string | null;
|
|
160
|
+
} | {
|
|
161
|
+
kind: "LoadMatchedWork";
|
|
162
|
+
work_id: string;
|
|
163
|
+
} | {
|
|
164
|
+
kind: "FetchFullText";
|
|
165
|
+
} | {
|
|
166
|
+
kind: "NormalizeFullText";
|
|
167
|
+
versionLabel: string;
|
|
168
|
+
} | {
|
|
169
|
+
kind: "NormalizePdf";
|
|
170
|
+
versionLabel: string;
|
|
171
|
+
} | {
|
|
172
|
+
kind: "EnsureWork";
|
|
173
|
+
} | {
|
|
174
|
+
kind: "PersistRawBundle";
|
|
175
|
+
versionLabel: string;
|
|
176
|
+
} | {
|
|
177
|
+
kind: "EnsureVersion";
|
|
178
|
+
versionLabel: string;
|
|
179
|
+
} | {
|
|
180
|
+
kind: "FinalizeFullText";
|
|
181
|
+
versionLabel: string;
|
|
182
|
+
};
|
|
183
|
+
export type StrategyAttempt = {
|
|
184
|
+
strategy: string;
|
|
185
|
+
matched: boolean;
|
|
186
|
+
attempted: boolean;
|
|
187
|
+
succeeded: boolean;
|
|
188
|
+
error?: string;
|
|
189
|
+
skipReason?: string;
|
|
190
|
+
};
|
|
191
|
+
export type IngestResult = {
|
|
192
|
+
work_id: string;
|
|
193
|
+
completeIds: CompleteIds;
|
|
194
|
+
fullText: FullTextResult | null;
|
|
195
|
+
warnings?: string[];
|
|
196
|
+
};
|
|
197
|
+
export type FetchTextResult = {
|
|
198
|
+
text: string | null;
|
|
199
|
+
finalUrl?: string;
|
|
200
|
+
failureReason?: "not_found" | "rate_limited" | "server_error" | "network_error" | "empty_response";
|
|
201
|
+
lastStatus?: number;
|
|
202
|
+
};
|
|
203
|
+
export type FetchTextFn = (url: string, accept?: string, tries?: number) => Promise<FetchTextResult>;
|
|
204
|
+
export type StrategyContext = {
|
|
205
|
+
fetchText: FetchTextFn;
|
|
206
|
+
};
|
|
207
|
+
export type FullTextStrategy = {
|
|
208
|
+
name: string;
|
|
209
|
+
matches: (ids: CompleteIds) => boolean;
|
|
210
|
+
getFullText: (ids: CompleteIds, ctx: StrategyContext) => Promise<FullTextResult | null>;
|
|
211
|
+
};
|
|
212
|
+
export type PlanOptions = {
|
|
213
|
+
hasPdf?: boolean;
|
|
214
|
+
};
|
|
215
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/works/types.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,MAAM,MAAM,aAAa,GACrB,SAAS,GACT,SAAS,GACT,OAAO,GACP,WAAW,GACX,MAAM,GACN,OAAO,GACP,KAAK,GACL,KAAK,GACL,OAAO,CAAC;AAGZ,MAAM,MAAM,mBAAmB,GAC3B,KAAK,GACL,MAAM,GACN,OAAO,GACP,SAAS,GACT,WAAW,GACX,WAAW,GACX,YAAY,CAAC;AAEjB,MAAM,MAAM,aAAa,GAAG;IAC1B,cAAc,EAAE,mBAAmB,CAAC;IACpC,EAAE,EAAE,MAAM,CAAC;CACZ,CAAC;AAGF,MAAM,MAAM,WAAW,GAAG;IACxB,UAAU,EAAE,OAAO,CAAC;IACpB,aAAa,EAAE,OAAO,CAAC;IACvB,YAAY,EAAE,OAAO,CAAC;IACtB,UAAU,EAAE,aAAa,CAAC;IAC1B,UAAU,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAE3B,OAAO,EAAE;QACP,UAAU,EAAE,OAAO,CAAC;QACpB,aAAa,EAAE,OAAO,CAAC;QACvB,aAAa,EAAE,OAAO,CAAC;KACxB,CAAC;IAEF,kBAAkB,EAAE,MAAM,CAAC;IAC3B,OAAO,EAAE;QAAE,OAAO,CAAC,EAAE,MAAM,CAAC;QAAC,SAAS,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;KAAE,CAAC;IACzD,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,QAAQ,CAAC,EAAE,OAAO,CAAC;CACpB,CAAC;AAGF,MAAM,MAAM,YAAY,GAAG,SAAS,GAAG,UAAU,GAAG,SAAS,CAAC;AAE9D,MAAM,MAAM,UAAU,GAAG;IACvB,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,OAAO,CAAC;IACtB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B,CAAC;AAEF,MAAM,MAAM,cAAc,GAAG,UAAU,GAAG,UAAU,GAAG,WAAW,CAAC;AAEnE,MAAM,MAAM,aAAa,CAAC,CAAC,IACvB;IAAE,EAAE,EAAE,IAAI,CAAC;IAAC,IAAI,EAAE,CAAC,CAAA;CAAE,GACrB;IACE,EAAE,EAAE,KAAK,CAAC;IACV,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,WAAW,GAAG,cAAc,GAAG,gBAAgB,GAAG,eAAe,CAAC;CAC3E,CAAC;AAEN,MAAM,MAAM,aAAa,GAAG;IAC1B,MAAM,EAAE,cAAc,CAAC;IACvB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;CAChB,CAAC;AAEF,MAAM,MAAM,cAAc,GAAG;IAC3B,UAAU,CAAC,EAAE,aAAa,CAAC;IAC3B,UAAU,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B,WAAW,CAAC,EAAE,cAAc,EAAE,CAAC;IAC/B,cAAc,CAAC,EAAE,aAAa,EAAE,CAAC;CAClC,CAAC;AAEF,MAAM,MAAM,WAAW,GAAG,OAAO,CAAC;IAChC,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;CACpB,CAAC,CAAC;AAEH,MAAM,MAAM,gBAAgB,GAAG,OAAO,CAAC;IACrC,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,CAAC;IAEb,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,OAAO,CAAC;IACpB,YAAY,EAAE,OAAO,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,gBAAgB,EAAE,MAAM,CAAC;CAC1B,CAAC,GACA,cAAc,GAAG;IACf,UAAU,CAAC,EAAE,UAAU,CAAC;CACzB,CAAC;AAEJ,MAAM,MAAM,WAAW,GAAG,WAAW,GAAG,gBAAgB,CAAC;AAEzD,MAAM,MAAM,YAAY,GAAG,UAAU,GAAG,MAAM,GAAG,KAAK,CAAC;AAEvD,MAAM,MAAM,cAAc,GAAG;IAC3B,IAAI,EAAE,YAAY,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;IACf,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,IAAI,CAAC,EAAE,OAAO,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB,CAAC;AAEF,MAAM,MAAM,gBAAgB,GAAG;IAC7B,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,WAAW,CAAC;IACzB,QAAQ,CAAC,EAAE,cAAc,GAAG,IAAI,CAAC;IACjC,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;CACrB,CAAC;AAEF,MAAM,MAAM,WAAW,GAAG;IACxB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,GAAG,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACpB,IAAI,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACrB,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CACvB,CAAC;AACF,MAAM,MAAM,gBAAgB,GAAG;IAC7B,MAAM,CAAC,EAAE;QAAE,EAAE,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;QAAC,YAAY,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;KAAE,GAAG,IAAI,CAAC;IACrE,gBAAgB,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACjC,OAAO,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CACzB,CAAC;AACF,MAAM,MAAM,YAAY,GAAG;IACzB,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,GAAG,CAAC,EAAE,WAAW,CAAC;IAClB,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACtB,YAAY,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,gBAAgB,CAAC,EAAE,gBAAgB,GAAG,IAAI,CAAC;IAC3C,gBAAgB,CAAC,EAAE,gBAAgB,GAAG,IAAI,CAAC;IAC3C,SAAS,CAAC,EAAE,gBAAgB,EAAE,GAAG,IAAI,CAAC;IACtC,UAAU,CAAC,EAAE;QACX,YAAY,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;QAC7B,SAAS,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;KAC3B,GAAG,IAAI,CAAC;IACT,gBAAgB,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACjC,gBAAgB,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACjC,uBAAuB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC,GAAG,IAAI,CAAC;CAC3D,CAAC;AAEF,MAAM,MAAM,UAAU,GAAG;IACvB,IAAI,CAAC,EAAE,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,EAAE,MAAM,KAAK,IAAI,CAAC;IACxC,IAAI,CAAC,EAAE,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,EAAE,MAAM,KAAK,IAAI,CAAC;IACxC,KAAK,CAAC,EAAE,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,EAAE,MAAM,KAAK,IAAI,CAAC;IACzC,KAAK,CAAC,EAAE,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,EAAE,MAAM,KAAK,IAAI,CAAC;CAC1C,CAAC;AAEF,MAAM,MAAM,iBAAiB,GAAG;IAAE,MAAM,CAAC,EAAE,UAAU,CAAC;IAAC,KAAK,CAAC,EAAE,OAAO,CAAA;CAAE,CAAC;AAEzE,MAAM,MAAM,YAAY,GAAG,UAAU,GAAG,SAAS,CAAC;AAElD,MAAM,MAAM,UAAU,GAAG;IACvB,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB,SAAS,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,IAAI,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CACtB,CAAC;AAEF,MAAM,MAAM,IAAI,GACZ;IAAE,IAAI,EAAE,gBAAgB,CAAC;IAAC,OAAO,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,MAAM,GAAG,IAAI,CAAA;CAAE,GACrE;IAAE,IAAI,EAAE,iBAAiB,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,GAC5C;IAAE,IAAI,EAAE,eAAe,CAAA;CAAE,GACzB;IAAE,IAAI,EAAE,mBAAmB,CAAC;IAAC,YAAY,EAAE,MAAM,CAAA;CAAE,GACnD;IAAE,IAAI,EAAE,cAAc,CAAC;IAAC,YAAY,EAAE,MAAM,CAAA;CAAE,GAC9C;IAAE,IAAI,EAAE,YAAY,CAAA;CAAE,GACtB;IAAE,IAAI,EAAE,kBAAkB,CAAC;IAAC,YAAY,EAAE,MAAM,CAAA;CAAE,GAClD;IAAE,IAAI,EAAE,eAAe,CAAC;IAAC,YAAY,EAAE,MAAM,CAAA;CAAE,GAC/C;IAAE,IAAI,EAAE,kBAAkB,CAAC;IAAC,YAAY,EAAE,MAAM,CAAA;CAAE,CAAC;AAEvD,MAAM,MAAM,eAAe,GAAG;IAC5B,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,OAAO,CAAC;IACjB,SAAS,EAAE,OAAO,CAAC;IACnB,SAAS,EAAE,OAAO,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB,CAAC;AAEF,MAAM,MAAM,YAAY,GAAG;IACzB,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,WAAW,CAAC;IACzB,QAAQ,EAAE,cAAc,GAAG,IAAI,CAAC;IAChC,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;CACrB,CAAC;AAEF,MAAM,MAAM,eAAe,GAAG;IAC5B,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,aAAa,CAAC,EACV,WAAW,GACX,cAAc,GACd,cAAc,GACd,eAAe,GACf,gBAAgB,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB,CAAC;AACF,MAAM,MAAM,WAAW,GAAG,CACxB,GAAG,EAAE,MAAM,EACX,MAAM,CAAC,EAAE,MAAM,EACf,KAAK,CAAC,EAAE,MAAM,KACX,OAAO,CAAC,eAAe,CAAC,CAAC;AAE9B,MAAM,MAAM,eAAe,GAAG;IAAE,SAAS,EAAE,WAAW,CAAA;CAAE,CAAC;AAEzD,MAAM,MAAM,gBAAgB,GAAG;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,CAAC,GAAG,EAAE,WAAW,KAAK,OAAO,CAAC;IACvC,WAAW,EAAE,CACX,GAAG,EAAE,WAAW,EAChB,GAAG,EAAE,eAAe,KACjB,OAAO,CAAC,cAAc,GAAG,IAAI,CAAC,CAAC;CACrC,CAAC;AAEF,MAAM,MAAM,WAAW,GAAG;IACxB,MAAM,CAAC,EAAE,OAAO,CAAC;CAClB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../src/works/types.ts"],"names":[],"mappings":"AAAA;;;GAGG"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"debug.d.ts","sourceRoot":"","sources":["../../../src/works/util/debug.ts"],"names":[],"mappings":"AAAA,wBAAgB,cAAc,CAAC,CAAC,EAAE,OAAO,GAAG,OAAO,CAKlD;AAED,MAAM,MAAM,UAAU,GAAG;IACvB,IAAI,CAAC,EAAE,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,EAAE,MAAM,KAAK,IAAI,CAAC;IACxC,IAAI,CAAC,EAAE,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,EAAE,MAAM,KAAK,IAAI,CAAC;IACxC,KAAK,CAAC,EAAE,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,EAAE,MAAM,KAAK,IAAI,CAAC;CAC1C,CAAC"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
export function parseDebugFlag(v) {
|
|
2
|
+
if (typeof v === "boolean")
|
|
3
|
+
return v;
|
|
4
|
+
if (v === undefined || v === null)
|
|
5
|
+
return false;
|
|
6
|
+
const s = String(v).trim().toLowerCase();
|
|
7
|
+
return s === "1" || s === "true" || s === "yes" || s === "on";
|
|
8
|
+
}
|
|
9
|
+
//# sourceMappingURL=debug.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"debug.js","sourceRoot":"","sources":["../../../src/works/util/debug.ts"],"names":[],"mappings":"AAAA,MAAM,UAAU,cAAc,CAAC,CAAU;IACvC,IAAI,OAAO,CAAC,KAAK,SAAS;QAAE,OAAO,CAAC,CAAC;IACrC,IAAI,CAAC,KAAK,SAAS,IAAI,CAAC,KAAK,IAAI;QAAE,OAAO,KAAK,CAAC;IAChD,MAAM,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IACzC,OAAO,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,MAAM,IAAI,CAAC,KAAK,KAAK,IAAI,CAAC,KAAK,IAAI,CAAC;AAChE,CAAC"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
type Lic = string | {
|
|
2
|
+
href?: string | null;
|
|
3
|
+
type?: string | null;
|
|
4
|
+
text?: string | null;
|
|
5
|
+
} | null | undefined;
|
|
6
|
+
export declare function normalizeLicenseHref(x: Lic): string | null;
|
|
7
|
+
export declare function licenseBucket(x: Lic): string;
|
|
8
|
+
export {};
|
|
9
|
+
//# sourceMappingURL=license.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"license.d.ts","sourceRoot":"","sources":["../../../src/works/util/license.ts"],"names":[],"mappings":"AAAA,KAAK,GAAG,GACJ,MAAM,GACN;IAAE,IAAI,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAAC,IAAI,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAAC,IAAI,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;CAAE,GACpE,IAAI,GACJ,SAAS,CAAC;AAEd,wBAAgB,oBAAoB,CAAC,CAAC,EAAE,GAAG,GAAG,MAAM,GAAG,IAAI,CAa1D;AAED,wBAAgB,aAAa,CAAC,CAAC,EAAE,GAAG,GAAG,MAAM,CAwB5C"}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
export function normalizeLicenseHref(x) {
|
|
2
|
+
const href = typeof x === "string" ? (x.startsWith("http") ? x : null) : x?.href ?? null;
|
|
3
|
+
if (!href)
|
|
4
|
+
return null;
|
|
5
|
+
try {
|
|
6
|
+
const u = new URL(href);
|
|
7
|
+
// enforce https, drop www, strip query/fragment, collapse trailing slashes
|
|
8
|
+
const host = u.host.replace(/^www\./, "").toLowerCase();
|
|
9
|
+
const path = u.pathname.replace(/\/+$/, "/").toLowerCase();
|
|
10
|
+
return `https://${host}${path}`;
|
|
11
|
+
}
|
|
12
|
+
catch {
|
|
13
|
+
return null;
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
export function licenseBucket(x) {
|
|
17
|
+
const href = normalizeLicenseHref(x);
|
|
18
|
+
const text = (typeof x === "string" ? x : [x?.type, x?.text].filter(Boolean).join(" ")).toLowerCase();
|
|
19
|
+
if (/\ball[-\s]?rights[-\s]?reserved\b/.test(text))
|
|
20
|
+
return "arr";
|
|
21
|
+
if (/\bpublic\s*domain\b/.test(text))
|
|
22
|
+
return "pd";
|
|
23
|
+
if (href) {
|
|
24
|
+
const m = href.match(/creativecommons\.org\/licenses\/([a-z-]+)\/([0-9.]+)\//i);
|
|
25
|
+
if (m)
|
|
26
|
+
return `cc-${m[1]}-${m[2]}`.toLowerCase();
|
|
27
|
+
return `custom-${new URL(href).host}`;
|
|
28
|
+
}
|
|
29
|
+
// quick textual CC fallback like "CC BY 4.0"
|
|
30
|
+
if (/\bcc\b/.test(text) && /\bby\b/.test(text)) {
|
|
31
|
+
const flags = ["by", "nc", "nd", "sa"]
|
|
32
|
+
.filter((f) => text.includes(f))
|
|
33
|
+
.join("-");
|
|
34
|
+
const ver = (text.match(/\b\d+\.\d+\b/) || [])[0];
|
|
35
|
+
return `cc-${flags}${ver ? `-${ver}` : ""}`;
|
|
36
|
+
}
|
|
37
|
+
return "unknown";
|
|
38
|
+
}
|
|
39
|
+
//# sourceMappingURL=license.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"license.js","sourceRoot":"","sources":["../../../src/works/util/license.ts"],"names":[],"mappings":"AAMA,MAAM,UAAU,oBAAoB,CAAC,CAAM;IACzC,MAAM,IAAI,GACR,OAAO,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,IAAI,IAAI,CAAC;IAC9E,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAC;IACvB,IAAI,CAAC;QACH,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,CAAC;QACxB,2EAA2E;QAC3E,MAAM,IAAI,GAAG,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;QACxD,MAAM,IAAI,GAAG,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,WAAW,EAAE,CAAC;QAC3D,OAAO,WAAW,IAAI,GAAG,IAAI,EAAE,CAAC;IAClC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,CAAM;IAClC,MAAM,IAAI,GAAG,oBAAoB,CAAC,CAAC,CAAC,CAAC;IACrC,MAAM,IAAI,GAAG,CACX,OAAO,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,IAAI,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CACzE,CAAC,WAAW,EAAE,CAAC;IAEhB,IAAI,mCAAmC,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO,KAAK,CAAC;IACjE,IAAI,qBAAqB,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IAClD,IAAI,IAAI,EAAE,CAAC;QACT,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAClB,yDAAyD,CAC1D,CAAC;QACF,IAAI,CAAC;YAAE,OAAO,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,WAAW,EAAE,CAAC;QACjD,OAAO,UAAU,IAAI,GAAG,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;IACxC,CAAC;IACD,6CAA6C;IAC7C,IAAI,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;QAC/C,MAAM,KAAK,GAAG,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC;aACnC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;aAC/B,IAAI,CAAC,GAAG,CAAC,CAAC;QACb,MAAM,GAAG,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QAClD,OAAO,MAAM,KAAK,GAAG,GAAG,CAAC,CAAC,CAAC,IAAI,GAAG,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;IAC9C,CAAC;IACD,OAAO,SAAS,CAAC;AACnB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"normalize.d.ts","sourceRoot":"","sources":["../../../src/works/util/normalize.ts"],"names":[],"mappings":"AASA,wBAAgB,YAAY,CAAC,CAAC,SAAS,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,GAAG,CAAC,CA0ErE"}
|