@mantra-ai/core 0.7.2 → 0.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/sources/biorxiv/client.js +2 -2
- package/dist/sources/europepmc/client.js +3 -3
- package/dist/sources/medrxiv/client.js +3 -3
- package/dist/sources/plos/client.js +3 -3
- package/dist/sources/preprint-discovery.js +1 -1
- package/dist/works/export/paper-formats.d.ts +1 -1
- package/dist/works/pdf-fallback/fetch.js +1 -1
- package/dist/works/strategies/arxiv.js +2 -2
- package/dist/works/strategies/biorxiv.js +1 -1
- package/dist/works/strategies/medrxiv.js +1 -1
- package/dist/works/strategies/shared.js +2 -2
- package/dist/works/strategies/ten1101.js +1 -1
- package/dist/works/util/paper-metadata.d.ts +1 -1
- package/package.json +1 -1
|
@@ -25,7 +25,7 @@ export async function fetchBiorxivSourceXmlViaApi(idOrDoi, opts) {
|
|
|
25
25
|
const core = normPreprintCore(idOrDoi, "biorxiv");
|
|
26
26
|
const landingUrl = `https://www.biorxiv.org/content/10.1101/${core}`;
|
|
27
27
|
DEBUG && console.log("[biorxiv] trying jats url:", jatsUrl);
|
|
28
|
-
const userAgent = opts?.UA || "
|
|
28
|
+
const userAgent = opts?.UA || "mantra-reveng-api/1.0";
|
|
29
29
|
// Try 1: explicit XML accept
|
|
30
30
|
let res = await fetch(jatsUrl, {
|
|
31
31
|
headers: {
|
|
@@ -148,7 +148,7 @@ export function buildBiorxivFullHtmlUrlFromId(idOrDoi) {
|
|
|
148
148
|
return `https://www.biorxiv.org/content/10.1101/${core}${v}.full`;
|
|
149
149
|
}
|
|
150
150
|
/** Fetch the full HTML via r.jina.ai proxy to bypass origin 403s */
|
|
151
|
-
export async function fetchBiorxivFullHtmlViaProxy(idOrDoiWithVersion, UA = "
|
|
151
|
+
export async function fetchBiorxivFullHtmlViaProxy(idOrDoiWithVersion, UA = "mantra-reveng-api/1.0") {
|
|
152
152
|
const fullUrl = buildBiorxivFullHtmlUrlFromId(idOrDoiWithVersion);
|
|
153
153
|
if (!fullUrl)
|
|
154
154
|
return null;
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
const UA = process.env.UA || "
|
|
1
|
+
const UA = process.env.UA || "mantra-reveng-api/1.0";
|
|
2
2
|
export async function fetchEpmcJatsByPmcid(pmcid) {
|
|
3
3
|
const id = pmcid.startsWith("PMC") ? pmcid : `PMC${pmcid}`;
|
|
4
4
|
const url = `https://www.ebi.ac.uk/europepmc/webservices/rest/${encodeURIComponent(id)}/fullTextXML`;
|
|
5
5
|
const res = await fetch(url, {
|
|
6
|
-
headers: { Accept: "application/xml", "User-Agent": UA || "
|
|
6
|
+
headers: { Accept: "application/xml", "User-Agent": UA || "mantra/1.0" },
|
|
7
7
|
});
|
|
8
8
|
if (!res.ok)
|
|
9
9
|
throw new Error(`Europe PMC fetch failed: ${res.status}`);
|
|
@@ -17,7 +17,7 @@ export async function fetchEpmcPmcidByDoi(doi) {
|
|
|
17
17
|
const q = `DOI:${doi}`;
|
|
18
18
|
const url = `https://www.ebi.ac.uk/europepmc/webservices/rest/search?format=json&pageSize=1&query=${encodeURIComponent(q)}`;
|
|
19
19
|
const res = await fetch(url, {
|
|
20
|
-
headers: { Accept: "application/json", "User-Agent": UA || "
|
|
20
|
+
headers: { Accept: "application/json", "User-Agent": UA || "mantra/1.0" },
|
|
21
21
|
});
|
|
22
22
|
if (!res.ok)
|
|
23
23
|
return null;
|
|
@@ -50,7 +50,7 @@ const resolveVersion = (ctx) => {
|
|
|
50
50
|
};
|
|
51
51
|
const directFetchStrategy = async (ctx) => {
|
|
52
52
|
const attempts = [];
|
|
53
|
-
const ua = ctx.opts?.UA || "
|
|
53
|
+
const ua = ctx.opts?.UA || "mantra-reveng-api/1.0";
|
|
54
54
|
const browserUA = ua && /^Mozilla\//.test(ua) ? ua : MEDRXIV_BROWSER_UA;
|
|
55
55
|
const attempt = async (label, overrides) => {
|
|
56
56
|
try {
|
|
@@ -190,7 +190,7 @@ const browserFetchStrategy = async (ctx) => {
|
|
|
190
190
|
};
|
|
191
191
|
};
|
|
192
192
|
const proxyFetchStrategy = async (ctx) => {
|
|
193
|
-
const ua = ctx.opts?.UA || "
|
|
193
|
+
const ua = ctx.opts?.UA || "mantra-reveng-api/1.0";
|
|
194
194
|
const browserUA = ua && /^Mozilla\//.test(ua) ? ua : MEDRXIV_BROWSER_UA;
|
|
195
195
|
const proxyUrl = `https://r.jina.ai/http://${ctx.jatsUrl.replace(/^https?:\/\//i, "")}`;
|
|
196
196
|
try {
|
|
@@ -323,7 +323,7 @@ export async function fetchMedrxivSourceXmlViaApi(idOrDoi, opts) {
|
|
|
323
323
|
async function fetchMedrxivFromConnect(core, version, opts, cookieHeader) {
|
|
324
324
|
if (!version || !Number.isFinite(version))
|
|
325
325
|
return null;
|
|
326
|
-
const defaultUA = opts?.UA || "
|
|
326
|
+
const defaultUA = opts?.UA || "mantra-reveng-api/1.0";
|
|
327
327
|
const browserUA = defaultUA && /^Mozilla\//.test(defaultUA) ? defaultUA : MEDRXIV_BROWSER_UA;
|
|
328
328
|
const suffix = `v${version}`;
|
|
329
329
|
const baseFile = `${core}${suffix}`;
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
const UA = process.env.UA || "
|
|
1
|
+
const UA = process.env.UA || "mantra-reveng-api/1.0";
|
|
2
2
|
function journalSlugFromDoi(doi) {
|
|
3
3
|
// DOI pattern: 10.1371/journal.pone.XXXX
|
|
4
4
|
const suffix = doi.split("/")[1] || "";
|
|
@@ -69,7 +69,7 @@ export async function fetchPlosJatsXml(doi) {
|
|
|
69
69
|
const urls = buildPlosXmlUrlCandidates(doi);
|
|
70
70
|
const headers = {
|
|
71
71
|
Accept: "application/xml, text/xml;q=0.9, */*;q=0.8",
|
|
72
|
-
"User-Agent": UA || "
|
|
72
|
+
"User-Agent": UA || "mantra-reveng-api/1.0",
|
|
73
73
|
};
|
|
74
74
|
let lastError = undefined;
|
|
75
75
|
for (const url of urls) {
|
|
@@ -135,7 +135,7 @@ export async function fetchPlosSearchDocByDoi(doi) {
|
|
|
135
135
|
const url = `${base}?q=${encodeURIComponent(q)}&rows=1&wt=json`;
|
|
136
136
|
const headers = {
|
|
137
137
|
Accept: "application/json",
|
|
138
|
-
"User-Agent": UA || "
|
|
138
|
+
"User-Agent": UA || "mantra-reveng-api/1.0",
|
|
139
139
|
};
|
|
140
140
|
const res = await fetchWithRetry(url, { timeoutMs: 15000, headers });
|
|
141
141
|
const data = (await res.json());
|
|
@@ -31,7 +31,7 @@ export function toAbsolutePreprintUrl(u, server) {
|
|
|
31
31
|
/**
|
|
32
32
|
* Fetch the details JSON for a preprint from the API.
|
|
33
33
|
*/
|
|
34
|
-
export async function fetchPreprintDetailsJson(idOrDoi, server, UA = "
|
|
34
|
+
export async function fetchPreprintDetailsJson(idOrDoi, server, UA = "mantra-reveng-api/1.0", debug = false) {
|
|
35
35
|
const url = preprintDetailsApiUrl(idOrDoi, server);
|
|
36
36
|
debug && console.log(`[${server}] details url:`, url);
|
|
37
37
|
const res = await fetch(url, {
|
|
@@ -15,7 +15,7 @@ export async function fetchPdfCandidate(candidate, logger) {
|
|
|
15
15
|
redirect: "follow",
|
|
16
16
|
headers: {
|
|
17
17
|
Accept: "application/pdf",
|
|
18
|
-
"User-Agent": "
|
|
18
|
+
"User-Agent": "Mantra-RevEng-API/1.0 (PDF-fallback)",
|
|
19
19
|
},
|
|
20
20
|
});
|
|
21
21
|
clearTimeout(timeout);
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import { buildAr5ivHtmlUrl, buildArxivAbsUrl, } from "../../sources/arxiv/client";
|
|
2
2
|
import { isLikelyErrorHtml, isArxivAbsHtml, } from "./shared";
|
|
3
|
-
const UA = process.env.UA || "
|
|
3
|
+
const UA = process.env.UA || "mantra-reveng-api/1.0";
|
|
4
4
|
async function fetchArxivHtmlIfAvailable(id, UA) {
|
|
5
5
|
const headers = {
|
|
6
6
|
Accept: "text/html,application/xhtml+xml",
|
|
7
|
-
"User-Agent": UA || "
|
|
7
|
+
"User-Agent": UA || "mantra-reveng-api/1.0",
|
|
8
8
|
};
|
|
9
9
|
const absRes = await fetch(`https://arxiv.org/abs/${id}`, { headers });
|
|
10
10
|
if (!absRes.ok)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { fetchBiorxivSourceXmlViaApi, buildBiorxivLatestSourceXmlUrlViaApi, fetchBiorxivFullHtmlViaProxy, } from "../../sources/biorxiv/client";
|
|
2
2
|
import { parseVersionSuffix, extractVersionSuffix, firstFiniteVersion, labelFromVersion, buildBiorxivVersionedId, } from "./shared";
|
|
3
|
-
const UA = process.env.UA || "
|
|
3
|
+
const UA = process.env.UA || "mantra-reveng-api/1.0";
|
|
4
4
|
export const biorxivStrategy = {
|
|
5
5
|
name: "biorxiv-id",
|
|
6
6
|
matches: (ids) => Boolean(ids.biorxivId),
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { fetchMedrxivSourceXmlViaApi, buildMedrxivLatestSourceXmlUrlViaApi, } from "../../sources/medrxiv/client";
|
|
2
2
|
import { parseVersionSuffix, extractVersionSuffix, firstFiniteVersion, labelFromVersion, } from "./shared";
|
|
3
|
-
const UA = process.env.UA || "
|
|
3
|
+
const UA = process.env.UA || "mantra-reveng-api/1.0";
|
|
4
4
|
export const medrxivStrategy = {
|
|
5
5
|
name: "medrxiv-id",
|
|
6
6
|
matches: (ids) => Boolean(ids.medrxivId),
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
const UA = process.env.UA || "
|
|
1
|
+
const UA = process.env.UA || "mantra-reveng-api/1.0";
|
|
2
2
|
export const cloneJson = (value) => value === null || value === undefined
|
|
3
3
|
? value
|
|
4
4
|
: JSON.parse(JSON.stringify(value));
|
|
@@ -60,7 +60,7 @@ export const isArxivAbsHtml = (html) => /<link\s+rel=["']canonical["']\s+href=["
|
|
|
60
60
|
export const fetchTextWithRetries = async (url, accept = "text/html,application/xhtml+xml", tries = 2) => {
|
|
61
61
|
const headers = {
|
|
62
62
|
Accept: accept,
|
|
63
|
-
"User-Agent": UA || "
|
|
63
|
+
"User-Agent": UA || "mantra-reveng-api/1.0",
|
|
64
64
|
};
|
|
65
65
|
let lastStatus;
|
|
66
66
|
let failureReason;
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { fetchBiorxivSourceXmlViaApi, buildBiorxivLatestSourceXmlUrlViaApi, } from "../../sources/biorxiv/client";
|
|
2
2
|
import { fetchMedrxivSourceXmlViaApi, buildMedrxivLatestSourceXmlUrlViaApi, } from "../../sources/medrxiv/client";
|
|
3
3
|
import { parseVersionSuffix, extractVersionSuffix, firstFiniteVersion, labelFromVersion, } from "./shared";
|
|
4
|
-
const UA = process.env.UA || "
|
|
4
|
+
const UA = process.env.UA || "mantra-reveng-api/1.0";
|
|
5
5
|
export const ten1101FallbackStrategy = {
|
|
6
6
|
name: "generic-10.1101",
|
|
7
7
|
matches: (ids) => Boolean(!ids.biorxivId &&
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import type { ActionsExternalType, CompleteIds } from "../types";
|
|
2
2
|
/**
|
|
3
3
|
* Metadata fields derived from CompleteIds for paper storage.
|
|
4
|
-
* Structurally compatible with PaperInsert from @
|
|
4
|
+
* Structurally compatible with PaperInsert from @mantra/store
|
|
5
5
|
* without creating a dependency on the store package.
|
|
6
6
|
*/
|
|
7
7
|
export interface PaperMetadataFromIds {
|