jats-xml 0.0.4 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/cli/parse.js +39 -28
- package/dist/cjs/download.js +163 -12
- package/dist/cjs/index.js +3 -1
- package/dist/cjs/jats.js +83 -19
- package/dist/cjs/resolvers.js +6 -3
- package/dist/cjs/types/elementTags.js +16 -0
- package/dist/cjs/types/refType.js +5 -5
- package/dist/cjs/utils.js +55 -6
- package/dist/cjs/version.js +1 -1
- package/dist/esm/cli/parse.js +40 -29
- package/dist/esm/download.js +162 -13
- package/dist/esm/index.js +1 -0
- package/dist/esm/jats.js +69 -8
- package/dist/esm/resolvers.js +4 -1
- package/dist/esm/types/elementTags.js +16 -0
- package/dist/esm/types/refType.js +5 -5
- package/dist/esm/utils.js +52 -4
- package/dist/esm/version.js +1 -1
- package/dist/jats.js +3165 -2823
- package/dist/types/cli/parse.d.ts.map +1 -1
- package/dist/types/download.d.ts +9 -1
- package/dist/types/download.d.ts.map +1 -1
- package/dist/types/index.d.ts +1 -0
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/jats.d.ts +28 -15
- package/dist/types/jats.d.ts.map +1 -1
- package/dist/types/resolvers.d.ts +4 -1
- package/dist/types/resolvers.d.ts.map +1 -1
- package/dist/types/types/elementTags.d.ts +17 -1
- package/dist/types/types/elementTags.d.ts.map +1 -1
- package/dist/types/types/elements.d.ts +9 -0
- package/dist/types/types/elements.d.ts.map +1 -1
- package/dist/types/types/refType.d.ts +5 -5
- package/dist/types/types/refType.d.ts.map +1 -1
- package/dist/types/utils.d.ts +5 -2
- package/dist/types/utils.d.ts.map +1 -1
- package/dist/types/version.d.ts +1 -1
- package/package.json +7 -4
package/dist/cjs/cli/parse.js
CHANGED
|
@@ -41,7 +41,7 @@ function downloadAndSaveJats(session, urlOrDoi, output) {
|
|
|
41
41
|
if (!hasValidExtension(output)) {
|
|
42
42
|
session.log.warn(`The extension ${(0, path_1.extname)(output)} is not a valid extension for JATS, try using ".xml" or ".jats"`);
|
|
43
43
|
}
|
|
44
|
-
const data = yield (0, download_1.downloadJatsFromUrl)(session, urlOrDoi, resolvers_1.DEFAULT_RESOLVERS);
|
|
44
|
+
const { data } = yield (0, download_1.downloadJatsFromUrl)(session, urlOrDoi, resolvers_1.DEFAULT_RESOLVERS);
|
|
45
45
|
(0, myst_cli_utils_1.writeFileToFolder)(output, data);
|
|
46
46
|
return data;
|
|
47
47
|
});
|
|
@@ -52,12 +52,12 @@ function parseJats(session, file) {
|
|
|
52
52
|
if (fs_1.default.existsSync(file)) {
|
|
53
53
|
session.log.debug(`Found ${file} locally, parsing`);
|
|
54
54
|
const data = fs_1.default.readFileSync(file).toString();
|
|
55
|
-
|
|
56
|
-
return new jats_1.Jats(data);
|
|
55
|
+
return new jats_1.Jats(data, { log: session.log });
|
|
57
56
|
}
|
|
58
|
-
const data = yield (0, download_1.downloadJatsFromUrl)(session, file, resolvers_1.DEFAULT_RESOLVERS);
|
|
57
|
+
const { source, data } = yield (0, download_1.downloadJatsFromUrl)(session, file, resolvers_1.DEFAULT_RESOLVERS);
|
|
58
|
+
const jats = new jats_1.Jats(data, { source, log: session.log });
|
|
59
59
|
session.log.debug(toc(`Downloaded and parsed JATS file in %s`));
|
|
60
|
-
return
|
|
60
|
+
return jats;
|
|
61
61
|
});
|
|
62
62
|
}
|
|
63
63
|
function formatLongString(data, offset = 0, length = 88 - offset) {
|
|
@@ -76,6 +76,9 @@ function formatDictionary(dict, opts) {
|
|
|
76
76
|
return Object.entries(dict)
|
|
77
77
|
.map(([k, t]) => {
|
|
78
78
|
var _a;
|
|
79
|
+
if (!t)
|
|
80
|
+
return null;
|
|
81
|
+
let wrap = typeof (opts === null || opts === void 0 ? void 0 : opts.wrap) === 'boolean' ? opts.wrap : true;
|
|
79
82
|
let value = t;
|
|
80
83
|
let color = chalk_1.default.yellow.bold;
|
|
81
84
|
if (t && typeof t === 'object') {
|
|
@@ -83,21 +86,21 @@ function formatDictionary(dict, opts) {
|
|
|
83
86
|
return null;
|
|
84
87
|
color = (_a = t.label) !== null && _a !== void 0 ? _a : color;
|
|
85
88
|
value = t.value;
|
|
89
|
+
wrap = typeof t.wrap === 'boolean' ? t.wrap : wrap;
|
|
86
90
|
}
|
|
87
|
-
const wrapped =
|
|
88
|
-
? String(value)
|
|
89
|
-
: formatLongString(String(value), maxLabel + 2, opts === null || opts === void 0 ? void 0 : opts.wrap);
|
|
91
|
+
const wrapped = wrap ? formatLongString(String(value), maxLabel + 2) : String(value);
|
|
90
92
|
return `${color(k)}:${' '.repeat(maxLabel - k.length + 1)}${wrapped}`;
|
|
91
93
|
})
|
|
92
94
|
.filter((o) => !!o)
|
|
93
95
|
.join('\n');
|
|
94
96
|
}
|
|
95
97
|
function jatsSummaryCLI(session, file) {
|
|
96
|
-
var _a, _b;
|
|
98
|
+
var _a, _b, _c;
|
|
97
99
|
return __awaiter(this, void 0, void 0, function* () {
|
|
98
100
|
const jats = yield parseJats(session, file);
|
|
99
101
|
const summary = {
|
|
100
|
-
|
|
102
|
+
Source: { value: jats.source, wrap: false },
|
|
103
|
+
DOI: jats.doi ? { value: doi_utils_1.default.buildUrl(jats.doi), wrap: false } : null,
|
|
101
104
|
Title: (_a = (0, myst_common_1.toText)(jats.articleTitle)) === null || _a === void 0 ? void 0 : _a.replace(/\n/g, ' '),
|
|
102
105
|
Date: (0, utils_1.formatDate)((0, utils_1.toDate)(jats.publicationDate)),
|
|
103
106
|
Authors: jats.articleAuthors
|
|
@@ -105,36 +108,44 @@ function jatsSummaryCLI(session, file) {
|
|
|
105
108
|
.join(', '),
|
|
106
109
|
Abstract: (_b = (0, myst_common_1.toText)(jats.abstract)) === null || _b === void 0 ? void 0 : _b.replace(/\n/g, ' '),
|
|
107
110
|
Keywords: jats.keywords.map((k) => (0, myst_common_1.toText)(k)).join(', '),
|
|
108
|
-
License: jats.license['xlink:href'],
|
|
109
|
-
|
|
110
|
-
|
|
111
|
+
License: (_c = jats.license) === null || _c === void 0 ? void 0 : _c['xlink:href'],
|
|
112
|
+
};
|
|
113
|
+
if (jats.body) {
|
|
114
|
+
summary.Figures = {
|
|
115
|
+
label: chalk_1.default.blue.bold,
|
|
116
|
+
value: String((0, unist_util_select_1.selectAll)(types_1.Tags.fig, jats.body).length),
|
|
117
|
+
};
|
|
118
|
+
summary.Equations = {
|
|
111
119
|
label: chalk_1.default.blue.bold,
|
|
112
120
|
value: String((0, unist_util_select_1.selectAll)(types_1.Tags.dispFormula, jats.body).length),
|
|
113
|
-
}
|
|
114
|
-
Tables
|
|
121
|
+
};
|
|
122
|
+
summary.Tables = {
|
|
115
123
|
label: chalk_1.default.blue.bold,
|
|
116
124
|
value: String((0, unist_util_select_1.selectAll)(types_1.Tags.table, jats.body).length),
|
|
117
|
-
}
|
|
118
|
-
Code
|
|
125
|
+
};
|
|
126
|
+
summary.Code = {
|
|
119
127
|
label: chalk_1.default.blue.bold,
|
|
120
128
|
value: String((0, unist_util_select_1.selectAll)(types_1.Tags.code, jats.body).length),
|
|
121
|
-
}
|
|
122
|
-
Sections
|
|
129
|
+
};
|
|
130
|
+
summary.Sections = {
|
|
123
131
|
label: chalk_1.default.blue.bold,
|
|
124
132
|
value: String((0, unist_util_select_1.selectAll)(types_1.Tags.sec, jats.body).length),
|
|
125
|
-
}
|
|
126
|
-
Paragraphs
|
|
133
|
+
};
|
|
134
|
+
summary.Paragraphs = {
|
|
127
135
|
label: chalk_1.default.blue.bold,
|
|
128
136
|
value: String((0, unist_util_select_1.selectAll)(types_1.Tags.p, jats.body).length),
|
|
129
|
-
}
|
|
130
|
-
Citations
|
|
131
|
-
'Cross-References'
|
|
137
|
+
};
|
|
138
|
+
summary.Citations = { label: chalk_1.default.blue.bold, value: String(jats.references.length) };
|
|
139
|
+
summary['Cross-References'] = {
|
|
132
140
|
label: chalk_1.default.blue.bold,
|
|
133
141
|
value: String((0, unist_util_select_1.selectAll)(types_1.Tags.xref, jats.body).length),
|
|
134
|
-
}
|
|
135
|
-
'Sub Articles'
|
|
136
|
-
}
|
|
142
|
+
};
|
|
143
|
+
summary['Sub Articles'] = { label: chalk_1.default.blue.bold, value: String(jats.subArticles.length) };
|
|
144
|
+
}
|
|
137
145
|
session.log.info(formatDictionary(summary));
|
|
146
|
+
if (!jats.body) {
|
|
147
|
+
session.log.warn('\nThis is a partial JATS record that does not have <body>.');
|
|
148
|
+
}
|
|
138
149
|
});
|
|
139
150
|
}
|
|
140
151
|
function jatsReferencesCLI(session, file) {
|
|
@@ -142,7 +153,7 @@ function jatsReferencesCLI(session, file) {
|
|
|
142
153
|
const jats = yield parseJats(session, file);
|
|
143
154
|
const sorted = jats.references
|
|
144
155
|
.map((ref) => {
|
|
145
|
-
const doiString = (0, utils_1.
|
|
156
|
+
const doiString = (0, utils_1.findArticleId)(ref, 'doi');
|
|
146
157
|
const title = (0, myst_common_1.toText)((0, unist_util_select_1.select)(types_1.Tags.articleTitle, ref));
|
|
147
158
|
const year = (0, myst_common_1.toText)((0, unist_util_select_1.select)(types_1.Tags.year, ref));
|
|
148
159
|
const surnames = (0, unist_util_select_1.selectAll)(types_1.Tags.surname, ref);
|
package/dist/cjs/download.js
CHANGED
|
@@ -12,35 +12,186 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
12
12
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
13
|
};
|
|
14
14
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
-
exports.downloadJatsFromUrl = void 0;
|
|
15
|
+
exports.downloadJatsFromUrl = exports.checkIfPubMedCentralHasJats = exports.convertPMID2PMCID = void 0;
|
|
16
16
|
const doi_utils_1 = __importDefault(require("doi-utils"));
|
|
17
|
+
const chalk_1 = __importDefault(require("chalk"));
|
|
17
18
|
const node_fetch_1 = __importDefault(require("node-fetch"));
|
|
18
19
|
const myst_cli_utils_1 = require("myst-cli-utils");
|
|
20
|
+
const fair_principles_1 = require("fair-principles");
|
|
19
21
|
const resolvers_1 = require("./resolvers");
|
|
22
|
+
function logAboutJatsFailing(session, jatsUrls) {
|
|
23
|
+
session.log.warn('⛔️ JATS may not be Open Access 😭, you should petition your local representative 🪧');
|
|
24
|
+
session.log.info(`${chalk_1.default.green(`\nThe XML ${chalk_1.default.bold('should')} be here:\n\n${jatsUrls.join('\n')}`)}\n`);
|
|
25
|
+
const FAIR = (0, fair_principles_1.highlightFAIR)('A', { chalk: chalk_1.default });
|
|
26
|
+
session.log.info(`Some publishers aggressively block programmatic access, which isn't ${FAIR}.`);
|
|
27
|
+
session.log.debug((0, fair_principles_1.formatPrinciples)('A*', { chalk: chalk_1.default }));
|
|
28
|
+
session.log.info(`${chalk_1.default.blue('The link may work in a browser.')}\n`);
|
|
29
|
+
}
|
|
20
30
|
function dowloadFromUrl(session, jatsUrl) {
|
|
21
31
|
return __awaiter(this, void 0, void 0, function* () {
|
|
32
|
+
const toc = (0, myst_cli_utils_1.tic)();
|
|
22
33
|
session.log.debug(`Fetching JATS from ${jatsUrl}`);
|
|
23
|
-
const
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
34
|
+
const resp = yield (0, node_fetch_1.default)(jatsUrl, {
|
|
35
|
+
headers: [
|
|
36
|
+
['accept', 'application/xml'],
|
|
37
|
+
[
|
|
38
|
+
'user-agent',
|
|
39
|
+
// A bunch of publishers just show the login screen or quickly block you.
|
|
40
|
+
// We don't want to DDOS these publishers, they are the _good ones_ for sharing the XML!!
|
|
41
|
+
// But some block on the second request?!
|
|
42
|
+
// So we can pretend to be a random browser, I guess. How silly. 🤷♂️
|
|
43
|
+
`Mozilla/5.0 (Macintosh; Intel Mac OS X ${Math.floor(Math.random() * 100)})`,
|
|
44
|
+
],
|
|
45
|
+
],
|
|
46
|
+
});
|
|
47
|
+
if (!resp.ok) {
|
|
48
|
+
session.log.debug(`JATS failed to download from "${jatsUrl}"`);
|
|
49
|
+
throw new Error(`STATUS ${resp.status}: ${resp.statusText}`);
|
|
50
|
+
}
|
|
51
|
+
const contentType = resp.headers.get('content-type');
|
|
52
|
+
if (!((contentType === null || contentType === void 0 ? void 0 : contentType.includes('application/xml')) ||
|
|
53
|
+
(contentType === null || contentType === void 0 ? void 0 : contentType.includes('text/xml')) ||
|
|
54
|
+
(contentType === null || contentType === void 0 ? void 0 : contentType.includes('text/plain')))) {
|
|
55
|
+
session.log.warn(`Expected content-type "application/xml" instead we got "${contentType}" for ${jatsUrl}\n${chalk_1.default.dim('Things may not work, but we are going to try our best...')}`);
|
|
56
|
+
}
|
|
57
|
+
const data = yield resp.text();
|
|
58
|
+
session.log.debug(toc(`Fetched document with content-type "${contentType}" in %s`));
|
|
31
59
|
return data;
|
|
32
60
|
});
|
|
33
61
|
}
|
|
62
|
+
/**
|
|
63
|
+
* There are 5.8M or so DOIs that have a full XML record:
|
|
64
|
+
*
|
|
65
|
+
* https://api.crossref.org/works?filter=full-text.type:application/xml,full-text.application:text-mining&facet=publisher-name:*&rows=0
|
|
66
|
+
*
|
|
67
|
+
* This function tries to find the correct URL for the record.
|
|
68
|
+
*/
|
|
69
|
+
function checkIfDoiHasJats(session, urlOrDoi) {
|
|
70
|
+
var _a, _b, _c, _d;
|
|
71
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
72
|
+
if (!doi_utils_1.default.validate(urlOrDoi))
|
|
73
|
+
return;
|
|
74
|
+
const toc = (0, myst_cli_utils_1.tic)();
|
|
75
|
+
const doiUrl = doi_utils_1.default.buildUrl(urlOrDoi);
|
|
76
|
+
session.log.debug(`Attempting to resolving full XML from DOI ${doiUrl}`);
|
|
77
|
+
const resp = yield (0, node_fetch_1.default)(doiUrl, { headers: [['Accept', 'application/json']] });
|
|
78
|
+
if (!resp.ok) {
|
|
79
|
+
// Silently return -- other functions can try!
|
|
80
|
+
session.log.debug(`DOI failed to resolve: ${doiUrl}`);
|
|
81
|
+
return;
|
|
82
|
+
}
|
|
83
|
+
const data = (yield resp.json());
|
|
84
|
+
session.log.debug(toc(`DOI resolved in %s with ${(_b = (_a = data.link) === null || _a === void 0 ? void 0 : _a.length) !== null && _b !== void 0 ? _b : 0} links to content`));
|
|
85
|
+
if (data.link) {
|
|
86
|
+
session.log.debug(['', ...data.link.map((link) => `content-type: ${link['content-type']}, ${link.URL}\n`)].join(' - '));
|
|
87
|
+
}
|
|
88
|
+
const fullXml = (_d = (_c = data.link) === null || _c === void 0 ? void 0 : _c.find((link) => { var _a; return ['text/xml', 'application/xml'].includes((_a = link['content-type']) !== null && _a !== void 0 ? _a : ''); })) === null || _d === void 0 ? void 0 : _d.URL;
|
|
89
|
+
if (fullXml)
|
|
90
|
+
return fullXml;
|
|
91
|
+
session.log.debug(`Could not find XML in DOI record ${doiUrl}`);
|
|
92
|
+
return undefined;
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
/**
|
|
96
|
+
* https://www.ncbi.nlm.nih.gov/pmc/tools/id-converter-api/
|
|
97
|
+
*/
|
|
98
|
+
function convertPMID2PMCID(session, PMID) {
|
|
99
|
+
var _a, _b;
|
|
100
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
101
|
+
if (PMID.startsWith('https://')) {
|
|
102
|
+
const idPart = new URL(PMID).pathname.slice(1);
|
|
103
|
+
session.log.debug(`Extract ${PMID} to ${idPart}`);
|
|
104
|
+
return convertPMID2PMCID(session, idPart);
|
|
105
|
+
}
|
|
106
|
+
const toc = (0, myst_cli_utils_1.tic)();
|
|
107
|
+
const converter = 'https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/';
|
|
108
|
+
const resp = yield (0, node_fetch_1.default)(`${converter}?tool=jats-xml&format=json&ids=${PMID}`, {
|
|
109
|
+
headers: [['Accept', 'application/json']],
|
|
110
|
+
});
|
|
111
|
+
if (!resp.ok) {
|
|
112
|
+
// Silently return -- other functions can try!
|
|
113
|
+
session.log.debug(`Failed to convert PubMedID: ${PMID}`);
|
|
114
|
+
return;
|
|
115
|
+
}
|
|
116
|
+
const data = yield resp.json();
|
|
117
|
+
const PMCID = (_b = (_a = data === null || data === void 0 ? void 0 : data.records) === null || _a === void 0 ? void 0 : _a[0]) === null || _b === void 0 ? void 0 : _b.pmcid;
|
|
118
|
+
session.log.debug(toc(`Used nih.gov to transform ${PMID} to ${PMCID} in %s.`));
|
|
119
|
+
return PMCID;
|
|
120
|
+
});
|
|
121
|
+
}
|
|
122
|
+
exports.convertPMID2PMCID = convertPMID2PMCID;
|
|
123
|
+
function pubMedCentralJats(PMCID) {
|
|
124
|
+
const normalized = PMCID.replace(/^PMC:?/, '');
|
|
125
|
+
return `https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pmc&id=${normalized}`;
|
|
126
|
+
}
|
|
127
|
+
function checkIfPubMedCentralHasJats(session, urlOrDoi) {
|
|
128
|
+
var _a, _b;
|
|
129
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
130
|
+
if (urlOrDoi.match(/^PMC:?([0-9]+)$/))
|
|
131
|
+
return pubMedCentralJats(urlOrDoi);
|
|
132
|
+
if (!doi_utils_1.default.validate(urlOrDoi))
|
|
133
|
+
return;
|
|
134
|
+
const toc = (0, myst_cli_utils_1.tic)();
|
|
135
|
+
const doiUrl = doi_utils_1.default.buildUrl(urlOrDoi);
|
|
136
|
+
session.log.debug(`Attempting to resolve PMCID using OpenAlex from ${doiUrl}`);
|
|
137
|
+
const openAlexUrl = `https://api.openalex.org/works/${doiUrl}`;
|
|
138
|
+
const resp = yield (0, node_fetch_1.default)(openAlexUrl, { headers: [['Accept', 'application/json']] });
|
|
139
|
+
if (!resp.ok) {
|
|
140
|
+
// Silently return -- other functions can try!
|
|
141
|
+
session.log.debug(`Failed to lookup on OpenAlex: ${openAlexUrl}`);
|
|
142
|
+
return;
|
|
143
|
+
}
|
|
144
|
+
const data = (yield resp.json());
|
|
145
|
+
const PMID = (_a = data === null || data === void 0 ? void 0 : data.ids) === null || _a === void 0 ? void 0 : _a.pmid;
|
|
146
|
+
let PMCID = (_b = data === null || data === void 0 ? void 0 : data.ids) === null || _b === void 0 ? void 0 : _b.pmcid;
|
|
147
|
+
if (!PMCID && !!PMID) {
|
|
148
|
+
session.log.debug(toc(`OpenAlex resolved ${data === null || data === void 0 ? void 0 : data.ids.openalex} in %s. There is no PMCID, but there is a PMID`));
|
|
149
|
+
PMCID = yield convertPMID2PMCID(session, PMID);
|
|
150
|
+
if (!PMCID) {
|
|
151
|
+
session.log.debug(toc(`PubMed does not have a record of ${PMID}`));
|
|
152
|
+
return;
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
if (!PMCID) {
|
|
156
|
+
session.log.debug(toc(`OpenAlex resolved ${data === null || data === void 0 ? void 0 : data.ids.openalex} in %s, but there is no PMCID`));
|
|
157
|
+
return;
|
|
158
|
+
}
|
|
159
|
+
session.log.debug(toc(`OpenAlex resolved in %s, with a PMCID of ${PMCID}`));
|
|
160
|
+
return pubMedCentralJats(PMCID);
|
|
161
|
+
});
|
|
162
|
+
}
|
|
163
|
+
exports.checkIfPubMedCentralHasJats = checkIfPubMedCentralHasJats;
|
|
34
164
|
function downloadJatsFromUrl(session, urlOrDoi, resolvers) {
|
|
35
165
|
return __awaiter(this, void 0, void 0, function* () {
|
|
166
|
+
const expectedUrls = (yield Promise.all([
|
|
167
|
+
checkIfPubMedCentralHasJats(session, urlOrDoi),
|
|
168
|
+
checkIfDoiHasJats(session, urlOrDoi),
|
|
169
|
+
])).filter((u) => !!u);
|
|
170
|
+
if (expectedUrls.length > 0) {
|
|
171
|
+
session.log.debug(['Trying URLs:\n', ...expectedUrls.map((url) => ` ${url}\n`)].join(' - '));
|
|
172
|
+
for (let index = 0; index < expectedUrls.length; index++) {
|
|
173
|
+
const url = expectedUrls[index];
|
|
174
|
+
try {
|
|
175
|
+
const data = yield dowloadFromUrl(session, url);
|
|
176
|
+
if (data)
|
|
177
|
+
return { source: url, data };
|
|
178
|
+
}
|
|
179
|
+
catch (error) {
|
|
180
|
+
session.log.debug(error.message);
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
// If there are expected URLs that don't work: see something, say something, etc.
|
|
184
|
+
logAboutJatsFailing(session, expectedUrls);
|
|
185
|
+
}
|
|
36
186
|
if (doi_utils_1.default.validate(urlOrDoi)) {
|
|
37
|
-
const jatsUrl = yield (0, resolvers_1.
|
|
187
|
+
const jatsUrl = yield (0, resolvers_1.customResolveJatsUrlFromDoi)(session, urlOrDoi, resolvers);
|
|
38
188
|
const data = yield dowloadFromUrl(session, jatsUrl);
|
|
39
|
-
return data;
|
|
189
|
+
return { source: jatsUrl, data };
|
|
40
190
|
}
|
|
41
191
|
if ((0, myst_cli_utils_1.isUrl)(urlOrDoi)) {
|
|
192
|
+
session.log.debug("No resolver matched, and the URL doesn't look like a DOI. We will attempt to download it directly.");
|
|
42
193
|
const data = yield dowloadFromUrl(session, urlOrDoi);
|
|
43
|
-
return data;
|
|
194
|
+
return { source: urlOrDoi, data };
|
|
44
195
|
}
|
|
45
196
|
throw new Error(`Could not find ${urlOrDoi} locally, and it doesn't look like a URL or DOI`);
|
|
46
197
|
});
|
package/dist/cjs/index.js
CHANGED
|
@@ -17,9 +17,11 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
17
17
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
18
18
|
};
|
|
19
19
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
20
|
-
exports.Jats = exports.version = void 0;
|
|
20
|
+
exports.downloadJatsFromUrl = exports.Jats = exports.version = void 0;
|
|
21
21
|
var version_1 = require("./version");
|
|
22
22
|
Object.defineProperty(exports, "version", { enumerable: true, get: function () { return __importDefault(version_1).default; } });
|
|
23
23
|
var jats_1 = require("./jats");
|
|
24
24
|
Object.defineProperty(exports, "Jats", { enumerable: true, get: function () { return jats_1.Jats; } });
|
|
25
|
+
var download_1 = require("./download");
|
|
26
|
+
Object.defineProperty(exports, "downloadJatsFromUrl", { enumerable: true, get: function () { return download_1.downloadJatsFromUrl; } });
|
|
25
27
|
__exportStar(require("./types"), exports);
|
package/dist/cjs/jats.js
CHANGED
|
@@ -1,41 +1,90 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
2
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
6
|
exports.Jats = void 0;
|
|
7
|
+
const myst_common_1 = require("myst-common");
|
|
4
8
|
const xml_js_1 = require("xml-js");
|
|
9
|
+
const doi_utils_1 = __importDefault(require("doi-utils"));
|
|
5
10
|
const utils_1 = require("./utils");
|
|
6
11
|
const unist_util_select_1 = require("unist-util-select");
|
|
7
12
|
const types_1 = require("./types");
|
|
13
|
+
const myst_cli_utils_1 = require("myst-cli-utils");
|
|
14
|
+
function select(selector, node) {
|
|
15
|
+
var _a;
|
|
16
|
+
return ((_a = (0, unist_util_select_1.select)(selector, node)) !== null && _a !== void 0 ? _a : undefined);
|
|
17
|
+
}
|
|
8
18
|
class Jats {
|
|
9
|
-
constructor(data) {
|
|
10
|
-
|
|
19
|
+
constructor(data, opts) {
|
|
20
|
+
var _a;
|
|
21
|
+
const toc = (0, myst_cli_utils_1.tic)();
|
|
22
|
+
this.log = opts === null || opts === void 0 ? void 0 : opts.log;
|
|
23
|
+
if (opts === null || opts === void 0 ? void 0 : opts.source)
|
|
24
|
+
this.source = opts.source;
|
|
25
|
+
try {
|
|
26
|
+
this.raw = (0, xml_js_1.xml2js)(data, { compact: false });
|
|
27
|
+
}
|
|
28
|
+
catch (error) {
|
|
29
|
+
throw new Error('Problem parsing the JATS document, please ensure it is XML');
|
|
30
|
+
}
|
|
11
31
|
const { declaration, elements } = this.raw;
|
|
12
32
|
this.declaration = declaration === null || declaration === void 0 ? void 0 : declaration.attributes;
|
|
13
|
-
if (!((elements === null || elements === void 0 ? void 0 : elements.length) === 2 && elements[0].type === 'doctype' && elements[1]
|
|
14
|
-
throw new Error('article is not the only element of the JATS');
|
|
33
|
+
if (!((elements === null || elements === void 0 ? void 0 : elements.length) === 2 && elements[0].type === 'doctype' && hasSingleArticle(elements[1]))) {
|
|
34
|
+
throw new Error('Element <article> is not the only element of the JATS');
|
|
15
35
|
}
|
|
16
36
|
this.doctype = elements[0].doctype;
|
|
17
|
-
|
|
37
|
+
const converted = (0, utils_1.convertToUnist)(elements[1]);
|
|
38
|
+
this.tree = select('article', converted);
|
|
39
|
+
(_a = this.log) === null || _a === void 0 ? void 0 : _a.debug(toc('Parsed and converted JATS to unist tree in %s'));
|
|
40
|
+
}
|
|
41
|
+
get frontmatter() {
|
|
42
|
+
var _a, _b, _c, _d;
|
|
43
|
+
const title = this.articleTitle;
|
|
44
|
+
const subtitle = this.articleSubtitle;
|
|
45
|
+
const date = this.publicationDate;
|
|
46
|
+
const authors = this.articleAuthors;
|
|
47
|
+
const firstSubject = select(types_1.Tags.subject, (_a = this.articleCategories) !== null && _a !== void 0 ? _a : this.front);
|
|
48
|
+
const journalTitle = select(types_1.Tags.journalTitle, this.front);
|
|
49
|
+
return {
|
|
50
|
+
title: title ? (0, myst_common_1.toText)(title) : undefined,
|
|
51
|
+
subtitle: subtitle ? (0, myst_common_1.toText)(subtitle) : undefined,
|
|
52
|
+
doi: (_b = this.doi) !== null && _b !== void 0 ? _b : undefined,
|
|
53
|
+
date: date ? (_c = (0, utils_1.toDate)(date)) === null || _c === void 0 ? void 0 : _c.toISOString() : undefined,
|
|
54
|
+
authors: authors === null || authors === void 0 ? void 0 : authors.map((a) => (0, utils_1.authorAndAffiliation)(a, this.tree)),
|
|
55
|
+
keywords: (_d = this.keywords) === null || _d === void 0 ? void 0 : _d.map((k) => (0, myst_common_1.toText)(k)),
|
|
56
|
+
venue: journalTitle ? { title: (0, myst_common_1.toText)(journalTitle) } : undefined,
|
|
57
|
+
subject: firstSubject ? (0, myst_common_1.toText)(firstSubject) : undefined,
|
|
58
|
+
};
|
|
18
59
|
}
|
|
19
60
|
get front() {
|
|
20
|
-
return
|
|
61
|
+
return select(types_1.Tags.front, this.tree);
|
|
21
62
|
}
|
|
22
63
|
get premissions() {
|
|
23
|
-
return
|
|
64
|
+
return select(types_1.Tags.permissions, this.front);
|
|
24
65
|
}
|
|
25
66
|
get doi() {
|
|
26
|
-
|
|
67
|
+
var _a;
|
|
68
|
+
return doi_utils_1.default.normalize((_a = (0, utils_1.findArticleId)(this.front, 'doi')) !== null && _a !== void 0 ? _a : '');
|
|
69
|
+
}
|
|
70
|
+
get pmc() {
|
|
71
|
+
var _a;
|
|
72
|
+
return (_a = (0, utils_1.findArticleId)(this.front, 'pmc')) === null || _a === void 0 ? void 0 : _a.replace(/^PMC:?/, '');
|
|
73
|
+
}
|
|
74
|
+
get pmid() {
|
|
75
|
+
return (0, utils_1.findArticleId)(this.front, 'pmid');
|
|
27
76
|
}
|
|
28
77
|
get publicationDates() {
|
|
29
78
|
return (0, unist_util_select_1.selectAll)(types_1.Tags.pubDate, this.front);
|
|
30
79
|
}
|
|
31
80
|
get publicationDate() {
|
|
32
|
-
return this.publicationDates.find((d) => !!
|
|
81
|
+
return this.publicationDates.find((d) => !!select(types_1.Tags.day, d));
|
|
33
82
|
}
|
|
34
83
|
get license() {
|
|
35
|
-
return
|
|
84
|
+
return select(types_1.Tags.license, this.premissions);
|
|
36
85
|
}
|
|
37
86
|
get keywordGroup() {
|
|
38
|
-
return
|
|
87
|
+
return select(types_1.Tags.kwdGroup, this.front);
|
|
39
88
|
}
|
|
40
89
|
/** The first keywords */
|
|
41
90
|
get keywords() {
|
|
@@ -44,23 +93,26 @@ class Jats {
|
|
|
44
93
|
get keywordGroups() {
|
|
45
94
|
return (0, unist_util_select_1.selectAll)(types_1.Tags.kwdGroup, this.front);
|
|
46
95
|
}
|
|
96
|
+
get articleCategories() {
|
|
97
|
+
return select(types_1.Tags.articleCategories, this.front);
|
|
98
|
+
}
|
|
47
99
|
get titleGroup() {
|
|
48
|
-
return
|
|
100
|
+
return select(types_1.Tags.titleGroup, this.front);
|
|
49
101
|
}
|
|
50
102
|
get articleTitle() {
|
|
51
|
-
return
|
|
103
|
+
return select(types_1.Tags.articleTitle, this.titleGroup);
|
|
52
104
|
}
|
|
53
105
|
get articleSubtitle() {
|
|
54
|
-
return
|
|
106
|
+
return select(types_1.Tags.subtitle, this.titleGroup);
|
|
55
107
|
}
|
|
56
108
|
get abstract() {
|
|
57
|
-
return
|
|
109
|
+
return select(types_1.Tags.abstract, this.front);
|
|
58
110
|
}
|
|
59
111
|
get abstracts() {
|
|
60
112
|
return (0, unist_util_select_1.selectAll)(types_1.Tags.abstract, this.front);
|
|
61
113
|
}
|
|
62
114
|
get contribGroup() {
|
|
63
|
-
return
|
|
115
|
+
return select(types_1.Tags.contribGroup, this.front);
|
|
64
116
|
}
|
|
65
117
|
get contribGroups() {
|
|
66
118
|
return (0, unist_util_select_1.selectAll)(types_1.Tags.contribGroup, this.front);
|
|
@@ -69,19 +121,31 @@ class Jats {
|
|
|
69
121
|
return (0, unist_util_select_1.selectAll)(types_1.Tags.contrib, this.contribGroup);
|
|
70
122
|
}
|
|
71
123
|
get body() {
|
|
72
|
-
return
|
|
124
|
+
return select(types_1.Tags.body, this.tree);
|
|
73
125
|
}
|
|
74
126
|
get back() {
|
|
75
|
-
return
|
|
127
|
+
return select(types_1.Tags.back, this.tree);
|
|
76
128
|
}
|
|
77
129
|
get subArticles() {
|
|
78
130
|
return (0, unist_util_select_1.selectAll)(types_1.Tags.subArticle, this.tree);
|
|
79
131
|
}
|
|
80
132
|
get refList() {
|
|
81
|
-
return
|
|
133
|
+
return select(types_1.Tags.refList, this.back);
|
|
82
134
|
}
|
|
83
135
|
get references() {
|
|
84
136
|
return (0, unist_util_select_1.selectAll)(types_1.Tags.ref, this.refList);
|
|
85
137
|
}
|
|
86
138
|
}
|
|
87
139
|
exports.Jats = Jats;
|
|
140
|
+
function hasSingleArticle(element) {
|
|
141
|
+
var _a;
|
|
142
|
+
if (element.name === 'article') {
|
|
143
|
+
return true;
|
|
144
|
+
}
|
|
145
|
+
if (element.name === 'pmc-articleset' &&
|
|
146
|
+
((_a = element.elements) === null || _a === void 0 ? void 0 : _a.length) === 1 &&
|
|
147
|
+
element.elements[0].name === 'article') {
|
|
148
|
+
return true;
|
|
149
|
+
}
|
|
150
|
+
return false;
|
|
151
|
+
}
|
package/dist/cjs/resolvers.js
CHANGED
|
@@ -12,7 +12,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
12
12
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
13
|
};
|
|
14
14
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
-
exports.
|
|
15
|
+
exports.customResolveJatsUrlFromDoi = exports.DEFAULT_RESOLVERS = exports.joss = exports.plos = exports.elife = void 0;
|
|
16
16
|
const doi_utils_1 = __importDefault(require("doi-utils"));
|
|
17
17
|
const node_fetch_1 = __importDefault(require("node-fetch"));
|
|
18
18
|
exports.elife = {
|
|
@@ -44,7 +44,10 @@ exports.joss = {
|
|
|
44
44
|
},
|
|
45
45
|
};
|
|
46
46
|
exports.DEFAULT_RESOLVERS = [exports.elife, exports.plos, exports.joss];
|
|
47
|
-
|
|
47
|
+
/**
|
|
48
|
+
* Use the known custom resolvers to pick where the JATS should be downloaded from.
|
|
49
|
+
*/
|
|
50
|
+
function customResolveJatsUrlFromDoi(session, doiString, resolvers = exports.DEFAULT_RESOLVERS) {
|
|
48
51
|
return __awaiter(this, void 0, void 0, function* () {
|
|
49
52
|
if (!doi_utils_1.default.validate(doiString))
|
|
50
53
|
throw new Error(`The doi ${doiString} is not valid`);
|
|
@@ -60,4 +63,4 @@ function resolveJatsUrlFromDoi(session, doiString, resolvers = exports.DEFAULT_R
|
|
|
60
63
|
return jatsUrl;
|
|
61
64
|
});
|
|
62
65
|
}
|
|
63
|
-
exports.
|
|
66
|
+
exports.customResolveJatsUrlFromDoi = customResolveJatsUrlFromDoi;
|
|
@@ -1282,4 +1282,20 @@ var Tags;
|
|
|
1282
1282
|
* See: https://jats.nlm.nih.gov/publishing/tag-library/1.3/element/pub-date.html
|
|
1283
1283
|
*/
|
|
1284
1284
|
Tags["pubDate"] = "pub-date";
|
|
1285
|
+
/**
|
|
1286
|
+
* Article Grouping Data
|
|
1287
|
+
*
|
|
1288
|
+
* Not available in articleauthoring!
|
|
1289
|
+
*
|
|
1290
|
+
* See: https://jats.nlm.nih.gov/archiving/tag-library/1.3/element/article-categories.html
|
|
1291
|
+
*/
|
|
1292
|
+
Tags["articleCategories"] = "article-categories";
|
|
1293
|
+
/**
|
|
1294
|
+
* Journal Title
|
|
1295
|
+
*
|
|
1296
|
+
* Not available in articleauthoring!
|
|
1297
|
+
*
|
|
1298
|
+
* See: https://jats.nlm.nih.gov/archiving/tag-library/1.3/element/journal-title.html
|
|
1299
|
+
*/
|
|
1300
|
+
Tags["journalTitle"] = "journal-title";
|
|
1285
1301
|
})(Tags = exports.Tags || (exports.Tags = {}));
|
|
@@ -11,7 +11,7 @@ var RefType;
|
|
|
11
11
|
/** Appendix */
|
|
12
12
|
RefType["app"] = "app";
|
|
13
13
|
/** Author notes */
|
|
14
|
-
RefType["
|
|
14
|
+
RefType["authorNote"] = "author-note";
|
|
15
15
|
/** Points to the description of or identifier for a grant or award (<award-id>), also, possibly to an entire awards group (<award-group>) */
|
|
16
16
|
RefType["award"] = "award";
|
|
17
17
|
/** Bibliographic reference (typically to a <ref> element, but it may point to a <element-citation> or <mixed-citation> if there are multiple citations inside the <ref> element) */
|
|
@@ -19,7 +19,7 @@ var RefType;
|
|
|
19
19
|
/** Biography (typically of a contributor) */
|
|
20
20
|
RefType["bio"] = "bio";
|
|
21
21
|
/** Textbox or sidebar */
|
|
22
|
-
RefType["
|
|
22
|
+
RefType["boxedText"] = "boxed-text";
|
|
23
23
|
/** Chemical structure (to a <chem-struct> or <chem-struct-wrap> element) */
|
|
24
24
|
RefType["chem"] = "chem";
|
|
25
25
|
/** Collaboration */
|
|
@@ -31,7 +31,7 @@ var RefType;
|
|
|
31
31
|
/** The value “custom” is used in versions of JATS that have a static list of values for the @ref-type attribute. To add a value to such a list, the cross reference is given the type “custom” and a separate @custom-type attribute provides the typing value. There is no need for this mechanism in Archiving, since there are no restrictions on the value of @ref-type, but “custom” and @custom-type have both been included in Archiving so that documents valid to a stricter version of the JATS Tag Set will also be valid to Archiving. */
|
|
32
32
|
RefType["custom"] = "custom";
|
|
33
33
|
/** Display formula */
|
|
34
|
-
RefType["
|
|
34
|
+
RefType["dispFormula"] = "disp-formula";
|
|
35
35
|
/** Figure or group of figures (to a <fig> or <fig-group> element) */
|
|
36
36
|
RefType["fig"] = "fig";
|
|
37
37
|
/** Footnote */
|
|
@@ -49,9 +49,9 @@ var RefType;
|
|
|
49
49
|
/** Statement */
|
|
50
50
|
RefType["statement"] = "statement";
|
|
51
51
|
/** Supplementary information */
|
|
52
|
-
RefType["
|
|
52
|
+
RefType["supplementaryMaterial"] = "supplementary-material";
|
|
53
53
|
/** Table or group of tables (to a <table-wrap> or <table-wrap-group> element) */
|
|
54
54
|
RefType["table"] = "table";
|
|
55
55
|
/** Table footnote */
|
|
56
|
-
RefType["
|
|
56
|
+
RefType["tableFn"] = "table-fn";
|
|
57
57
|
})(RefType = exports.RefType || (exports.RefType = {}));
|