jats-xml 0.0.3 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/cjs/cli/parse.js +43 -35
- package/dist/cjs/download.js +163 -12
- package/dist/cjs/index.js +3 -1
- package/dist/cjs/jats.js +83 -19
- package/dist/cjs/resolvers.js +12 -9
- package/dist/cjs/session.js +0 -1
- package/dist/cjs/types/elementTags.js +16 -0
- package/dist/cjs/types/refType.js +5 -5
- package/dist/cjs/utils.js +55 -6
- package/dist/cjs/version.js +1 -1
- package/dist/esm/cli/parse.js +46 -38
- package/dist/esm/download.js +164 -15
- package/dist/esm/index.js +1 -0
- package/dist/esm/jats.js +69 -8
- package/dist/esm/resolvers.js +11 -8
- package/dist/esm/session.js +0 -1
- package/dist/esm/types/elementTags.js +16 -0
- package/dist/esm/types/refType.js +5 -5
- package/dist/esm/utils.js +54 -6
- package/dist/esm/version.js +1 -1
- package/dist/jats.js +3189 -2843
- package/dist/types/cli/parse.d.ts.map +1 -1
- package/dist/types/download.d.ts +9 -1
- package/dist/types/download.d.ts.map +1 -1
- package/dist/types/index.d.ts +1 -0
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/jats.d.ts +28 -15
- package/dist/types/jats.d.ts.map +1 -1
- package/dist/types/resolvers.d.ts +4 -1
- package/dist/types/resolvers.d.ts.map +1 -1
- package/dist/types/session.d.ts +0 -1
- package/dist/types/session.d.ts.map +1 -1
- package/dist/types/types/elementTags.d.ts +17 -1
- package/dist/types/types/elementTags.d.ts.map +1 -1
- package/dist/types/types/elements.d.ts +9 -0
- package/dist/types/types/elements.d.ts.map +1 -1
- package/dist/types/types/refType.d.ts +5 -5
- package/dist/types/types/refType.d.ts.map +1 -1
- package/dist/types/utils.d.ts +5 -2
- package/dist/types/utils.d.ts.map +1 -1
- package/dist/types/version.d.ts +1 -1
- package/package.json +10 -7
package/README.md
CHANGED
|
@@ -41,7 +41,7 @@ jats summary /local/article.jats
|
|
|
41
41
|
|
|
42
42
|
This will provide a summary, including a list of what the JATS file contains.
|
|
43
43
|
|
|
44
|
-

|
|
44
|
+

|
|
45
45
|
|
|
46
46
|
## Working in Typescript
|
|
47
47
|
|
package/dist/cjs/cli/parse.js
CHANGED
|
@@ -30,7 +30,7 @@ const utils_1 = require("../utils");
|
|
|
30
30
|
function hasValidExtension(output) {
|
|
31
31
|
return ['.xml', '.jats'].includes((0, path_1.extname)(output).toLowerCase());
|
|
32
32
|
}
|
|
33
|
-
function
|
|
33
|
+
function downloadAndSaveJats(session, urlOrDoi, output) {
|
|
34
34
|
return __awaiter(this, void 0, void 0, function* () {
|
|
35
35
|
if (fs_1.default.existsSync(urlOrDoi)) {
|
|
36
36
|
throw new Error(`File "${urlOrDoi}" is local and cannot be downloaded!`);
|
|
@@ -41,7 +41,7 @@ function downloadJats(session, urlOrDoi, output) {
|
|
|
41
41
|
if (!hasValidExtension(output)) {
|
|
42
42
|
session.log.warn(`The extension ${(0, path_1.extname)(output)} is not a valid extension for JATS, try using ".xml" or ".jats"`);
|
|
43
43
|
}
|
|
44
|
-
const data = yield (0, download_1.downloadJatsFromUrl)(session, urlOrDoi, resolvers_1.DEFAULT_RESOLVERS);
|
|
44
|
+
const { data } = yield (0, download_1.downloadJatsFromUrl)(session, urlOrDoi, resolvers_1.DEFAULT_RESOLVERS);
|
|
45
45
|
(0, myst_cli_utils_1.writeFileToFolder)(output, data);
|
|
46
46
|
return data;
|
|
47
47
|
});
|
|
@@ -52,15 +52,12 @@ function parseJats(session, file) {
|
|
|
52
52
|
if (fs_1.default.existsSync(file)) {
|
|
53
53
|
session.log.debug(`Found ${file} locally, parsing`);
|
|
54
54
|
const data = fs_1.default.readFileSync(file).toString();
|
|
55
|
-
|
|
56
|
-
return new jats_1.Jats(data);
|
|
55
|
+
return new jats_1.Jats(data, { log: session.log });
|
|
57
56
|
}
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
}
|
|
63
|
-
throw new Error(`Could not find ${file} locally, and it doesn't look like a URL or DOI`);
|
|
57
|
+
const { source, data } = yield (0, download_1.downloadJatsFromUrl)(session, file, resolvers_1.DEFAULT_RESOLVERS);
|
|
58
|
+
const jats = new jats_1.Jats(data, { source, log: session.log });
|
|
59
|
+
session.log.debug(toc(`Downloaded and parsed JATS file in %s`));
|
|
60
|
+
return jats;
|
|
64
61
|
});
|
|
65
62
|
}
|
|
66
63
|
function formatLongString(data, offset = 0, length = 88 - offset) {
|
|
@@ -79,6 +76,9 @@ function formatDictionary(dict, opts) {
|
|
|
79
76
|
return Object.entries(dict)
|
|
80
77
|
.map(([k, t]) => {
|
|
81
78
|
var _a;
|
|
79
|
+
if (!t)
|
|
80
|
+
return null;
|
|
81
|
+
let wrap = typeof (opts === null || opts === void 0 ? void 0 : opts.wrap) === 'boolean' ? opts.wrap : true;
|
|
82
82
|
let value = t;
|
|
83
83
|
let color = chalk_1.default.yellow.bold;
|
|
84
84
|
if (t && typeof t === 'object') {
|
|
@@ -86,21 +86,21 @@ function formatDictionary(dict, opts) {
|
|
|
86
86
|
return null;
|
|
87
87
|
color = (_a = t.label) !== null && _a !== void 0 ? _a : color;
|
|
88
88
|
value = t.value;
|
|
89
|
+
wrap = typeof t.wrap === 'boolean' ? t.wrap : wrap;
|
|
89
90
|
}
|
|
90
|
-
const wrapped =
|
|
91
|
-
? String(value)
|
|
92
|
-
: formatLongString(String(value), maxLabel + 2, opts === null || opts === void 0 ? void 0 : opts.wrap);
|
|
91
|
+
const wrapped = wrap ? formatLongString(String(value), maxLabel + 2) : String(value);
|
|
93
92
|
return `${color(k)}:${' '.repeat(maxLabel - k.length + 1)}${wrapped}`;
|
|
94
93
|
})
|
|
95
94
|
.filter((o) => !!o)
|
|
96
95
|
.join('\n');
|
|
97
96
|
}
|
|
98
97
|
function jatsSummaryCLI(session, file) {
|
|
99
|
-
var _a, _b;
|
|
98
|
+
var _a, _b, _c;
|
|
100
99
|
return __awaiter(this, void 0, void 0, function* () {
|
|
101
100
|
const jats = yield parseJats(session, file);
|
|
102
101
|
const summary = {
|
|
103
|
-
|
|
102
|
+
Source: { value: jats.source, wrap: false },
|
|
103
|
+
DOI: jats.doi ? { value: doi_utils_1.default.buildUrl(jats.doi), wrap: false } : null,
|
|
104
104
|
Title: (_a = (0, myst_common_1.toText)(jats.articleTitle)) === null || _a === void 0 ? void 0 : _a.replace(/\n/g, ' '),
|
|
105
105
|
Date: (0, utils_1.formatDate)((0, utils_1.toDate)(jats.publicationDate)),
|
|
106
106
|
Authors: jats.articleAuthors
|
|
@@ -108,36 +108,44 @@ function jatsSummaryCLI(session, file) {
|
|
|
108
108
|
.join(', '),
|
|
109
109
|
Abstract: (_b = (0, myst_common_1.toText)(jats.abstract)) === null || _b === void 0 ? void 0 : _b.replace(/\n/g, ' '),
|
|
110
110
|
Keywords: jats.keywords.map((k) => (0, myst_common_1.toText)(k)).join(', '),
|
|
111
|
-
License: jats.license['xlink:href'],
|
|
112
|
-
|
|
113
|
-
|
|
111
|
+
License: (_c = jats.license) === null || _c === void 0 ? void 0 : _c['xlink:href'],
|
|
112
|
+
};
|
|
113
|
+
if (jats.body) {
|
|
114
|
+
summary.Figures = {
|
|
115
|
+
label: chalk_1.default.blue.bold,
|
|
116
|
+
value: String((0, unist_util_select_1.selectAll)(types_1.Tags.fig, jats.body).length),
|
|
117
|
+
};
|
|
118
|
+
summary.Equations = {
|
|
114
119
|
label: chalk_1.default.blue.bold,
|
|
115
120
|
value: String((0, unist_util_select_1.selectAll)(types_1.Tags.dispFormula, jats.body).length),
|
|
116
|
-
}
|
|
117
|
-
Tables
|
|
121
|
+
};
|
|
122
|
+
summary.Tables = {
|
|
118
123
|
label: chalk_1.default.blue.bold,
|
|
119
124
|
value: String((0, unist_util_select_1.selectAll)(types_1.Tags.table, jats.body).length),
|
|
120
|
-
}
|
|
121
|
-
Code
|
|
125
|
+
};
|
|
126
|
+
summary.Code = {
|
|
122
127
|
label: chalk_1.default.blue.bold,
|
|
123
128
|
value: String((0, unist_util_select_1.selectAll)(types_1.Tags.code, jats.body).length),
|
|
124
|
-
}
|
|
125
|
-
Sections
|
|
129
|
+
};
|
|
130
|
+
summary.Sections = {
|
|
126
131
|
label: chalk_1.default.blue.bold,
|
|
127
132
|
value: String((0, unist_util_select_1.selectAll)(types_1.Tags.sec, jats.body).length),
|
|
128
|
-
}
|
|
129
|
-
Paragraphs
|
|
133
|
+
};
|
|
134
|
+
summary.Paragraphs = {
|
|
130
135
|
label: chalk_1.default.blue.bold,
|
|
131
136
|
value: String((0, unist_util_select_1.selectAll)(types_1.Tags.p, jats.body).length),
|
|
132
|
-
}
|
|
133
|
-
Citations
|
|
134
|
-
'Cross-References'
|
|
137
|
+
};
|
|
138
|
+
summary.Citations = { label: chalk_1.default.blue.bold, value: String(jats.references.length) };
|
|
139
|
+
summary['Cross-References'] = {
|
|
135
140
|
label: chalk_1.default.blue.bold,
|
|
136
141
|
value: String((0, unist_util_select_1.selectAll)(types_1.Tags.xref, jats.body).length),
|
|
137
|
-
}
|
|
138
|
-
'Sub Articles'
|
|
139
|
-
}
|
|
142
|
+
};
|
|
143
|
+
summary['Sub Articles'] = { label: chalk_1.default.blue.bold, value: String(jats.subArticles.length) };
|
|
144
|
+
}
|
|
140
145
|
session.log.info(formatDictionary(summary));
|
|
146
|
+
if (!jats.body) {
|
|
147
|
+
session.log.warn('\nThis is a partial JATS record that does not have <body>.');
|
|
148
|
+
}
|
|
141
149
|
});
|
|
142
150
|
}
|
|
143
151
|
function jatsReferencesCLI(session, file) {
|
|
@@ -145,7 +153,7 @@ function jatsReferencesCLI(session, file) {
|
|
|
145
153
|
const jats = yield parseJats(session, file);
|
|
146
154
|
const sorted = jats.references
|
|
147
155
|
.map((ref) => {
|
|
148
|
-
const
|
|
156
|
+
const doiString = (0, utils_1.findArticleId)(ref, 'doi');
|
|
149
157
|
const title = (0, myst_common_1.toText)((0, unist_util_select_1.select)(types_1.Tags.articleTitle, ref));
|
|
150
158
|
const year = (0, myst_common_1.toText)((0, unist_util_select_1.select)(types_1.Tags.year, ref));
|
|
151
159
|
const surnames = (0, unist_util_select_1.selectAll)(types_1.Tags.surname, ref);
|
|
@@ -158,7 +166,7 @@ function jatsReferencesCLI(session, file) {
|
|
|
158
166
|
return {
|
|
159
167
|
Citation: `${short} (${year})`,
|
|
160
168
|
Title: title,
|
|
161
|
-
DOI:
|
|
169
|
+
DOI: doiString ? doi_utils_1.default.buildUrl(doiString) : null,
|
|
162
170
|
Count: s.length,
|
|
163
171
|
};
|
|
164
172
|
})
|
|
@@ -188,7 +196,7 @@ function makeDownloadCLI(program) {
|
|
|
188
196
|
.description('Parse a JATS file and provide a summary')
|
|
189
197
|
.argument('<url>', 'The JATS url or a DOI')
|
|
190
198
|
.argument('<output>', 'The JATS url or a DOI')
|
|
191
|
-
.action((0, myst_cli_utils_1.clirun)(
|
|
199
|
+
.action((0, myst_cli_utils_1.clirun)(downloadAndSaveJats, { program, getSession: session_1.getSession }));
|
|
192
200
|
return command;
|
|
193
201
|
}
|
|
194
202
|
function addDownloadCLI(program) {
|
package/dist/cjs/download.js
CHANGED
|
@@ -12,35 +12,186 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
12
12
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
13
|
};
|
|
14
14
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
-
exports.downloadJatsFromUrl = void 0;
|
|
15
|
+
exports.downloadJatsFromUrl = exports.checkIfPubMedCentralHasJats = exports.convertPMID2PMCID = void 0;
|
|
16
16
|
const doi_utils_1 = __importDefault(require("doi-utils"));
|
|
17
|
+
const chalk_1 = __importDefault(require("chalk"));
|
|
17
18
|
const node_fetch_1 = __importDefault(require("node-fetch"));
|
|
18
19
|
const myst_cli_utils_1 = require("myst-cli-utils");
|
|
20
|
+
const fair_principles_1 = require("fair-principles");
|
|
19
21
|
const resolvers_1 = require("./resolvers");
|
|
22
|
+
function logAboutJatsFailing(session, jatsUrls) {
|
|
23
|
+
session.log.warn('⛔️ JATS may not be Open Access 😭, you should petition your local representative 🪧');
|
|
24
|
+
session.log.info(`${chalk_1.default.green(`\nThe XML ${chalk_1.default.bold('should')} be here:\n\n${jatsUrls.join('\n')}`)}\n`);
|
|
25
|
+
const FAIR = (0, fair_principles_1.highlightFAIR)('A', { chalk: chalk_1.default });
|
|
26
|
+
session.log.info(`Some publishers aggressively block programmatic access, which isn't ${FAIR}.`);
|
|
27
|
+
session.log.debug((0, fair_principles_1.formatPrinciples)('A*', { chalk: chalk_1.default }));
|
|
28
|
+
session.log.info(`${chalk_1.default.blue('The link may work in a browser.')}\n`);
|
|
29
|
+
}
|
|
20
30
|
function dowloadFromUrl(session, jatsUrl) {
|
|
21
31
|
return __awaiter(this, void 0, void 0, function* () {
|
|
32
|
+
const toc = (0, myst_cli_utils_1.tic)();
|
|
22
33
|
session.log.debug(`Fetching JATS from ${jatsUrl}`);
|
|
23
|
-
const
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
34
|
+
const resp = yield (0, node_fetch_1.default)(jatsUrl, {
|
|
35
|
+
headers: [
|
|
36
|
+
['accept', 'application/xml'],
|
|
37
|
+
[
|
|
38
|
+
'user-agent',
|
|
39
|
+
// A bunch of publishers just show the login screen or quickly block you.
|
|
40
|
+
// We don't want to DDOS these publishers, they are the _good ones_ for sharing the XML!!
|
|
41
|
+
// But some block on the second request?!
|
|
42
|
+
// So we can pretend to be a random browser, I guess. How silly. 🤷♂️
|
|
43
|
+
`Mozilla/5.0 (Macintosh; Intel Mac OS X ${Math.floor(Math.random() * 100)})`,
|
|
44
|
+
],
|
|
45
|
+
],
|
|
46
|
+
});
|
|
47
|
+
if (!resp.ok) {
|
|
48
|
+
session.log.debug(`JATS failed to download from "${jatsUrl}"`);
|
|
49
|
+
throw new Error(`STATUS ${resp.status}: ${resp.statusText}`);
|
|
50
|
+
}
|
|
51
|
+
const contentType = resp.headers.get('content-type');
|
|
52
|
+
if (!((contentType === null || contentType === void 0 ? void 0 : contentType.includes('application/xml')) ||
|
|
53
|
+
(contentType === null || contentType === void 0 ? void 0 : contentType.includes('text/xml')) ||
|
|
54
|
+
(contentType === null || contentType === void 0 ? void 0 : contentType.includes('text/plain')))) {
|
|
55
|
+
session.log.warn(`Expected content-type "application/xml" instead we got "${contentType}" for ${jatsUrl}\n${chalk_1.default.dim('Things may not work, but we are going to try our best...')}`);
|
|
56
|
+
}
|
|
57
|
+
const data = yield resp.text();
|
|
58
|
+
session.log.debug(toc(`Fetched document with content-type "${contentType}" in %s`));
|
|
31
59
|
return data;
|
|
32
60
|
});
|
|
33
61
|
}
|
|
62
|
+
/**
|
|
63
|
+
* There are 5.8M or so DOIs that have a full XML record:
|
|
64
|
+
*
|
|
65
|
+
* https://api.crossref.org/works?filter=full-text.type:application/xml,full-text.application:text-mining&facet=publisher-name:*&rows=0
|
|
66
|
+
*
|
|
67
|
+
* This function tries to find the correct URL for the record.
|
|
68
|
+
*/
|
|
69
|
+
function checkIfDoiHasJats(session, urlOrDoi) {
|
|
70
|
+
var _a, _b, _c, _d;
|
|
71
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
72
|
+
if (!doi_utils_1.default.validate(urlOrDoi))
|
|
73
|
+
return;
|
|
74
|
+
const toc = (0, myst_cli_utils_1.tic)();
|
|
75
|
+
const doiUrl = doi_utils_1.default.buildUrl(urlOrDoi);
|
|
76
|
+
session.log.debug(`Attempting to resolving full XML from DOI ${doiUrl}`);
|
|
77
|
+
const resp = yield (0, node_fetch_1.default)(doiUrl, { headers: [['Accept', 'application/json']] });
|
|
78
|
+
if (!resp.ok) {
|
|
79
|
+
// Silently return -- other functions can try!
|
|
80
|
+
session.log.debug(`DOI failed to resolve: ${doiUrl}`);
|
|
81
|
+
return;
|
|
82
|
+
}
|
|
83
|
+
const data = (yield resp.json());
|
|
84
|
+
session.log.debug(toc(`DOI resolved in %s with ${(_b = (_a = data.link) === null || _a === void 0 ? void 0 : _a.length) !== null && _b !== void 0 ? _b : 0} links to content`));
|
|
85
|
+
if (data.link) {
|
|
86
|
+
session.log.debug(['', ...data.link.map((link) => `content-type: ${link['content-type']}, ${link.URL}\n`)].join(' - '));
|
|
87
|
+
}
|
|
88
|
+
const fullXml = (_d = (_c = data.link) === null || _c === void 0 ? void 0 : _c.find((link) => { var _a; return ['text/xml', 'application/xml'].includes((_a = link['content-type']) !== null && _a !== void 0 ? _a : ''); })) === null || _d === void 0 ? void 0 : _d.URL;
|
|
89
|
+
if (fullXml)
|
|
90
|
+
return fullXml;
|
|
91
|
+
session.log.debug(`Could not find XML in DOI record ${doiUrl}`);
|
|
92
|
+
return undefined;
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
/**
|
|
96
|
+
* https://www.ncbi.nlm.nih.gov/pmc/tools/id-converter-api/
|
|
97
|
+
*/
|
|
98
|
+
function convertPMID2PMCID(session, PMID) {
|
|
99
|
+
var _a, _b;
|
|
100
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
101
|
+
if (PMID.startsWith('https://')) {
|
|
102
|
+
const idPart = new URL(PMID).pathname.slice(1);
|
|
103
|
+
session.log.debug(`Extract ${PMID} to ${idPart}`);
|
|
104
|
+
return convertPMID2PMCID(session, idPart);
|
|
105
|
+
}
|
|
106
|
+
const toc = (0, myst_cli_utils_1.tic)();
|
|
107
|
+
const converter = 'https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/';
|
|
108
|
+
const resp = yield (0, node_fetch_1.default)(`${converter}?tool=jats-xml&format=json&ids=${PMID}`, {
|
|
109
|
+
headers: [['Accept', 'application/json']],
|
|
110
|
+
});
|
|
111
|
+
if (!resp.ok) {
|
|
112
|
+
// Silently return -- other functions can try!
|
|
113
|
+
session.log.debug(`Failed to convert PubMedID: ${PMID}`);
|
|
114
|
+
return;
|
|
115
|
+
}
|
|
116
|
+
const data = yield resp.json();
|
|
117
|
+
const PMCID = (_b = (_a = data === null || data === void 0 ? void 0 : data.records) === null || _a === void 0 ? void 0 : _a[0]) === null || _b === void 0 ? void 0 : _b.pmcid;
|
|
118
|
+
session.log.debug(toc(`Used nih.gov to transform ${PMID} to ${PMCID} in %s.`));
|
|
119
|
+
return PMCID;
|
|
120
|
+
});
|
|
121
|
+
}
|
|
122
|
+
exports.convertPMID2PMCID = convertPMID2PMCID;
|
|
123
|
+
function pubMedCentralJats(PMCID) {
|
|
124
|
+
const normalized = PMCID.replace(/^PMC:?/, '');
|
|
125
|
+
return `https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pmc&id=${normalized}`;
|
|
126
|
+
}
|
|
127
|
+
function checkIfPubMedCentralHasJats(session, urlOrDoi) {
|
|
128
|
+
var _a, _b;
|
|
129
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
130
|
+
if (urlOrDoi.match(/^PMC:?([0-9]+)$/))
|
|
131
|
+
return pubMedCentralJats(urlOrDoi);
|
|
132
|
+
if (!doi_utils_1.default.validate(urlOrDoi))
|
|
133
|
+
return;
|
|
134
|
+
const toc = (0, myst_cli_utils_1.tic)();
|
|
135
|
+
const doiUrl = doi_utils_1.default.buildUrl(urlOrDoi);
|
|
136
|
+
session.log.debug(`Attempting to resolve PMCID using OpenAlex from ${doiUrl}`);
|
|
137
|
+
const openAlexUrl = `https://api.openalex.org/works/${doiUrl}`;
|
|
138
|
+
const resp = yield (0, node_fetch_1.default)(openAlexUrl, { headers: [['Accept', 'application/json']] });
|
|
139
|
+
if (!resp.ok) {
|
|
140
|
+
// Silently return -- other functions can try!
|
|
141
|
+
session.log.debug(`Failed to lookup on OpenAlex: ${openAlexUrl}`);
|
|
142
|
+
return;
|
|
143
|
+
}
|
|
144
|
+
const data = (yield resp.json());
|
|
145
|
+
const PMID = (_a = data === null || data === void 0 ? void 0 : data.ids) === null || _a === void 0 ? void 0 : _a.pmid;
|
|
146
|
+
let PMCID = (_b = data === null || data === void 0 ? void 0 : data.ids) === null || _b === void 0 ? void 0 : _b.pmcid;
|
|
147
|
+
if (!PMCID && !!PMID) {
|
|
148
|
+
session.log.debug(toc(`OpenAlex resolved ${data === null || data === void 0 ? void 0 : data.ids.openalex} in %s. There is no PMCID, but there is a PMID`));
|
|
149
|
+
PMCID = yield convertPMID2PMCID(session, PMID);
|
|
150
|
+
if (!PMCID) {
|
|
151
|
+
session.log.debug(toc(`PubMed does not have a record of ${PMID}`));
|
|
152
|
+
return;
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
if (!PMCID) {
|
|
156
|
+
session.log.debug(toc(`OpenAlex resolved ${data === null || data === void 0 ? void 0 : data.ids.openalex} in %s, but there is no PMCID`));
|
|
157
|
+
return;
|
|
158
|
+
}
|
|
159
|
+
session.log.debug(toc(`OpenAlex resolved in %s, with a PMCID of ${PMCID}`));
|
|
160
|
+
return pubMedCentralJats(PMCID);
|
|
161
|
+
});
|
|
162
|
+
}
|
|
163
|
+
exports.checkIfPubMedCentralHasJats = checkIfPubMedCentralHasJats;
|
|
34
164
|
function downloadJatsFromUrl(session, urlOrDoi, resolvers) {
|
|
35
165
|
return __awaiter(this, void 0, void 0, function* () {
|
|
166
|
+
const expectedUrls = (yield Promise.all([
|
|
167
|
+
checkIfPubMedCentralHasJats(session, urlOrDoi),
|
|
168
|
+
checkIfDoiHasJats(session, urlOrDoi),
|
|
169
|
+
])).filter((u) => !!u);
|
|
170
|
+
if (expectedUrls.length > 0) {
|
|
171
|
+
session.log.debug(['Trying URLs:\n', ...expectedUrls.map((url) => ` ${url}\n`)].join(' - '));
|
|
172
|
+
for (let index = 0; index < expectedUrls.length; index++) {
|
|
173
|
+
const url = expectedUrls[index];
|
|
174
|
+
try {
|
|
175
|
+
const data = yield dowloadFromUrl(session, url);
|
|
176
|
+
if (data)
|
|
177
|
+
return { source: url, data };
|
|
178
|
+
}
|
|
179
|
+
catch (error) {
|
|
180
|
+
session.log.debug(error.message);
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
// If there are expected URLs that don't work: see something, say something, etc.
|
|
184
|
+
logAboutJatsFailing(session, expectedUrls);
|
|
185
|
+
}
|
|
36
186
|
if (doi_utils_1.default.validate(urlOrDoi)) {
|
|
37
|
-
const jatsUrl = yield (0, resolvers_1.
|
|
187
|
+
const jatsUrl = yield (0, resolvers_1.customResolveJatsUrlFromDoi)(session, urlOrDoi, resolvers);
|
|
38
188
|
const data = yield dowloadFromUrl(session, jatsUrl);
|
|
39
|
-
return data;
|
|
189
|
+
return { source: jatsUrl, data };
|
|
40
190
|
}
|
|
41
191
|
if ((0, myst_cli_utils_1.isUrl)(urlOrDoi)) {
|
|
192
|
+
session.log.debug("No resolver matched, and the URL doesn't look like a DOI. We will attempt to download it directly.");
|
|
42
193
|
const data = yield dowloadFromUrl(session, urlOrDoi);
|
|
43
|
-
return data;
|
|
194
|
+
return { source: urlOrDoi, data };
|
|
44
195
|
}
|
|
45
196
|
throw new Error(`Could not find ${urlOrDoi} locally, and it doesn't look like a URL or DOI`);
|
|
46
197
|
});
|
package/dist/cjs/index.js
CHANGED
|
@@ -17,9 +17,11 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
17
17
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
18
18
|
};
|
|
19
19
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
20
|
-
exports.Jats = exports.version = void 0;
|
|
20
|
+
exports.downloadJatsFromUrl = exports.Jats = exports.version = void 0;
|
|
21
21
|
var version_1 = require("./version");
|
|
22
22
|
Object.defineProperty(exports, "version", { enumerable: true, get: function () { return __importDefault(version_1).default; } });
|
|
23
23
|
var jats_1 = require("./jats");
|
|
24
24
|
Object.defineProperty(exports, "Jats", { enumerable: true, get: function () { return jats_1.Jats; } });
|
|
25
|
+
var download_1 = require("./download");
|
|
26
|
+
Object.defineProperty(exports, "downloadJatsFromUrl", { enumerable: true, get: function () { return download_1.downloadJatsFromUrl; } });
|
|
25
27
|
__exportStar(require("./types"), exports);
|
package/dist/cjs/jats.js
CHANGED
|
@@ -1,41 +1,90 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
2
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
6
|
exports.Jats = void 0;
|
|
7
|
+
const myst_common_1 = require("myst-common");
|
|
4
8
|
const xml_js_1 = require("xml-js");
|
|
9
|
+
const doi_utils_1 = __importDefault(require("doi-utils"));
|
|
5
10
|
const utils_1 = require("./utils");
|
|
6
11
|
const unist_util_select_1 = require("unist-util-select");
|
|
7
12
|
const types_1 = require("./types");
|
|
13
|
+
const myst_cli_utils_1 = require("myst-cli-utils");
|
|
14
|
+
function select(selector, node) {
|
|
15
|
+
var _a;
|
|
16
|
+
return ((_a = (0, unist_util_select_1.select)(selector, node)) !== null && _a !== void 0 ? _a : undefined);
|
|
17
|
+
}
|
|
8
18
|
class Jats {
|
|
9
|
-
constructor(data) {
|
|
10
|
-
|
|
19
|
+
constructor(data, opts) {
|
|
20
|
+
var _a;
|
|
21
|
+
const toc = (0, myst_cli_utils_1.tic)();
|
|
22
|
+
this.log = opts === null || opts === void 0 ? void 0 : opts.log;
|
|
23
|
+
if (opts === null || opts === void 0 ? void 0 : opts.source)
|
|
24
|
+
this.source = opts.source;
|
|
25
|
+
try {
|
|
26
|
+
this.raw = (0, xml_js_1.xml2js)(data, { compact: false });
|
|
27
|
+
}
|
|
28
|
+
catch (error) {
|
|
29
|
+
throw new Error('Problem parsing the JATS document, please ensure it is XML');
|
|
30
|
+
}
|
|
11
31
|
const { declaration, elements } = this.raw;
|
|
12
32
|
this.declaration = declaration === null || declaration === void 0 ? void 0 : declaration.attributes;
|
|
13
|
-
if (!((elements === null || elements === void 0 ? void 0 : elements.length) === 2 && elements[0].type === 'doctype' && elements[1]
|
|
14
|
-
throw new Error('article is not the only element of the JATS');
|
|
33
|
+
if (!((elements === null || elements === void 0 ? void 0 : elements.length) === 2 && elements[0].type === 'doctype' && hasSingleArticle(elements[1]))) {
|
|
34
|
+
throw new Error('Element <article> is not the only element of the JATS');
|
|
15
35
|
}
|
|
16
36
|
this.doctype = elements[0].doctype;
|
|
17
|
-
|
|
37
|
+
const converted = (0, utils_1.convertToUnist)(elements[1]);
|
|
38
|
+
this.tree = select('article', converted);
|
|
39
|
+
(_a = this.log) === null || _a === void 0 ? void 0 : _a.debug(toc('Parsed and converted JATS to unist tree in %s'));
|
|
40
|
+
}
|
|
41
|
+
get frontmatter() {
|
|
42
|
+
var _a, _b, _c, _d;
|
|
43
|
+
const title = this.articleTitle;
|
|
44
|
+
const subtitle = this.articleSubtitle;
|
|
45
|
+
const date = this.publicationDate;
|
|
46
|
+
const authors = this.articleAuthors;
|
|
47
|
+
const firstSubject = select(types_1.Tags.subject, (_a = this.articleCategories) !== null && _a !== void 0 ? _a : this.front);
|
|
48
|
+
const journalTitle = select(types_1.Tags.journalTitle, this.front);
|
|
49
|
+
return {
|
|
50
|
+
title: title ? (0, myst_common_1.toText)(title) : undefined,
|
|
51
|
+
subtitle: subtitle ? (0, myst_common_1.toText)(subtitle) : undefined,
|
|
52
|
+
doi: (_b = this.doi) !== null && _b !== void 0 ? _b : undefined,
|
|
53
|
+
date: date ? (_c = (0, utils_1.toDate)(date)) === null || _c === void 0 ? void 0 : _c.toISOString() : undefined,
|
|
54
|
+
authors: authors === null || authors === void 0 ? void 0 : authors.map((a) => (0, utils_1.authorAndAffiliation)(a, this.tree)),
|
|
55
|
+
keywords: (_d = this.keywords) === null || _d === void 0 ? void 0 : _d.map((k) => (0, myst_common_1.toText)(k)),
|
|
56
|
+
venue: journalTitle ? { title: (0, myst_common_1.toText)(journalTitle) } : undefined,
|
|
57
|
+
subject: firstSubject ? (0, myst_common_1.toText)(firstSubject) : undefined,
|
|
58
|
+
};
|
|
18
59
|
}
|
|
19
60
|
get front() {
|
|
20
|
-
return
|
|
61
|
+
return select(types_1.Tags.front, this.tree);
|
|
21
62
|
}
|
|
22
63
|
get premissions() {
|
|
23
|
-
return
|
|
64
|
+
return select(types_1.Tags.permissions, this.front);
|
|
24
65
|
}
|
|
25
66
|
get doi() {
|
|
26
|
-
|
|
67
|
+
var _a;
|
|
68
|
+
return doi_utils_1.default.normalize((_a = (0, utils_1.findArticleId)(this.front, 'doi')) !== null && _a !== void 0 ? _a : '');
|
|
69
|
+
}
|
|
70
|
+
get pmc() {
|
|
71
|
+
var _a;
|
|
72
|
+
return (_a = (0, utils_1.findArticleId)(this.front, 'pmc')) === null || _a === void 0 ? void 0 : _a.replace(/^PMC:?/, '');
|
|
73
|
+
}
|
|
74
|
+
get pmid() {
|
|
75
|
+
return (0, utils_1.findArticleId)(this.front, 'pmid');
|
|
27
76
|
}
|
|
28
77
|
get publicationDates() {
|
|
29
78
|
return (0, unist_util_select_1.selectAll)(types_1.Tags.pubDate, this.front);
|
|
30
79
|
}
|
|
31
80
|
get publicationDate() {
|
|
32
|
-
return this.publicationDates.find((d) => !!
|
|
81
|
+
return this.publicationDates.find((d) => !!select(types_1.Tags.day, d));
|
|
33
82
|
}
|
|
34
83
|
get license() {
|
|
35
|
-
return
|
|
84
|
+
return select(types_1.Tags.license, this.premissions);
|
|
36
85
|
}
|
|
37
86
|
get keywordGroup() {
|
|
38
|
-
return
|
|
87
|
+
return select(types_1.Tags.kwdGroup, this.front);
|
|
39
88
|
}
|
|
40
89
|
/** The first keywords */
|
|
41
90
|
get keywords() {
|
|
@@ -44,23 +93,26 @@ class Jats {
|
|
|
44
93
|
get keywordGroups() {
|
|
45
94
|
return (0, unist_util_select_1.selectAll)(types_1.Tags.kwdGroup, this.front);
|
|
46
95
|
}
|
|
96
|
+
get articleCategories() {
|
|
97
|
+
return select(types_1.Tags.articleCategories, this.front);
|
|
98
|
+
}
|
|
47
99
|
get titleGroup() {
|
|
48
|
-
return
|
|
100
|
+
return select(types_1.Tags.titleGroup, this.front);
|
|
49
101
|
}
|
|
50
102
|
get articleTitle() {
|
|
51
|
-
return
|
|
103
|
+
return select(types_1.Tags.articleTitle, this.titleGroup);
|
|
52
104
|
}
|
|
53
105
|
get articleSubtitle() {
|
|
54
|
-
return
|
|
106
|
+
return select(types_1.Tags.subtitle, this.titleGroup);
|
|
55
107
|
}
|
|
56
108
|
get abstract() {
|
|
57
|
-
return
|
|
109
|
+
return select(types_1.Tags.abstract, this.front);
|
|
58
110
|
}
|
|
59
111
|
get abstracts() {
|
|
60
112
|
return (0, unist_util_select_1.selectAll)(types_1.Tags.abstract, this.front);
|
|
61
113
|
}
|
|
62
114
|
get contribGroup() {
|
|
63
|
-
return
|
|
115
|
+
return select(types_1.Tags.contribGroup, this.front);
|
|
64
116
|
}
|
|
65
117
|
get contribGroups() {
|
|
66
118
|
return (0, unist_util_select_1.selectAll)(types_1.Tags.contribGroup, this.front);
|
|
@@ -69,19 +121,31 @@ class Jats {
|
|
|
69
121
|
return (0, unist_util_select_1.selectAll)(types_1.Tags.contrib, this.contribGroup);
|
|
70
122
|
}
|
|
71
123
|
get body() {
|
|
72
|
-
return
|
|
124
|
+
return select(types_1.Tags.body, this.tree);
|
|
73
125
|
}
|
|
74
126
|
get back() {
|
|
75
|
-
return
|
|
127
|
+
return select(types_1.Tags.back, this.tree);
|
|
76
128
|
}
|
|
77
129
|
get subArticles() {
|
|
78
130
|
return (0, unist_util_select_1.selectAll)(types_1.Tags.subArticle, this.tree);
|
|
79
131
|
}
|
|
80
132
|
get refList() {
|
|
81
|
-
return
|
|
133
|
+
return select(types_1.Tags.refList, this.back);
|
|
82
134
|
}
|
|
83
135
|
get references() {
|
|
84
136
|
return (0, unist_util_select_1.selectAll)(types_1.Tags.ref, this.refList);
|
|
85
137
|
}
|
|
86
138
|
}
|
|
87
139
|
exports.Jats = Jats;
|
|
140
|
+
function hasSingleArticle(element) {
|
|
141
|
+
var _a;
|
|
142
|
+
if (element.name === 'article') {
|
|
143
|
+
return true;
|
|
144
|
+
}
|
|
145
|
+
if (element.name === 'pmc-articleset' &&
|
|
146
|
+
((_a = element.elements) === null || _a === void 0 ? void 0 : _a.length) === 1 &&
|
|
147
|
+
element.elements[0].name === 'article') {
|
|
148
|
+
return true;
|
|
149
|
+
}
|
|
150
|
+
return false;
|
|
151
|
+
}
|
package/dist/cjs/resolvers.js
CHANGED
|
@@ -12,7 +12,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
12
12
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
13
|
};
|
|
14
14
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
-
exports.
|
|
15
|
+
exports.customResolveJatsUrlFromDoi = exports.DEFAULT_RESOLVERS = exports.joss = exports.plos = exports.elife = void 0;
|
|
16
16
|
const doi_utils_1 = __importDefault(require("doi-utils"));
|
|
17
17
|
const node_fetch_1 = __importDefault(require("node-fetch"));
|
|
18
18
|
exports.elife = {
|
|
@@ -33,22 +33,25 @@ exports.plos = {
|
|
|
33
33
|
};
|
|
34
34
|
exports.joss = {
|
|
35
35
|
test(url) {
|
|
36
|
-
return new URL(url).hostname === 'joss.theoj.org';
|
|
36
|
+
return new URL(url).hostname === 'joss.theoj.org' && doi_utils_1.default.validate(url);
|
|
37
37
|
},
|
|
38
38
|
jatsUrl(url) {
|
|
39
39
|
// Probably a better way to do this, the joss papers on on github!
|
|
40
|
-
const
|
|
41
|
-
const [org, jossId] =
|
|
40
|
+
const doiString = doi_utils_1.default.normalize(url);
|
|
41
|
+
const [org, jossId] = doiString.split('/');
|
|
42
42
|
const id = jossId.split('.')[1];
|
|
43
43
|
return `https://raw.githubusercontent.com/openjournals/joss-papers/master/joss.${id}/${org}.${jossId}.jats`;
|
|
44
44
|
},
|
|
45
45
|
};
|
|
46
46
|
exports.DEFAULT_RESOLVERS = [exports.elife, exports.plos, exports.joss];
|
|
47
|
-
|
|
47
|
+
/**
|
|
48
|
+
* Use the known custom resolvers to pick where the JATS should be downloaded from.
|
|
49
|
+
*/
|
|
50
|
+
function customResolveJatsUrlFromDoi(session, doiString, resolvers = exports.DEFAULT_RESOLVERS) {
|
|
48
51
|
return __awaiter(this, void 0, void 0, function* () {
|
|
49
|
-
if (!doi_utils_1.default.validate(
|
|
50
|
-
throw new Error(`The doi ${
|
|
51
|
-
const doiUrl = doi_utils_1.default.buildUrl(
|
|
52
|
+
if (!doi_utils_1.default.validate(doiString))
|
|
53
|
+
throw new Error(`The doi ${doiString} is not valid`);
|
|
54
|
+
const doiUrl = doi_utils_1.default.buildUrl(doiString);
|
|
52
55
|
session.log.debug(`Resolving DOI ${doiUrl}`);
|
|
53
56
|
const resp = yield (0, node_fetch_1.default)(doiUrl);
|
|
54
57
|
const articleUrl = resp.url;
|
|
@@ -60,4 +63,4 @@ function resolveJatsUrlFromDoi(session, doi, resolvers = exports.DEFAULT_RESOLVE
|
|
|
60
63
|
return jatsUrl;
|
|
61
64
|
});
|
|
62
65
|
}
|
|
63
|
-
exports.
|
|
66
|
+
exports.customResolveJatsUrlFromDoi = customResolveJatsUrlFromDoi;
|
package/dist/cjs/session.js
CHANGED
|
@@ -5,7 +5,6 @@ const myst_cli_utils_1 = require("myst-cli-utils");
|
|
|
5
5
|
class Session {
|
|
6
6
|
constructor(opts) {
|
|
7
7
|
var _a;
|
|
8
|
-
this.API_URL = 'https://api.myst.tools';
|
|
9
8
|
this.log = (_a = opts === null || opts === void 0 ? void 0 : opts.logger) !== null && _a !== void 0 ? _a : (0, myst_cli_utils_1.chalkLogger)(myst_cli_utils_1.LogLevel.debug);
|
|
10
9
|
}
|
|
11
10
|
}
|
|
@@ -1282,4 +1282,20 @@ var Tags;
|
|
|
1282
1282
|
* See: https://jats.nlm.nih.gov/publishing/tag-library/1.3/element/pub-date.html
|
|
1283
1283
|
*/
|
|
1284
1284
|
Tags["pubDate"] = "pub-date";
|
|
1285
|
+
/**
|
|
1286
|
+
* Article Grouping Data
|
|
1287
|
+
*
|
|
1288
|
+
* Not available in articleauthoring!
|
|
1289
|
+
*
|
|
1290
|
+
* See: https://jats.nlm.nih.gov/archiving/tag-library/1.3/element/article-categories.html
|
|
1291
|
+
*/
|
|
1292
|
+
Tags["articleCategories"] = "article-categories";
|
|
1293
|
+
/**
|
|
1294
|
+
* Journal Title
|
|
1295
|
+
*
|
|
1296
|
+
* Not available in articleauthoring!
|
|
1297
|
+
*
|
|
1298
|
+
* See: https://jats.nlm.nih.gov/archiving/tag-library/1.3/element/journal-title.html
|
|
1299
|
+
*/
|
|
1300
|
+
Tags["journalTitle"] = "journal-title";
|
|
1285
1301
|
})(Tags = exports.Tags || (exports.Tags = {}));
|