jats-xml 0.0.7 → 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2023 Curvenote Inc.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md CHANGED
@@ -1,5 +1,9 @@
1
1
  # jats-xml
2
2
 
3
+ [![jats-xml on npm](https://img.shields.io/npm/v/jats-xml.svg)](https://www.npmjs.com/package/jats-xml)
4
+ [![MIT License](https://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/curvenote/jats-xml/blob/main/LICENSE)
5
+ [![CI](https://github.com/curvenote/jats-xml/workflows/CI/badge.svg)](https://github.com/curvenote/jats-xml/actions)
6
+
3
7
  Types and utilities for working with JATS XML documents in Node and Typescript.
4
8
 
5
9
  Read and write JATS XML from node or see summries from the command line.
@@ -41,7 +45,7 @@ jats summary /local/article.jats
41
45
 
42
46
  This will provide a summary, including a list of what the JATS file contains.
43
47
 
44
- ![Output of `jats summary`](/packages/jats-xml/images/jats-output.png)
48
+ ![Output of `jats summary`](/images/jats-output.png)
45
49
 
46
50
  ## Working in Typescript
47
51
 
@@ -30,7 +30,7 @@ const utils_1 = require("../utils");
30
30
  function hasValidExtension(output) {
31
31
  return ['.xml', '.jats'].includes((0, path_1.extname)(output).toLowerCase());
32
32
  }
33
- function downloadAndSaveJats(session, urlOrDoi, output) {
33
+ function downloadAndSaveJats(session, urlOrDoi, output, opts = { resolvers: resolvers_1.DEFAULT_RESOLVERS }) {
34
34
  return __awaiter(this, void 0, void 0, function* () {
35
35
  if (fs_1.default.existsSync(urlOrDoi)) {
36
36
  throw new Error(`File "${urlOrDoi}" is local and cannot be downloaded!`);
@@ -41,12 +41,12 @@ function downloadAndSaveJats(session, urlOrDoi, output) {
41
41
  if (!hasValidExtension(output)) {
42
42
  session.log.warn(`The extension ${(0, path_1.extname)(output)} is not a valid extension for JATS, try using ".xml" or ".jats"`);
43
43
  }
44
- const { data } = yield (0, download_1.downloadJatsFromUrl)(session, urlOrDoi, resolvers_1.DEFAULT_RESOLVERS);
44
+ const { data } = yield (0, download_1.downloadJatsFromUrl)(session, urlOrDoi, opts);
45
45
  (0, myst_cli_utils_1.writeFileToFolder)(output, data);
46
46
  return data;
47
47
  });
48
48
  }
49
- function parseJats(session, file) {
49
+ function parseJats(session, file, opts = { resolvers: resolvers_1.DEFAULT_RESOLVERS }) {
50
50
  return __awaiter(this, void 0, void 0, function* () {
51
51
  const toc = (0, myst_cli_utils_1.tic)();
52
52
  if (fs_1.default.existsSync(file)) {
@@ -54,7 +54,7 @@ function parseJats(session, file) {
54
54
  const data = fs_1.default.readFileSync(file).toString();
55
55
  return new jats_1.Jats(data, { log: session.log });
56
56
  }
57
- const { source, data } = yield (0, download_1.downloadJatsFromUrl)(session, file, resolvers_1.DEFAULT_RESOLVERS);
57
+ const { source, data } = yield (0, download_1.downloadJatsFromUrl)(session, file, opts);
58
58
  const jats = new jats_1.Jats(data, { source, log: session.log });
59
59
  session.log.debug(toc(`Downloaded and parsed JATS file in %s`));
60
60
  return jats;
@@ -27,28 +27,17 @@ function logAboutJatsFailing(session, jatsUrls) {
27
27
  session.log.debug((0, fair_principles_1.formatPrinciples)('A*', { chalk: chalk_1.default }));
28
28
  session.log.info(`${chalk_1.default.blue('The link may work in a browser.')}\n`);
29
29
  }
30
- function dowloadFromUrl(session, jatsUrl) {
30
+ function dowloadFromUrl(session, jatsUrl, opts) {
31
+ var _a, _b;
31
32
  return __awaiter(this, void 0, void 0, function* () {
32
33
  const toc = (0, myst_cli_utils_1.tic)();
33
34
  session.log.debug(`Fetching JATS from ${jatsUrl}`);
34
- const resp = yield (0, node_fetch_1.default)(jatsUrl, {
35
- headers: [
36
- ['accept', 'application/xml'],
37
- [
38
- 'user-agent',
39
- // A bunch of publishers just show the login screen or quickly block you.
40
- // We don't want to DDOS these publishers, they are the _good ones_ for sharing the XML!!
41
- // But some block on the second request?!
42
- // So we can pretend to be a random browser, I guess. How silly. 🤷‍♂️
43
- `Mozilla/5.0 (Macintosh; Intel Mac OS X ${Math.floor(Math.random() * 100)})`,
44
- ],
45
- ],
46
- });
35
+ const resp = yield ((_a = opts === null || opts === void 0 ? void 0 : opts.fetcher) !== null && _a !== void 0 ? _a : defaultFetcher)(jatsUrl, 'xml');
47
36
  if (!resp.ok) {
48
37
  session.log.debug(`JATS failed to download from "${jatsUrl}"`);
49
38
  throw new Error(`STATUS ${resp.status}: ${resp.statusText}`);
50
39
  }
51
- const contentType = resp.headers.get('content-type');
40
+ const contentType = (_b = resp.headers) === null || _b === void 0 ? void 0 : _b.get('content-type');
52
41
  if (!((contentType === null || contentType === void 0 ? void 0 : contentType.includes('application/xml')) ||
53
42
  (contentType === null || contentType === void 0 ? void 0 : contentType.includes('text/xml')) ||
54
43
  (contentType === null || contentType === void 0 ? void 0 : contentType.includes('text/plain')))) {
@@ -59,6 +48,16 @@ function dowloadFromUrl(session, jatsUrl) {
59
48
  return data;
60
49
  });
61
50
  }
51
+ function defaultFetcher(url, kind) {
52
+ switch (kind) {
53
+ case 'json':
54
+ return (0, node_fetch_1.default)(url, { headers: [['Accept', 'application/json']] });
55
+ case 'xml':
56
+ return (0, node_fetch_1.default)(url, { headers: [['Accept', 'application/xml']] });
57
+ default:
58
+ return (0, node_fetch_1.default)(url);
59
+ }
60
+ }
62
61
  /**
63
62
  * There are 5.8M or so DOIs that have a full XML record:
64
63
  *
@@ -66,26 +65,26 @@ function dowloadFromUrl(session, jatsUrl) {
66
65
  *
67
66
  * This function tries to find the correct URL for the record.
68
67
  */
69
- function checkIfDoiHasJats(session, urlOrDoi) {
70
- var _a, _b, _c, _d;
68
+ function checkIfDoiHasJats(session, urlOrDoi, opts) {
69
+ var _a, _b, _c, _d, _e;
71
70
  return __awaiter(this, void 0, void 0, function* () {
72
71
  if (!doi_utils_1.default.validate(urlOrDoi))
73
72
  return;
74
73
  const toc = (0, myst_cli_utils_1.tic)();
75
74
  const doiUrl = doi_utils_1.default.buildUrl(urlOrDoi);
76
75
  session.log.debug(`Attempting to resolving full XML from DOI ${doiUrl}`);
77
- const resp = yield (0, node_fetch_1.default)(doiUrl, { headers: [['Accept', 'application/json']] });
76
+ const resp = yield ((_a = opts === null || opts === void 0 ? void 0 : opts.fetcher) !== null && _a !== void 0 ? _a : defaultFetcher)(doiUrl, 'json');
78
77
  if (!resp.ok) {
79
78
  // Silently return -- other functions can try!
80
79
  session.log.debug(`DOI failed to resolve: ${doiUrl}`);
81
80
  return;
82
81
  }
83
82
  const data = (yield resp.json());
84
- session.log.debug(toc(`DOI resolved in %s with ${(_b = (_a = data.link) === null || _a === void 0 ? void 0 : _a.length) !== null && _b !== void 0 ? _b : 0} links to content`));
83
+ session.log.debug(toc(`DOI resolved in %s with ${(_c = (_b = data.link) === null || _b === void 0 ? void 0 : _b.length) !== null && _c !== void 0 ? _c : 0} links to content`));
85
84
  if (data.link) {
86
85
  session.log.debug(['', ...data.link.map((link) => `content-type: ${link['content-type']}, ${link.URL}\n`)].join(' - '));
87
86
  }
88
- const fullXml = (_d = (_c = data.link) === null || _c === void 0 ? void 0 : _c.find((link) => { var _a; return ['text/xml', 'application/xml'].includes((_a = link['content-type']) !== null && _a !== void 0 ? _a : ''); })) === null || _d === void 0 ? void 0 : _d.URL;
87
+ const fullXml = (_e = (_d = data.link) === null || _d === void 0 ? void 0 : _d.find((link) => { var _a; return ['text/xml', 'application/xml'].includes((_a = link['content-type']) !== null && _a !== void 0 ? _a : ''); })) === null || _e === void 0 ? void 0 : _e.URL;
89
88
  if (fullXml)
90
89
  return fullXml;
91
90
  session.log.debug(`Could not find XML in DOI record ${doiUrl}`);
@@ -95,26 +94,24 @@ function checkIfDoiHasJats(session, urlOrDoi) {
95
94
  /**
96
95
  * https://www.ncbi.nlm.nih.gov/pmc/tools/id-converter-api/
97
96
  */
98
- function convertPMID2PMCID(session, PMID) {
99
- var _a, _b;
97
+ function convertPMID2PMCID(session, PMID, opts) {
98
+ var _a, _b, _c;
100
99
  return __awaiter(this, void 0, void 0, function* () {
101
100
  if (PMID.startsWith('https://')) {
102
101
  const idPart = new URL(PMID).pathname.slice(1);
103
102
  session.log.debug(`Extract ${PMID} to ${idPart}`);
104
- return convertPMID2PMCID(session, idPart);
103
+ return convertPMID2PMCID(session, idPart, opts);
105
104
  }
106
105
  const toc = (0, myst_cli_utils_1.tic)();
107
106
  const converter = 'https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/';
108
- const resp = yield (0, node_fetch_1.default)(`${converter}?tool=jats-xml&format=json&ids=${PMID}`, {
109
- headers: [['Accept', 'application/json']],
110
- });
107
+ const resp = yield ((_a = opts === null || opts === void 0 ? void 0 : opts.fetcher) !== null && _a !== void 0 ? _a : defaultFetcher)(`${converter}?tool=jats-xml&format=json&ids=${PMID}`, 'json');
111
108
  if (!resp.ok) {
112
109
  // Silently return -- other functions can try!
113
110
  session.log.debug(`Failed to convert PubMedID: ${PMID}`);
114
111
  return;
115
112
  }
116
113
  const data = yield resp.json();
117
- const PMCID = (_b = (_a = data === null || data === void 0 ? void 0 : data.records) === null || _a === void 0 ? void 0 : _a[0]) === null || _b === void 0 ? void 0 : _b.pmcid;
114
+ const PMCID = (_c = (_b = data === null || data === void 0 ? void 0 : data.records) === null || _b === void 0 ? void 0 : _b[0]) === null || _c === void 0 ? void 0 : _c.pmcid;
118
115
  session.log.debug(toc(`Used nih.gov to transform ${PMID} to ${PMCID} in %s.`));
119
116
  return PMCID;
120
117
  });
@@ -124,8 +121,8 @@ function pubMedCentralJats(PMCID) {
124
121
  const normalized = PMCID.replace(/^PMC:?/, '');
125
122
  return `https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pmc&id=${normalized}`;
126
123
  }
127
- function checkIfPubMedCentralHasJats(session, urlOrDoi) {
128
- var _a, _b;
124
+ function checkIfPubMedCentralHasJats(session, urlOrDoi, opts) {
125
+ var _a, _b, _c;
129
126
  return __awaiter(this, void 0, void 0, function* () {
130
127
  if (urlOrDoi.match(/^PMC:?([0-9]+)$/))
131
128
  return pubMedCentralJats(urlOrDoi);
@@ -135,18 +132,18 @@ function checkIfPubMedCentralHasJats(session, urlOrDoi) {
135
132
  const doiUrl = doi_utils_1.default.buildUrl(urlOrDoi);
136
133
  session.log.debug(`Attempting to resolve PMCID using OpenAlex from ${doiUrl}`);
137
134
  const openAlexUrl = `https://api.openalex.org/works/${doiUrl}`;
138
- const resp = yield (0, node_fetch_1.default)(openAlexUrl, { headers: [['Accept', 'application/json']] });
135
+ const resp = yield ((_a = opts === null || opts === void 0 ? void 0 : opts.fetcher) !== null && _a !== void 0 ? _a : defaultFetcher)(openAlexUrl, 'json');
139
136
  if (!resp.ok) {
140
137
  // Silently return -- other functions can try!
141
138
  session.log.debug(`Failed to lookup on OpenAlex: ${openAlexUrl}`);
142
139
  return;
143
140
  }
144
141
  const data = (yield resp.json());
145
- const PMID = (_a = data === null || data === void 0 ? void 0 : data.ids) === null || _a === void 0 ? void 0 : _a.pmid;
146
- let PMCID = (_b = data === null || data === void 0 ? void 0 : data.ids) === null || _b === void 0 ? void 0 : _b.pmcid;
142
+ const PMID = (_b = data === null || data === void 0 ? void 0 : data.ids) === null || _b === void 0 ? void 0 : _b.pmid;
143
+ let PMCID = (_c = data === null || data === void 0 ? void 0 : data.ids) === null || _c === void 0 ? void 0 : _c.pmcid;
147
144
  if (!PMCID && !!PMID) {
148
145
  session.log.debug(toc(`OpenAlex resolved ${data === null || data === void 0 ? void 0 : data.ids.openalex} in %s. There is no PMCID, but there is a PMID`));
149
- PMCID = yield convertPMID2PMCID(session, PMID);
146
+ PMCID = yield convertPMID2PMCID(session, PMID, opts);
150
147
  if (!PMCID) {
151
148
  session.log.debug(toc(`PubMed does not have a record of ${PMID}`));
152
149
  return;
@@ -161,18 +158,18 @@ function checkIfPubMedCentralHasJats(session, urlOrDoi) {
161
158
  });
162
159
  }
163
160
  exports.checkIfPubMedCentralHasJats = checkIfPubMedCentralHasJats;
164
- function downloadJatsFromUrl(session, urlOrDoi, resolvers) {
161
+ function downloadJatsFromUrl(session, urlOrDoi, opts = {}) {
165
162
  return __awaiter(this, void 0, void 0, function* () {
166
163
  const expectedUrls = (yield Promise.all([
167
- checkIfPubMedCentralHasJats(session, urlOrDoi),
168
- checkIfDoiHasJats(session, urlOrDoi),
164
+ checkIfPubMedCentralHasJats(session, urlOrDoi, opts),
165
+ checkIfDoiHasJats(session, urlOrDoi, opts),
169
166
  ])).filter((u) => !!u);
170
167
  if (expectedUrls.length > 0) {
171
168
  session.log.debug(['Trying URLs:\n', ...expectedUrls.map((url) => ` ${url}\n`)].join(' - '));
172
169
  for (let index = 0; index < expectedUrls.length; index++) {
173
170
  const url = expectedUrls[index];
174
171
  try {
175
- const data = yield dowloadFromUrl(session, url);
172
+ const data = yield dowloadFromUrl(session, url, opts);
176
173
  if (data)
177
174
  return { source: url, data };
178
175
  }
@@ -184,13 +181,13 @@ function downloadJatsFromUrl(session, urlOrDoi, resolvers) {
184
181
  logAboutJatsFailing(session, expectedUrls);
185
182
  }
186
183
  if (doi_utils_1.default.validate(urlOrDoi)) {
187
- const jatsUrl = yield (0, resolvers_1.customResolveJatsUrlFromDoi)(session, urlOrDoi, resolvers);
188
- const data = yield dowloadFromUrl(session, jatsUrl);
184
+ const jatsUrl = yield (0, resolvers_1.customResolveJatsUrlFromDoi)(session, urlOrDoi, opts);
185
+ const data = yield dowloadFromUrl(session, jatsUrl, opts);
189
186
  return { source: jatsUrl, data };
190
187
  }
191
188
  if ((0, myst_cli_utils_1.isUrl)(urlOrDoi)) {
192
189
  session.log.debug("No resolver matched, and the URL doesn't look like a DOI. We will attempt to download it directly.");
193
- const data = yield dowloadFromUrl(session, urlOrDoi);
190
+ const data = yield dowloadFromUrl(session, urlOrDoi, opts);
194
191
  return { source: urlOrDoi, data };
195
192
  }
196
193
  throw new Error(`Could not find ${urlOrDoi} locally, and it doesn't look like a URL or DOI`);
@@ -47,16 +47,17 @@ exports.DEFAULT_RESOLVERS = [exports.elife, exports.plos, exports.joss];
47
47
  /**
48
48
  * Use the known custom resolvers to pick where the JATS should be downloaded from.
49
49
  */
50
- function customResolveJatsUrlFromDoi(session, doiString, resolvers = exports.DEFAULT_RESOLVERS) {
50
+ function customResolveJatsUrlFromDoi(session, doiString, opts = { resolvers: exports.DEFAULT_RESOLVERS }) {
51
+ var _a, _b;
51
52
  return __awaiter(this, void 0, void 0, function* () {
52
53
  if (!doi_utils_1.default.validate(doiString))
53
54
  throw new Error(`The doi ${doiString} is not valid`);
54
55
  const doiUrl = doi_utils_1.default.buildUrl(doiString);
55
56
  session.log.debug(`Resolving DOI ${doiUrl}`);
56
- const resp = yield (0, node_fetch_1.default)(doiUrl);
57
+ const resp = yield ((_a = opts === null || opts === void 0 ? void 0 : opts.fetcher) !== null && _a !== void 0 ? _a : node_fetch_1.default)(doiUrl);
57
58
  const articleUrl = resp.url;
58
59
  session.log.debug(`Found resolved URL for DOI at ${articleUrl}`);
59
- const resolver = resolvers.find((r) => r.test(articleUrl));
60
+ const resolver = (_b = opts === null || opts === void 0 ? void 0 : opts.resolvers) === null || _b === void 0 ? void 0 : _b.find((r) => r.test(articleUrl));
60
61
  if (!resolver)
61
62
  throw new Error(`Could not resolve JATS for ${articleUrl}, no resolver matched`);
62
63
  const jatsUrl = resolver.jatsUrl(articleUrl);
@@ -1,4 +1,4 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- const version = '0.0.7';
3
+ const version = '0.0.9';
4
4
  exports.default = version;
@@ -24,7 +24,7 @@ import { findArticleId, formatDate, toDate } from '../utils';
24
24
  function hasValidExtension(output) {
25
25
  return ['.xml', '.jats'].includes(extname(output).toLowerCase());
26
26
  }
27
- function downloadAndSaveJats(session, urlOrDoi, output) {
27
+ function downloadAndSaveJats(session, urlOrDoi, output, opts = { resolvers: DEFAULT_RESOLVERS }) {
28
28
  return __awaiter(this, void 0, void 0, function* () {
29
29
  if (fs.existsSync(urlOrDoi)) {
30
30
  throw new Error(`File "${urlOrDoi}" is local and cannot be downloaded!`);
@@ -35,12 +35,12 @@ function downloadAndSaveJats(session, urlOrDoi, output) {
35
35
  if (!hasValidExtension(output)) {
36
36
  session.log.warn(`The extension ${extname(output)} is not a valid extension for JATS, try using ".xml" or ".jats"`);
37
37
  }
38
- const { data } = yield downloadJatsFromUrl(session, urlOrDoi, DEFAULT_RESOLVERS);
38
+ const { data } = yield downloadJatsFromUrl(session, urlOrDoi, opts);
39
39
  writeFileToFolder(output, data);
40
40
  return data;
41
41
  });
42
42
  }
43
- function parseJats(session, file) {
43
+ function parseJats(session, file, opts = { resolvers: DEFAULT_RESOLVERS }) {
44
44
  return __awaiter(this, void 0, void 0, function* () {
45
45
  const toc = tic();
46
46
  if (fs.existsSync(file)) {
@@ -48,7 +48,7 @@ function parseJats(session, file) {
48
48
  const data = fs.readFileSync(file).toString();
49
49
  return new Jats(data, { log: session.log });
50
50
  }
51
- const { source, data } = yield downloadJatsFromUrl(session, file, DEFAULT_RESOLVERS);
51
+ const { source, data } = yield downloadJatsFromUrl(session, file, opts);
52
52
  const jats = new Jats(data, { source, log: session.log });
53
53
  session.log.debug(toc(`Downloaded and parsed JATS file in %s`));
54
54
  return jats;
@@ -21,28 +21,17 @@ function logAboutJatsFailing(session, jatsUrls) {
21
21
  session.log.debug(formatPrinciples('A*', { chalk }));
22
22
  session.log.info(`${chalk.blue('The link may work in a browser.')}\n`);
23
23
  }
24
- function dowloadFromUrl(session, jatsUrl) {
24
+ function dowloadFromUrl(session, jatsUrl, opts) {
25
+ var _a, _b;
25
26
  return __awaiter(this, void 0, void 0, function* () {
26
27
  const toc = tic();
27
28
  session.log.debug(`Fetching JATS from ${jatsUrl}`);
28
- const resp = yield fetch(jatsUrl, {
29
- headers: [
30
- ['accept', 'application/xml'],
31
- [
32
- 'user-agent',
33
- // A bunch of publishers just show the login screen or quickly block you.
34
- // We don't want to DDOS these publishers, they are the _good ones_ for sharing the XML!!
35
- // But some block on the second request?!
36
- // So we can pretend to be a random browser, I guess. How silly. 🤷‍♂️
37
- `Mozilla/5.0 (Macintosh; Intel Mac OS X ${Math.floor(Math.random() * 100)})`,
38
- ],
39
- ],
40
- });
29
+ const resp = yield ((_a = opts === null || opts === void 0 ? void 0 : opts.fetcher) !== null && _a !== void 0 ? _a : defaultFetcher)(jatsUrl, 'xml');
41
30
  if (!resp.ok) {
42
31
  session.log.debug(`JATS failed to download from "${jatsUrl}"`);
43
32
  throw new Error(`STATUS ${resp.status}: ${resp.statusText}`);
44
33
  }
45
- const contentType = resp.headers.get('content-type');
34
+ const contentType = (_b = resp.headers) === null || _b === void 0 ? void 0 : _b.get('content-type');
46
35
  if (!((contentType === null || contentType === void 0 ? void 0 : contentType.includes('application/xml')) ||
47
36
  (contentType === null || contentType === void 0 ? void 0 : contentType.includes('text/xml')) ||
48
37
  (contentType === null || contentType === void 0 ? void 0 : contentType.includes('text/plain')))) {
@@ -53,6 +42,16 @@ function dowloadFromUrl(session, jatsUrl) {
53
42
  return data;
54
43
  });
55
44
  }
45
+ function defaultFetcher(url, kind) {
46
+ switch (kind) {
47
+ case 'json':
48
+ return fetch(url, { headers: [['Accept', 'application/json']] });
49
+ case 'xml':
50
+ return fetch(url, { headers: [['Accept', 'application/xml']] });
51
+ default:
52
+ return fetch(url);
53
+ }
54
+ }
56
55
  /**
57
56
  * There are 5.8M or so DOIs that have a full XML record:
58
57
  *
@@ -60,26 +59,26 @@ function dowloadFromUrl(session, jatsUrl) {
60
59
  *
61
60
  * This function tries to find the correct URL for the record.
62
61
  */
63
- function checkIfDoiHasJats(session, urlOrDoi) {
64
- var _a, _b, _c, _d;
62
+ function checkIfDoiHasJats(session, urlOrDoi, opts) {
63
+ var _a, _b, _c, _d, _e;
65
64
  return __awaiter(this, void 0, void 0, function* () {
66
65
  if (!doi.validate(urlOrDoi))
67
66
  return;
68
67
  const toc = tic();
69
68
  const doiUrl = doi.buildUrl(urlOrDoi);
70
69
  session.log.debug(`Attempting to resolving full XML from DOI ${doiUrl}`);
71
- const resp = yield fetch(doiUrl, { headers: [['Accept', 'application/json']] });
70
+ const resp = yield ((_a = opts === null || opts === void 0 ? void 0 : opts.fetcher) !== null && _a !== void 0 ? _a : defaultFetcher)(doiUrl, 'json');
72
71
  if (!resp.ok) {
73
72
  // Silently return -- other functions can try!
74
73
  session.log.debug(`DOI failed to resolve: ${doiUrl}`);
75
74
  return;
76
75
  }
77
76
  const data = (yield resp.json());
78
- session.log.debug(toc(`DOI resolved in %s with ${(_b = (_a = data.link) === null || _a === void 0 ? void 0 : _a.length) !== null && _b !== void 0 ? _b : 0} links to content`));
77
+ session.log.debug(toc(`DOI resolved in %s with ${(_c = (_b = data.link) === null || _b === void 0 ? void 0 : _b.length) !== null && _c !== void 0 ? _c : 0} links to content`));
79
78
  if (data.link) {
80
79
  session.log.debug(['', ...data.link.map((link) => `content-type: ${link['content-type']}, ${link.URL}\n`)].join(' - '));
81
80
  }
82
- const fullXml = (_d = (_c = data.link) === null || _c === void 0 ? void 0 : _c.find((link) => { var _a; return ['text/xml', 'application/xml'].includes((_a = link['content-type']) !== null && _a !== void 0 ? _a : ''); })) === null || _d === void 0 ? void 0 : _d.URL;
81
+ const fullXml = (_e = (_d = data.link) === null || _d === void 0 ? void 0 : _d.find((link) => { var _a; return ['text/xml', 'application/xml'].includes((_a = link['content-type']) !== null && _a !== void 0 ? _a : ''); })) === null || _e === void 0 ? void 0 : _e.URL;
83
82
  if (fullXml)
84
83
  return fullXml;
85
84
  session.log.debug(`Could not find XML in DOI record ${doiUrl}`);
@@ -89,26 +88,24 @@ function checkIfDoiHasJats(session, urlOrDoi) {
89
88
  /**
90
89
  * https://www.ncbi.nlm.nih.gov/pmc/tools/id-converter-api/
91
90
  */
92
- export function convertPMID2PMCID(session, PMID) {
93
- var _a, _b;
91
+ export function convertPMID2PMCID(session, PMID, opts) {
92
+ var _a, _b, _c;
94
93
  return __awaiter(this, void 0, void 0, function* () {
95
94
  if (PMID.startsWith('https://')) {
96
95
  const idPart = new URL(PMID).pathname.slice(1);
97
96
  session.log.debug(`Extract ${PMID} to ${idPart}`);
98
- return convertPMID2PMCID(session, idPart);
97
+ return convertPMID2PMCID(session, idPart, opts);
99
98
  }
100
99
  const toc = tic();
101
100
  const converter = 'https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/';
102
- const resp = yield fetch(`${converter}?tool=jats-xml&format=json&ids=${PMID}`, {
103
- headers: [['Accept', 'application/json']],
104
- });
101
+ const resp = yield ((_a = opts === null || opts === void 0 ? void 0 : opts.fetcher) !== null && _a !== void 0 ? _a : defaultFetcher)(`${converter}?tool=jats-xml&format=json&ids=${PMID}`, 'json');
105
102
  if (!resp.ok) {
106
103
  // Silently return -- other functions can try!
107
104
  session.log.debug(`Failed to convert PubMedID: ${PMID}`);
108
105
  return;
109
106
  }
110
107
  const data = yield resp.json();
111
- const PMCID = (_b = (_a = data === null || data === void 0 ? void 0 : data.records) === null || _a === void 0 ? void 0 : _a[0]) === null || _b === void 0 ? void 0 : _b.pmcid;
108
+ const PMCID = (_c = (_b = data === null || data === void 0 ? void 0 : data.records) === null || _b === void 0 ? void 0 : _b[0]) === null || _c === void 0 ? void 0 : _c.pmcid;
112
109
  session.log.debug(toc(`Used nih.gov to transform ${PMID} to ${PMCID} in %s.`));
113
110
  return PMCID;
114
111
  });
@@ -117,8 +114,8 @@ function pubMedCentralJats(PMCID) {
117
114
  const normalized = PMCID.replace(/^PMC:?/, '');
118
115
  return `https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pmc&id=${normalized}`;
119
116
  }
120
- export function checkIfPubMedCentralHasJats(session, urlOrDoi) {
121
- var _a, _b;
117
+ export function checkIfPubMedCentralHasJats(session, urlOrDoi, opts) {
118
+ var _a, _b, _c;
122
119
  return __awaiter(this, void 0, void 0, function* () {
123
120
  if (urlOrDoi.match(/^PMC:?([0-9]+)$/))
124
121
  return pubMedCentralJats(urlOrDoi);
@@ -128,18 +125,18 @@ export function checkIfPubMedCentralHasJats(session, urlOrDoi) {
128
125
  const doiUrl = doi.buildUrl(urlOrDoi);
129
126
  session.log.debug(`Attempting to resolve PMCID using OpenAlex from ${doiUrl}`);
130
127
  const openAlexUrl = `https://api.openalex.org/works/${doiUrl}`;
131
- const resp = yield fetch(openAlexUrl, { headers: [['Accept', 'application/json']] });
128
+ const resp = yield ((_a = opts === null || opts === void 0 ? void 0 : opts.fetcher) !== null && _a !== void 0 ? _a : defaultFetcher)(openAlexUrl, 'json');
132
129
  if (!resp.ok) {
133
130
  // Silently return -- other functions can try!
134
131
  session.log.debug(`Failed to lookup on OpenAlex: ${openAlexUrl}`);
135
132
  return;
136
133
  }
137
134
  const data = (yield resp.json());
138
- const PMID = (_a = data === null || data === void 0 ? void 0 : data.ids) === null || _a === void 0 ? void 0 : _a.pmid;
139
- let PMCID = (_b = data === null || data === void 0 ? void 0 : data.ids) === null || _b === void 0 ? void 0 : _b.pmcid;
135
+ const PMID = (_b = data === null || data === void 0 ? void 0 : data.ids) === null || _b === void 0 ? void 0 : _b.pmid;
136
+ let PMCID = (_c = data === null || data === void 0 ? void 0 : data.ids) === null || _c === void 0 ? void 0 : _c.pmcid;
140
137
  if (!PMCID && !!PMID) {
141
138
  session.log.debug(toc(`OpenAlex resolved ${data === null || data === void 0 ? void 0 : data.ids.openalex} in %s. There is no PMCID, but there is a PMID`));
142
- PMCID = yield convertPMID2PMCID(session, PMID);
139
+ PMCID = yield convertPMID2PMCID(session, PMID, opts);
143
140
  if (!PMCID) {
144
141
  session.log.debug(toc(`PubMed does not have a record of ${PMID}`));
145
142
  return;
@@ -153,18 +150,18 @@ export function checkIfPubMedCentralHasJats(session, urlOrDoi) {
153
150
  return pubMedCentralJats(PMCID);
154
151
  });
155
152
  }
156
- export function downloadJatsFromUrl(session, urlOrDoi, resolvers) {
153
+ export function downloadJatsFromUrl(session, urlOrDoi, opts = {}) {
157
154
  return __awaiter(this, void 0, void 0, function* () {
158
155
  const expectedUrls = (yield Promise.all([
159
- checkIfPubMedCentralHasJats(session, urlOrDoi),
160
- checkIfDoiHasJats(session, urlOrDoi),
156
+ checkIfPubMedCentralHasJats(session, urlOrDoi, opts),
157
+ checkIfDoiHasJats(session, urlOrDoi, opts),
161
158
  ])).filter((u) => !!u);
162
159
  if (expectedUrls.length > 0) {
163
160
  session.log.debug(['Trying URLs:\n', ...expectedUrls.map((url) => ` ${url}\n`)].join(' - '));
164
161
  for (let index = 0; index < expectedUrls.length; index++) {
165
162
  const url = expectedUrls[index];
166
163
  try {
167
- const data = yield dowloadFromUrl(session, url);
164
+ const data = yield dowloadFromUrl(session, url, opts);
168
165
  if (data)
169
166
  return { source: url, data };
170
167
  }
@@ -176,13 +173,13 @@ export function downloadJatsFromUrl(session, urlOrDoi, resolvers) {
176
173
  logAboutJatsFailing(session, expectedUrls);
177
174
  }
178
175
  if (doi.validate(urlOrDoi)) {
179
- const jatsUrl = yield customResolveJatsUrlFromDoi(session, urlOrDoi, resolvers);
180
- const data = yield dowloadFromUrl(session, jatsUrl);
176
+ const jatsUrl = yield customResolveJatsUrlFromDoi(session, urlOrDoi, opts);
177
+ const data = yield dowloadFromUrl(session, jatsUrl, opts);
181
178
  return { source: jatsUrl, data };
182
179
  }
183
180
  if (isUrl(urlOrDoi)) {
184
181
  session.log.debug("No resolver matched, and the URL doesn't look like a DOI. We will attempt to download it directly.");
185
- const data = yield dowloadFromUrl(session, urlOrDoi);
182
+ const data = yield dowloadFromUrl(session, urlOrDoi, opts);
186
183
  return { source: urlOrDoi, data };
187
184
  }
188
185
  throw new Error(`Could not find ${urlOrDoi} locally, and it doesn't look like a URL or DOI`);
@@ -41,16 +41,17 @@ export const DEFAULT_RESOLVERS = [elife, plos, joss];
41
41
  /**
42
42
  * Use the known custom resolvers to pick where the JATS should be downloaded from.
43
43
  */
44
- export function customResolveJatsUrlFromDoi(session, doiString, resolvers = DEFAULT_RESOLVERS) {
44
+ export function customResolveJatsUrlFromDoi(session, doiString, opts = { resolvers: DEFAULT_RESOLVERS }) {
45
+ var _a, _b;
45
46
  return __awaiter(this, void 0, void 0, function* () {
46
47
  if (!doi.validate(doiString))
47
48
  throw new Error(`The doi ${doiString} is not valid`);
48
49
  const doiUrl = doi.buildUrl(doiString);
49
50
  session.log.debug(`Resolving DOI ${doiUrl}`);
50
- const resp = yield fetch(doiUrl);
51
+ const resp = yield ((_a = opts === null || opts === void 0 ? void 0 : opts.fetcher) !== null && _a !== void 0 ? _a : fetch)(doiUrl);
51
52
  const articleUrl = resp.url;
52
53
  session.log.debug(`Found resolved URL for DOI at ${articleUrl}`);
53
- const resolver = resolvers.find((r) => r.test(articleUrl));
54
+ const resolver = (_b = opts === null || opts === void 0 ? void 0 : opts.resolvers) === null || _b === void 0 ? void 0 : _b.find((r) => r.test(articleUrl));
54
55
  if (!resolver)
55
56
  throw new Error(`Could not resolve JATS for ${articleUrl}, no resolver matched`);
56
57
  const jatsUrl = resolver.jatsUrl(articleUrl);
@@ -1,2 +1,2 @@
1
- const version = '0.0.7';
1
+ const version = '0.0.9';
2
2
  export default version;