jats-xml 0.0.7 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -30,7 +30,7 @@ const utils_1 = require("../utils");
30
30
  function hasValidExtension(output) {
31
31
  return ['.xml', '.jats'].includes((0, path_1.extname)(output).toLowerCase());
32
32
  }
33
- function downloadAndSaveJats(session, urlOrDoi, output) {
33
+ function downloadAndSaveJats(session, urlOrDoi, output, opts = { resolvers: resolvers_1.DEFAULT_RESOLVERS }) {
34
34
  return __awaiter(this, void 0, void 0, function* () {
35
35
  if (fs_1.default.existsSync(urlOrDoi)) {
36
36
  throw new Error(`File "${urlOrDoi}" is local and cannot be downloaded!`);
@@ -41,12 +41,12 @@ function downloadAndSaveJats(session, urlOrDoi, output) {
41
41
  if (!hasValidExtension(output)) {
42
42
  session.log.warn(`The extension ${(0, path_1.extname)(output)} is not a valid extension for JATS, try using ".xml" or ".jats"`);
43
43
  }
44
- const { data } = yield (0, download_1.downloadJatsFromUrl)(session, urlOrDoi, resolvers_1.DEFAULT_RESOLVERS);
44
+ const { data } = yield (0, download_1.downloadJatsFromUrl)(session, urlOrDoi, opts);
45
45
  (0, myst_cli_utils_1.writeFileToFolder)(output, data);
46
46
  return data;
47
47
  });
48
48
  }
49
- function parseJats(session, file) {
49
+ function parseJats(session, file, opts = { resolvers: resolvers_1.DEFAULT_RESOLVERS }) {
50
50
  return __awaiter(this, void 0, void 0, function* () {
51
51
  const toc = (0, myst_cli_utils_1.tic)();
52
52
  if (fs_1.default.existsSync(file)) {
@@ -54,7 +54,7 @@ function parseJats(session, file) {
54
54
  const data = fs_1.default.readFileSync(file).toString();
55
55
  return new jats_1.Jats(data, { log: session.log });
56
56
  }
57
- const { source, data } = yield (0, download_1.downloadJatsFromUrl)(session, file, resolvers_1.DEFAULT_RESOLVERS);
57
+ const { source, data } = yield (0, download_1.downloadJatsFromUrl)(session, file, opts);
58
58
  const jats = new jats_1.Jats(data, { source, log: session.log });
59
59
  session.log.debug(toc(`Downloaded and parsed JATS file in %s`));
60
60
  return jats;
@@ -27,28 +27,17 @@ function logAboutJatsFailing(session, jatsUrls) {
27
27
  session.log.debug((0, fair_principles_1.formatPrinciples)('A*', { chalk: chalk_1.default }));
28
28
  session.log.info(`${chalk_1.default.blue('The link may work in a browser.')}\n`);
29
29
  }
30
- function dowloadFromUrl(session, jatsUrl) {
30
+ function dowloadFromUrl(session, jatsUrl, opts) {
31
+ var _a, _b;
31
32
  return __awaiter(this, void 0, void 0, function* () {
32
33
  const toc = (0, myst_cli_utils_1.tic)();
33
34
  session.log.debug(`Fetching JATS from ${jatsUrl}`);
34
- const resp = yield (0, node_fetch_1.default)(jatsUrl, {
35
- headers: [
36
- ['accept', 'application/xml'],
37
- [
38
- 'user-agent',
39
- // A bunch of publishers just show the login screen or quickly block you.
40
- // We don't want to DDOS these publishers, they are the _good ones_ for sharing the XML!!
41
- // But some block on the second request?!
42
- // So we can pretend to be a random browser, I guess. How silly. 🤷‍♂️
43
- `Mozilla/5.0 (Macintosh; Intel Mac OS X ${Math.floor(Math.random() * 100)})`,
44
- ],
45
- ],
46
- });
35
+ const resp = yield ((_a = opts === null || opts === void 0 ? void 0 : opts.fetcher) !== null && _a !== void 0 ? _a : defaultFetcher)(jatsUrl, 'xml');
47
36
  if (!resp.ok) {
48
37
  session.log.debug(`JATS failed to download from "${jatsUrl}"`);
49
38
  throw new Error(`STATUS ${resp.status}: ${resp.statusText}`);
50
39
  }
51
- const contentType = resp.headers.get('content-type');
40
+ const contentType = (_b = resp.headers) === null || _b === void 0 ? void 0 : _b.get('content-type');
52
41
  if (!((contentType === null || contentType === void 0 ? void 0 : contentType.includes('application/xml')) ||
53
42
  (contentType === null || contentType === void 0 ? void 0 : contentType.includes('text/xml')) ||
54
43
  (contentType === null || contentType === void 0 ? void 0 : contentType.includes('text/plain')))) {
@@ -59,6 +48,16 @@ function dowloadFromUrl(session, jatsUrl) {
59
48
  return data;
60
49
  });
61
50
  }
51
+ function defaultFetcher(url, kind) {
52
+ switch (kind) {
53
+ case 'json':
54
+ return (0, node_fetch_1.default)(url, { headers: [['Accept', 'application/json']] });
55
+ case 'xml':
56
+ return (0, node_fetch_1.default)(url, { headers: [['Accept', 'application/xml']] });
57
+ default:
58
+ return (0, node_fetch_1.default)(url);
59
+ }
60
+ }
62
61
  /**
63
62
  * There are 5.8M or so DOIs that have a full XML record:
64
63
  *
@@ -66,26 +65,26 @@ function dowloadFromUrl(session, jatsUrl) {
66
65
  *
67
66
  * This function tries to find the correct URL for the record.
68
67
  */
69
- function checkIfDoiHasJats(session, urlOrDoi) {
70
- var _a, _b, _c, _d;
68
+ function checkIfDoiHasJats(session, urlOrDoi, opts) {
69
+ var _a, _b, _c, _d, _e;
71
70
  return __awaiter(this, void 0, void 0, function* () {
72
71
  if (!doi_utils_1.default.validate(urlOrDoi))
73
72
  return;
74
73
  const toc = (0, myst_cli_utils_1.tic)();
75
74
  const doiUrl = doi_utils_1.default.buildUrl(urlOrDoi);
76
75
  session.log.debug(`Attempting to resolving full XML from DOI ${doiUrl}`);
77
- const resp = yield (0, node_fetch_1.default)(doiUrl, { headers: [['Accept', 'application/json']] });
76
+ const resp = yield ((_a = opts === null || opts === void 0 ? void 0 : opts.fetcher) !== null && _a !== void 0 ? _a : defaultFetcher)(doiUrl, 'json');
78
77
  if (!resp.ok) {
79
78
  // Silently return -- other functions can try!
80
79
  session.log.debug(`DOI failed to resolve: ${doiUrl}`);
81
80
  return;
82
81
  }
83
82
  const data = (yield resp.json());
84
- session.log.debug(toc(`DOI resolved in %s with ${(_b = (_a = data.link) === null || _a === void 0 ? void 0 : _a.length) !== null && _b !== void 0 ? _b : 0} links to content`));
83
+ session.log.debug(toc(`DOI resolved in %s with ${(_c = (_b = data.link) === null || _b === void 0 ? void 0 : _b.length) !== null && _c !== void 0 ? _c : 0} links to content`));
85
84
  if (data.link) {
86
85
  session.log.debug(['', ...data.link.map((link) => `content-type: ${link['content-type']}, ${link.URL}\n`)].join(' - '));
87
86
  }
88
- const fullXml = (_d = (_c = data.link) === null || _c === void 0 ? void 0 : _c.find((link) => { var _a; return ['text/xml', 'application/xml'].includes((_a = link['content-type']) !== null && _a !== void 0 ? _a : ''); })) === null || _d === void 0 ? void 0 : _d.URL;
87
+ const fullXml = (_e = (_d = data.link) === null || _d === void 0 ? void 0 : _d.find((link) => { var _a; return ['text/xml', 'application/xml'].includes((_a = link['content-type']) !== null && _a !== void 0 ? _a : ''); })) === null || _e === void 0 ? void 0 : _e.URL;
89
88
  if (fullXml)
90
89
  return fullXml;
91
90
  session.log.debug(`Could not find XML in DOI record ${doiUrl}`);
@@ -95,26 +94,24 @@ function checkIfDoiHasJats(session, urlOrDoi) {
95
94
  /**
96
95
  * https://www.ncbi.nlm.nih.gov/pmc/tools/id-converter-api/
97
96
  */
98
- function convertPMID2PMCID(session, PMID) {
99
- var _a, _b;
97
+ function convertPMID2PMCID(session, PMID, opts) {
98
+ var _a, _b, _c;
100
99
  return __awaiter(this, void 0, void 0, function* () {
101
100
  if (PMID.startsWith('https://')) {
102
101
  const idPart = new URL(PMID).pathname.slice(1);
103
102
  session.log.debug(`Extract ${PMID} to ${idPart}`);
104
- return convertPMID2PMCID(session, idPart);
103
+ return convertPMID2PMCID(session, idPart, opts);
105
104
  }
106
105
  const toc = (0, myst_cli_utils_1.tic)();
107
106
  const converter = 'https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/';
108
- const resp = yield (0, node_fetch_1.default)(`${converter}?tool=jats-xml&format=json&ids=${PMID}`, {
109
- headers: [['Accept', 'application/json']],
110
- });
107
+ const resp = yield ((_a = opts === null || opts === void 0 ? void 0 : opts.fetcher) !== null && _a !== void 0 ? _a : defaultFetcher)(`${converter}?tool=jats-xml&format=json&ids=${PMID}`, 'json');
111
108
  if (!resp.ok) {
112
109
  // Silently return -- other functions can try!
113
110
  session.log.debug(`Failed to convert PubMedID: ${PMID}`);
114
111
  return;
115
112
  }
116
113
  const data = yield resp.json();
117
- const PMCID = (_b = (_a = data === null || data === void 0 ? void 0 : data.records) === null || _a === void 0 ? void 0 : _a[0]) === null || _b === void 0 ? void 0 : _b.pmcid;
114
+ const PMCID = (_c = (_b = data === null || data === void 0 ? void 0 : data.records) === null || _b === void 0 ? void 0 : _b[0]) === null || _c === void 0 ? void 0 : _c.pmcid;
118
115
  session.log.debug(toc(`Used nih.gov to transform ${PMID} to ${PMCID} in %s.`));
119
116
  return PMCID;
120
117
  });
@@ -124,8 +121,8 @@ function pubMedCentralJats(PMCID) {
124
121
  const normalized = PMCID.replace(/^PMC:?/, '');
125
122
  return `https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pmc&id=${normalized}`;
126
123
  }
127
- function checkIfPubMedCentralHasJats(session, urlOrDoi) {
128
- var _a, _b;
124
+ function checkIfPubMedCentralHasJats(session, urlOrDoi, opts) {
125
+ var _a, _b, _c;
129
126
  return __awaiter(this, void 0, void 0, function* () {
130
127
  if (urlOrDoi.match(/^PMC:?([0-9]+)$/))
131
128
  return pubMedCentralJats(urlOrDoi);
@@ -135,18 +132,18 @@ function checkIfPubMedCentralHasJats(session, urlOrDoi) {
135
132
  const doiUrl = doi_utils_1.default.buildUrl(urlOrDoi);
136
133
  session.log.debug(`Attempting to resolve PMCID using OpenAlex from ${doiUrl}`);
137
134
  const openAlexUrl = `https://api.openalex.org/works/${doiUrl}`;
138
- const resp = yield (0, node_fetch_1.default)(openAlexUrl, { headers: [['Accept', 'application/json']] });
135
+ const resp = yield ((_a = opts === null || opts === void 0 ? void 0 : opts.fetcher) !== null && _a !== void 0 ? _a : defaultFetcher)(openAlexUrl, 'json');
139
136
  if (!resp.ok) {
140
137
  // Silently return -- other functions can try!
141
138
  session.log.debug(`Failed to lookup on OpenAlex: ${openAlexUrl}`);
142
139
  return;
143
140
  }
144
141
  const data = (yield resp.json());
145
- const PMID = (_a = data === null || data === void 0 ? void 0 : data.ids) === null || _a === void 0 ? void 0 : _a.pmid;
146
- let PMCID = (_b = data === null || data === void 0 ? void 0 : data.ids) === null || _b === void 0 ? void 0 : _b.pmcid;
142
+ const PMID = (_b = data === null || data === void 0 ? void 0 : data.ids) === null || _b === void 0 ? void 0 : _b.pmid;
143
+ let PMCID = (_c = data === null || data === void 0 ? void 0 : data.ids) === null || _c === void 0 ? void 0 : _c.pmcid;
147
144
  if (!PMCID && !!PMID) {
148
145
  session.log.debug(toc(`OpenAlex resolved ${data === null || data === void 0 ? void 0 : data.ids.openalex} in %s. There is no PMCID, but there is a PMID`));
149
- PMCID = yield convertPMID2PMCID(session, PMID);
146
+ PMCID = yield convertPMID2PMCID(session, PMID, opts);
150
147
  if (!PMCID) {
151
148
  session.log.debug(toc(`PubMed does not have a record of ${PMID}`));
152
149
  return;
@@ -161,18 +158,18 @@ function checkIfPubMedCentralHasJats(session, urlOrDoi) {
161
158
  });
162
159
  }
163
160
  exports.checkIfPubMedCentralHasJats = checkIfPubMedCentralHasJats;
164
- function downloadJatsFromUrl(session, urlOrDoi, resolvers) {
161
+ function downloadJatsFromUrl(session, urlOrDoi, opts = {}) {
165
162
  return __awaiter(this, void 0, void 0, function* () {
166
163
  const expectedUrls = (yield Promise.all([
167
- checkIfPubMedCentralHasJats(session, urlOrDoi),
168
- checkIfDoiHasJats(session, urlOrDoi),
164
+ checkIfPubMedCentralHasJats(session, urlOrDoi, opts),
165
+ checkIfDoiHasJats(session, urlOrDoi, opts),
169
166
  ])).filter((u) => !!u);
170
167
  if (expectedUrls.length > 0) {
171
168
  session.log.debug(['Trying URLs:\n', ...expectedUrls.map((url) => ` ${url}\n`)].join(' - '));
172
169
  for (let index = 0; index < expectedUrls.length; index++) {
173
170
  const url = expectedUrls[index];
174
171
  try {
175
- const data = yield dowloadFromUrl(session, url);
172
+ const data = yield dowloadFromUrl(session, url, opts);
176
173
  if (data)
177
174
  return { source: url, data };
178
175
  }
@@ -184,13 +181,13 @@ function downloadJatsFromUrl(session, urlOrDoi, resolvers) {
184
181
  logAboutJatsFailing(session, expectedUrls);
185
182
  }
186
183
  if (doi_utils_1.default.validate(urlOrDoi)) {
187
- const jatsUrl = yield (0, resolvers_1.customResolveJatsUrlFromDoi)(session, urlOrDoi, resolvers);
188
- const data = yield dowloadFromUrl(session, jatsUrl);
184
+ const jatsUrl = yield (0, resolvers_1.customResolveJatsUrlFromDoi)(session, urlOrDoi, opts);
185
+ const data = yield dowloadFromUrl(session, jatsUrl, opts);
189
186
  return { source: jatsUrl, data };
190
187
  }
191
188
  if ((0, myst_cli_utils_1.isUrl)(urlOrDoi)) {
192
189
  session.log.debug("No resolver matched, and the URL doesn't look like a DOI. We will attempt to download it directly.");
193
- const data = yield dowloadFromUrl(session, urlOrDoi);
190
+ const data = yield dowloadFromUrl(session, urlOrDoi, opts);
194
191
  return { source: urlOrDoi, data };
195
192
  }
196
193
  throw new Error(`Could not find ${urlOrDoi} locally, and it doesn't look like a URL or DOI`);
@@ -47,16 +47,17 @@ exports.DEFAULT_RESOLVERS = [exports.elife, exports.plos, exports.joss];
47
47
  /**
48
48
  * Use the known custom resolvers to pick where the JATS should be downloaded from.
49
49
  */
50
- function customResolveJatsUrlFromDoi(session, doiString, resolvers = exports.DEFAULT_RESOLVERS) {
50
+ function customResolveJatsUrlFromDoi(session, doiString, opts = { resolvers: exports.DEFAULT_RESOLVERS }) {
51
+ var _a, _b;
51
52
  return __awaiter(this, void 0, void 0, function* () {
52
53
  if (!doi_utils_1.default.validate(doiString))
53
54
  throw new Error(`The doi ${doiString} is not valid`);
54
55
  const doiUrl = doi_utils_1.default.buildUrl(doiString);
55
56
  session.log.debug(`Resolving DOI ${doiUrl}`);
56
- const resp = yield (0, node_fetch_1.default)(doiUrl);
57
+ const resp = yield ((_a = opts === null || opts === void 0 ? void 0 : opts.fetcher) !== null && _a !== void 0 ? _a : node_fetch_1.default)(doiUrl);
57
58
  const articleUrl = resp.url;
58
59
  session.log.debug(`Found resolved URL for DOI at ${articleUrl}`);
59
- const resolver = resolvers.find((r) => r.test(articleUrl));
60
+ const resolver = (_b = opts === null || opts === void 0 ? void 0 : opts.resolvers) === null || _b === void 0 ? void 0 : _b.find((r) => r.test(articleUrl));
60
61
  if (!resolver)
61
62
  throw new Error(`Could not resolve JATS for ${articleUrl}, no resolver matched`);
62
63
  const jatsUrl = resolver.jatsUrl(articleUrl);
@@ -1,4 +1,4 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- const version = '0.0.7';
3
+ const version = '0.0.8';
4
4
  exports.default = version;
@@ -24,7 +24,7 @@ import { findArticleId, formatDate, toDate } from '../utils';
24
24
  function hasValidExtension(output) {
25
25
  return ['.xml', '.jats'].includes(extname(output).toLowerCase());
26
26
  }
27
- function downloadAndSaveJats(session, urlOrDoi, output) {
27
+ function downloadAndSaveJats(session, urlOrDoi, output, opts = { resolvers: DEFAULT_RESOLVERS }) {
28
28
  return __awaiter(this, void 0, void 0, function* () {
29
29
  if (fs.existsSync(urlOrDoi)) {
30
30
  throw new Error(`File "${urlOrDoi}" is local and cannot be downloaded!`);
@@ -35,12 +35,12 @@ function downloadAndSaveJats(session, urlOrDoi, output) {
35
35
  if (!hasValidExtension(output)) {
36
36
  session.log.warn(`The extension ${extname(output)} is not a valid extension for JATS, try using ".xml" or ".jats"`);
37
37
  }
38
- const { data } = yield downloadJatsFromUrl(session, urlOrDoi, DEFAULT_RESOLVERS);
38
+ const { data } = yield downloadJatsFromUrl(session, urlOrDoi, opts);
39
39
  writeFileToFolder(output, data);
40
40
  return data;
41
41
  });
42
42
  }
43
- function parseJats(session, file) {
43
+ function parseJats(session, file, opts = { resolvers: DEFAULT_RESOLVERS }) {
44
44
  return __awaiter(this, void 0, void 0, function* () {
45
45
  const toc = tic();
46
46
  if (fs.existsSync(file)) {
@@ -48,7 +48,7 @@ function parseJats(session, file) {
48
48
  const data = fs.readFileSync(file).toString();
49
49
  return new Jats(data, { log: session.log });
50
50
  }
51
- const { source, data } = yield downloadJatsFromUrl(session, file, DEFAULT_RESOLVERS);
51
+ const { source, data } = yield downloadJatsFromUrl(session, file, opts);
52
52
  const jats = new Jats(data, { source, log: session.log });
53
53
  session.log.debug(toc(`Downloaded and parsed JATS file in %s`));
54
54
  return jats;
@@ -21,28 +21,17 @@ function logAboutJatsFailing(session, jatsUrls) {
21
21
  session.log.debug(formatPrinciples('A*', { chalk }));
22
22
  session.log.info(`${chalk.blue('The link may work in a browser.')}\n`);
23
23
  }
24
- function dowloadFromUrl(session, jatsUrl) {
24
+ function dowloadFromUrl(session, jatsUrl, opts) {
25
+ var _a, _b;
25
26
  return __awaiter(this, void 0, void 0, function* () {
26
27
  const toc = tic();
27
28
  session.log.debug(`Fetching JATS from ${jatsUrl}`);
28
- const resp = yield fetch(jatsUrl, {
29
- headers: [
30
- ['accept', 'application/xml'],
31
- [
32
- 'user-agent',
33
- // A bunch of publishers just show the login screen or quickly block you.
34
- // We don't want to DDOS these publishers, they are the _good ones_ for sharing the XML!!
35
- // But some block on the second request?!
36
- // So we can pretend to be a random browser, I guess. How silly. 🤷‍♂️
37
- `Mozilla/5.0 (Macintosh; Intel Mac OS X ${Math.floor(Math.random() * 100)})`,
38
- ],
39
- ],
40
- });
29
+ const resp = yield ((_a = opts === null || opts === void 0 ? void 0 : opts.fetcher) !== null && _a !== void 0 ? _a : defaultFetcher)(jatsUrl, 'xml');
41
30
  if (!resp.ok) {
42
31
  session.log.debug(`JATS failed to download from "${jatsUrl}"`);
43
32
  throw new Error(`STATUS ${resp.status}: ${resp.statusText}`);
44
33
  }
45
- const contentType = resp.headers.get('content-type');
34
+ const contentType = (_b = resp.headers) === null || _b === void 0 ? void 0 : _b.get('content-type');
46
35
  if (!((contentType === null || contentType === void 0 ? void 0 : contentType.includes('application/xml')) ||
47
36
  (contentType === null || contentType === void 0 ? void 0 : contentType.includes('text/xml')) ||
48
37
  (contentType === null || contentType === void 0 ? void 0 : contentType.includes('text/plain')))) {
@@ -53,6 +42,16 @@ function dowloadFromUrl(session, jatsUrl) {
53
42
  return data;
54
43
  });
55
44
  }
45
+ function defaultFetcher(url, kind) {
46
+ switch (kind) {
47
+ case 'json':
48
+ return fetch(url, { headers: [['Accept', 'application/json']] });
49
+ case 'xml':
50
+ return fetch(url, { headers: [['Accept', 'application/xml']] });
51
+ default:
52
+ return fetch(url);
53
+ }
54
+ }
56
55
  /**
57
56
  * There are 5.8M or so DOIs that have a full XML record:
58
57
  *
@@ -60,26 +59,26 @@ function dowloadFromUrl(session, jatsUrl) {
60
59
  *
61
60
  * This function tries to find the correct URL for the record.
62
61
  */
63
- function checkIfDoiHasJats(session, urlOrDoi) {
64
- var _a, _b, _c, _d;
62
+ function checkIfDoiHasJats(session, urlOrDoi, opts) {
63
+ var _a, _b, _c, _d, _e;
65
64
  return __awaiter(this, void 0, void 0, function* () {
66
65
  if (!doi.validate(urlOrDoi))
67
66
  return;
68
67
  const toc = tic();
69
68
  const doiUrl = doi.buildUrl(urlOrDoi);
70
69
  session.log.debug(`Attempting to resolving full XML from DOI ${doiUrl}`);
71
- const resp = yield fetch(doiUrl, { headers: [['Accept', 'application/json']] });
70
+ const resp = yield ((_a = opts === null || opts === void 0 ? void 0 : opts.fetcher) !== null && _a !== void 0 ? _a : defaultFetcher)(doiUrl, 'json');
72
71
  if (!resp.ok) {
73
72
  // Silently return -- other functions can try!
74
73
  session.log.debug(`DOI failed to resolve: ${doiUrl}`);
75
74
  return;
76
75
  }
77
76
  const data = (yield resp.json());
78
- session.log.debug(toc(`DOI resolved in %s with ${(_b = (_a = data.link) === null || _a === void 0 ? void 0 : _a.length) !== null && _b !== void 0 ? _b : 0} links to content`));
77
+ session.log.debug(toc(`DOI resolved in %s with ${(_c = (_b = data.link) === null || _b === void 0 ? void 0 : _b.length) !== null && _c !== void 0 ? _c : 0} links to content`));
79
78
  if (data.link) {
80
79
  session.log.debug(['', ...data.link.map((link) => `content-type: ${link['content-type']}, ${link.URL}\n`)].join(' - '));
81
80
  }
82
- const fullXml = (_d = (_c = data.link) === null || _c === void 0 ? void 0 : _c.find((link) => { var _a; return ['text/xml', 'application/xml'].includes((_a = link['content-type']) !== null && _a !== void 0 ? _a : ''); })) === null || _d === void 0 ? void 0 : _d.URL;
81
+ const fullXml = (_e = (_d = data.link) === null || _d === void 0 ? void 0 : _d.find((link) => { var _a; return ['text/xml', 'application/xml'].includes((_a = link['content-type']) !== null && _a !== void 0 ? _a : ''); })) === null || _e === void 0 ? void 0 : _e.URL;
83
82
  if (fullXml)
84
83
  return fullXml;
85
84
  session.log.debug(`Could not find XML in DOI record ${doiUrl}`);
@@ -89,26 +88,24 @@ function checkIfDoiHasJats(session, urlOrDoi) {
89
88
  /**
90
89
  * https://www.ncbi.nlm.nih.gov/pmc/tools/id-converter-api/
91
90
  */
92
- export function convertPMID2PMCID(session, PMID) {
93
- var _a, _b;
91
+ export function convertPMID2PMCID(session, PMID, opts) {
92
+ var _a, _b, _c;
94
93
  return __awaiter(this, void 0, void 0, function* () {
95
94
  if (PMID.startsWith('https://')) {
96
95
  const idPart = new URL(PMID).pathname.slice(1);
97
96
  session.log.debug(`Extract ${PMID} to ${idPart}`);
98
- return convertPMID2PMCID(session, idPart);
97
+ return convertPMID2PMCID(session, idPart, opts);
99
98
  }
100
99
  const toc = tic();
101
100
  const converter = 'https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/';
102
- const resp = yield fetch(`${converter}?tool=jats-xml&format=json&ids=${PMID}`, {
103
- headers: [['Accept', 'application/json']],
104
- });
101
+ const resp = yield ((_a = opts === null || opts === void 0 ? void 0 : opts.fetcher) !== null && _a !== void 0 ? _a : defaultFetcher)(`${converter}?tool=jats-xml&format=json&ids=${PMID}`, 'json');
105
102
  if (!resp.ok) {
106
103
  // Silently return -- other functions can try!
107
104
  session.log.debug(`Failed to convert PubMedID: ${PMID}`);
108
105
  return;
109
106
  }
110
107
  const data = yield resp.json();
111
- const PMCID = (_b = (_a = data === null || data === void 0 ? void 0 : data.records) === null || _a === void 0 ? void 0 : _a[0]) === null || _b === void 0 ? void 0 : _b.pmcid;
108
+ const PMCID = (_c = (_b = data === null || data === void 0 ? void 0 : data.records) === null || _b === void 0 ? void 0 : _b[0]) === null || _c === void 0 ? void 0 : _c.pmcid;
112
109
  session.log.debug(toc(`Used nih.gov to transform ${PMID} to ${PMCID} in %s.`));
113
110
  return PMCID;
114
111
  });
@@ -117,8 +114,8 @@ function pubMedCentralJats(PMCID) {
117
114
  const normalized = PMCID.replace(/^PMC:?/, '');
118
115
  return `https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pmc&id=${normalized}`;
119
116
  }
120
- export function checkIfPubMedCentralHasJats(session, urlOrDoi) {
121
- var _a, _b;
117
+ export function checkIfPubMedCentralHasJats(session, urlOrDoi, opts) {
118
+ var _a, _b, _c;
122
119
  return __awaiter(this, void 0, void 0, function* () {
123
120
  if (urlOrDoi.match(/^PMC:?([0-9]+)$/))
124
121
  return pubMedCentralJats(urlOrDoi);
@@ -128,18 +125,18 @@ export function checkIfPubMedCentralHasJats(session, urlOrDoi) {
128
125
  const doiUrl = doi.buildUrl(urlOrDoi);
129
126
  session.log.debug(`Attempting to resolve PMCID using OpenAlex from ${doiUrl}`);
130
127
  const openAlexUrl = `https://api.openalex.org/works/${doiUrl}`;
131
- const resp = yield fetch(openAlexUrl, { headers: [['Accept', 'application/json']] });
128
+ const resp = yield ((_a = opts === null || opts === void 0 ? void 0 : opts.fetcher) !== null && _a !== void 0 ? _a : defaultFetcher)(openAlexUrl, 'json');
132
129
  if (!resp.ok) {
133
130
  // Silently return -- other functions can try!
134
131
  session.log.debug(`Failed to lookup on OpenAlex: ${openAlexUrl}`);
135
132
  return;
136
133
  }
137
134
  const data = (yield resp.json());
138
- const PMID = (_a = data === null || data === void 0 ? void 0 : data.ids) === null || _a === void 0 ? void 0 : _a.pmid;
139
- let PMCID = (_b = data === null || data === void 0 ? void 0 : data.ids) === null || _b === void 0 ? void 0 : _b.pmcid;
135
+ const PMID = (_b = data === null || data === void 0 ? void 0 : data.ids) === null || _b === void 0 ? void 0 : _b.pmid;
136
+ let PMCID = (_c = data === null || data === void 0 ? void 0 : data.ids) === null || _c === void 0 ? void 0 : _c.pmcid;
140
137
  if (!PMCID && !!PMID) {
141
138
  session.log.debug(toc(`OpenAlex resolved ${data === null || data === void 0 ? void 0 : data.ids.openalex} in %s. There is no PMCID, but there is a PMID`));
142
- PMCID = yield convertPMID2PMCID(session, PMID);
139
+ PMCID = yield convertPMID2PMCID(session, PMID, opts);
143
140
  if (!PMCID) {
144
141
  session.log.debug(toc(`PubMed does not have a record of ${PMID}`));
145
142
  return;
@@ -153,18 +150,18 @@ export function checkIfPubMedCentralHasJats(session, urlOrDoi) {
153
150
  return pubMedCentralJats(PMCID);
154
151
  });
155
152
  }
156
- export function downloadJatsFromUrl(session, urlOrDoi, resolvers) {
153
+ export function downloadJatsFromUrl(session, urlOrDoi, opts = {}) {
157
154
  return __awaiter(this, void 0, void 0, function* () {
158
155
  const expectedUrls = (yield Promise.all([
159
- checkIfPubMedCentralHasJats(session, urlOrDoi),
160
- checkIfDoiHasJats(session, urlOrDoi),
156
+ checkIfPubMedCentralHasJats(session, urlOrDoi, opts),
157
+ checkIfDoiHasJats(session, urlOrDoi, opts),
161
158
  ])).filter((u) => !!u);
162
159
  if (expectedUrls.length > 0) {
163
160
  session.log.debug(['Trying URLs:\n', ...expectedUrls.map((url) => ` ${url}\n`)].join(' - '));
164
161
  for (let index = 0; index < expectedUrls.length; index++) {
165
162
  const url = expectedUrls[index];
166
163
  try {
167
- const data = yield dowloadFromUrl(session, url);
164
+ const data = yield dowloadFromUrl(session, url, opts);
168
165
  if (data)
169
166
  return { source: url, data };
170
167
  }
@@ -176,13 +173,13 @@ export function downloadJatsFromUrl(session, urlOrDoi, resolvers) {
176
173
  logAboutJatsFailing(session, expectedUrls);
177
174
  }
178
175
  if (doi.validate(urlOrDoi)) {
179
- const jatsUrl = yield customResolveJatsUrlFromDoi(session, urlOrDoi, resolvers);
180
- const data = yield dowloadFromUrl(session, jatsUrl);
176
+ const jatsUrl = yield customResolveJatsUrlFromDoi(session, urlOrDoi, opts);
177
+ const data = yield dowloadFromUrl(session, jatsUrl, opts);
181
178
  return { source: jatsUrl, data };
182
179
  }
183
180
  if (isUrl(urlOrDoi)) {
184
181
  session.log.debug("No resolver matched, and the URL doesn't look like a DOI. We will attempt to download it directly.");
185
- const data = yield dowloadFromUrl(session, urlOrDoi);
182
+ const data = yield dowloadFromUrl(session, urlOrDoi, opts);
186
183
  return { source: urlOrDoi, data };
187
184
  }
188
185
  throw new Error(`Could not find ${urlOrDoi} locally, and it doesn't look like a URL or DOI`);
@@ -41,16 +41,17 @@ export const DEFAULT_RESOLVERS = [elife, plos, joss];
41
41
  /**
42
42
  * Use the known custom resolvers to pick where the JATS should be downloaded from.
43
43
  */
44
- export function customResolveJatsUrlFromDoi(session, doiString, resolvers = DEFAULT_RESOLVERS) {
44
+ export function customResolveJatsUrlFromDoi(session, doiString, opts = { resolvers: DEFAULT_RESOLVERS }) {
45
+ var _a, _b;
45
46
  return __awaiter(this, void 0, void 0, function* () {
46
47
  if (!doi.validate(doiString))
47
48
  throw new Error(`The doi ${doiString} is not valid`);
48
49
  const doiUrl = doi.buildUrl(doiString);
49
50
  session.log.debug(`Resolving DOI ${doiUrl}`);
50
- const resp = yield fetch(doiUrl);
51
+ const resp = yield ((_a = opts === null || opts === void 0 ? void 0 : opts.fetcher) !== null && _a !== void 0 ? _a : fetch)(doiUrl);
51
52
  const articleUrl = resp.url;
52
53
  session.log.debug(`Found resolved URL for DOI at ${articleUrl}`);
53
- const resolver = resolvers.find((r) => r.test(articleUrl));
54
+ const resolver = (_b = opts === null || opts === void 0 ? void 0 : opts.resolvers) === null || _b === void 0 ? void 0 : _b.find((r) => r.test(articleUrl));
54
55
  if (!resolver)
55
56
  throw new Error(`Could not resolve JATS for ${articleUrl}, no resolver matched`);
56
57
  const jatsUrl = resolver.jatsUrl(articleUrl);
@@ -1,2 +1,2 @@
1
- const version = '0.0.7';
1
+ const version = '0.0.8';
2
2
  export default version;
package/dist/jats.js CHANGED
@@ -3513,8 +3513,20 @@ var init_nanoid = __esm({
3513
3513
  var require_utils = __commonJS({
3514
3514
  "../myst-common/dist/cjs/utils.js"(exports) {
3515
3515
  "use strict";
3516
+ var __assign = exports && exports.__assign || function() {
3517
+ __assign = Object.assign || function(t) {
3518
+ for (var s, i = 1, n = arguments.length; i < n; i++) {
3519
+ s = arguments[i];
3520
+ for (var p in s)
3521
+ if (Object.prototype.hasOwnProperty.call(s, p))
3522
+ t[p] = s[p];
3523
+ }
3524
+ return t;
3525
+ };
3526
+ return __assign.apply(this, arguments);
3527
+ };
3516
3528
  exports.__esModule = true;
3517
- exports.copyNode = exports.toText = exports.setTextAsChild = exports.liftChildren = exports.createHtmlId = exports.normalizeLabel = exports.createId = exports.fileInfo = exports.fileWarn = exports.fileError = void 0;
3529
+ exports.mergeTextNodes = exports.copyNode = exports.toText = exports.setTextAsChild = exports.liftChildren = exports.createHtmlId = exports.normalizeLabel = exports.createId = exports.fileInfo = exports.fileWarn = exports.fileError = void 0;
3518
3530
  var unist_util_map_1 = (init_unist_util_map(), __toCommonJS(unist_util_map_exports));
3519
3531
  var nanoid_1 = (init_nanoid(), __toCommonJS(nanoid_exports));
3520
3532
  function addMessageInfo(message, info) {
@@ -3527,7 +3539,7 @@ var require_utils = __commonJS({
3527
3539
  return message;
3528
3540
  }
3529
3541
  function fileError(file, message, opts) {
3530
- return addMessageInfo(file.message(message, opts === null || opts === void 0 ? void 0 : opts.node, opts === null || opts === void 0 ? void 0 : opts.source), opts);
3542
+ return addMessageInfo(file.message(message, opts === null || opts === void 0 ? void 0 : opts.node, opts === null || opts === void 0 ? void 0 : opts.source), __assign(__assign({}, opts), { fatal: true }));
3531
3543
  }
3532
3544
  exports.fileError = fileError;
3533
3545
  function fileWarn(file, message, opts) {
@@ -3585,6 +3597,8 @@ var require_utils = __commonJS({
3585
3597
  if (!Array.isArray(content))
3586
3598
  return toText4([content]);
3587
3599
  return content.map(function(n) {
3600
+ if (!n || typeof n === "string")
3601
+ return n || "";
3588
3602
  if ("value" in n)
3589
3603
  return n.value;
3590
3604
  if ("children" in n && n.children)
@@ -3597,6 +3611,35 @@ var require_utils = __commonJS({
3597
3611
  return JSON.parse(JSON.stringify(node));
3598
3612
  }
3599
3613
  exports.copyNode = copyNode;
3614
+ function mergeTextNodes(node) {
3615
+ var _a;
3616
+ var children = (_a = node.children) === null || _a === void 0 ? void 0 : _a.reduce(function(c, n) {
3617
+ var _a2;
3618
+ if ((n === null || n === void 0 ? void 0 : n.type) !== "text") {
3619
+ c.push(mergeTextNodes(n));
3620
+ return c;
3621
+ }
3622
+ var last = c[c.length - 1];
3623
+ if ((last === null || last === void 0 ? void 0 : last.type) !== "text") {
3624
+ c.push(n);
3625
+ return c;
3626
+ }
3627
+ if ((_a2 = n.position) === null || _a2 === void 0 ? void 0 : _a2.end) {
3628
+ if (!last.position)
3629
+ last.position = {};
3630
+ last.position.end = n.position.end;
3631
+ }
3632
+ if (!last.value)
3633
+ last.value = "";
3634
+ if (n.value)
3635
+ last.value += n.value;
3636
+ return c;
3637
+ }, []);
3638
+ if (children)
3639
+ node.children = children;
3640
+ return node;
3641
+ }
3642
+ exports.mergeTextNodes = mergeTextNodes;
3600
3643
  }
3601
3644
  });
3602
3645
 
@@ -5147,7 +5190,7 @@ var require_cjs2 = __commonJS({
5147
5190
  o[k2] = m[k];
5148
5191
  });
5149
5192
  exports.__esModule = true;
5150
- exports.TemplateOptionType = exports.TemplateKind = exports.extractPart = exports.selectBlockParts = exports.copyNode = exports.setTextAsChild = exports.liftChildren = exports.createHtmlId = exports.normalizeLabel = exports.createId = exports.fileInfo = exports.fileWarn = exports.fileError = exports.toText = void 0;
5193
+ exports.TemplateOptionType = exports.TemplateKind = exports.extractPart = exports.selectBlockParts = exports.mergeTextNodes = exports.copyNode = exports.setTextAsChild = exports.liftChildren = exports.createHtmlId = exports.normalizeLabel = exports.createId = exports.fileInfo = exports.fileWarn = exports.fileError = exports.toText = void 0;
5151
5194
  var utils_1 = require_utils();
5152
5195
  __createBinding(exports, utils_1, "toText");
5153
5196
  __createBinding(exports, utils_1, "fileError");
@@ -5159,6 +5202,7 @@ var require_cjs2 = __commonJS({
5159
5202
  __createBinding(exports, utils_1, "liftChildren");
5160
5203
  __createBinding(exports, utils_1, "setTextAsChild");
5161
5204
  __createBinding(exports, utils_1, "copyNode");
5205
+ __createBinding(exports, utils_1, "mergeTextNodes");
5162
5206
  var extractParts_1 = require_extractParts();
5163
5207
  __createBinding(exports, extractParts_1, "selectBlockParts");
5164
5208
  __createBinding(exports, extractParts_1, "extractPart");
@@ -10310,7 +10354,7 @@ var require_cjs3 = __commonJS({
10310
10354
  var import_commander2 = __toESM(require_commander());
10311
10355
 
10312
10356
  // src/version.ts
10313
- var version = "0.0.7";
10357
+ var version = "0.0.8";
10314
10358
  var version_default = version;
10315
10359
 
10316
10360
  // src/cli/parse.ts
@@ -10335,7 +10379,7 @@ function replaceCwd(cwd, args) {
10335
10379
  return args;
10336
10380
  return args.map((a) => {
10337
10381
  if (typeof a === "string") {
10338
- return a.replace(new RegExp(cwd + import_path.sep, "g"), "");
10382
+ return a.replaceAll(cwd + import_path.sep, "");
10339
10383
  }
10340
10384
  return a;
10341
10385
  });
@@ -10766,15 +10810,16 @@ var joss = {
10766
10810
  };
10767
10811
  var DEFAULT_RESOLVERS = [elife, plos, joss];
10768
10812
  function customResolveJatsUrlFromDoi(_0, _1) {
10769
- return __async(this, arguments, function* (session, doiString, resolvers = DEFAULT_RESOLVERS) {
10813
+ return __async(this, arguments, function* (session, doiString, opts = { resolvers: DEFAULT_RESOLVERS }) {
10814
+ var _a, _b;
10770
10815
  if (!import_doi_utils3.default.validate(doiString))
10771
10816
  throw new Error(`The doi ${doiString} is not valid`);
10772
10817
  const doiUrl = import_doi_utils3.default.buildUrl(doiString);
10773
10818
  session.log.debug(`Resolving DOI ${doiUrl}`);
10774
- const resp = yield (0, import_node_fetch.default)(doiUrl);
10819
+ const resp = yield ((_a = opts == null ? void 0 : opts.fetcher) != null ? _a : import_node_fetch.default)(doiUrl);
10775
10820
  const articleUrl = resp.url;
10776
10821
  session.log.debug(`Found resolved URL for DOI at ${articleUrl}`);
10777
- const resolver = resolvers.find((r) => r.test(articleUrl));
10822
+ const resolver = (_b = opts == null ? void 0 : opts.resolvers) == null ? void 0 : _b.find((r) => r.test(articleUrl));
10778
10823
  if (!resolver)
10779
10824
  throw new Error(`Could not resolve JATS for ${articleUrl}, no resolver matched`);
10780
10825
  const jatsUrl = resolver.jatsUrl(articleUrl);
@@ -10800,24 +10845,17 @@ ${jatsUrls.join("\n")}`)}
10800
10845
  session.log.info(`${import_chalk2.default.blue("The link may work in a browser.")}
10801
10846
  `);
10802
10847
  }
10803
- function dowloadFromUrl(session, jatsUrl) {
10848
+ function dowloadFromUrl(session, jatsUrl, opts) {
10804
10849
  return __async(this, null, function* () {
10850
+ var _a, _b;
10805
10851
  const toc = tic();
10806
10852
  session.log.debug(`Fetching JATS from ${jatsUrl}`);
10807
- const resp = yield (0, import_node_fetch2.default)(jatsUrl, {
10808
- headers: [
10809
- ["accept", "application/xml"],
10810
- [
10811
- "user-agent",
10812
- `Mozilla/5.0 (Macintosh; Intel Mac OS X ${Math.floor(Math.random() * 100)})`
10813
- ]
10814
- ]
10815
- });
10853
+ const resp = yield ((_a = opts == null ? void 0 : opts.fetcher) != null ? _a : defaultFetcher)(jatsUrl, "xml");
10816
10854
  if (!resp.ok) {
10817
10855
  session.log.debug(`JATS failed to download from "${jatsUrl}"`);
10818
10856
  throw new Error(`STATUS ${resp.status}: ${resp.statusText}`);
10819
10857
  }
10820
- const contentType = resp.headers.get("content-type");
10858
+ const contentType = (_b = resp.headers) == null ? void 0 : _b.get("content-type");
10821
10859
  if (!((contentType == null ? void 0 : contentType.includes("application/xml")) || (contentType == null ? void 0 : contentType.includes("text/xml")) || (contentType == null ? void 0 : contentType.includes("text/plain")))) {
10822
10860
  session.log.warn(
10823
10861
  `Expected content-type "application/xml" instead we got "${contentType}" for ${jatsUrl}
@@ -10831,21 +10869,31 @@ ${import_chalk2.default.dim(
10831
10869
  return data;
10832
10870
  });
10833
10871
  }
10834
- function checkIfDoiHasJats(session, urlOrDoi) {
10872
+ function defaultFetcher(url, kind) {
10873
+ switch (kind) {
10874
+ case "json":
10875
+ return (0, import_node_fetch2.default)(url, { headers: [["Accept", "application/json"]] });
10876
+ case "xml":
10877
+ return (0, import_node_fetch2.default)(url, { headers: [["Accept", "application/xml"]] });
10878
+ default:
10879
+ return (0, import_node_fetch2.default)(url);
10880
+ }
10881
+ }
10882
+ function checkIfDoiHasJats(session, urlOrDoi, opts) {
10835
10883
  return __async(this, null, function* () {
10836
- var _a, _b, _c, _d;
10884
+ var _a, _b, _c, _d, _e;
10837
10885
  if (!import_doi_utils4.default.validate(urlOrDoi))
10838
10886
  return;
10839
10887
  const toc = tic();
10840
10888
  const doiUrl = import_doi_utils4.default.buildUrl(urlOrDoi);
10841
10889
  session.log.debug(`Attempting to resolving full XML from DOI ${doiUrl}`);
10842
- const resp = yield (0, import_node_fetch2.default)(doiUrl, { headers: [["Accept", "application/json"]] });
10890
+ const resp = yield ((_a = opts == null ? void 0 : opts.fetcher) != null ? _a : defaultFetcher)(doiUrl, "json");
10843
10891
  if (!resp.ok) {
10844
10892
  session.log.debug(`DOI failed to resolve: ${doiUrl}`);
10845
10893
  return;
10846
10894
  }
10847
10895
  const data = yield resp.json();
10848
- session.log.debug(toc(`DOI resolved in %s with ${(_b = (_a = data.link) == null ? void 0 : _a.length) != null ? _b : 0} links to content`));
10896
+ session.log.debug(toc(`DOI resolved in %s with ${(_c = (_b = data.link) == null ? void 0 : _b.length) != null ? _c : 0} links to content`));
10849
10897
  if (data.link) {
10850
10898
  session.log.debug(
10851
10899
  ["", ...data.link.map((link) => `content-type: ${link["content-type"]}, ${link.URL}
@@ -10854,37 +10902,38 @@ function checkIfDoiHasJats(session, urlOrDoi) {
10854
10902
  )
10855
10903
  );
10856
10904
  }
10857
- const fullXml = (_d = (_c = data.link) == null ? void 0 : _c.find(
10905
+ const fullXml = (_e = (_d = data.link) == null ? void 0 : _d.find(
10858
10906
  (link) => {
10859
10907
  var _a2;
10860
10908
  return ["text/xml", "application/xml"].includes((_a2 = link["content-type"]) != null ? _a2 : "");
10861
10909
  }
10862
- )) == null ? void 0 : _d.URL;
10910
+ )) == null ? void 0 : _e.URL;
10863
10911
  if (fullXml)
10864
10912
  return fullXml;
10865
10913
  session.log.debug(`Could not find XML in DOI record ${doiUrl}`);
10866
10914
  return void 0;
10867
10915
  });
10868
10916
  }
10869
- function convertPMID2PMCID(session, PMID) {
10917
+ function convertPMID2PMCID(session, PMID, opts) {
10870
10918
  return __async(this, null, function* () {
10871
- var _a, _b;
10919
+ var _a, _b, _c;
10872
10920
  if (PMID.startsWith("https://")) {
10873
10921
  const idPart = new URL(PMID).pathname.slice(1);
10874
10922
  session.log.debug(`Extract ${PMID} to ${idPart}`);
10875
- return convertPMID2PMCID(session, idPart);
10923
+ return convertPMID2PMCID(session, idPart, opts);
10876
10924
  }
10877
10925
  const toc = tic();
10878
10926
  const converter = "https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/";
10879
- const resp = yield (0, import_node_fetch2.default)(`${converter}?tool=jats-xml&format=json&ids=${PMID}`, {
10880
- headers: [["Accept", "application/json"]]
10881
- });
10927
+ const resp = yield ((_a = opts == null ? void 0 : opts.fetcher) != null ? _a : defaultFetcher)(
10928
+ `${converter}?tool=jats-xml&format=json&ids=${PMID}`,
10929
+ "json"
10930
+ );
10882
10931
  if (!resp.ok) {
10883
10932
  session.log.debug(`Failed to convert PubMedID: ${PMID}`);
10884
10933
  return;
10885
10934
  }
10886
10935
  const data = yield resp.json();
10887
- const PMCID = (_b = (_a = data == null ? void 0 : data.records) == null ? void 0 : _a[0]) == null ? void 0 : _b.pmcid;
10936
+ const PMCID = (_c = (_b = data == null ? void 0 : data.records) == null ? void 0 : _b[0]) == null ? void 0 : _c.pmcid;
10888
10937
  session.log.debug(toc(`Used nih.gov to transform ${PMID} to ${PMCID} in %s.`));
10889
10938
  return PMCID;
10890
10939
  });
@@ -10893,9 +10942,9 @@ function pubMedCentralJats(PMCID) {
10893
10942
  const normalized = PMCID.replace(/^PMC:?/, "");
10894
10943
  return `https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pmc&id=${normalized}`;
10895
10944
  }
10896
- function checkIfPubMedCentralHasJats(session, urlOrDoi) {
10945
+ function checkIfPubMedCentralHasJats(session, urlOrDoi, opts) {
10897
10946
  return __async(this, null, function* () {
10898
- var _a, _b;
10947
+ var _a, _b, _c;
10899
10948
  if (urlOrDoi.match(/^PMC:?([0-9]+)$/))
10900
10949
  return pubMedCentralJats(urlOrDoi);
10901
10950
  if (!import_doi_utils4.default.validate(urlOrDoi))
@@ -10904,19 +10953,19 @@ function checkIfPubMedCentralHasJats(session, urlOrDoi) {
10904
10953
  const doiUrl = import_doi_utils4.default.buildUrl(urlOrDoi);
10905
10954
  session.log.debug(`Attempting to resolve PMCID using OpenAlex from ${doiUrl}`);
10906
10955
  const openAlexUrl = `https://api.openalex.org/works/${doiUrl}`;
10907
- const resp = yield (0, import_node_fetch2.default)(openAlexUrl, { headers: [["Accept", "application/json"]] });
10956
+ const resp = yield ((_a = opts == null ? void 0 : opts.fetcher) != null ? _a : defaultFetcher)(openAlexUrl, "json");
10908
10957
  if (!resp.ok) {
10909
10958
  session.log.debug(`Failed to lookup on OpenAlex: ${openAlexUrl}`);
10910
10959
  return;
10911
10960
  }
10912
10961
  const data = yield resp.json();
10913
- const PMID = (_a = data == null ? void 0 : data.ids) == null ? void 0 : _a.pmid;
10914
- let PMCID = (_b = data == null ? void 0 : data.ids) == null ? void 0 : _b.pmcid;
10962
+ const PMID = (_b = data == null ? void 0 : data.ids) == null ? void 0 : _b.pmid;
10963
+ let PMCID = (_c = data == null ? void 0 : data.ids) == null ? void 0 : _c.pmcid;
10915
10964
  if (!PMCID && !!PMID) {
10916
10965
  session.log.debug(
10917
10966
  toc(`OpenAlex resolved ${data == null ? void 0 : data.ids.openalex} in %s. There is no PMCID, but there is a PMID`)
10918
10967
  );
10919
- PMCID = yield convertPMID2PMCID(session, PMID);
10968
+ PMCID = yield convertPMID2PMCID(session, PMID, opts);
10920
10969
  if (!PMCID) {
10921
10970
  session.log.debug(toc(`PubMed does not have a record of ${PMID}`));
10922
10971
  return;
@@ -10930,11 +10979,11 @@ function checkIfPubMedCentralHasJats(session, urlOrDoi) {
10930
10979
  return pubMedCentralJats(PMCID);
10931
10980
  });
10932
10981
  }
10933
- function downloadJatsFromUrl(session, urlOrDoi, resolvers) {
10934
- return __async(this, null, function* () {
10982
+ function downloadJatsFromUrl(_0, _1) {
10983
+ return __async(this, arguments, function* (session, urlOrDoi, opts = {}) {
10935
10984
  const expectedUrls = (yield Promise.all([
10936
- checkIfPubMedCentralHasJats(session, urlOrDoi),
10937
- checkIfDoiHasJats(session, urlOrDoi)
10985
+ checkIfPubMedCentralHasJats(session, urlOrDoi, opts),
10986
+ checkIfDoiHasJats(session, urlOrDoi, opts)
10938
10987
  ])).filter((u) => !!u);
10939
10988
  if (expectedUrls.length > 0) {
10940
10989
  session.log.debug(["Trying URLs:\n", ...expectedUrls.map((url) => ` ${url}
@@ -10942,7 +10991,7 @@ function downloadJatsFromUrl(session, urlOrDoi, resolvers) {
10942
10991
  for (let index = 0; index < expectedUrls.length; index++) {
10943
10992
  const url = expectedUrls[index];
10944
10993
  try {
10945
- const data = yield dowloadFromUrl(session, url);
10994
+ const data = yield dowloadFromUrl(session, url, opts);
10946
10995
  if (data)
10947
10996
  return { source: url, data };
10948
10997
  } catch (error) {
@@ -10952,15 +11001,15 @@ function downloadJatsFromUrl(session, urlOrDoi, resolvers) {
10952
11001
  logAboutJatsFailing(session, expectedUrls);
10953
11002
  }
10954
11003
  if (import_doi_utils4.default.validate(urlOrDoi)) {
10955
- const jatsUrl = yield customResolveJatsUrlFromDoi(session, urlOrDoi, resolvers);
10956
- const data = yield dowloadFromUrl(session, jatsUrl);
11004
+ const jatsUrl = yield customResolveJatsUrlFromDoi(session, urlOrDoi, opts);
11005
+ const data = yield dowloadFromUrl(session, jatsUrl, opts);
10957
11006
  return { source: jatsUrl, data };
10958
11007
  }
10959
11008
  if (isUrl(urlOrDoi)) {
10960
11009
  session.log.debug(
10961
11010
  "No resolver matched, and the URL doesn't look like a DOI. We will attempt to download it directly."
10962
11011
  );
10963
- const data = yield dowloadFromUrl(session, urlOrDoi);
11012
+ const data = yield dowloadFromUrl(session, urlOrDoi, opts);
10964
11013
  return { source: urlOrDoi, data };
10965
11014
  }
10966
11015
  throw new Error(`Could not find ${urlOrDoi} locally, and it doesn't look like a URL or DOI`);
@@ -10971,8 +11020,8 @@ function downloadJatsFromUrl(session, urlOrDoi, resolvers) {
10971
11020
  function hasValidExtension(output) {
10972
11021
  return [".xml", ".jats"].includes((0, import_path3.extname)(output).toLowerCase());
10973
11022
  }
10974
- function downloadAndSaveJats(session, urlOrDoi, output) {
10975
- return __async(this, null, function* () {
11023
+ function downloadAndSaveJats(_0, _1, _2) {
11024
+ return __async(this, arguments, function* (session, urlOrDoi, output, opts = { resolvers: DEFAULT_RESOLVERS }) {
10976
11025
  if (import_fs2.default.existsSync(urlOrDoi)) {
10977
11026
  throw new Error(`File "${urlOrDoi}" is local and cannot be downloaded!`);
10978
11027
  }
@@ -10986,20 +11035,20 @@ function downloadAndSaveJats(session, urlOrDoi, output) {
10986
11035
  )} is not a valid extension for JATS, try using ".xml" or ".jats"`
10987
11036
  );
10988
11037
  }
10989
- const { data } = yield downloadJatsFromUrl(session, urlOrDoi, DEFAULT_RESOLVERS);
11038
+ const { data } = yield downloadJatsFromUrl(session, urlOrDoi, opts);
10990
11039
  writeFileToFolder(output, data);
10991
11040
  return data;
10992
11041
  });
10993
11042
  }
10994
- function parseJats(session, file) {
10995
- return __async(this, null, function* () {
11043
+ function parseJats(_0, _1) {
11044
+ return __async(this, arguments, function* (session, file, opts = { resolvers: DEFAULT_RESOLVERS }) {
10996
11045
  const toc = tic();
10997
11046
  if (import_fs2.default.existsSync(file)) {
10998
11047
  session.log.debug(`Found ${file} locally, parsing`);
10999
11048
  const data2 = import_fs2.default.readFileSync(file).toString();
11000
11049
  return new Jats(data2, { log: session.log });
11001
11050
  }
11002
- const { source, data } = yield downloadJatsFromUrl(session, file, DEFAULT_RESOLVERS);
11051
+ const { source, data } = yield downloadJatsFromUrl(session, file, opts);
11003
11052
  const jats = new Jats(data, { source, log: session.log });
11004
11053
  session.log.debug(toc(`Downloaded and parsed JATS file in %s`));
11005
11054
  return jats;
@@ -1 +1 @@
1
- {"version":3,"file":"parse.d.ts","sourceRoot":"","sources":["../../../src/cli/parse.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAiNpC,wBAAgB,cAAc,CAAC,OAAO,EAAE,OAAO,QAI9C"}
1
+ {"version":3,"file":"parse.d.ts","sourceRoot":"","sources":["../../../src/cli/parse.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAsNpC,wBAAgB,cAAc,CAAC,OAAO,EAAE,OAAO,QAI9C"}
@@ -1,11 +1,10 @@
1
- import type { ISession } from './types';
2
- import type { Resolver } from './resolvers';
1
+ import type { ISession, Options } from './types';
3
2
  /**
4
3
  * https://www.ncbi.nlm.nih.gov/pmc/tools/id-converter-api/
5
4
  */
6
- export declare function convertPMID2PMCID(session: ISession, PMID: string): Promise<string | undefined>;
7
- export declare function checkIfPubMedCentralHasJats(session: ISession, urlOrDoi: string): Promise<string | undefined>;
8
- export declare function downloadJatsFromUrl(session: ISession, urlOrDoi: string, resolvers?: Resolver[]): Promise<{
5
+ export declare function convertPMID2PMCID(session: ISession, PMID: string, opts: Options): Promise<string | undefined>;
6
+ export declare function checkIfPubMedCentralHasJats(session: ISession, urlOrDoi: string, opts: Options): Promise<string | undefined>;
7
+ export declare function downloadJatsFromUrl(session: ISession, urlOrDoi: string, opts?: Options): Promise<{
9
8
  source: string;
10
9
  data: string;
11
10
  }>;
@@ -1 +1 @@
1
- {"version":3,"file":"download.d.ts","sourceRoot":"","sources":["../../src/download.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAC;AACxC,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AA2G5C;;GAEG;AACH,wBAAsB,iBAAiB,CACrC,OAAO,EAAE,QAAQ,EACjB,IAAI,EAAE,MAAM,GACX,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC,CAoB7B;AAOD,wBAAsB,2BAA2B,CAC/C,OAAO,EAAE,QAAQ,EACjB,QAAQ,EAAE,MAAM,GACf,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC,CAgC7B;AAED,wBAAsB,mBAAmB,CACvC,OAAO,EAAE,QAAQ,EACjB,QAAQ,EAAE,MAAM,EAChB,SAAS,CAAC,EAAE,QAAQ,EAAE,GACrB,OAAO,CAAC;IAAE,MAAM,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,CAAC,CAkC3C"}
1
+ {"version":3,"file":"download.d.ts","sourceRoot":"","sources":["../../src/download.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AA8GjD;;GAEG;AACH,wBAAsB,iBAAiB,CACrC,OAAO,EAAE,QAAQ,EACjB,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,OAAO,GACZ,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC,CAqB7B;AAOD,wBAAsB,2BAA2B,CAC/C,OAAO,EAAE,QAAQ,EACjB,QAAQ,EAAE,MAAM,EAChB,IAAI,EAAE,OAAO,GACZ,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC,CAgC7B;AAED,wBAAsB,mBAAmB,CACvC,OAAO,EAAE,QAAQ,EACjB,QAAQ,EAAE,MAAM,EAChB,IAAI,GAAE,OAAY,GACjB,OAAO,CAAC;IAAE,MAAM,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,CAAC,CAkC3C"}
@@ -1,8 +1,4 @@
1
- import type { ISession } from './types';
2
- export interface Resolver {
3
- test: (url: string) => boolean;
4
- jatsUrl: (url: string) => string;
5
- }
1
+ import type { ISession, Options, Resolver } from './types';
6
2
  export declare const elife: Resolver;
7
3
  export declare const plos: Resolver;
8
4
  export declare const joss: Resolver;
@@ -10,5 +6,5 @@ export declare const DEFAULT_RESOLVERS: Resolver[];
10
6
  /**
11
7
  * Use the known custom resolvers to pick where the JATS should be downloaded from.
12
8
  */
13
- export declare function customResolveJatsUrlFromDoi(session: ISession, doiString: string, resolvers?: Resolver[]): Promise<string>;
9
+ export declare function customResolveJatsUrlFromDoi(session: ISession, doiString: string, opts?: Options): Promise<string>;
14
10
  //# sourceMappingURL=resolvers.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"resolvers.d.ts","sourceRoot":"","sources":["../../src/resolvers.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAC;AAExC,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,CAAC;IAC/B,OAAO,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,MAAM,CAAC;CAClC;AAED,eAAO,MAAM,KAAK,EAAE,QAOnB,CAAC;AAEF,eAAO,MAAM,IAAI,EAAE,QAOlB,CAAC;AAEF,eAAO,MAAM,IAAI,EAAE,QAWlB,CAAC;AAEF,eAAO,MAAM,iBAAiB,EAAE,QAAQ,EAAwB,CAAC;AAEjE;;GAEG;AACH,wBAAsB,2BAA2B,CAC/C,OAAO,EAAE,QAAQ,EACjB,SAAS,EAAE,MAAM,EACjB,SAAS,aAAoB,GAC5B,OAAO,CAAC,MAAM,CAAC,CAWjB"}
1
+ {"version":3,"file":"resolvers.d.ts","sourceRoot":"","sources":["../../src/resolvers.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAC;AAE3D,eAAO,MAAM,KAAK,EAAE,QAOnB,CAAC;AAEF,eAAO,MAAM,IAAI,EAAE,QAOlB,CAAC;AAEF,eAAO,MAAM,IAAI,EAAE,QAWlB,CAAC;AAEF,eAAO,MAAM,iBAAiB,EAAE,QAAQ,EAAwB,CAAC;AAEjE;;GAEG;AACH,wBAAsB,2BAA2B,CAC/C,OAAO,EAAE,QAAQ,EACjB,SAAS,EAAE,MAAM,EACjB,IAAI,GAAE,OAA0C,GAC/C,OAAO,CAAC,MAAM,CAAC,CAWjB"}
@@ -1,3 +1,11 @@
1
1
  import type { ISession as BaseISession } from 'myst-cli-utils';
2
2
  export declare type ISession = BaseISession;
3
+ export interface Resolver {
4
+ test: (url: string) => boolean;
5
+ jatsUrl: (url: string) => string;
6
+ }
7
+ export declare type Options = {
8
+ resolvers?: Resolver[];
9
+ fetcher?: (url: string, contentType?: 'json' | 'xml') => Promise<Pick<Response, 'ok' | 'headers' | 'text' | 'json' | 'status' | 'statusText' | 'url'>>;
10
+ };
3
11
  //# sourceMappingURL=session.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"session.d.ts","sourceRoot":"","sources":["../../../src/types/session.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,QAAQ,IAAI,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAE/D,oBAAY,QAAQ,GAAG,YAAY,CAAC"}
1
+ {"version":3,"file":"session.d.ts","sourceRoot":"","sources":["../../../src/types/session.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,QAAQ,IAAI,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAE/D,oBAAY,QAAQ,GAAG,YAAY,CAAC;AAEpC,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,CAAC;IAC/B,OAAO,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,MAAM,CAAC;CAClC;AAED,oBAAY,OAAO,GAAG;IACpB,SAAS,CAAC,EAAE,QAAQ,EAAE,CAAC;IACvB,OAAO,CAAC,EAAE,CACR,GAAG,EAAE,MAAM,EACX,WAAW,CAAC,EAAE,MAAM,GAAG,KAAK,KACzB,OAAO,CACV,IAAI,CAAC,QAAQ,EAAE,IAAI,GAAG,SAAS,GAAG,MAAM,GAAG,MAAM,GAAG,QAAQ,GAAG,YAAY,GAAG,KAAK,CAAC,CACrF,CAAC;CACH,CAAC"}
@@ -1,3 +1,3 @@
1
- declare const version = "0.0.7";
1
+ declare const version = "0.0.8";
2
2
  export default version;
3
3
  //# sourceMappingURL=version.d.ts.map
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "jats-xml",
3
- "version": "0.0.7",
3
+ "version": "0.0.8",
4
4
  "description": "Types and utilities for working with JATS in Typescript",
5
5
  "author": "Rowan Cockett <rowan@curvenote.com>",
6
6
  "homepage": "https://github.com/executablebooks/mystjs/tree/main/packages/jats-xml",
@@ -52,8 +52,8 @@
52
52
  },
53
53
  "dependencies": {
54
54
  "fair-principles": "^1.0.3",
55
- "myst-common": "^0.0.10",
56
- "myst-frontmatter": "^0.0.4"
55
+ "myst-common": "^0.0.11",
56
+ "myst-frontmatter": "^0.0.5"
57
57
  },
58
58
  "devDependencies": {
59
59
  "@types/jest": "^28.1.6",