jats-xml 0.0.4 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/dist/cjs/cli/parse.js +39 -28
  2. package/dist/cjs/download.js +163 -12
  3. package/dist/cjs/index.js +3 -1
  4. package/dist/cjs/jats.js +83 -19
  5. package/dist/cjs/resolvers.js +6 -3
  6. package/dist/cjs/types/elementTags.js +16 -0
  7. package/dist/cjs/types/refType.js +5 -5
  8. package/dist/cjs/utils.js +55 -6
  9. package/dist/cjs/version.js +1 -1
  10. package/dist/esm/cli/parse.js +40 -29
  11. package/dist/esm/download.js +162 -13
  12. package/dist/esm/index.js +1 -0
  13. package/dist/esm/jats.js +69 -8
  14. package/dist/esm/resolvers.js +4 -1
  15. package/dist/esm/types/elementTags.js +16 -0
  16. package/dist/esm/types/refType.js +5 -5
  17. package/dist/esm/utils.js +52 -4
  18. package/dist/esm/version.js +1 -1
  19. package/dist/jats.js +3165 -2823
  20. package/dist/types/cli/parse.d.ts.map +1 -1
  21. package/dist/types/download.d.ts +9 -1
  22. package/dist/types/download.d.ts.map +1 -1
  23. package/dist/types/index.d.ts +1 -0
  24. package/dist/types/index.d.ts.map +1 -1
  25. package/dist/types/jats.d.ts +28 -15
  26. package/dist/types/jats.d.ts.map +1 -1
  27. package/dist/types/resolvers.d.ts +4 -1
  28. package/dist/types/resolvers.d.ts.map +1 -1
  29. package/dist/types/types/elementTags.d.ts +17 -1
  30. package/dist/types/types/elementTags.d.ts.map +1 -1
  31. package/dist/types/types/elements.d.ts +9 -0
  32. package/dist/types/types/elements.d.ts.map +1 -1
  33. package/dist/types/types/refType.d.ts +5 -5
  34. package/dist/types/types/refType.d.ts.map +1 -1
  35. package/dist/types/utils.d.ts +5 -2
  36. package/dist/types/utils.d.ts.map +1 -1
  37. package/dist/types/version.d.ts +1 -1
  38. package/package.json +7 -4
@@ -41,7 +41,7 @@ function downloadAndSaveJats(session, urlOrDoi, output) {
41
41
  if (!hasValidExtension(output)) {
42
42
  session.log.warn(`The extension ${(0, path_1.extname)(output)} is not a valid extension for JATS, try using ".xml" or ".jats"`);
43
43
  }
44
- const data = yield (0, download_1.downloadJatsFromUrl)(session, urlOrDoi, resolvers_1.DEFAULT_RESOLVERS);
44
+ const { data } = yield (0, download_1.downloadJatsFromUrl)(session, urlOrDoi, resolvers_1.DEFAULT_RESOLVERS);
45
45
  (0, myst_cli_utils_1.writeFileToFolder)(output, data);
46
46
  return data;
47
47
  });
@@ -52,12 +52,12 @@ function parseJats(session, file) {
52
52
  if (fs_1.default.existsSync(file)) {
53
53
  session.log.debug(`Found ${file} locally, parsing`);
54
54
  const data = fs_1.default.readFileSync(file).toString();
55
- session.log.debug(toc(`Parsed JATS file from disk in %s`));
56
- return new jats_1.Jats(data);
55
+ return new jats_1.Jats(data, { log: session.log });
57
56
  }
58
- const data = yield (0, download_1.downloadJatsFromUrl)(session, file, resolvers_1.DEFAULT_RESOLVERS);
57
+ const { source, data } = yield (0, download_1.downloadJatsFromUrl)(session, file, resolvers_1.DEFAULT_RESOLVERS);
58
+ const jats = new jats_1.Jats(data, { source, log: session.log });
59
59
  session.log.debug(toc(`Downloaded and parsed JATS file in %s`));
60
- return new jats_1.Jats(data);
60
+ return jats;
61
61
  });
62
62
  }
63
63
  function formatLongString(data, offset = 0, length = 88 - offset) {
@@ -76,6 +76,9 @@ function formatDictionary(dict, opts) {
76
76
  return Object.entries(dict)
77
77
  .map(([k, t]) => {
78
78
  var _a;
79
+ if (!t)
80
+ return null;
81
+ let wrap = typeof (opts === null || opts === void 0 ? void 0 : opts.wrap) === 'boolean' ? opts.wrap : true;
79
82
  let value = t;
80
83
  let color = chalk_1.default.yellow.bold;
81
84
  if (t && typeof t === 'object') {
@@ -83,21 +86,21 @@ function formatDictionary(dict, opts) {
83
86
  return null;
84
87
  color = (_a = t.label) !== null && _a !== void 0 ? _a : color;
85
88
  value = t.value;
89
+ wrap = typeof t.wrap === 'boolean' ? t.wrap : wrap;
86
90
  }
87
- const wrapped = (opts === null || opts === void 0 ? void 0 : opts.wrap) === false
88
- ? String(value)
89
- : formatLongString(String(value), maxLabel + 2, opts === null || opts === void 0 ? void 0 : opts.wrap);
91
+ const wrapped = wrap ? formatLongString(String(value), maxLabel + 2) : String(value);
90
92
  return `${color(k)}:${' '.repeat(maxLabel - k.length + 1)}${wrapped}`;
91
93
  })
92
94
  .filter((o) => !!o)
93
95
  .join('\n');
94
96
  }
95
97
  function jatsSummaryCLI(session, file) {
96
- var _a, _b;
98
+ var _a, _b, _c;
97
99
  return __awaiter(this, void 0, void 0, function* () {
98
100
  const jats = yield parseJats(session, file);
99
101
  const summary = {
100
- DOI: jats.doi ? doi_utils_1.default.buildUrl(jats.doi) : null,
102
+ Source: { value: jats.source, wrap: false },
103
+ DOI: jats.doi ? { value: doi_utils_1.default.buildUrl(jats.doi), wrap: false } : null,
101
104
  Title: (_a = (0, myst_common_1.toText)(jats.articleTitle)) === null || _a === void 0 ? void 0 : _a.replace(/\n/g, ' '),
102
105
  Date: (0, utils_1.formatDate)((0, utils_1.toDate)(jats.publicationDate)),
103
106
  Authors: jats.articleAuthors
@@ -105,36 +108,44 @@ function jatsSummaryCLI(session, file) {
105
108
  .join(', '),
106
109
  Abstract: (_b = (0, myst_common_1.toText)(jats.abstract)) === null || _b === void 0 ? void 0 : _b.replace(/\n/g, ' '),
107
110
  Keywords: jats.keywords.map((k) => (0, myst_common_1.toText)(k)).join(', '),
108
- License: jats.license['xlink:href'],
109
- Figures: { label: chalk_1.default.blue.bold, value: String((0, unist_util_select_1.selectAll)(types_1.Tags.fig, jats.body).length) },
110
- Equations: {
111
+ License: (_c = jats.license) === null || _c === void 0 ? void 0 : _c['xlink:href'],
112
+ };
113
+ if (jats.body) {
114
+ summary.Figures = {
115
+ label: chalk_1.default.blue.bold,
116
+ value: String((0, unist_util_select_1.selectAll)(types_1.Tags.fig, jats.body).length),
117
+ };
118
+ summary.Equations = {
111
119
  label: chalk_1.default.blue.bold,
112
120
  value: String((0, unist_util_select_1.selectAll)(types_1.Tags.dispFormula, jats.body).length),
113
- },
114
- Tables: {
121
+ };
122
+ summary.Tables = {
115
123
  label: chalk_1.default.blue.bold,
116
124
  value: String((0, unist_util_select_1.selectAll)(types_1.Tags.table, jats.body).length),
117
- },
118
- Code: {
125
+ };
126
+ summary.Code = {
119
127
  label: chalk_1.default.blue.bold,
120
128
  value: String((0, unist_util_select_1.selectAll)(types_1.Tags.code, jats.body).length),
121
- },
122
- Sections: {
129
+ };
130
+ summary.Sections = {
123
131
  label: chalk_1.default.blue.bold,
124
132
  value: String((0, unist_util_select_1.selectAll)(types_1.Tags.sec, jats.body).length),
125
- },
126
- Paragraphs: {
133
+ };
134
+ summary.Paragraphs = {
127
135
  label: chalk_1.default.blue.bold,
128
136
  value: String((0, unist_util_select_1.selectAll)(types_1.Tags.p, jats.body).length),
129
- },
130
- Citations: { label: chalk_1.default.blue.bold, value: String(jats.references.length) },
131
- 'Cross-References': {
137
+ };
138
+ summary.Citations = { label: chalk_1.default.blue.bold, value: String(jats.references.length) };
139
+ summary['Cross-References'] = {
132
140
  label: chalk_1.default.blue.bold,
133
141
  value: String((0, unist_util_select_1.selectAll)(types_1.Tags.xref, jats.body).length),
134
- },
135
- 'Sub Articles': { label: chalk_1.default.blue.bold, value: String(jats.subArticles.length) },
136
- };
142
+ };
143
+ summary['Sub Articles'] = { label: chalk_1.default.blue.bold, value: String(jats.subArticles.length) };
144
+ }
137
145
  session.log.info(formatDictionary(summary));
146
+ if (!jats.body) {
147
+ session.log.warn('\nThis is a partial JATS record that does not have <body>.');
148
+ }
138
149
  });
139
150
  }
140
151
  function jatsReferencesCLI(session, file) {
@@ -142,7 +153,7 @@ function jatsReferencesCLI(session, file) {
142
153
  const jats = yield parseJats(session, file);
143
154
  const sorted = jats.references
144
155
  .map((ref) => {
145
- const doiString = (0, utils_1.findDoi)(ref);
156
+ const doiString = (0, utils_1.findArticleId)(ref, 'doi');
146
157
  const title = (0, myst_common_1.toText)((0, unist_util_select_1.select)(types_1.Tags.articleTitle, ref));
147
158
  const year = (0, myst_common_1.toText)((0, unist_util_select_1.select)(types_1.Tags.year, ref));
148
159
  const surnames = (0, unist_util_select_1.selectAll)(types_1.Tags.surname, ref);
@@ -12,35 +12,186 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
12
12
  return (mod && mod.__esModule) ? mod : { "default": mod };
13
13
  };
14
14
  Object.defineProperty(exports, "__esModule", { value: true });
15
- exports.downloadJatsFromUrl = void 0;
15
+ exports.downloadJatsFromUrl = exports.checkIfPubMedCentralHasJats = exports.convertPMID2PMCID = void 0;
16
16
  const doi_utils_1 = __importDefault(require("doi-utils"));
17
+ const chalk_1 = __importDefault(require("chalk"));
17
18
  const node_fetch_1 = __importDefault(require("node-fetch"));
18
19
  const myst_cli_utils_1 = require("myst-cli-utils");
20
+ const fair_principles_1 = require("fair-principles");
19
21
  const resolvers_1 = require("./resolvers");
22
+ function logAboutJatsFailing(session, jatsUrls) {
23
+ session.log.warn('⛔️ JATS may not be Open Access 😭, you should petition your local representative 🪧');
24
+ session.log.info(`${chalk_1.default.green(`\nThe XML ${chalk_1.default.bold('should')} be here:\n\n${jatsUrls.join('\n')}`)}\n`);
25
+ const FAIR = (0, fair_principles_1.highlightFAIR)('A', { chalk: chalk_1.default });
26
+ session.log.info(`Some publishers aggressively block programmatic access, which isn't ${FAIR}.`);
27
+ session.log.debug((0, fair_principles_1.formatPrinciples)('A*', { chalk: chalk_1.default }));
28
+ session.log.info(`${chalk_1.default.blue('The link may work in a browser.')}\n`);
29
+ }
20
30
  function dowloadFromUrl(session, jatsUrl) {
21
31
  return __awaiter(this, void 0, void 0, function* () {
32
+ const toc = (0, myst_cli_utils_1.tic)();
22
33
  session.log.debug(`Fetching JATS from ${jatsUrl}`);
23
- const jatsResp = yield (0, node_fetch_1.default)(jatsUrl);
24
- if (!jatsResp.ok)
25
- throw new Error(`Problem fetching JATS from ${jatsUrl}`);
26
- const contentType = jatsResp.headers.get('content-type');
27
- if (!(contentType === 'application/xml' || (contentType === null || contentType === void 0 ? void 0 : contentType.includes('text/plain')))) {
28
- throw new Error(`Expected content-type "application/xml" instead we got ${contentType} for ${jatsUrl}`);
29
- }
30
- const data = yield jatsResp.text();
34
+ const resp = yield (0, node_fetch_1.default)(jatsUrl, {
35
+ headers: [
36
+ ['accept', 'application/xml'],
37
+ [
38
+ 'user-agent',
39
+ // A bunch of publishers just show the login screen or quickly block you.
40
+ // We don't want to DDOS these publishers, they are the _good ones_ for sharing the XML!!
41
+ // But some block on the second request?!
42
+ // So we can pretend to be a random browser, I guess. How silly. 🤷‍♂️
43
+ `Mozilla/5.0 (Macintosh; Intel Mac OS X ${Math.floor(Math.random() * 100)})`,
44
+ ],
45
+ ],
46
+ });
47
+ if (!resp.ok) {
48
+ session.log.debug(`JATS failed to download from "${jatsUrl}"`);
49
+ throw new Error(`STATUS ${resp.status}: ${resp.statusText}`);
50
+ }
51
+ const contentType = resp.headers.get('content-type');
52
+ if (!((contentType === null || contentType === void 0 ? void 0 : contentType.includes('application/xml')) ||
53
+ (contentType === null || contentType === void 0 ? void 0 : contentType.includes('text/xml')) ||
54
+ (contentType === null || contentType === void 0 ? void 0 : contentType.includes('text/plain')))) {
55
+ session.log.warn(`Expected content-type "application/xml" instead we got "${contentType}" for ${jatsUrl}\n${chalk_1.default.dim('Things may not work, but we are going to try our best...')}`);
56
+ }
57
+ const data = yield resp.text();
58
+ session.log.debug(toc(`Fetched document with content-type "${contentType}" in %s`));
31
59
  return data;
32
60
  });
33
61
  }
62
+ /**
63
+ * There are 5.8M or so DOIs that have a full XML record:
64
+ *
65
+ * https://api.crossref.org/works?filter=full-text.type:application/xml,full-text.application:text-mining&facet=publisher-name:*&rows=0
66
+ *
67
+ * This function tries to find the correct URL for the record.
68
+ */
69
+ function checkIfDoiHasJats(session, urlOrDoi) {
70
+ var _a, _b, _c, _d;
71
+ return __awaiter(this, void 0, void 0, function* () {
72
+ if (!doi_utils_1.default.validate(urlOrDoi))
73
+ return;
74
+ const toc = (0, myst_cli_utils_1.tic)();
75
+ const doiUrl = doi_utils_1.default.buildUrl(urlOrDoi);
76
+ session.log.debug(`Attempting to resolving full XML from DOI ${doiUrl}`);
77
+ const resp = yield (0, node_fetch_1.default)(doiUrl, { headers: [['Accept', 'application/json']] });
78
+ if (!resp.ok) {
79
+ // Silently return -- other functions can try!
80
+ session.log.debug(`DOI failed to resolve: ${doiUrl}`);
81
+ return;
82
+ }
83
+ const data = (yield resp.json());
84
+ session.log.debug(toc(`DOI resolved in %s with ${(_b = (_a = data.link) === null || _a === void 0 ? void 0 : _a.length) !== null && _b !== void 0 ? _b : 0} links to content`));
85
+ if (data.link) {
86
+ session.log.debug(['', ...data.link.map((link) => `content-type: ${link['content-type']}, ${link.URL}\n`)].join(' - '));
87
+ }
88
+ const fullXml = (_d = (_c = data.link) === null || _c === void 0 ? void 0 : _c.find((link) => { var _a; return ['text/xml', 'application/xml'].includes((_a = link['content-type']) !== null && _a !== void 0 ? _a : ''); })) === null || _d === void 0 ? void 0 : _d.URL;
89
+ if (fullXml)
90
+ return fullXml;
91
+ session.log.debug(`Could not find XML in DOI record ${doiUrl}`);
92
+ return undefined;
93
+ });
94
+ }
95
+ /**
96
+ * https://www.ncbi.nlm.nih.gov/pmc/tools/id-converter-api/
97
+ */
98
+ function convertPMID2PMCID(session, PMID) {
99
+ var _a, _b;
100
+ return __awaiter(this, void 0, void 0, function* () {
101
+ if (PMID.startsWith('https://')) {
102
+ const idPart = new URL(PMID).pathname.slice(1);
103
+ session.log.debug(`Extract ${PMID} to ${idPart}`);
104
+ return convertPMID2PMCID(session, idPart);
105
+ }
106
+ const toc = (0, myst_cli_utils_1.tic)();
107
+ const converter = 'https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/';
108
+ const resp = yield (0, node_fetch_1.default)(`${converter}?tool=jats-xml&format=json&ids=${PMID}`, {
109
+ headers: [['Accept', 'application/json']],
110
+ });
111
+ if (!resp.ok) {
112
+ // Silently return -- other functions can try!
113
+ session.log.debug(`Failed to convert PubMedID: ${PMID}`);
114
+ return;
115
+ }
116
+ const data = yield resp.json();
117
+ const PMCID = (_b = (_a = data === null || data === void 0 ? void 0 : data.records) === null || _a === void 0 ? void 0 : _a[0]) === null || _b === void 0 ? void 0 : _b.pmcid;
118
+ session.log.debug(toc(`Used nih.gov to transform ${PMID} to ${PMCID} in %s.`));
119
+ return PMCID;
120
+ });
121
+ }
122
+ exports.convertPMID2PMCID = convertPMID2PMCID;
123
+ function pubMedCentralJats(PMCID) {
124
+ const normalized = PMCID.replace(/^PMC:?/, '');
125
+ return `https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pmc&id=${normalized}`;
126
+ }
127
+ function checkIfPubMedCentralHasJats(session, urlOrDoi) {
128
+ var _a, _b;
129
+ return __awaiter(this, void 0, void 0, function* () {
130
+ if (urlOrDoi.match(/^PMC:?([0-9]+)$/))
131
+ return pubMedCentralJats(urlOrDoi);
132
+ if (!doi_utils_1.default.validate(urlOrDoi))
133
+ return;
134
+ const toc = (0, myst_cli_utils_1.tic)();
135
+ const doiUrl = doi_utils_1.default.buildUrl(urlOrDoi);
136
+ session.log.debug(`Attempting to resolve PMCID using OpenAlex from ${doiUrl}`);
137
+ const openAlexUrl = `https://api.openalex.org/works/${doiUrl}`;
138
+ const resp = yield (0, node_fetch_1.default)(openAlexUrl, { headers: [['Accept', 'application/json']] });
139
+ if (!resp.ok) {
140
+ // Silently return -- other functions can try!
141
+ session.log.debug(`Failed to lookup on OpenAlex: ${openAlexUrl}`);
142
+ return;
143
+ }
144
+ const data = (yield resp.json());
145
+ const PMID = (_a = data === null || data === void 0 ? void 0 : data.ids) === null || _a === void 0 ? void 0 : _a.pmid;
146
+ let PMCID = (_b = data === null || data === void 0 ? void 0 : data.ids) === null || _b === void 0 ? void 0 : _b.pmcid;
147
+ if (!PMCID && !!PMID) {
148
+ session.log.debug(toc(`OpenAlex resolved ${data === null || data === void 0 ? void 0 : data.ids.openalex} in %s. There is no PMCID, but there is a PMID`));
149
+ PMCID = yield convertPMID2PMCID(session, PMID);
150
+ if (!PMCID) {
151
+ session.log.debug(toc(`PubMed does not have a record of ${PMID}`));
152
+ return;
153
+ }
154
+ }
155
+ if (!PMCID) {
156
+ session.log.debug(toc(`OpenAlex resolved ${data === null || data === void 0 ? void 0 : data.ids.openalex} in %s, but there is no PMCID`));
157
+ return;
158
+ }
159
+ session.log.debug(toc(`OpenAlex resolved in %s, with a PMCID of ${PMCID}`));
160
+ return pubMedCentralJats(PMCID);
161
+ });
162
+ }
163
+ exports.checkIfPubMedCentralHasJats = checkIfPubMedCentralHasJats;
34
164
  function downloadJatsFromUrl(session, urlOrDoi, resolvers) {
35
165
  return __awaiter(this, void 0, void 0, function* () {
166
+ const expectedUrls = (yield Promise.all([
167
+ checkIfPubMedCentralHasJats(session, urlOrDoi),
168
+ checkIfDoiHasJats(session, urlOrDoi),
169
+ ])).filter((u) => !!u);
170
+ if (expectedUrls.length > 0) {
171
+ session.log.debug(['Trying URLs:\n', ...expectedUrls.map((url) => ` ${url}\n`)].join(' - '));
172
+ for (let index = 0; index < expectedUrls.length; index++) {
173
+ const url = expectedUrls[index];
174
+ try {
175
+ const data = yield dowloadFromUrl(session, url);
176
+ if (data)
177
+ return { source: url, data };
178
+ }
179
+ catch (error) {
180
+ session.log.debug(error.message);
181
+ }
182
+ }
183
+ // If there are expected URLs that don't work: see something, say something, etc.
184
+ logAboutJatsFailing(session, expectedUrls);
185
+ }
36
186
  if (doi_utils_1.default.validate(urlOrDoi)) {
37
- const jatsUrl = yield (0, resolvers_1.resolveJatsUrlFromDoi)(session, urlOrDoi, resolvers);
187
+ const jatsUrl = yield (0, resolvers_1.customResolveJatsUrlFromDoi)(session, urlOrDoi, resolvers);
38
188
  const data = yield dowloadFromUrl(session, jatsUrl);
39
- return data;
189
+ return { source: jatsUrl, data };
40
190
  }
41
191
  if ((0, myst_cli_utils_1.isUrl)(urlOrDoi)) {
192
+ session.log.debug("No resolver matched, and the URL doesn't look like a DOI. We will attempt to download it directly.");
42
193
  const data = yield dowloadFromUrl(session, urlOrDoi);
43
- return data;
194
+ return { source: urlOrDoi, data };
44
195
  }
45
196
  throw new Error(`Could not find ${urlOrDoi} locally, and it doesn't look like a URL or DOI`);
46
197
  });
package/dist/cjs/index.js CHANGED
@@ -17,9 +17,11 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
17
17
  return (mod && mod.__esModule) ? mod : { "default": mod };
18
18
  };
19
19
  Object.defineProperty(exports, "__esModule", { value: true });
20
- exports.Jats = exports.version = void 0;
20
+ exports.downloadJatsFromUrl = exports.Jats = exports.version = void 0;
21
21
  var version_1 = require("./version");
22
22
  Object.defineProperty(exports, "version", { enumerable: true, get: function () { return __importDefault(version_1).default; } });
23
23
  var jats_1 = require("./jats");
24
24
  Object.defineProperty(exports, "Jats", { enumerable: true, get: function () { return jats_1.Jats; } });
25
+ var download_1 = require("./download");
26
+ Object.defineProperty(exports, "downloadJatsFromUrl", { enumerable: true, get: function () { return download_1.downloadJatsFromUrl; } });
25
27
  __exportStar(require("./types"), exports);
package/dist/cjs/jats.js CHANGED
@@ -1,41 +1,90 @@
1
1
  "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
2
5
  Object.defineProperty(exports, "__esModule", { value: true });
3
6
  exports.Jats = void 0;
7
+ const myst_common_1 = require("myst-common");
4
8
  const xml_js_1 = require("xml-js");
9
+ const doi_utils_1 = __importDefault(require("doi-utils"));
5
10
  const utils_1 = require("./utils");
6
11
  const unist_util_select_1 = require("unist-util-select");
7
12
  const types_1 = require("./types");
13
+ const myst_cli_utils_1 = require("myst-cli-utils");
14
+ function select(selector, node) {
15
+ var _a;
16
+ return ((_a = (0, unist_util_select_1.select)(selector, node)) !== null && _a !== void 0 ? _a : undefined);
17
+ }
8
18
  class Jats {
9
- constructor(data) {
10
- this.raw = (0, xml_js_1.xml2js)(data, { compact: false });
19
+ constructor(data, opts) {
20
+ var _a;
21
+ const toc = (0, myst_cli_utils_1.tic)();
22
+ this.log = opts === null || opts === void 0 ? void 0 : opts.log;
23
+ if (opts === null || opts === void 0 ? void 0 : opts.source)
24
+ this.source = opts.source;
25
+ try {
26
+ this.raw = (0, xml_js_1.xml2js)(data, { compact: false });
27
+ }
28
+ catch (error) {
29
+ throw new Error('Problem parsing the JATS document, please ensure it is XML');
30
+ }
11
31
  const { declaration, elements } = this.raw;
12
32
  this.declaration = declaration === null || declaration === void 0 ? void 0 : declaration.attributes;
13
- if (!((elements === null || elements === void 0 ? void 0 : elements.length) === 2 && elements[0].type === 'doctype' && elements[1].name === 'article')) {
14
- throw new Error('article is not the only element of the JATS');
33
+ if (!((elements === null || elements === void 0 ? void 0 : elements.length) === 2 && elements[0].type === 'doctype' && hasSingleArticle(elements[1]))) {
34
+ throw new Error('Element <article> is not the only element of the JATS');
15
35
  }
16
36
  this.doctype = elements[0].doctype;
17
- this.tree = (0, utils_1.convertToUnist)(elements[1]);
37
+ const converted = (0, utils_1.convertToUnist)(elements[1]);
38
+ this.tree = select('article', converted);
39
+ (_a = this.log) === null || _a === void 0 ? void 0 : _a.debug(toc('Parsed and converted JATS to unist tree in %s'));
40
+ }
41
+ get frontmatter() {
42
+ var _a, _b, _c, _d;
43
+ const title = this.articleTitle;
44
+ const subtitle = this.articleSubtitle;
45
+ const date = this.publicationDate;
46
+ const authors = this.articleAuthors;
47
+ const firstSubject = select(types_1.Tags.subject, (_a = this.articleCategories) !== null && _a !== void 0 ? _a : this.front);
48
+ const journalTitle = select(types_1.Tags.journalTitle, this.front);
49
+ return {
50
+ title: title ? (0, myst_common_1.toText)(title) : undefined,
51
+ subtitle: subtitle ? (0, myst_common_1.toText)(subtitle) : undefined,
52
+ doi: (_b = this.doi) !== null && _b !== void 0 ? _b : undefined,
53
+ date: date ? (_c = (0, utils_1.toDate)(date)) === null || _c === void 0 ? void 0 : _c.toISOString() : undefined,
54
+ authors: authors === null || authors === void 0 ? void 0 : authors.map((a) => (0, utils_1.authorAndAffiliation)(a, this.tree)),
55
+ keywords: (_d = this.keywords) === null || _d === void 0 ? void 0 : _d.map((k) => (0, myst_common_1.toText)(k)),
56
+ venue: journalTitle ? { title: (0, myst_common_1.toText)(journalTitle) } : undefined,
57
+ subject: firstSubject ? (0, myst_common_1.toText)(firstSubject) : undefined,
58
+ };
18
59
  }
19
60
  get front() {
20
- return (0, unist_util_select_1.select)(types_1.Tags.front, this.tree);
61
+ return select(types_1.Tags.front, this.tree);
21
62
  }
22
63
  get premissions() {
23
- return (0, unist_util_select_1.select)(types_1.Tags.permissions, this.front);
64
+ return select(types_1.Tags.permissions, this.front);
24
65
  }
25
66
  get doi() {
26
- return (0, utils_1.findDoi)(this.front);
67
+ var _a;
68
+ return doi_utils_1.default.normalize((_a = (0, utils_1.findArticleId)(this.front, 'doi')) !== null && _a !== void 0 ? _a : '');
69
+ }
70
+ get pmc() {
71
+ var _a;
72
+ return (_a = (0, utils_1.findArticleId)(this.front, 'pmc')) === null || _a === void 0 ? void 0 : _a.replace(/^PMC:?/, '');
73
+ }
74
+ get pmid() {
75
+ return (0, utils_1.findArticleId)(this.front, 'pmid');
27
76
  }
28
77
  get publicationDates() {
29
78
  return (0, unist_util_select_1.selectAll)(types_1.Tags.pubDate, this.front);
30
79
  }
31
80
  get publicationDate() {
32
- return this.publicationDates.find((d) => !!(0, unist_util_select_1.select)(types_1.Tags.day, d));
81
+ return this.publicationDates.find((d) => !!select(types_1.Tags.day, d));
33
82
  }
34
83
  get license() {
35
- return (0, unist_util_select_1.select)(types_1.Tags.license, this.premissions);
84
+ return select(types_1.Tags.license, this.premissions);
36
85
  }
37
86
  get keywordGroup() {
38
- return (0, unist_util_select_1.select)(types_1.Tags.kwdGroup, this.front);
87
+ return select(types_1.Tags.kwdGroup, this.front);
39
88
  }
40
89
  /** The first keywords */
41
90
  get keywords() {
@@ -44,23 +93,26 @@ class Jats {
44
93
  get keywordGroups() {
45
94
  return (0, unist_util_select_1.selectAll)(types_1.Tags.kwdGroup, this.front);
46
95
  }
96
+ get articleCategories() {
97
+ return select(types_1.Tags.articleCategories, this.front);
98
+ }
47
99
  get titleGroup() {
48
- return (0, unist_util_select_1.select)(types_1.Tags.titleGroup, this.front);
100
+ return select(types_1.Tags.titleGroup, this.front);
49
101
  }
50
102
  get articleTitle() {
51
- return (0, unist_util_select_1.select)(types_1.Tags.articleTitle, this.titleGroup);
103
+ return select(types_1.Tags.articleTitle, this.titleGroup);
52
104
  }
53
105
  get articleSubtitle() {
54
- return (0, unist_util_select_1.select)(types_1.Tags.subtitle, this.titleGroup);
106
+ return select(types_1.Tags.subtitle, this.titleGroup);
55
107
  }
56
108
  get abstract() {
57
- return (0, unist_util_select_1.select)(types_1.Tags.abstract, this.front);
109
+ return select(types_1.Tags.abstract, this.front);
58
110
  }
59
111
  get abstracts() {
60
112
  return (0, unist_util_select_1.selectAll)(types_1.Tags.abstract, this.front);
61
113
  }
62
114
  get contribGroup() {
63
- return (0, unist_util_select_1.select)(types_1.Tags.contribGroup, this.front);
115
+ return select(types_1.Tags.contribGroup, this.front);
64
116
  }
65
117
  get contribGroups() {
66
118
  return (0, unist_util_select_1.selectAll)(types_1.Tags.contribGroup, this.front);
@@ -69,19 +121,31 @@ class Jats {
69
121
  return (0, unist_util_select_1.selectAll)(types_1.Tags.contrib, this.contribGroup);
70
122
  }
71
123
  get body() {
72
- return (0, unist_util_select_1.select)(types_1.Tags.body, this.tree);
124
+ return select(types_1.Tags.body, this.tree);
73
125
  }
74
126
  get back() {
75
- return (0, unist_util_select_1.select)(types_1.Tags.back, this.tree);
127
+ return select(types_1.Tags.back, this.tree);
76
128
  }
77
129
  get subArticles() {
78
130
  return (0, unist_util_select_1.selectAll)(types_1.Tags.subArticle, this.tree);
79
131
  }
80
132
  get refList() {
81
- return (0, unist_util_select_1.select)(types_1.Tags.refList, this.back);
133
+ return select(types_1.Tags.refList, this.back);
82
134
  }
83
135
  get references() {
84
136
  return (0, unist_util_select_1.selectAll)(types_1.Tags.ref, this.refList);
85
137
  }
86
138
  }
87
139
  exports.Jats = Jats;
140
+ function hasSingleArticle(element) {
141
+ var _a;
142
+ if (element.name === 'article') {
143
+ return true;
144
+ }
145
+ if (element.name === 'pmc-articleset' &&
146
+ ((_a = element.elements) === null || _a === void 0 ? void 0 : _a.length) === 1 &&
147
+ element.elements[0].name === 'article') {
148
+ return true;
149
+ }
150
+ return false;
151
+ }
@@ -12,7 +12,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
12
12
  return (mod && mod.__esModule) ? mod : { "default": mod };
13
13
  };
14
14
  Object.defineProperty(exports, "__esModule", { value: true });
15
- exports.resolveJatsUrlFromDoi = exports.DEFAULT_RESOLVERS = exports.joss = exports.plos = exports.elife = void 0;
15
+ exports.customResolveJatsUrlFromDoi = exports.DEFAULT_RESOLVERS = exports.joss = exports.plos = exports.elife = void 0;
16
16
  const doi_utils_1 = __importDefault(require("doi-utils"));
17
17
  const node_fetch_1 = __importDefault(require("node-fetch"));
18
18
  exports.elife = {
@@ -44,7 +44,10 @@ exports.joss = {
44
44
  },
45
45
  };
46
46
  exports.DEFAULT_RESOLVERS = [exports.elife, exports.plos, exports.joss];
47
- function resolveJatsUrlFromDoi(session, doiString, resolvers = exports.DEFAULT_RESOLVERS) {
47
+ /**
48
+ * Use the known custom resolvers to pick where the JATS should be downloaded from.
49
+ */
50
+ function customResolveJatsUrlFromDoi(session, doiString, resolvers = exports.DEFAULT_RESOLVERS) {
48
51
  return __awaiter(this, void 0, void 0, function* () {
49
52
  if (!doi_utils_1.default.validate(doiString))
50
53
  throw new Error(`The doi ${doiString} is not valid`);
@@ -60,4 +63,4 @@ function resolveJatsUrlFromDoi(session, doiString, resolvers = exports.DEFAULT_R
60
63
  return jatsUrl;
61
64
  });
62
65
  }
63
- exports.resolveJatsUrlFromDoi = resolveJatsUrlFromDoi;
66
+ exports.customResolveJatsUrlFromDoi = customResolveJatsUrlFromDoi;
@@ -1282,4 +1282,20 @@ var Tags;
1282
1282
  * See: https://jats.nlm.nih.gov/publishing/tag-library/1.3/element/pub-date.html
1283
1283
  */
1284
1284
  Tags["pubDate"] = "pub-date";
1285
+ /**
1286
+ * Article Grouping Data
1287
+ *
1288
+ * Not available in articleauthoring!
1289
+ *
1290
+ * See: https://jats.nlm.nih.gov/archiving/tag-library/1.3/element/article-categories.html
1291
+ */
1292
+ Tags["articleCategories"] = "article-categories";
1293
+ /**
1294
+ * Journal Title
1295
+ *
1296
+ * Not available in articleauthoring!
1297
+ *
1298
+ * See: https://jats.nlm.nih.gov/archiving/tag-library/1.3/element/journal-title.html
1299
+ */
1300
+ Tags["journalTitle"] = "journal-title";
1285
1301
  })(Tags = exports.Tags || (exports.Tags = {}));
@@ -11,7 +11,7 @@ var RefType;
11
11
  /** Appendix */
12
12
  RefType["app"] = "app";
13
13
  /** Author notes */
14
- RefType["author-note"] = "author-note";
14
+ RefType["authorNote"] = "author-note";
15
15
  /** Points to the description of or identifier for a grant or award (<award-id>), also, possibly to an entire awards group (<award-group>) */
16
16
  RefType["award"] = "award";
17
17
  /** Bibliographic reference (typically to a <ref> element, but it may point to a <element-citation> or <mixed-citation> if there are multiple citations inside the <ref> element) */
@@ -19,7 +19,7 @@ var RefType;
19
19
  /** Biography (typically of a contributor) */
20
20
  RefType["bio"] = "bio";
21
21
  /** Textbox or sidebar */
22
- RefType["boxed-text"] = "boxed-text";
22
+ RefType["boxedText"] = "boxed-text";
23
23
  /** Chemical structure (to a <chem-struct> or <chem-struct-wrap> element) */
24
24
  RefType["chem"] = "chem";
25
25
  /** Collaboration */
@@ -31,7 +31,7 @@ var RefType;
31
31
  /** The value “custom” is used in versions of JATS that have a static list of values for the @ref-type attribute. To add a value to such a list, the cross reference is given the type “custom” and a separate @custom-type attribute provides the typing value. There is no need for this mechanism in Archiving, since there are no restrictions on the value of @ref-type, but “custom” and @custom-type have both been included in Archiving so that documents valid to a stricter version of the JATS Tag Set will also be valid to Archiving. */
32
32
  RefType["custom"] = "custom";
33
33
  /** Display formula */
34
- RefType["disp-formula"] = "disp-formula";
34
+ RefType["dispFormula"] = "disp-formula";
35
35
  /** Figure or group of figures (to a <fig> or <fig-group> element) */
36
36
  RefType["fig"] = "fig";
37
37
  /** Footnote */
@@ -49,9 +49,9 @@ var RefType;
49
49
  /** Statement */
50
50
  RefType["statement"] = "statement";
51
51
  /** Supplementary information */
52
- RefType["supplementary-material"] = "supplementary-material";
52
+ RefType["supplementaryMaterial"] = "supplementary-material";
53
53
  /** Table or group of tables (to a <table-wrap> or <table-wrap-group> element) */
54
54
  RefType["table"] = "table";
55
55
  /** Table footnote */
56
- RefType["table-fn"] = "table-fn";
56
+ RefType["tableFn"] = "table-fn";
57
57
  })(RefType = exports.RefType || (exports.RefType = {}));