jats-xml 0.0.3 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/README.md +1 -1
  2. package/dist/cjs/cli/parse.js +43 -35
  3. package/dist/cjs/download.js +163 -12
  4. package/dist/cjs/index.js +3 -1
  5. package/dist/cjs/jats.js +83 -19
  6. package/dist/cjs/resolvers.js +12 -9
  7. package/dist/cjs/session.js +0 -1
  8. package/dist/cjs/types/elementTags.js +16 -0
  9. package/dist/cjs/types/refType.js +5 -5
  10. package/dist/cjs/utils.js +55 -6
  11. package/dist/cjs/version.js +1 -1
  12. package/dist/esm/cli/parse.js +46 -38
  13. package/dist/esm/download.js +164 -15
  14. package/dist/esm/index.js +1 -0
  15. package/dist/esm/jats.js +69 -8
  16. package/dist/esm/resolvers.js +11 -8
  17. package/dist/esm/session.js +0 -1
  18. package/dist/esm/types/elementTags.js +16 -0
  19. package/dist/esm/types/refType.js +5 -5
  20. package/dist/esm/utils.js +54 -6
  21. package/dist/esm/version.js +1 -1
  22. package/dist/jats.js +3189 -2843
  23. package/dist/types/cli/parse.d.ts.map +1 -1
  24. package/dist/types/download.d.ts +9 -1
  25. package/dist/types/download.d.ts.map +1 -1
  26. package/dist/types/index.d.ts +1 -0
  27. package/dist/types/index.d.ts.map +1 -1
  28. package/dist/types/jats.d.ts +28 -15
  29. package/dist/types/jats.d.ts.map +1 -1
  30. package/dist/types/resolvers.d.ts +4 -1
  31. package/dist/types/resolvers.d.ts.map +1 -1
  32. package/dist/types/session.d.ts +0 -1
  33. package/dist/types/session.d.ts.map +1 -1
  34. package/dist/types/types/elementTags.d.ts +17 -1
  35. package/dist/types/types/elementTags.d.ts.map +1 -1
  36. package/dist/types/types/elements.d.ts +9 -0
  37. package/dist/types/types/elements.d.ts.map +1 -1
  38. package/dist/types/types/refType.d.ts +5 -5
  39. package/dist/types/types/refType.d.ts.map +1 -1
  40. package/dist/types/utils.d.ts +5 -2
  41. package/dist/types/utils.d.ts.map +1 -1
  42. package/dist/types/version.d.ts +1 -1
  43. package/package.json +10 -7
package/README.md CHANGED
@@ -41,7 +41,7 @@ jats summary /local/article.jats
41
41
 
42
42
  This will provide a summary, including a list of what the JATS file contains.
43
43
 
44
- ![Output of `jats summary`](images/jats-output.png)
44
+ ![Output of `jats summary`](/packages/jats-xml/images/jats-output.png)
45
45
 
46
46
  ## Working in Typescript
47
47
 
@@ -30,7 +30,7 @@ const utils_1 = require("../utils");
30
30
  function hasValidExtension(output) {
31
31
  return ['.xml', '.jats'].includes((0, path_1.extname)(output).toLowerCase());
32
32
  }
33
- function downloadJats(session, urlOrDoi, output) {
33
+ function downloadAndSaveJats(session, urlOrDoi, output) {
34
34
  return __awaiter(this, void 0, void 0, function* () {
35
35
  if (fs_1.default.existsSync(urlOrDoi)) {
36
36
  throw new Error(`File "${urlOrDoi}" is local and cannot be downloaded!`);
@@ -41,7 +41,7 @@ function downloadJats(session, urlOrDoi, output) {
41
41
  if (!hasValidExtension(output)) {
42
42
  session.log.warn(`The extension ${(0, path_1.extname)(output)} is not a valid extension for JATS, try using ".xml" or ".jats"`);
43
43
  }
44
- const data = yield (0, download_1.downloadJatsFromUrl)(session, urlOrDoi, resolvers_1.DEFAULT_RESOLVERS);
44
+ const { data } = yield (0, download_1.downloadJatsFromUrl)(session, urlOrDoi, resolvers_1.DEFAULT_RESOLVERS);
45
45
  (0, myst_cli_utils_1.writeFileToFolder)(output, data);
46
46
  return data;
47
47
  });
@@ -52,15 +52,12 @@ function parseJats(session, file) {
52
52
  if (fs_1.default.existsSync(file)) {
53
53
  session.log.debug(`Found ${file} locally, parsing`);
54
54
  const data = fs_1.default.readFileSync(file).toString();
55
- session.log.debug(toc(`Parsed JATS file from disk in %s`));
56
- return new jats_1.Jats(data);
55
+ return new jats_1.Jats(data, { log: session.log });
57
56
  }
58
- if (doi_utils_1.default.validate(file) || (0, myst_cli_utils_1.isUrl)(file)) {
59
- const data = yield (0, download_1.downloadJatsFromUrl)(session, file, resolvers_1.DEFAULT_RESOLVERS);
60
- session.log.debug(toc(`Downloaded and parsed JATS file in %s`));
61
- return new jats_1.Jats(data);
62
- }
63
- throw new Error(`Could not find ${file} locally, and it doesn't look like a URL or DOI`);
57
+ const { source, data } = yield (0, download_1.downloadJatsFromUrl)(session, file, resolvers_1.DEFAULT_RESOLVERS);
58
+ const jats = new jats_1.Jats(data, { source, log: session.log });
59
+ session.log.debug(toc(`Downloaded and parsed JATS file in %s`));
60
+ return jats;
64
61
  });
65
62
  }
66
63
  function formatLongString(data, offset = 0, length = 88 - offset) {
@@ -79,6 +76,9 @@ function formatDictionary(dict, opts) {
79
76
  return Object.entries(dict)
80
77
  .map(([k, t]) => {
81
78
  var _a;
79
+ if (!t)
80
+ return null;
81
+ let wrap = typeof (opts === null || opts === void 0 ? void 0 : opts.wrap) === 'boolean' ? opts.wrap : true;
82
82
  let value = t;
83
83
  let color = chalk_1.default.yellow.bold;
84
84
  if (t && typeof t === 'object') {
@@ -86,21 +86,21 @@ function formatDictionary(dict, opts) {
86
86
  return null;
87
87
  color = (_a = t.label) !== null && _a !== void 0 ? _a : color;
88
88
  value = t.value;
89
+ wrap = typeof t.wrap === 'boolean' ? t.wrap : wrap;
89
90
  }
90
- const wrapped = (opts === null || opts === void 0 ? void 0 : opts.wrap) === false
91
- ? String(value)
92
- : formatLongString(String(value), maxLabel + 2, opts === null || opts === void 0 ? void 0 : opts.wrap);
91
+ const wrapped = wrap ? formatLongString(String(value), maxLabel + 2) : String(value);
93
92
  return `${color(k)}:${' '.repeat(maxLabel - k.length + 1)}${wrapped}`;
94
93
  })
95
94
  .filter((o) => !!o)
96
95
  .join('\n');
97
96
  }
98
97
  function jatsSummaryCLI(session, file) {
99
- var _a, _b;
98
+ var _a, _b, _c;
100
99
  return __awaiter(this, void 0, void 0, function* () {
101
100
  const jats = yield parseJats(session, file);
102
101
  const summary = {
103
- DOI: jats.doi ? doi_utils_1.default.buildUrl(jats.doi) : null,
102
+ Source: { value: jats.source, wrap: false },
103
+ DOI: jats.doi ? { value: doi_utils_1.default.buildUrl(jats.doi), wrap: false } : null,
104
104
  Title: (_a = (0, myst_common_1.toText)(jats.articleTitle)) === null || _a === void 0 ? void 0 : _a.replace(/\n/g, ' '),
105
105
  Date: (0, utils_1.formatDate)((0, utils_1.toDate)(jats.publicationDate)),
106
106
  Authors: jats.articleAuthors
@@ -108,36 +108,44 @@ function jatsSummaryCLI(session, file) {
108
108
  .join(', '),
109
109
  Abstract: (_b = (0, myst_common_1.toText)(jats.abstract)) === null || _b === void 0 ? void 0 : _b.replace(/\n/g, ' '),
110
110
  Keywords: jats.keywords.map((k) => (0, myst_common_1.toText)(k)).join(', '),
111
- License: jats.license['xlink:href'],
112
- Figures: { label: chalk_1.default.blue.bold, value: String((0, unist_util_select_1.selectAll)(types_1.Tags.fig, jats.body).length) },
113
- Equations: {
111
+ License: (_c = jats.license) === null || _c === void 0 ? void 0 : _c['xlink:href'],
112
+ };
113
+ if (jats.body) {
114
+ summary.Figures = {
115
+ label: chalk_1.default.blue.bold,
116
+ value: String((0, unist_util_select_1.selectAll)(types_1.Tags.fig, jats.body).length),
117
+ };
118
+ summary.Equations = {
114
119
  label: chalk_1.default.blue.bold,
115
120
  value: String((0, unist_util_select_1.selectAll)(types_1.Tags.dispFormula, jats.body).length),
116
- },
117
- Tables: {
121
+ };
122
+ summary.Tables = {
118
123
  label: chalk_1.default.blue.bold,
119
124
  value: String((0, unist_util_select_1.selectAll)(types_1.Tags.table, jats.body).length),
120
- },
121
- Code: {
125
+ };
126
+ summary.Code = {
122
127
  label: chalk_1.default.blue.bold,
123
128
  value: String((0, unist_util_select_1.selectAll)(types_1.Tags.code, jats.body).length),
124
- },
125
- Sections: {
129
+ };
130
+ summary.Sections = {
126
131
  label: chalk_1.default.blue.bold,
127
132
  value: String((0, unist_util_select_1.selectAll)(types_1.Tags.sec, jats.body).length),
128
- },
129
- Paragraphs: {
133
+ };
134
+ summary.Paragraphs = {
130
135
  label: chalk_1.default.blue.bold,
131
136
  value: String((0, unist_util_select_1.selectAll)(types_1.Tags.p, jats.body).length),
132
- },
133
- Citations: { label: chalk_1.default.blue.bold, value: String(jats.references.length) },
134
- 'Cross-References': {
137
+ };
138
+ summary.Citations = { label: chalk_1.default.blue.bold, value: String(jats.references.length) };
139
+ summary['Cross-References'] = {
135
140
  label: chalk_1.default.blue.bold,
136
141
  value: String((0, unist_util_select_1.selectAll)(types_1.Tags.xref, jats.body).length),
137
- },
138
- 'Sub Articles': { label: chalk_1.default.blue.bold, value: String(jats.subArticles.length) },
139
- };
142
+ };
143
+ summary['Sub Articles'] = { label: chalk_1.default.blue.bold, value: String(jats.subArticles.length) };
144
+ }
140
145
  session.log.info(formatDictionary(summary));
146
+ if (!jats.body) {
147
+ session.log.warn('\nThis is a partial JATS record that does not have <body>.');
148
+ }
141
149
  });
142
150
  }
143
151
  function jatsReferencesCLI(session, file) {
@@ -145,7 +153,7 @@ function jatsReferencesCLI(session, file) {
145
153
  const jats = yield parseJats(session, file);
146
154
  const sorted = jats.references
147
155
  .map((ref) => {
148
- const doi = (0, utils_1.findDoi)(ref);
156
+ const doiString = (0, utils_1.findArticleId)(ref, 'doi');
149
157
  const title = (0, myst_common_1.toText)((0, unist_util_select_1.select)(types_1.Tags.articleTitle, ref));
150
158
  const year = (0, myst_common_1.toText)((0, unist_util_select_1.select)(types_1.Tags.year, ref));
151
159
  const surnames = (0, unist_util_select_1.selectAll)(types_1.Tags.surname, ref);
@@ -158,7 +166,7 @@ function jatsReferencesCLI(session, file) {
158
166
  return {
159
167
  Citation: `${short} (${year})`,
160
168
  Title: title,
161
- DOI: doi ? doi_utils_1.default.buildUrl(doi) : null,
169
+ DOI: doiString ? doi_utils_1.default.buildUrl(doiString) : null,
162
170
  Count: s.length,
163
171
  };
164
172
  })
@@ -188,7 +196,7 @@ function makeDownloadCLI(program) {
188
196
  .description('Parse a JATS file and provide a summary')
189
197
  .argument('<url>', 'The JATS url or a DOI')
190
198
  .argument('<output>', 'The JATS url or a DOI')
191
- .action((0, myst_cli_utils_1.clirun)(downloadJats, { program, getSession: session_1.getSession }));
199
+ .action((0, myst_cli_utils_1.clirun)(downloadAndSaveJats, { program, getSession: session_1.getSession }));
192
200
  return command;
193
201
  }
194
202
  function addDownloadCLI(program) {
@@ -12,35 +12,186 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
12
12
  return (mod && mod.__esModule) ? mod : { "default": mod };
13
13
  };
14
14
  Object.defineProperty(exports, "__esModule", { value: true });
15
- exports.downloadJatsFromUrl = void 0;
15
+ exports.downloadJatsFromUrl = exports.checkIfPubMedCentralHasJats = exports.convertPMID2PMCID = void 0;
16
16
  const doi_utils_1 = __importDefault(require("doi-utils"));
17
+ const chalk_1 = __importDefault(require("chalk"));
17
18
  const node_fetch_1 = __importDefault(require("node-fetch"));
18
19
  const myst_cli_utils_1 = require("myst-cli-utils");
20
+ const fair_principles_1 = require("fair-principles");
19
21
  const resolvers_1 = require("./resolvers");
22
+ function logAboutJatsFailing(session, jatsUrls) {
23
+ session.log.warn('⛔️ JATS may not be Open Access 😭, you should petition your local representative 🪧');
24
+ session.log.info(`${chalk_1.default.green(`\nThe XML ${chalk_1.default.bold('should')} be here:\n\n${jatsUrls.join('\n')}`)}\n`);
25
+ const FAIR = (0, fair_principles_1.highlightFAIR)('A', { chalk: chalk_1.default });
26
+ session.log.info(`Some publishers aggressively block programmatic access, which isn't ${FAIR}.`);
27
+ session.log.debug((0, fair_principles_1.formatPrinciples)('A*', { chalk: chalk_1.default }));
28
+ session.log.info(`${chalk_1.default.blue('The link may work in a browser.')}\n`);
29
+ }
20
30
  function dowloadFromUrl(session, jatsUrl) {
21
31
  return __awaiter(this, void 0, void 0, function* () {
32
+ const toc = (0, myst_cli_utils_1.tic)();
22
33
  session.log.debug(`Fetching JATS from ${jatsUrl}`);
23
- const jatsResp = yield (0, node_fetch_1.default)(jatsUrl);
24
- if (!jatsResp.ok)
25
- throw new Error(`Problem fetching JATS from ${jatsUrl}`);
26
- const contentType = jatsResp.headers.get('content-type');
27
- if (!(contentType === 'application/xml' || (contentType === null || contentType === void 0 ? void 0 : contentType.includes('text/plain')))) {
28
- throw new Error(`Expected content-type "application/xml" instead we got ${contentType} for ${jatsUrl}`);
29
- }
30
- const data = yield jatsResp.text();
34
+ const resp = yield (0, node_fetch_1.default)(jatsUrl, {
35
+ headers: [
36
+ ['accept', 'application/xml'],
37
+ [
38
+ 'user-agent',
39
+ // A bunch of publishers just show the login screen or quickly block you.
40
+ // We don't want to DDOS these publishers, they are the _good ones_ for sharing the XML!!
41
+ // But some block on the second request?!
42
+ // So we can pretend to be a random browser, I guess. How silly. 🤷‍♂️
43
+ `Mozilla/5.0 (Macintosh; Intel Mac OS X ${Math.floor(Math.random() * 100)})`,
44
+ ],
45
+ ],
46
+ });
47
+ if (!resp.ok) {
48
+ session.log.debug(`JATS failed to download from "${jatsUrl}"`);
49
+ throw new Error(`STATUS ${resp.status}: ${resp.statusText}`);
50
+ }
51
+ const contentType = resp.headers.get('content-type');
52
+ if (!((contentType === null || contentType === void 0 ? void 0 : contentType.includes('application/xml')) ||
53
+ (contentType === null || contentType === void 0 ? void 0 : contentType.includes('text/xml')) ||
54
+ (contentType === null || contentType === void 0 ? void 0 : contentType.includes('text/plain')))) {
55
+ session.log.warn(`Expected content-type "application/xml" instead we got "${contentType}" for ${jatsUrl}\n${chalk_1.default.dim('Things may not work, but we are going to try our best...')}`);
56
+ }
57
+ const data = yield resp.text();
58
+ session.log.debug(toc(`Fetched document with content-type "${contentType}" in %s`));
31
59
  return data;
32
60
  });
33
61
  }
62
+ /**
63
+ * There are 5.8M or so DOIs that have a full XML record:
64
+ *
65
+ * https://api.crossref.org/works?filter=full-text.type:application/xml,full-text.application:text-mining&facet=publisher-name:*&rows=0
66
+ *
67
+ * This function tries to find the correct URL for the record.
68
+ */
69
+ function checkIfDoiHasJats(session, urlOrDoi) {
70
+ var _a, _b, _c, _d;
71
+ return __awaiter(this, void 0, void 0, function* () {
72
+ if (!doi_utils_1.default.validate(urlOrDoi))
73
+ return;
74
+ const toc = (0, myst_cli_utils_1.tic)();
75
+ const doiUrl = doi_utils_1.default.buildUrl(urlOrDoi);
76
+ session.log.debug(`Attempting to resolving full XML from DOI ${doiUrl}`);
77
+ const resp = yield (0, node_fetch_1.default)(doiUrl, { headers: [['Accept', 'application/json']] });
78
+ if (!resp.ok) {
79
+ // Silently return -- other functions can try!
80
+ session.log.debug(`DOI failed to resolve: ${doiUrl}`);
81
+ return;
82
+ }
83
+ const data = (yield resp.json());
84
+ session.log.debug(toc(`DOI resolved in %s with ${(_b = (_a = data.link) === null || _a === void 0 ? void 0 : _a.length) !== null && _b !== void 0 ? _b : 0} links to content`));
85
+ if (data.link) {
86
+ session.log.debug(['', ...data.link.map((link) => `content-type: ${link['content-type']}, ${link.URL}\n`)].join(' - '));
87
+ }
88
+ const fullXml = (_d = (_c = data.link) === null || _c === void 0 ? void 0 : _c.find((link) => { var _a; return ['text/xml', 'application/xml'].includes((_a = link['content-type']) !== null && _a !== void 0 ? _a : ''); })) === null || _d === void 0 ? void 0 : _d.URL;
89
+ if (fullXml)
90
+ return fullXml;
91
+ session.log.debug(`Could not find XML in DOI record ${doiUrl}`);
92
+ return undefined;
93
+ });
94
+ }
95
+ /**
96
+ * https://www.ncbi.nlm.nih.gov/pmc/tools/id-converter-api/
97
+ */
98
+ function convertPMID2PMCID(session, PMID) {
99
+ var _a, _b;
100
+ return __awaiter(this, void 0, void 0, function* () {
101
+ if (PMID.startsWith('https://')) {
102
+ const idPart = new URL(PMID).pathname.slice(1);
103
+ session.log.debug(`Extract ${PMID} to ${idPart}`);
104
+ return convertPMID2PMCID(session, idPart);
105
+ }
106
+ const toc = (0, myst_cli_utils_1.tic)();
107
+ const converter = 'https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/';
108
+ const resp = yield (0, node_fetch_1.default)(`${converter}?tool=jats-xml&format=json&ids=${PMID}`, {
109
+ headers: [['Accept', 'application/json']],
110
+ });
111
+ if (!resp.ok) {
112
+ // Silently return -- other functions can try!
113
+ session.log.debug(`Failed to convert PubMedID: ${PMID}`);
114
+ return;
115
+ }
116
+ const data = yield resp.json();
117
+ const PMCID = (_b = (_a = data === null || data === void 0 ? void 0 : data.records) === null || _a === void 0 ? void 0 : _a[0]) === null || _b === void 0 ? void 0 : _b.pmcid;
118
+ session.log.debug(toc(`Used nih.gov to transform ${PMID} to ${PMCID} in %s.`));
119
+ return PMCID;
120
+ });
121
+ }
122
+ exports.convertPMID2PMCID = convertPMID2PMCID;
123
+ function pubMedCentralJats(PMCID) {
124
+ const normalized = PMCID.replace(/^PMC:?/, '');
125
+ return `https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pmc&id=${normalized}`;
126
+ }
127
+ function checkIfPubMedCentralHasJats(session, urlOrDoi) {
128
+ var _a, _b;
129
+ return __awaiter(this, void 0, void 0, function* () {
130
+ if (urlOrDoi.match(/^PMC:?([0-9]+)$/))
131
+ return pubMedCentralJats(urlOrDoi);
132
+ if (!doi_utils_1.default.validate(urlOrDoi))
133
+ return;
134
+ const toc = (0, myst_cli_utils_1.tic)();
135
+ const doiUrl = doi_utils_1.default.buildUrl(urlOrDoi);
136
+ session.log.debug(`Attempting to resolve PMCID using OpenAlex from ${doiUrl}`);
137
+ const openAlexUrl = `https://api.openalex.org/works/${doiUrl}`;
138
+ const resp = yield (0, node_fetch_1.default)(openAlexUrl, { headers: [['Accept', 'application/json']] });
139
+ if (!resp.ok) {
140
+ // Silently return -- other functions can try!
141
+ session.log.debug(`Failed to lookup on OpenAlex: ${openAlexUrl}`);
142
+ return;
143
+ }
144
+ const data = (yield resp.json());
145
+ const PMID = (_a = data === null || data === void 0 ? void 0 : data.ids) === null || _a === void 0 ? void 0 : _a.pmid;
146
+ let PMCID = (_b = data === null || data === void 0 ? void 0 : data.ids) === null || _b === void 0 ? void 0 : _b.pmcid;
147
+ if (!PMCID && !!PMID) {
148
+ session.log.debug(toc(`OpenAlex resolved ${data === null || data === void 0 ? void 0 : data.ids.openalex} in %s. There is no PMCID, but there is a PMID`));
149
+ PMCID = yield convertPMID2PMCID(session, PMID);
150
+ if (!PMCID) {
151
+ session.log.debug(toc(`PubMed does not have a record of ${PMID}`));
152
+ return;
153
+ }
154
+ }
155
+ if (!PMCID) {
156
+ session.log.debug(toc(`OpenAlex resolved ${data === null || data === void 0 ? void 0 : data.ids.openalex} in %s, but there is no PMCID`));
157
+ return;
158
+ }
159
+ session.log.debug(toc(`OpenAlex resolved in %s, with a PMCID of ${PMCID}`));
160
+ return pubMedCentralJats(PMCID);
161
+ });
162
+ }
163
+ exports.checkIfPubMedCentralHasJats = checkIfPubMedCentralHasJats;
34
164
  function downloadJatsFromUrl(session, urlOrDoi, resolvers) {
35
165
  return __awaiter(this, void 0, void 0, function* () {
166
+ const expectedUrls = (yield Promise.all([
167
+ checkIfPubMedCentralHasJats(session, urlOrDoi),
168
+ checkIfDoiHasJats(session, urlOrDoi),
169
+ ])).filter((u) => !!u);
170
+ if (expectedUrls.length > 0) {
171
+ session.log.debug(['Trying URLs:\n', ...expectedUrls.map((url) => ` ${url}\n`)].join(' - '));
172
+ for (let index = 0; index < expectedUrls.length; index++) {
173
+ const url = expectedUrls[index];
174
+ try {
175
+ const data = yield dowloadFromUrl(session, url);
176
+ if (data)
177
+ return { source: url, data };
178
+ }
179
+ catch (error) {
180
+ session.log.debug(error.message);
181
+ }
182
+ }
183
+ // If there are expected URLs that don't work: see something, say something, etc.
184
+ logAboutJatsFailing(session, expectedUrls);
185
+ }
36
186
  if (doi_utils_1.default.validate(urlOrDoi)) {
37
- const jatsUrl = yield (0, resolvers_1.resolveJatsUrlFromDoi)(session, urlOrDoi, resolvers);
187
+ const jatsUrl = yield (0, resolvers_1.customResolveJatsUrlFromDoi)(session, urlOrDoi, resolvers);
38
188
  const data = yield dowloadFromUrl(session, jatsUrl);
39
- return data;
189
+ return { source: jatsUrl, data };
40
190
  }
41
191
  if ((0, myst_cli_utils_1.isUrl)(urlOrDoi)) {
192
+ session.log.debug("No resolver matched, and the URL doesn't look like a DOI. We will attempt to download it directly.");
42
193
  const data = yield dowloadFromUrl(session, urlOrDoi);
43
- return data;
194
+ return { source: urlOrDoi, data };
44
195
  }
45
196
  throw new Error(`Could not find ${urlOrDoi} locally, and it doesn't look like a URL or DOI`);
46
197
  });
package/dist/cjs/index.js CHANGED
@@ -17,9 +17,11 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
17
17
  return (mod && mod.__esModule) ? mod : { "default": mod };
18
18
  };
19
19
  Object.defineProperty(exports, "__esModule", { value: true });
20
- exports.Jats = exports.version = void 0;
20
+ exports.downloadJatsFromUrl = exports.Jats = exports.version = void 0;
21
21
  var version_1 = require("./version");
22
22
  Object.defineProperty(exports, "version", { enumerable: true, get: function () { return __importDefault(version_1).default; } });
23
23
  var jats_1 = require("./jats");
24
24
  Object.defineProperty(exports, "Jats", { enumerable: true, get: function () { return jats_1.Jats; } });
25
+ var download_1 = require("./download");
26
+ Object.defineProperty(exports, "downloadJatsFromUrl", { enumerable: true, get: function () { return download_1.downloadJatsFromUrl; } });
25
27
  __exportStar(require("./types"), exports);
package/dist/cjs/jats.js CHANGED
@@ -1,41 +1,90 @@
1
1
  "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
2
5
  Object.defineProperty(exports, "__esModule", { value: true });
3
6
  exports.Jats = void 0;
7
+ const myst_common_1 = require("myst-common");
4
8
  const xml_js_1 = require("xml-js");
9
+ const doi_utils_1 = __importDefault(require("doi-utils"));
5
10
  const utils_1 = require("./utils");
6
11
  const unist_util_select_1 = require("unist-util-select");
7
12
  const types_1 = require("./types");
13
+ const myst_cli_utils_1 = require("myst-cli-utils");
14
+ function select(selector, node) {
15
+ var _a;
16
+ return ((_a = (0, unist_util_select_1.select)(selector, node)) !== null && _a !== void 0 ? _a : undefined);
17
+ }
8
18
  class Jats {
9
- constructor(data) {
10
- this.raw = (0, xml_js_1.xml2js)(data, { compact: false });
19
+ constructor(data, opts) {
20
+ var _a;
21
+ const toc = (0, myst_cli_utils_1.tic)();
22
+ this.log = opts === null || opts === void 0 ? void 0 : opts.log;
23
+ if (opts === null || opts === void 0 ? void 0 : opts.source)
24
+ this.source = opts.source;
25
+ try {
26
+ this.raw = (0, xml_js_1.xml2js)(data, { compact: false });
27
+ }
28
+ catch (error) {
29
+ throw new Error('Problem parsing the JATS document, please ensure it is XML');
30
+ }
11
31
  const { declaration, elements } = this.raw;
12
32
  this.declaration = declaration === null || declaration === void 0 ? void 0 : declaration.attributes;
13
- if (!((elements === null || elements === void 0 ? void 0 : elements.length) === 2 && elements[0].type === 'doctype' && elements[1].name === 'article')) {
14
- throw new Error('article is not the only element of the JATS');
33
+ if (!((elements === null || elements === void 0 ? void 0 : elements.length) === 2 && elements[0].type === 'doctype' && hasSingleArticle(elements[1]))) {
34
+ throw new Error('Element <article> is not the only element of the JATS');
15
35
  }
16
36
  this.doctype = elements[0].doctype;
17
- this.tree = (0, utils_1.convertToUnist)(elements[1]);
37
+ const converted = (0, utils_1.convertToUnist)(elements[1]);
38
+ this.tree = select('article', converted);
39
+ (_a = this.log) === null || _a === void 0 ? void 0 : _a.debug(toc('Parsed and converted JATS to unist tree in %s'));
40
+ }
41
+ get frontmatter() {
42
+ var _a, _b, _c, _d;
43
+ const title = this.articleTitle;
44
+ const subtitle = this.articleSubtitle;
45
+ const date = this.publicationDate;
46
+ const authors = this.articleAuthors;
47
+ const firstSubject = select(types_1.Tags.subject, (_a = this.articleCategories) !== null && _a !== void 0 ? _a : this.front);
48
+ const journalTitle = select(types_1.Tags.journalTitle, this.front);
49
+ return {
50
+ title: title ? (0, myst_common_1.toText)(title) : undefined,
51
+ subtitle: subtitle ? (0, myst_common_1.toText)(subtitle) : undefined,
52
+ doi: (_b = this.doi) !== null && _b !== void 0 ? _b : undefined,
53
+ date: date ? (_c = (0, utils_1.toDate)(date)) === null || _c === void 0 ? void 0 : _c.toISOString() : undefined,
54
+ authors: authors === null || authors === void 0 ? void 0 : authors.map((a) => (0, utils_1.authorAndAffiliation)(a, this.tree)),
55
+ keywords: (_d = this.keywords) === null || _d === void 0 ? void 0 : _d.map((k) => (0, myst_common_1.toText)(k)),
56
+ venue: journalTitle ? { title: (0, myst_common_1.toText)(journalTitle) } : undefined,
57
+ subject: firstSubject ? (0, myst_common_1.toText)(firstSubject) : undefined,
58
+ };
18
59
  }
19
60
  get front() {
20
- return (0, unist_util_select_1.select)(types_1.Tags.front, this.tree);
61
+ return select(types_1.Tags.front, this.tree);
21
62
  }
22
63
  get premissions() {
23
- return (0, unist_util_select_1.select)(types_1.Tags.permissions, this.front);
64
+ return select(types_1.Tags.permissions, this.front);
24
65
  }
25
66
  get doi() {
26
- return (0, utils_1.findDoi)(this.front);
67
+ var _a;
68
+ return doi_utils_1.default.normalize((_a = (0, utils_1.findArticleId)(this.front, 'doi')) !== null && _a !== void 0 ? _a : '');
69
+ }
70
+ get pmc() {
71
+ var _a;
72
+ return (_a = (0, utils_1.findArticleId)(this.front, 'pmc')) === null || _a === void 0 ? void 0 : _a.replace(/^PMC:?/, '');
73
+ }
74
+ get pmid() {
75
+ return (0, utils_1.findArticleId)(this.front, 'pmid');
27
76
  }
28
77
  get publicationDates() {
29
78
  return (0, unist_util_select_1.selectAll)(types_1.Tags.pubDate, this.front);
30
79
  }
31
80
  get publicationDate() {
32
- return this.publicationDates.find((d) => !!(0, unist_util_select_1.select)(types_1.Tags.day, d));
81
+ return this.publicationDates.find((d) => !!select(types_1.Tags.day, d));
33
82
  }
34
83
  get license() {
35
- return (0, unist_util_select_1.select)(types_1.Tags.license, this.premissions);
84
+ return select(types_1.Tags.license, this.premissions);
36
85
  }
37
86
  get keywordGroup() {
38
- return (0, unist_util_select_1.select)(types_1.Tags.kwdGroup, this.front);
87
+ return select(types_1.Tags.kwdGroup, this.front);
39
88
  }
40
89
  /** The first keywords */
41
90
  get keywords() {
@@ -44,23 +93,26 @@ class Jats {
44
93
  get keywordGroups() {
45
94
  return (0, unist_util_select_1.selectAll)(types_1.Tags.kwdGroup, this.front);
46
95
  }
96
+ get articleCategories() {
97
+ return select(types_1.Tags.articleCategories, this.front);
98
+ }
47
99
  get titleGroup() {
48
- return (0, unist_util_select_1.select)(types_1.Tags.titleGroup, this.front);
100
+ return select(types_1.Tags.titleGroup, this.front);
49
101
  }
50
102
  get articleTitle() {
51
- return (0, unist_util_select_1.select)(types_1.Tags.articleTitle, this.titleGroup);
103
+ return select(types_1.Tags.articleTitle, this.titleGroup);
52
104
  }
53
105
  get articleSubtitle() {
54
- return (0, unist_util_select_1.select)(types_1.Tags.subtitle, this.titleGroup);
106
+ return select(types_1.Tags.subtitle, this.titleGroup);
55
107
  }
56
108
  get abstract() {
57
- return (0, unist_util_select_1.select)(types_1.Tags.abstract, this.front);
109
+ return select(types_1.Tags.abstract, this.front);
58
110
  }
59
111
  get abstracts() {
60
112
  return (0, unist_util_select_1.selectAll)(types_1.Tags.abstract, this.front);
61
113
  }
62
114
  get contribGroup() {
63
- return (0, unist_util_select_1.select)(types_1.Tags.contribGroup, this.front);
115
+ return select(types_1.Tags.contribGroup, this.front);
64
116
  }
65
117
  get contribGroups() {
66
118
  return (0, unist_util_select_1.selectAll)(types_1.Tags.contribGroup, this.front);
@@ -69,19 +121,31 @@ class Jats {
69
121
  return (0, unist_util_select_1.selectAll)(types_1.Tags.contrib, this.contribGroup);
70
122
  }
71
123
  get body() {
72
- return (0, unist_util_select_1.select)(types_1.Tags.body, this.tree);
124
+ return select(types_1.Tags.body, this.tree);
73
125
  }
74
126
  get back() {
75
- return (0, unist_util_select_1.select)(types_1.Tags.back, this.tree);
127
+ return select(types_1.Tags.back, this.tree);
76
128
  }
77
129
  get subArticles() {
78
130
  return (0, unist_util_select_1.selectAll)(types_1.Tags.subArticle, this.tree);
79
131
  }
80
132
  get refList() {
81
- return (0, unist_util_select_1.select)(types_1.Tags.refList, this.back);
133
+ return select(types_1.Tags.refList, this.back);
82
134
  }
83
135
  get references() {
84
136
  return (0, unist_util_select_1.selectAll)(types_1.Tags.ref, this.refList);
85
137
  }
86
138
  }
87
139
  exports.Jats = Jats;
140
+ function hasSingleArticle(element) {
141
+ var _a;
142
+ if (element.name === 'article') {
143
+ return true;
144
+ }
145
+ if (element.name === 'pmc-articleset' &&
146
+ ((_a = element.elements) === null || _a === void 0 ? void 0 : _a.length) === 1 &&
147
+ element.elements[0].name === 'article') {
148
+ return true;
149
+ }
150
+ return false;
151
+ }
@@ -12,7 +12,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
12
12
  return (mod && mod.__esModule) ? mod : { "default": mod };
13
13
  };
14
14
  Object.defineProperty(exports, "__esModule", { value: true });
15
- exports.resolveJatsUrlFromDoi = exports.DEFAULT_RESOLVERS = exports.joss = exports.plos = exports.elife = void 0;
15
+ exports.customResolveJatsUrlFromDoi = exports.DEFAULT_RESOLVERS = exports.joss = exports.plos = exports.elife = void 0;
16
16
  const doi_utils_1 = __importDefault(require("doi-utils"));
17
17
  const node_fetch_1 = __importDefault(require("node-fetch"));
18
18
  exports.elife = {
@@ -33,22 +33,25 @@ exports.plos = {
33
33
  };
34
34
  exports.joss = {
35
35
  test(url) {
36
- return new URL(url).hostname === 'joss.theoj.org';
36
+ return new URL(url).hostname === 'joss.theoj.org' && doi_utils_1.default.validate(url);
37
37
  },
38
38
  jatsUrl(url) {
39
39
  // Probably a better way to do this, the joss papers on on github!
40
- const doi = new URL(url).pathname.replace('/papers/', '');
41
- const [org, jossId] = doi.split('/');
40
+ const doiString = doi_utils_1.default.normalize(url);
41
+ const [org, jossId] = doiString.split('/');
42
42
  const id = jossId.split('.')[1];
43
43
  return `https://raw.githubusercontent.com/openjournals/joss-papers/master/joss.${id}/${org}.${jossId}.jats`;
44
44
  },
45
45
  };
46
46
  exports.DEFAULT_RESOLVERS = [exports.elife, exports.plos, exports.joss];
47
- function resolveJatsUrlFromDoi(session, doi, resolvers = exports.DEFAULT_RESOLVERS) {
47
+ /**
48
+ * Use the known custom resolvers to pick where the JATS should be downloaded from.
49
+ */
50
+ function customResolveJatsUrlFromDoi(session, doiString, resolvers = exports.DEFAULT_RESOLVERS) {
48
51
  return __awaiter(this, void 0, void 0, function* () {
49
- if (!doi_utils_1.default.validate(doi))
50
- throw new Error(`The doi ${doi} is not valid`);
51
- const doiUrl = doi_utils_1.default.buildUrl(doi);
52
+ if (!doi_utils_1.default.validate(doiString))
53
+ throw new Error(`The doi ${doiString} is not valid`);
54
+ const doiUrl = doi_utils_1.default.buildUrl(doiString);
52
55
  session.log.debug(`Resolving DOI ${doiUrl}`);
53
56
  const resp = yield (0, node_fetch_1.default)(doiUrl);
54
57
  const articleUrl = resp.url;
@@ -60,4 +63,4 @@ function resolveJatsUrlFromDoi(session, doi, resolvers = exports.DEFAULT_RESOLVE
60
63
  return jatsUrl;
61
64
  });
62
65
  }
63
- exports.resolveJatsUrlFromDoi = resolveJatsUrlFromDoi;
66
+ exports.customResolveJatsUrlFromDoi = customResolveJatsUrlFromDoi;
@@ -5,7 +5,6 @@ const myst_cli_utils_1 = require("myst-cli-utils");
5
5
  class Session {
6
6
  constructor(opts) {
7
7
  var _a;
8
- this.API_URL = 'https://api.myst.tools';
9
8
  this.log = (_a = opts === null || opts === void 0 ? void 0 : opts.logger) !== null && _a !== void 0 ? _a : (0, myst_cli_utils_1.chalkLogger)(myst_cli_utils_1.LogLevel.debug);
10
9
  }
11
10
  }
@@ -1282,4 +1282,20 @@ var Tags;
1282
1282
  * See: https://jats.nlm.nih.gov/publishing/tag-library/1.3/element/pub-date.html
1283
1283
  */
1284
1284
  Tags["pubDate"] = "pub-date";
1285
+ /**
1286
+ * Article Grouping Data
1287
+ *
1288
+ * Not available in articleauthoring!
1289
+ *
1290
+ * See: https://jats.nlm.nih.gov/archiving/tag-library/1.3/element/article-categories.html
1291
+ */
1292
+ Tags["articleCategories"] = "article-categories";
1293
+ /**
1294
+ * Journal Title
1295
+ *
1296
+ * Not available in articleauthoring!
1297
+ *
1298
+ * See: https://jats.nlm.nih.gov/archiving/tag-library/1.3/element/journal-title.html
1299
+ */
1300
+ Tags["journalTitle"] = "journal-title";
1285
1301
  })(Tags = exports.Tags || (exports.Tags = {}));