jats-xml 0.1.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/README.md +13 -0
  2. package/dist/{types/cli → cli}/download.d.ts +1 -2
  3. package/dist/cli/download.js +158 -0
  4. package/dist/cli/index.d.ts +2 -0
  5. package/dist/{esm/cli → cli}/index.js +6 -6
  6. package/dist/{types/cli → cli}/jats-test.d.ts +1 -2
  7. package/dist/cli/jats-test.js +106 -0
  8. package/dist/{types/cli → cli}/parse.d.ts +2 -3
  9. package/dist/cli/parse.js +200 -0
  10. package/dist/{types/cli → cli}/validate.d.ts +0 -1
  11. package/dist/{esm/cli → cli}/validate.js +3 -3
  12. package/dist/index.d.ts +4 -0
  13. package/dist/index.js +4 -0
  14. package/dist/jats.cjs +34871 -0
  15. package/dist/{types/jats.d.ts → jats.d.ts} +0 -1
  16. package/dist/jats.js +110 -30703
  17. package/dist/{types/resolvers.d.ts → resolvers.d.ts} +1 -2
  18. package/dist/resolvers.js +49 -0
  19. package/dist/{types/session.d.ts → session.d.ts} +1 -2
  20. package/dist/{types/types.d.ts → types.d.ts} +0 -1
  21. package/dist/{types/utils.d.ts → utils.d.ts} +0 -1
  22. package/dist/{esm/utils.js → utils.js} +13 -5
  23. package/dist/{types/validate → validate}/dtd.d.ts +1 -2
  24. package/dist/{esm/validate → validate}/dtd.js +50 -69
  25. package/dist/validate/index.d.ts +1 -0
  26. package/dist/validate/index.js +1 -0
  27. package/dist/version.d.ts +2 -0
  28. package/dist/version.js +2 -0
  29. package/package.json +29 -39
  30. package/dist/cjs/cli/download.js +0 -186
  31. package/dist/cjs/cli/index.js +0 -18
  32. package/dist/cjs/cli/jats-test.js +0 -125
  33. package/dist/cjs/cli/parse.js +0 -225
  34. package/dist/cjs/cli/validate.js +0 -29
  35. package/dist/cjs/index.js +0 -26
  36. package/dist/cjs/jats.js +0 -151
  37. package/dist/cjs/resolvers.js +0 -67
  38. package/dist/cjs/session.js +0 -15
  39. package/dist/cjs/types.js +0 -2
  40. package/dist/cjs/utils.js +0 -152
  41. package/dist/cjs/validate/dtd.js +0 -320
  42. package/dist/cjs/validate/index.js +0 -17
  43. package/dist/cjs/version.js +0 -4
  44. package/dist/esm/cli/download.js +0 -177
  45. package/dist/esm/cli/jats-test.js +0 -117
  46. package/dist/esm/cli/parse.js +0 -217
  47. package/dist/esm/index.js +0 -4
  48. package/dist/esm/jats.js +0 -144
  49. package/dist/esm/resolvers.js +0 -60
  50. package/dist/esm/validate/index.js +0 -1
  51. package/dist/esm/version.js +0 -2
  52. package/dist/types/cli/download.d.ts.map +0 -1
  53. package/dist/types/cli/index.d.ts +0 -3
  54. package/dist/types/cli/index.d.ts.map +0 -1
  55. package/dist/types/cli/jats-test.d.ts.map +0 -1
  56. package/dist/types/cli/parse.d.ts.map +0 -1
  57. package/dist/types/cli/validate.d.ts.map +0 -1
  58. package/dist/types/index.d.ts +0 -5
  59. package/dist/types/index.d.ts.map +0 -1
  60. package/dist/types/jats.d.ts.map +0 -1
  61. package/dist/types/resolvers.d.ts.map +0 -1
  62. package/dist/types/session.d.ts.map +0 -1
  63. package/dist/types/types.d.ts.map +0 -1
  64. package/dist/types/utils.d.ts.map +0 -1
  65. package/dist/types/validate/dtd.d.ts.map +0 -1
  66. package/dist/types/validate/index.d.ts +0 -2
  67. package/dist/types/validate/index.d.ts.map +0 -1
  68. package/dist/types/version.d.ts +0 -3
  69. package/dist/types/version.d.ts.map +0 -1
  70. /package/dist/{esm/session.js → session.js} +0 -0
  71. /package/dist/{esm/types.js → types.js} +0 -0
package/README.md CHANGED
@@ -106,3 +106,16 @@ selectAll('fig', jats.body) // Or selectAll figures
106
106
  ## Write JATS in Node
107
107
 
108
108
  TODO!
109
+
110
+ ---
111
+
112
+ As of v1.0.0 this package is [ESM only](https://gist.github.com/sindresorhus/a39789f98801d908bbc7ff3ecc99d99c).
113
+
114
+ ---
115
+
116
+ <p style="text-align: center; color: #aaa; padding-top: 50px">
117
+ Made with love by
118
+ <a href="https://curvenote.com" target="_blank" style="color: #aaa">
119
+ <img src="https://curvenote.dev/images/icon.png" style="height: 1em" /> Curvenote
120
+ </a>
121
+ </p>
@@ -1,4 +1,4 @@
1
- import type { ISession, Options } from '../types';
1
+ import type { ISession, Options } from '../types.js';
2
2
  /**
3
3
  * https://www.ncbi.nlm.nih.gov/pmc/tools/id-converter-api/
4
4
  */
@@ -9,4 +9,3 @@ export declare function downloadJatsFromUrl(session: ISession, urlOrDoi: string,
9
9
  source: string;
10
10
  data?: string;
11
11
  }>;
12
- //# sourceMappingURL=download.d.ts.map
@@ -0,0 +1,158 @@
1
+ import { doi } from 'doi-utils';
2
+ import fetch from 'node-fetch';
3
+ import { isUrl, tic } from 'myst-cli-utils';
4
+ import { customResolveJatsUrlFromDoi } from '../resolvers.js';
5
+ async function downloadFromUrl(session, jatsUrl, opts) {
6
+ var _a, _b;
7
+ const toc = tic();
8
+ session.log.debug(`Fetching JATS from ${jatsUrl}`);
9
+ const resp = await ((_a = opts === null || opts === void 0 ? void 0 : opts.fetcher) !== null && _a !== void 0 ? _a : defaultFetcher)(jatsUrl, 'xml');
10
+ if (!resp.ok) {
11
+ session.log.debug(`JATS failed to download from "${jatsUrl}"`);
12
+ throw new Error(`STATUS ${resp.status}: ${resp.statusText}`);
13
+ }
14
+ const contentType = (_b = resp.headers) === null || _b === void 0 ? void 0 : _b.get('content-type');
15
+ if (!((contentType === null || contentType === void 0 ? void 0 : contentType.includes('application/xml')) ||
16
+ (contentType === null || contentType === void 0 ? void 0 : contentType.includes('text/xml')) ||
17
+ (contentType === null || contentType === void 0 ? void 0 : contentType.includes('text/plain')))) {
18
+ session.log.warn(`Expected content-type "application/xml" instead we got "${contentType}" for ${jatsUrl}`);
19
+ }
20
+ const data = await resp.text();
21
+ session.log.debug(toc(`Fetched document with content-type "${contentType}" in %s`));
22
+ return data;
23
+ }
24
+ function defaultFetcher(url, kind) {
25
+ switch (kind) {
26
+ case 'json':
27
+ return fetch(url, { headers: [['Accept', 'application/json']] });
28
+ case 'xml':
29
+ return fetch(url, { headers: [['Accept', 'application/xml']] });
30
+ default:
31
+ return fetch(url);
32
+ }
33
+ }
34
+ /**
35
+ * There are 5.8M or so DOIs that have a full XML record:
36
+ *
37
+ * https://api.crossref.org/works?filter=full-text.type:application/xml,full-text.application:text-mining&facet=publisher-name:*&rows=0
38
+ *
39
+ * This function tries to find the correct URL for the record.
40
+ */
41
+ async function checkIfDoiHasJats(session, urlOrDoi, opts) {
42
+ var _a, _b, _c, _d, _e;
43
+ if (!doi.validate(urlOrDoi))
44
+ return;
45
+ const toc = tic();
46
+ const doiUrl = doi.buildUrl(urlOrDoi);
47
+ session.log.debug(`Attempting to resolving full XML from DOI ${doiUrl}`);
48
+ const resp = await ((_a = opts === null || opts === void 0 ? void 0 : opts.fetcher) !== null && _a !== void 0 ? _a : defaultFetcher)(doiUrl, 'json');
49
+ if (!resp.ok) {
50
+ // Silently return -- other functions can try!
51
+ session.log.debug(`DOI failed to resolve: ${doiUrl}`);
52
+ return;
53
+ }
54
+ const data = (await resp.json());
55
+ session.log.debug(toc(`DOI resolved in %s with ${(_c = (_b = data.link) === null || _b === void 0 ? void 0 : _b.length) !== null && _c !== void 0 ? _c : 0} links to content`));
56
+ if (data.link) {
57
+ session.log.debug(['', ...data.link.map((link) => `content-type: ${link['content-type']}, ${link.URL}\n`)].join(' - '));
58
+ }
59
+ const fullXml = (_e = (_d = data.link) === null || _d === void 0 ? void 0 : _d.find((link) => { var _a; return ['text/xml', 'application/xml'].includes((_a = link['content-type']) !== null && _a !== void 0 ? _a : ''); })) === null || _e === void 0 ? void 0 : _e.URL;
60
+ if (fullXml)
61
+ return fullXml;
62
+ session.log.debug(`Could not find XML in DOI record ${doiUrl}`);
63
+ return undefined;
64
+ }
65
+ /**
66
+ * https://www.ncbi.nlm.nih.gov/pmc/tools/id-converter-api/
67
+ */
68
+ export async function convertPMID2PMCID(session, PMID, opts) {
69
+ var _a, _b, _c;
70
+ if (PMID.startsWith('https://')) {
71
+ const idPart = new URL(PMID).pathname.slice(1);
72
+ session.log.debug(`Extract ${PMID} to ${idPart}`);
73
+ return convertPMID2PMCID(session, idPart, opts);
74
+ }
75
+ const toc = tic();
76
+ const converter = 'https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/';
77
+ const resp = await ((_a = opts === null || opts === void 0 ? void 0 : opts.fetcher) !== null && _a !== void 0 ? _a : defaultFetcher)(`${converter}?tool=jats-xml&format=json&ids=${PMID}`, 'json');
78
+ if (!resp.ok) {
79
+ // Silently return -- other functions can try!
80
+ session.log.debug(`Failed to convert PubMedID: ${PMID}`);
81
+ return;
82
+ }
83
+ const data = await resp.json();
84
+ const PMCID = (_c = (_b = data === null || data === void 0 ? void 0 : data.records) === null || _b === void 0 ? void 0 : _b[0]) === null || _c === void 0 ? void 0 : _c.pmcid;
85
+ session.log.debug(toc(`Used nih.gov to transform ${PMID} to ${PMCID} in %s.`));
86
+ return PMCID;
87
+ }
88
+ function pubMedCentralJats(PMCID) {
89
+ const normalized = PMCID.replace(/^PMC:?/, '');
90
+ return `https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pmc&id=${normalized}`;
91
+ }
92
+ export async function checkIfPubMedCentralHasJats(session, urlOrDoi, opts) {
93
+ var _a, _b, _c;
94
+ if (urlOrDoi.match(/^PMC:?([0-9]+)$/))
95
+ return pubMedCentralJats(urlOrDoi);
96
+ if (!doi.validate(urlOrDoi))
97
+ return;
98
+ const toc = tic();
99
+ const doiUrl = doi.buildUrl(urlOrDoi);
100
+ session.log.debug(`Attempting to resolve PMCID using OpenAlex from ${doiUrl}`);
101
+ const openAlexUrl = `https://api.openalex.org/works/${doiUrl}`;
102
+ const resp = await ((_a = opts === null || opts === void 0 ? void 0 : opts.fetcher) !== null && _a !== void 0 ? _a : defaultFetcher)(openAlexUrl, 'json');
103
+ if (!resp.ok) {
104
+ // Silently return -- other functions can try!
105
+ session.log.debug(`Failed to lookup on OpenAlex: ${openAlexUrl}`);
106
+ return;
107
+ }
108
+ const data = (await resp.json());
109
+ const PMID = (_b = data === null || data === void 0 ? void 0 : data.ids) === null || _b === void 0 ? void 0 : _b.pmid;
110
+ let PMCID = (_c = data === null || data === void 0 ? void 0 : data.ids) === null || _c === void 0 ? void 0 : _c.pmcid;
111
+ if (!PMCID && !!PMID) {
112
+ session.log.debug(toc(`OpenAlex resolved ${data === null || data === void 0 ? void 0 : data.ids.openalex} in %s. There is no PMCID, but there is a PMID`));
113
+ PMCID = await convertPMID2PMCID(session, PMID, opts);
114
+ if (!PMCID) {
115
+ session.log.debug(toc(`PubMed does not have a record of ${PMID}`));
116
+ return;
117
+ }
118
+ }
119
+ if (!PMCID) {
120
+ session.log.debug(toc(`OpenAlex resolved ${data === null || data === void 0 ? void 0 : data.ids.openalex} in %s, but there is no PMCID`));
121
+ return;
122
+ }
123
+ session.log.debug(toc(`OpenAlex resolved in %s, with a PMCID of ${PMCID}`));
124
+ return pubMedCentralJats(PMCID);
125
+ }
126
+ export async function downloadJatsFromUrl(session, urlOrDoi, opts = {}) {
127
+ const expectedUrls = (await Promise.all([
128
+ checkIfPubMedCentralHasJats(session, urlOrDoi, opts),
129
+ checkIfDoiHasJats(session, urlOrDoi, opts),
130
+ ])).filter((u) => !!u);
131
+ if (expectedUrls.length > 0) {
132
+ session.log.debug(['Trying URLs:\n', ...expectedUrls.map((url) => ` ${url}\n`)].join(' - '));
133
+ for (let index = 0; index < expectedUrls.length; index++) {
134
+ const url = expectedUrls[index];
135
+ try {
136
+ const data = await downloadFromUrl(session, url, opts);
137
+ if (data)
138
+ return { success: true, source: url, data };
139
+ }
140
+ catch (error) {
141
+ session.log.debug(error.message);
142
+ }
143
+ }
144
+ // If there are expected URLs that don't work: see something, say something, etc.
145
+ return { success: false, source: expectedUrls[0] };
146
+ }
147
+ if (doi.validate(urlOrDoi)) {
148
+ const jatsUrl = await customResolveJatsUrlFromDoi(session, urlOrDoi, opts);
149
+ const data = await downloadFromUrl(session, jatsUrl, opts);
150
+ return { success: true, source: jatsUrl, data };
151
+ }
152
+ if (isUrl(urlOrDoi)) {
153
+ session.log.debug("No resolver matched, and the URL doesn't look like a DOI. We will attempt to download it directly.");
154
+ const data = await downloadFromUrl(session, urlOrDoi, opts);
155
+ return { success: true, source: urlOrDoi, data };
156
+ }
157
+ throw new Error(`Could not find ${urlOrDoi} locally, and it doesn't look like a URL or DOI`);
158
+ }
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env node
2
+ export {};
@@ -1,10 +1,10 @@
1
1
  #!/usr/bin/env node
2
- import commander from 'commander';
3
- import version from '../version';
4
- import { addDownloadCLI } from './parse';
5
- import { addValidateCLI } from './validate';
6
- import { addTestCLI } from './jats-test';
7
- const program = new commander.Command();
2
+ import { Command } from 'commander';
3
+ import version from '../version.js';
4
+ import { addDownloadCLI } from './parse.js';
5
+ import { addValidateCLI } from './validate.js';
6
+ import { addTestCLI } from './jats-test.js';
7
+ const program = new Command();
8
8
  addDownloadCLI(program);
9
9
  addValidateCLI(program);
10
10
  addTestCLI(program);
@@ -1,9 +1,8 @@
1
1
  import { Command } from 'commander';
2
- import type { ISession } from '../types';
2
+ import type { ISession } from '../types.js';
3
3
  type Options = {
4
4
  cases: string;
5
5
  };
6
6
  export declare function testJatsFile(session: ISession, file: string, opts: Options): Promise<boolean>;
7
7
  export declare function addTestCLI(program: Command): void;
8
8
  export {};
9
- //# sourceMappingURL=jats-test.d.ts.map
@@ -0,0 +1,106 @@
1
+ import { Command, Option } from 'commander';
2
+ import { clirun, tic } from 'myst-cli-utils';
3
+ import yaml from 'js-yaml';
4
+ import fs from 'fs';
5
+ import { select, selectAll } from 'unist-util-select';
6
+ import { is } from 'unist-util-is';
7
+ import chalk from 'chalk';
8
+ import { getSession } from '../session.js';
9
+ import { parseJats } from './parse.js';
10
+ const INDENT = ' ';
11
+ function printNodes(expected, received) {
12
+ return chalk.reset(`\n${INDENT}${chalk.greenBright('Expected node containing')}:\n${INDENT} ${yaml
13
+ .dump(expected)
14
+ .replace(/\n/g, `\n${INDENT} `)}\n${INDENT}${chalk.redBright('Received node')}:\n${INDENT} ${yaml.dump(received).replace(/\n/g, `\n${INDENT} `)}`);
15
+ }
16
+ export async function testJatsFile(session, file, opts) {
17
+ const toc = tic();
18
+ const jats = await parseJats(session, file);
19
+ const tests = yaml.load(fs.readFileSync(opts.cases).toString());
20
+ const results = tests.cases.map((testCase, index) => {
21
+ if (!testCase.title) {
22
+ return [`Test Case ${index}`, null, 'Test must include a title'];
23
+ }
24
+ if (testCase.equals === undefined) {
25
+ return [testCase.title, null, 'Test must have an equals statement'];
26
+ }
27
+ if (testCase.select) {
28
+ const node = select(testCase.select, jats.tree);
29
+ const pass = is(node, testCase.equals);
30
+ if (testCase.equals == null && node) {
31
+ return [testCase.title, false, 'Expected no node to be present'];
32
+ }
33
+ if (!node && testCase.equals == null)
34
+ return [testCase.title, true];
35
+ if (!node)
36
+ return [testCase.title, false];
37
+ let failed = false;
38
+ const messages = [];
39
+ if (!pass) {
40
+ failed = failed || true;
41
+ messages.push(`Failed to validate node\n${printNodes(testCase.equals, node)}`);
42
+ }
43
+ return [testCase.title, !failed, messages.join('\n')];
44
+ }
45
+ else if (testCase.selectAll) {
46
+ const testNodes = selectAll(testCase.selectAll, jats.tree);
47
+ if (!testNodes && testCase.equals == null)
48
+ return [testCase.title, true];
49
+ if (!testNodes)
50
+ return [testCase.title, false, 'Node not found'];
51
+ let equals = testCase.equals;
52
+ if (!Array.isArray(testCase.equals)) {
53
+ equals = Array(testNodes.length).fill(testCase.equals);
54
+ }
55
+ let failed = false;
56
+ const messages = [];
57
+ if (equals.length !== testNodes.length) {
58
+ failed = failed || true;
59
+ messages.push(`Expected ${equals.length} nodes, got ${testNodes.length}\n${printNodes(equals, testNodes)}`);
60
+ }
61
+ else {
62
+ equals.forEach((node, ii) => {
63
+ const pass = is(testNodes[ii], node);
64
+ if (!pass) {
65
+ failed = failed || true;
66
+ messages.push(`Failed to validate node ${ii}\n${printNodes(node, testNodes[ii])}`);
67
+ }
68
+ });
69
+ }
70
+ return [testCase.title, !failed, messages.join('\n')];
71
+ }
72
+ else {
73
+ return [testCase.title, false, 'Test must have either `select` or `selectAll`'];
74
+ }
75
+ });
76
+ results.forEach((result) => {
77
+ const [title, pass, message] = result;
78
+ if (pass === null)
79
+ session.log.info(`${chalk.redBright.bold(`ERROR`)} - ${title}\n ${chalk.blueBright(message)}`);
80
+ else if (pass)
81
+ session.log.info(`${chalk.green(`PASS`)} - ${title}`);
82
+ else
83
+ session.log.info(`${chalk.red(`FAIL`)} - ${title}\n\n${INDENT}${chalk.blueBright(message)}\n`);
84
+ }, true);
85
+ const passed = results.reduce((num, [, pass]) => num + (pass ? 1 : 0), 0);
86
+ const failed = results.length - passed;
87
+ if (failed > 0 && passed === 0) {
88
+ throw new Error(toc(`${chalk.red(`Failed ${failed} tests in %s`)} 👎`));
89
+ }
90
+ if (failed > 0) {
91
+ throw new Error(toc(`${chalk.green(`Passed ${passed}/${results.length} tests in %s`)}\n${chalk.red(`Failed ${failed} tests`)} 👎`));
92
+ }
93
+ session.log.info(chalk.green(toc(`Passed ${passed} tests in %s 🚀`)));
94
+ return true;
95
+ }
96
+ function makeTestCLI(program) {
97
+ const command = new Command('test')
98
+ .description('Test JATS file against a list of cases')
99
+ .argument('<file>', 'JATS file to test')
100
+ .addOption(new Option('--cases <value>', 'The YAML file of unit tests to test against'))
101
+ .action(clirun(testJatsFile, { program, getSession }));
102
+ return command;
103
+ }
104
+ export function addTestCLI(program) {
105
+ program.addCommand(makeTestCLI(program));
106
+ }
@@ -1,6 +1,5 @@
1
1
  import { Command } from 'commander';
2
- import type { ISession, Options } from '../types';
3
- import { Jats } from '../jats';
2
+ import type { ISession, Options } from '../types.js';
3
+ import { Jats } from '../jats.js';
4
4
  export declare function parseJats(session: ISession, file: string, opts?: Options): Promise<Jats>;
5
5
  export declare function addDownloadCLI(program: Command): void;
6
- //# sourceMappingURL=parse.d.ts.map
@@ -0,0 +1,200 @@
1
+ import { Command } from 'commander';
2
+ import fs from 'fs';
3
+ import { extname } from 'path';
4
+ import { clirun, isUrl, tic, writeFileToFolder } from 'myst-cli-utils';
5
+ import { doi } from 'doi-utils';
6
+ import chalk from 'chalk';
7
+ import { formatPrinciples, highlightFAIR } from 'fair-principles';
8
+ import { Tags } from 'jats-tags';
9
+ import { toText } from 'myst-common';
10
+ import { select, selectAll } from 'unist-util-select';
11
+ import { getSession } from '../session.js';
12
+ import { Jats } from '../jats.js';
13
+ import { downloadJatsFromUrl } from './download.js';
14
+ import { DEFAULT_RESOLVERS } from '../resolvers.js';
15
+ import { findArticleId, formatDate, toDate } from '../utils.js';
16
+ function hasValidExtension(output) {
17
+ return ['.xml', '.jats'].includes(extname(output).toLowerCase());
18
+ }
19
+ async function downloadAndSaveJats(session, urlOrDoi, output, opts = { resolvers: DEFAULT_RESOLVERS }) {
20
+ if (fs.existsSync(urlOrDoi)) {
21
+ throw new Error(`File "${urlOrDoi}" is local and cannot be downloaded!`);
22
+ }
23
+ if (!(doi.validate(urlOrDoi) || isUrl(urlOrDoi))) {
24
+ throw new Error(`Path must be a URL or DOI, not "${urlOrDoi}"`);
25
+ }
26
+ if (!hasValidExtension(output)) {
27
+ session.log.warn(`The extension ${extname(output)} is not a valid extension for JATS, try using ".xml" or ".jats"`);
28
+ }
29
+ const { success, data, source } = await downloadJatsFromUrl(session, urlOrDoi, opts);
30
+ if (!success || !data) {
31
+ logAboutJatsFailing(session, [source]);
32
+ process.exit(1);
33
+ }
34
+ writeFileToFolder(output, data);
35
+ return data;
36
+ }
37
+ function logAboutJatsFailing(session, jatsUrls) {
38
+ session.log.warn('⛔️ JATS may not be Open Access 😭, you should petition your local representative 🪧');
39
+ session.log.info(`${chalk.green(`\nThe XML ${chalk.bold('should')} be here:\n\n${jatsUrls.join('\n')}`)}\n`);
40
+ const FAIR = highlightFAIR('A', { chalk });
41
+ session.log.info(`Some publishers aggressively block programmatic access, which isn't ${FAIR}.`);
42
+ session.log.debug(formatPrinciples('A*', { chalk }));
43
+ session.log.info(`\n${chalk.blue('The link may work in a browser.')}\n`);
44
+ }
45
+ export async function parseJats(session, file, opts = { resolvers: DEFAULT_RESOLVERS }) {
46
+ const toc = tic();
47
+ if (fs.existsSync(file)) {
48
+ session.log.debug(`Found ${file} locally, parsing`);
49
+ const data = fs.readFileSync(file).toString();
50
+ return new Jats(data, { log: session.log });
51
+ }
52
+ const { success, source, data } = await downloadJatsFromUrl(session, file, opts);
53
+ if (!success || !data) {
54
+ logAboutJatsFailing(session, [source]);
55
+ process.exit(1);
56
+ }
57
+ const jats = new Jats(data, { source, log: session.log });
58
+ session.log.debug(toc(`Downloaded and parsed JATS file in %s`));
59
+ return jats;
60
+ }
61
+ function formatLongString(data, offset = 0, length = 88 - offset) {
62
+ const out = [data.slice(0, length)];
63
+ let left = data.slice(length);
64
+ while (left.length > length) {
65
+ out.push(left.slice(0, length).trim());
66
+ left = left.slice(length);
67
+ }
68
+ if (left)
69
+ out.push(left.trim());
70
+ return out.join(`\n${' '.repeat(offset)}`);
71
+ }
72
+ function formatDictionary(dict, opts) {
73
+ const maxLabel = Object.keys(dict).reduce((a, b) => Math.max(a, b.length), 0);
74
+ return Object.entries(dict)
75
+ .map(([k, t]) => {
76
+ var _a;
77
+ if (!t)
78
+ return null;
79
+ let wrap = typeof (opts === null || opts === void 0 ? void 0 : opts.wrap) === 'boolean' ? opts.wrap : true;
80
+ let value = t;
81
+ let color = chalk.yellow.bold;
82
+ if (t && typeof t === 'object') {
83
+ if (!t.value)
84
+ return null;
85
+ color = (_a = t.label) !== null && _a !== void 0 ? _a : color;
86
+ value = t.value;
87
+ wrap = typeof t.wrap === 'boolean' ? t.wrap : wrap;
88
+ }
89
+ const wrapped = wrap ? formatLongString(String(value), maxLabel + 2) : String(value);
90
+ return `${color(k)}:${' '.repeat(maxLabel - k.length + 1)}${wrapped}`;
91
+ })
92
+ .filter((o) => !!o)
93
+ .join('\n');
94
+ }
95
+ async function jatsSummaryCLI(session, file) {
96
+ var _a, _b, _c;
97
+ const jats = await parseJats(session, file);
98
+ const summary = {
99
+ Source: { value: jats.source, wrap: false },
100
+ DOI: jats.doi ? { value: doi.buildUrl(jats.doi), wrap: false } : null,
101
+ Title: (_a = toText(jats.articleTitle)) === null || _a === void 0 ? void 0 : _a.replace(/\n/g, ' '),
102
+ Date: formatDate(toDate(jats.publicationDate)),
103
+ Authors: jats.articleAuthors
104
+ .map((a) => `${toText(select(Tags.givenNames, a))} ${toText(select(Tags.surname, a))}`)
105
+ .join(', '),
106
+ Abstract: (_b = toText(jats.abstract)) === null || _b === void 0 ? void 0 : _b.replace(/\n/g, ' '),
107
+ Keywords: jats.keywords.map((k) => toText(k)).join(', '),
108
+ License: (_c = jats.license) === null || _c === void 0 ? void 0 : _c['xlink:href'],
109
+ };
110
+ if (jats.body) {
111
+ summary.Figures = {
112
+ label: chalk.blue.bold,
113
+ value: String(selectAll(Tags.fig, jats.body).length),
114
+ };
115
+ summary.Equations = {
116
+ label: chalk.blue.bold,
117
+ value: String(selectAll(Tags.dispFormula, jats.body).length),
118
+ };
119
+ summary.Tables = {
120
+ label: chalk.blue.bold,
121
+ value: String(selectAll(Tags.table, jats.body).length),
122
+ };
123
+ summary.Code = {
124
+ label: chalk.blue.bold,
125
+ value: String(selectAll(Tags.code, jats.body).length),
126
+ };
127
+ summary.Sections = {
128
+ label: chalk.blue.bold,
129
+ value: String(selectAll(Tags.sec, jats.body).length),
130
+ };
131
+ summary.Paragraphs = {
132
+ label: chalk.blue.bold,
133
+ value: String(selectAll(Tags.p, jats.body).length),
134
+ };
135
+ summary.Citations = { label: chalk.blue.bold, value: String(jats.references.length) };
136
+ summary['Cross-References'] = {
137
+ label: chalk.blue.bold,
138
+ value: String(selectAll(Tags.xref, jats.body).length),
139
+ };
140
+ summary['Sub Articles'] = { label: chalk.blue.bold, value: String(jats.subArticles.length) };
141
+ }
142
+ session.log.info(formatDictionary(summary));
143
+ if (!jats.body) {
144
+ session.log.warn('\nThis is a partial JATS record that does not have <body>.');
145
+ }
146
+ }
147
+ async function jatsReferencesCLI(session, file) {
148
+ const jats = await parseJats(session, file);
149
+ const sorted = jats.references
150
+ .map((ref) => {
151
+ const doiString = findArticleId(ref, 'doi');
152
+ const title = toText(select(Tags.articleTitle, ref));
153
+ const year = toText(select(Tags.year, ref));
154
+ const surnames = selectAll(Tags.surname, ref);
155
+ const short = surnames.length > 2
156
+ ? toText(surnames[0]) + ' et al.'
157
+ : surnames.length === 2
158
+ ? toText(surnames[0]) + ' and ' + toText(surnames[1])
159
+ : toText(surnames[0]);
160
+ const s = selectAll(`[rid=${ref.id}]`, jats.body);
161
+ return {
162
+ Citation: `${short} (${year})`,
163
+ Title: title,
164
+ DOI: doiString ? doi.buildUrl(doiString) : null,
165
+ Count: s.length,
166
+ };
167
+ })
168
+ .sort((a, b) => b.Count - a.Count);
169
+ sorted.forEach((r) => {
170
+ session.log.info(formatDictionary(r, { wrap: false }) + '\n');
171
+ });
172
+ }
173
+ function makeSummaryCLI(program) {
174
+ const command = new Command('summary')
175
+ .description('Parse a JATS file and provide a summary')
176
+ .argument('<jats>', 'The JATS file or remote URL to be parsed')
177
+ .action(clirun(jatsSummaryCLI, { program, getSession }));
178
+ return command;
179
+ }
180
+ function makeReferencesCLI(program) {
181
+ const command = new Command('refs')
182
+ .alias('references')
183
+ .description('Parse a JATS file and provide a summary')
184
+ .argument('<jats>', 'The JATS file or remote URL to be parsed')
185
+ .action(clirun(jatsReferencesCLI, { program, getSession }));
186
+ return command;
187
+ }
188
+ function makeDownloadCLI(program) {
189
+ const command = new Command('download')
190
+ .description('Parse a JATS file and provide a summary')
191
+ .argument('<url>', 'The JATS url or a DOI')
192
+ .argument('<output>', 'The JATS output file')
193
+ .action(clirun(downloadAndSaveJats, { program, getSession }));
194
+ return command;
195
+ }
196
+ export function addDownloadCLI(program) {
197
+ program.addCommand(makeDownloadCLI(program));
198
+ program.addCommand(makeSummaryCLI(program));
199
+ program.addCommand(makeReferencesCLI(program));
200
+ }
@@ -1,3 +1,2 @@
1
1
  import { Command } from 'commander';
2
2
  export declare function addValidateCLI(program: Command): void;
3
- //# sourceMappingURL=validate.d.ts.map
@@ -1,12 +1,12 @@
1
1
  import { Command, Option } from 'commander';
2
2
  import { clirun } from 'myst-cli-utils';
3
- import { getSession } from '../session';
4
- import { validateJatsAgainstDtdWrapper } from '../validate';
3
+ import { getSession } from '../session.js';
4
+ import { validateJatsAgainstDtdWrapper } from '../validate/index.js';
5
5
  function makeValidateCLI(program) {
6
6
  const command = new Command('validate')
7
7
  .description(`
8
8
  Validate JATS file against DTD schema.
9
-
9
+
10
10
  The JATS DTD schema file is fetched from nih.gov ftp server if not available locally.
11
11
  This will attempt to infer the specific JATS DTD version, library, etc from the file header,
12
12
  but options are available to override the inferred values.
@@ -0,0 +1,4 @@
1
+ export { default as version } from './version.js';
2
+ export { Jats } from './jats.js';
3
+ export * from './types.js';
4
+ export * from './validate/index.js';
package/dist/index.js ADDED
@@ -0,0 +1,4 @@
1
+ export { default as version } from './version.js';
2
+ export { Jats } from './jats.js';
3
+ export * from './types.js';
4
+ export * from './validate/index.js';