jats-xml 0.1.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -0
- package/dist/{types/cli → cli}/download.d.ts +1 -2
- package/dist/cli/download.js +158 -0
- package/dist/cli/index.d.ts +2 -0
- package/dist/{esm/cli → cli}/index.js +6 -6
- package/dist/{types/cli → cli}/jats-test.d.ts +1 -2
- package/dist/cli/jats-test.js +106 -0
- package/dist/{types/cli → cli}/parse.d.ts +2 -3
- package/dist/cli/parse.js +200 -0
- package/dist/{types/cli → cli}/validate.d.ts +0 -1
- package/dist/{esm/cli → cli}/validate.js +3 -3
- package/dist/index.d.ts +4 -0
- package/dist/index.js +4 -0
- package/dist/jats.cjs +34871 -0
- package/dist/{types/jats.d.ts → jats.d.ts} +0 -1
- package/dist/jats.js +110 -30703
- package/dist/{types/resolvers.d.ts → resolvers.d.ts} +1 -2
- package/dist/resolvers.js +49 -0
- package/dist/{types/session.d.ts → session.d.ts} +1 -2
- package/dist/{types/types.d.ts → types.d.ts} +0 -1
- package/dist/{types/utils.d.ts → utils.d.ts} +0 -1
- package/dist/{esm/utils.js → utils.js} +13 -5
- package/dist/{types/validate → validate}/dtd.d.ts +1 -2
- package/dist/{esm/validate → validate}/dtd.js +50 -69
- package/dist/validate/index.d.ts +1 -0
- package/dist/validate/index.js +1 -0
- package/dist/version.d.ts +2 -0
- package/dist/version.js +2 -0
- package/package.json +29 -39
- package/dist/cjs/cli/download.js +0 -186
- package/dist/cjs/cli/index.js +0 -18
- package/dist/cjs/cli/jats-test.js +0 -125
- package/dist/cjs/cli/parse.js +0 -225
- package/dist/cjs/cli/validate.js +0 -29
- package/dist/cjs/index.js +0 -26
- package/dist/cjs/jats.js +0 -151
- package/dist/cjs/resolvers.js +0 -67
- package/dist/cjs/session.js +0 -15
- package/dist/cjs/types.js +0 -2
- package/dist/cjs/utils.js +0 -152
- package/dist/cjs/validate/dtd.js +0 -320
- package/dist/cjs/validate/index.js +0 -17
- package/dist/cjs/version.js +0 -4
- package/dist/esm/cli/download.js +0 -177
- package/dist/esm/cli/jats-test.js +0 -117
- package/dist/esm/cli/parse.js +0 -217
- package/dist/esm/index.js +0 -4
- package/dist/esm/jats.js +0 -144
- package/dist/esm/resolvers.js +0 -60
- package/dist/esm/validate/index.js +0 -1
- package/dist/esm/version.js +0 -2
- package/dist/types/cli/download.d.ts.map +0 -1
- package/dist/types/cli/index.d.ts +0 -3
- package/dist/types/cli/index.d.ts.map +0 -1
- package/dist/types/cli/jats-test.d.ts.map +0 -1
- package/dist/types/cli/parse.d.ts.map +0 -1
- package/dist/types/cli/validate.d.ts.map +0 -1
- package/dist/types/index.d.ts +0 -5
- package/dist/types/index.d.ts.map +0 -1
- package/dist/types/jats.d.ts.map +0 -1
- package/dist/types/resolvers.d.ts.map +0 -1
- package/dist/types/session.d.ts.map +0 -1
- package/dist/types/types.d.ts.map +0 -1
- package/dist/types/utils.d.ts.map +0 -1
- package/dist/types/validate/dtd.d.ts.map +0 -1
- package/dist/types/validate/index.d.ts +0 -2
- package/dist/types/validate/index.d.ts.map +0 -1
- package/dist/types/version.d.ts +0 -3
- package/dist/types/version.d.ts.map +0 -1
- /package/dist/{esm/session.js → session.js} +0 -0
- /package/dist/{esm/types.js → types.js} +0 -0
package/README.md
CHANGED
|
@@ -106,3 +106,16 @@ selectAll('fig', jats.body) // Or selectAll figures
|
|
|
106
106
|
## Write JATS in Node
|
|
107
107
|
|
|
108
108
|
TODO!
|
|
109
|
+
|
|
110
|
+
---
|
|
111
|
+
|
|
112
|
+
As of v1.0.0 this package is [ESM only](https://gist.github.com/sindresorhus/a39789f98801d908bbc7ff3ecc99d99c).
|
|
113
|
+
|
|
114
|
+
---
|
|
115
|
+
|
|
116
|
+
<p style="text-align: center; color: #aaa; padding-top: 50px">
|
|
117
|
+
Made with love by
|
|
118
|
+
<a href="https://curvenote.com" target="_blank" style="color: #aaa">
|
|
119
|
+
<img src="https://curvenote.dev/images/icon.png" style="height: 1em" /> Curvenote
|
|
120
|
+
</a>
|
|
121
|
+
</p>
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { ISession, Options } from '../types';
|
|
1
|
+
import type { ISession, Options } from '../types.js';
|
|
2
2
|
/**
|
|
3
3
|
* https://www.ncbi.nlm.nih.gov/pmc/tools/id-converter-api/
|
|
4
4
|
*/
|
|
@@ -9,4 +9,3 @@ export declare function downloadJatsFromUrl(session: ISession, urlOrDoi: string,
|
|
|
9
9
|
source: string;
|
|
10
10
|
data?: string;
|
|
11
11
|
}>;
|
|
12
|
-
//# sourceMappingURL=download.d.ts.map
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
import { doi } from 'doi-utils';
|
|
2
|
+
import fetch from 'node-fetch';
|
|
3
|
+
import { isUrl, tic } from 'myst-cli-utils';
|
|
4
|
+
import { customResolveJatsUrlFromDoi } from '../resolvers.js';
|
|
5
|
+
async function downloadFromUrl(session, jatsUrl, opts) {
|
|
6
|
+
var _a, _b;
|
|
7
|
+
const toc = tic();
|
|
8
|
+
session.log.debug(`Fetching JATS from ${jatsUrl}`);
|
|
9
|
+
const resp = await ((_a = opts === null || opts === void 0 ? void 0 : opts.fetcher) !== null && _a !== void 0 ? _a : defaultFetcher)(jatsUrl, 'xml');
|
|
10
|
+
if (!resp.ok) {
|
|
11
|
+
session.log.debug(`JATS failed to download from "${jatsUrl}"`);
|
|
12
|
+
throw new Error(`STATUS ${resp.status}: ${resp.statusText}`);
|
|
13
|
+
}
|
|
14
|
+
const contentType = (_b = resp.headers) === null || _b === void 0 ? void 0 : _b.get('content-type');
|
|
15
|
+
if (!((contentType === null || contentType === void 0 ? void 0 : contentType.includes('application/xml')) ||
|
|
16
|
+
(contentType === null || contentType === void 0 ? void 0 : contentType.includes('text/xml')) ||
|
|
17
|
+
(contentType === null || contentType === void 0 ? void 0 : contentType.includes('text/plain')))) {
|
|
18
|
+
session.log.warn(`Expected content-type "application/xml" instead we got "${contentType}" for ${jatsUrl}`);
|
|
19
|
+
}
|
|
20
|
+
const data = await resp.text();
|
|
21
|
+
session.log.debug(toc(`Fetched document with content-type "${contentType}" in %s`));
|
|
22
|
+
return data;
|
|
23
|
+
}
|
|
24
|
+
function defaultFetcher(url, kind) {
|
|
25
|
+
switch (kind) {
|
|
26
|
+
case 'json':
|
|
27
|
+
return fetch(url, { headers: [['Accept', 'application/json']] });
|
|
28
|
+
case 'xml':
|
|
29
|
+
return fetch(url, { headers: [['Accept', 'application/xml']] });
|
|
30
|
+
default:
|
|
31
|
+
return fetch(url);
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* There are 5.8M or so DOIs that have a full XML record:
|
|
36
|
+
*
|
|
37
|
+
* https://api.crossref.org/works?filter=full-text.type:application/xml,full-text.application:text-mining&facet=publisher-name:*&rows=0
|
|
38
|
+
*
|
|
39
|
+
* This function tries to find the correct URL for the record.
|
|
40
|
+
*/
|
|
41
|
+
async function checkIfDoiHasJats(session, urlOrDoi, opts) {
|
|
42
|
+
var _a, _b, _c, _d, _e;
|
|
43
|
+
if (!doi.validate(urlOrDoi))
|
|
44
|
+
return;
|
|
45
|
+
const toc = tic();
|
|
46
|
+
const doiUrl = doi.buildUrl(urlOrDoi);
|
|
47
|
+
session.log.debug(`Attempting to resolving full XML from DOI ${doiUrl}`);
|
|
48
|
+
const resp = await ((_a = opts === null || opts === void 0 ? void 0 : opts.fetcher) !== null && _a !== void 0 ? _a : defaultFetcher)(doiUrl, 'json');
|
|
49
|
+
if (!resp.ok) {
|
|
50
|
+
// Silently return -- other functions can try!
|
|
51
|
+
session.log.debug(`DOI failed to resolve: ${doiUrl}`);
|
|
52
|
+
return;
|
|
53
|
+
}
|
|
54
|
+
const data = (await resp.json());
|
|
55
|
+
session.log.debug(toc(`DOI resolved in %s with ${(_c = (_b = data.link) === null || _b === void 0 ? void 0 : _b.length) !== null && _c !== void 0 ? _c : 0} links to content`));
|
|
56
|
+
if (data.link) {
|
|
57
|
+
session.log.debug(['', ...data.link.map((link) => `content-type: ${link['content-type']}, ${link.URL}\n`)].join(' - '));
|
|
58
|
+
}
|
|
59
|
+
const fullXml = (_e = (_d = data.link) === null || _d === void 0 ? void 0 : _d.find((link) => { var _a; return ['text/xml', 'application/xml'].includes((_a = link['content-type']) !== null && _a !== void 0 ? _a : ''); })) === null || _e === void 0 ? void 0 : _e.URL;
|
|
60
|
+
if (fullXml)
|
|
61
|
+
return fullXml;
|
|
62
|
+
session.log.debug(`Could not find XML in DOI record ${doiUrl}`);
|
|
63
|
+
return undefined;
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* https://www.ncbi.nlm.nih.gov/pmc/tools/id-converter-api/
|
|
67
|
+
*/
|
|
68
|
+
export async function convertPMID2PMCID(session, PMID, opts) {
|
|
69
|
+
var _a, _b, _c;
|
|
70
|
+
if (PMID.startsWith('https://')) {
|
|
71
|
+
const idPart = new URL(PMID).pathname.slice(1);
|
|
72
|
+
session.log.debug(`Extract ${PMID} to ${idPart}`);
|
|
73
|
+
return convertPMID2PMCID(session, idPart, opts);
|
|
74
|
+
}
|
|
75
|
+
const toc = tic();
|
|
76
|
+
const converter = 'https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/';
|
|
77
|
+
const resp = await ((_a = opts === null || opts === void 0 ? void 0 : opts.fetcher) !== null && _a !== void 0 ? _a : defaultFetcher)(`${converter}?tool=jats-xml&format=json&ids=${PMID}`, 'json');
|
|
78
|
+
if (!resp.ok) {
|
|
79
|
+
// Silently return -- other functions can try!
|
|
80
|
+
session.log.debug(`Failed to convert PubMedID: ${PMID}`);
|
|
81
|
+
return;
|
|
82
|
+
}
|
|
83
|
+
const data = await resp.json();
|
|
84
|
+
const PMCID = (_c = (_b = data === null || data === void 0 ? void 0 : data.records) === null || _b === void 0 ? void 0 : _b[0]) === null || _c === void 0 ? void 0 : _c.pmcid;
|
|
85
|
+
session.log.debug(toc(`Used nih.gov to transform ${PMID} to ${PMCID} in %s.`));
|
|
86
|
+
return PMCID;
|
|
87
|
+
}
|
|
88
|
+
function pubMedCentralJats(PMCID) {
|
|
89
|
+
const normalized = PMCID.replace(/^PMC:?/, '');
|
|
90
|
+
return `https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pmc&id=${normalized}`;
|
|
91
|
+
}
|
|
92
|
+
export async function checkIfPubMedCentralHasJats(session, urlOrDoi, opts) {
|
|
93
|
+
var _a, _b, _c;
|
|
94
|
+
if (urlOrDoi.match(/^PMC:?([0-9]+)$/))
|
|
95
|
+
return pubMedCentralJats(urlOrDoi);
|
|
96
|
+
if (!doi.validate(urlOrDoi))
|
|
97
|
+
return;
|
|
98
|
+
const toc = tic();
|
|
99
|
+
const doiUrl = doi.buildUrl(urlOrDoi);
|
|
100
|
+
session.log.debug(`Attempting to resolve PMCID using OpenAlex from ${doiUrl}`);
|
|
101
|
+
const openAlexUrl = `https://api.openalex.org/works/${doiUrl}`;
|
|
102
|
+
const resp = await ((_a = opts === null || opts === void 0 ? void 0 : opts.fetcher) !== null && _a !== void 0 ? _a : defaultFetcher)(openAlexUrl, 'json');
|
|
103
|
+
if (!resp.ok) {
|
|
104
|
+
// Silently return -- other functions can try!
|
|
105
|
+
session.log.debug(`Failed to lookup on OpenAlex: ${openAlexUrl}`);
|
|
106
|
+
return;
|
|
107
|
+
}
|
|
108
|
+
const data = (await resp.json());
|
|
109
|
+
const PMID = (_b = data === null || data === void 0 ? void 0 : data.ids) === null || _b === void 0 ? void 0 : _b.pmid;
|
|
110
|
+
let PMCID = (_c = data === null || data === void 0 ? void 0 : data.ids) === null || _c === void 0 ? void 0 : _c.pmcid;
|
|
111
|
+
if (!PMCID && !!PMID) {
|
|
112
|
+
session.log.debug(toc(`OpenAlex resolved ${data === null || data === void 0 ? void 0 : data.ids.openalex} in %s. There is no PMCID, but there is a PMID`));
|
|
113
|
+
PMCID = await convertPMID2PMCID(session, PMID, opts);
|
|
114
|
+
if (!PMCID) {
|
|
115
|
+
session.log.debug(toc(`PubMed does not have a record of ${PMID}`));
|
|
116
|
+
return;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
if (!PMCID) {
|
|
120
|
+
session.log.debug(toc(`OpenAlex resolved ${data === null || data === void 0 ? void 0 : data.ids.openalex} in %s, but there is no PMCID`));
|
|
121
|
+
return;
|
|
122
|
+
}
|
|
123
|
+
session.log.debug(toc(`OpenAlex resolved in %s, with a PMCID of ${PMCID}`));
|
|
124
|
+
return pubMedCentralJats(PMCID);
|
|
125
|
+
}
|
|
126
|
+
export async function downloadJatsFromUrl(session, urlOrDoi, opts = {}) {
|
|
127
|
+
const expectedUrls = (await Promise.all([
|
|
128
|
+
checkIfPubMedCentralHasJats(session, urlOrDoi, opts),
|
|
129
|
+
checkIfDoiHasJats(session, urlOrDoi, opts),
|
|
130
|
+
])).filter((u) => !!u);
|
|
131
|
+
if (expectedUrls.length > 0) {
|
|
132
|
+
session.log.debug(['Trying URLs:\n', ...expectedUrls.map((url) => ` ${url}\n`)].join(' - '));
|
|
133
|
+
for (let index = 0; index < expectedUrls.length; index++) {
|
|
134
|
+
const url = expectedUrls[index];
|
|
135
|
+
try {
|
|
136
|
+
const data = await downloadFromUrl(session, url, opts);
|
|
137
|
+
if (data)
|
|
138
|
+
return { success: true, source: url, data };
|
|
139
|
+
}
|
|
140
|
+
catch (error) {
|
|
141
|
+
session.log.debug(error.message);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
// If there are expected URLs that don't work: see something, say something, etc.
|
|
145
|
+
return { success: false, source: expectedUrls[0] };
|
|
146
|
+
}
|
|
147
|
+
if (doi.validate(urlOrDoi)) {
|
|
148
|
+
const jatsUrl = await customResolveJatsUrlFromDoi(session, urlOrDoi, opts);
|
|
149
|
+
const data = await downloadFromUrl(session, jatsUrl, opts);
|
|
150
|
+
return { success: true, source: jatsUrl, data };
|
|
151
|
+
}
|
|
152
|
+
if (isUrl(urlOrDoi)) {
|
|
153
|
+
session.log.debug("No resolver matched, and the URL doesn't look like a DOI. We will attempt to download it directly.");
|
|
154
|
+
const data = await downloadFromUrl(session, urlOrDoi, opts);
|
|
155
|
+
return { success: true, source: urlOrDoi, data };
|
|
156
|
+
}
|
|
157
|
+
throw new Error(`Could not find ${urlOrDoi} locally, and it doesn't look like a URL or DOI`);
|
|
158
|
+
}
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import
|
|
3
|
-
import version from '../version';
|
|
4
|
-
import { addDownloadCLI } from './parse';
|
|
5
|
-
import { addValidateCLI } from './validate';
|
|
6
|
-
import { addTestCLI } from './jats-test';
|
|
7
|
-
const program = new
|
|
2
|
+
import { Command } from 'commander';
|
|
3
|
+
import version from '../version.js';
|
|
4
|
+
import { addDownloadCLI } from './parse.js';
|
|
5
|
+
import { addValidateCLI } from './validate.js';
|
|
6
|
+
import { addTestCLI } from './jats-test.js';
|
|
7
|
+
const program = new Command();
|
|
8
8
|
addDownloadCLI(program);
|
|
9
9
|
addValidateCLI(program);
|
|
10
10
|
addTestCLI(program);
|
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
import { Command } from 'commander';
|
|
2
|
-
import type { ISession } from '../types';
|
|
2
|
+
import type { ISession } from '../types.js';
|
|
3
3
|
type Options = {
|
|
4
4
|
cases: string;
|
|
5
5
|
};
|
|
6
6
|
export declare function testJatsFile(session: ISession, file: string, opts: Options): Promise<boolean>;
|
|
7
7
|
export declare function addTestCLI(program: Command): void;
|
|
8
8
|
export {};
|
|
9
|
-
//# sourceMappingURL=jats-test.d.ts.map
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
import { Command, Option } from 'commander';
|
|
2
|
+
import { clirun, tic } from 'myst-cli-utils';
|
|
3
|
+
import yaml from 'js-yaml';
|
|
4
|
+
import fs from 'fs';
|
|
5
|
+
import { select, selectAll } from 'unist-util-select';
|
|
6
|
+
import { is } from 'unist-util-is';
|
|
7
|
+
import chalk from 'chalk';
|
|
8
|
+
import { getSession } from '../session.js';
|
|
9
|
+
import { parseJats } from './parse.js';
|
|
10
|
+
const INDENT = ' ';
|
|
11
|
+
function printNodes(expected, received) {
|
|
12
|
+
return chalk.reset(`\n${INDENT}${chalk.greenBright('Expected node containing')}:\n${INDENT} ${yaml
|
|
13
|
+
.dump(expected)
|
|
14
|
+
.replace(/\n/g, `\n${INDENT} `)}\n${INDENT}${chalk.redBright('Received node')}:\n${INDENT} ${yaml.dump(received).replace(/\n/g, `\n${INDENT} `)}`);
|
|
15
|
+
}
|
|
16
|
+
export async function testJatsFile(session, file, opts) {
|
|
17
|
+
const toc = tic();
|
|
18
|
+
const jats = await parseJats(session, file);
|
|
19
|
+
const tests = yaml.load(fs.readFileSync(opts.cases).toString());
|
|
20
|
+
const results = tests.cases.map((testCase, index) => {
|
|
21
|
+
if (!testCase.title) {
|
|
22
|
+
return [`Test Case ${index}`, null, 'Test must include a title'];
|
|
23
|
+
}
|
|
24
|
+
if (testCase.equals === undefined) {
|
|
25
|
+
return [testCase.title, null, 'Test must have an equals statement'];
|
|
26
|
+
}
|
|
27
|
+
if (testCase.select) {
|
|
28
|
+
const node = select(testCase.select, jats.tree);
|
|
29
|
+
const pass = is(node, testCase.equals);
|
|
30
|
+
if (testCase.equals == null && node) {
|
|
31
|
+
return [testCase.title, false, 'Expected no node to be present'];
|
|
32
|
+
}
|
|
33
|
+
if (!node && testCase.equals == null)
|
|
34
|
+
return [testCase.title, true];
|
|
35
|
+
if (!node)
|
|
36
|
+
return [testCase.title, false];
|
|
37
|
+
let failed = false;
|
|
38
|
+
const messages = [];
|
|
39
|
+
if (!pass) {
|
|
40
|
+
failed = failed || true;
|
|
41
|
+
messages.push(`Failed to validate node\n${printNodes(testCase.equals, node)}`);
|
|
42
|
+
}
|
|
43
|
+
return [testCase.title, !failed, messages.join('\n')];
|
|
44
|
+
}
|
|
45
|
+
else if (testCase.selectAll) {
|
|
46
|
+
const testNodes = selectAll(testCase.selectAll, jats.tree);
|
|
47
|
+
if (!testNodes && testCase.equals == null)
|
|
48
|
+
return [testCase.title, true];
|
|
49
|
+
if (!testNodes)
|
|
50
|
+
return [testCase.title, false, 'Node not found'];
|
|
51
|
+
let equals = testCase.equals;
|
|
52
|
+
if (!Array.isArray(testCase.equals)) {
|
|
53
|
+
equals = Array(testNodes.length).fill(testCase.equals);
|
|
54
|
+
}
|
|
55
|
+
let failed = false;
|
|
56
|
+
const messages = [];
|
|
57
|
+
if (equals.length !== testNodes.length) {
|
|
58
|
+
failed = failed || true;
|
|
59
|
+
messages.push(`Expected ${equals.length} nodes, got ${testNodes.length}\n${printNodes(equals, testNodes)}`);
|
|
60
|
+
}
|
|
61
|
+
else {
|
|
62
|
+
equals.forEach((node, ii) => {
|
|
63
|
+
const pass = is(testNodes[ii], node);
|
|
64
|
+
if (!pass) {
|
|
65
|
+
failed = failed || true;
|
|
66
|
+
messages.push(`Failed to validate node ${ii}\n${printNodes(node, testNodes[ii])}`);
|
|
67
|
+
}
|
|
68
|
+
});
|
|
69
|
+
}
|
|
70
|
+
return [testCase.title, !failed, messages.join('\n')];
|
|
71
|
+
}
|
|
72
|
+
else {
|
|
73
|
+
return [testCase.title, false, 'Test must have either `select` or `selectAll`'];
|
|
74
|
+
}
|
|
75
|
+
});
|
|
76
|
+
results.forEach((result) => {
|
|
77
|
+
const [title, pass, message] = result;
|
|
78
|
+
if (pass === null)
|
|
79
|
+
session.log.info(`${chalk.redBright.bold(`ERROR`)} - ${title}\n ${chalk.blueBright(message)}`);
|
|
80
|
+
else if (pass)
|
|
81
|
+
session.log.info(`${chalk.green(`PASS`)} - ${title}`);
|
|
82
|
+
else
|
|
83
|
+
session.log.info(`${chalk.red(`FAIL`)} - ${title}\n\n${INDENT}${chalk.blueBright(message)}\n`);
|
|
84
|
+
}, true);
|
|
85
|
+
const passed = results.reduce((num, [, pass]) => num + (pass ? 1 : 0), 0);
|
|
86
|
+
const failed = results.length - passed;
|
|
87
|
+
if (failed > 0 && passed === 0) {
|
|
88
|
+
throw new Error(toc(`${chalk.red(`Failed ${failed} tests in %s`)} 👎`));
|
|
89
|
+
}
|
|
90
|
+
if (failed > 0) {
|
|
91
|
+
throw new Error(toc(`${chalk.green(`Passed ${passed}/${results.length} tests in %s`)}\n${chalk.red(`Failed ${failed} tests`)} 👎`));
|
|
92
|
+
}
|
|
93
|
+
session.log.info(chalk.green(toc(`Passed ${passed} tests in %s 🚀`)));
|
|
94
|
+
return true;
|
|
95
|
+
}
|
|
96
|
+
function makeTestCLI(program) {
|
|
97
|
+
const command = new Command('test')
|
|
98
|
+
.description('Test JATS file against a list of cases')
|
|
99
|
+
.argument('<file>', 'JATS file to test')
|
|
100
|
+
.addOption(new Option('--cases <value>', 'The YAML file of unit tests to test against'))
|
|
101
|
+
.action(clirun(testJatsFile, { program, getSession }));
|
|
102
|
+
return command;
|
|
103
|
+
}
|
|
104
|
+
export function addTestCLI(program) {
|
|
105
|
+
program.addCommand(makeTestCLI(program));
|
|
106
|
+
}
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import { Command } from 'commander';
|
|
2
|
-
import type { ISession, Options } from '../types';
|
|
3
|
-
import { Jats } from '../jats';
|
|
2
|
+
import type { ISession, Options } from '../types.js';
|
|
3
|
+
import { Jats } from '../jats.js';
|
|
4
4
|
export declare function parseJats(session: ISession, file: string, opts?: Options): Promise<Jats>;
|
|
5
5
|
export declare function addDownloadCLI(program: Command): void;
|
|
6
|
-
//# sourceMappingURL=parse.d.ts.map
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
import { Command } from 'commander';
|
|
2
|
+
import fs from 'fs';
|
|
3
|
+
import { extname } from 'path';
|
|
4
|
+
import { clirun, isUrl, tic, writeFileToFolder } from 'myst-cli-utils';
|
|
5
|
+
import { doi } from 'doi-utils';
|
|
6
|
+
import chalk from 'chalk';
|
|
7
|
+
import { formatPrinciples, highlightFAIR } from 'fair-principles';
|
|
8
|
+
import { Tags } from 'jats-tags';
|
|
9
|
+
import { toText } from 'myst-common';
|
|
10
|
+
import { select, selectAll } from 'unist-util-select';
|
|
11
|
+
import { getSession } from '../session.js';
|
|
12
|
+
import { Jats } from '../jats.js';
|
|
13
|
+
import { downloadJatsFromUrl } from './download.js';
|
|
14
|
+
import { DEFAULT_RESOLVERS } from '../resolvers.js';
|
|
15
|
+
import { findArticleId, formatDate, toDate } from '../utils.js';
|
|
16
|
+
function hasValidExtension(output) {
|
|
17
|
+
return ['.xml', '.jats'].includes(extname(output).toLowerCase());
|
|
18
|
+
}
|
|
19
|
+
async function downloadAndSaveJats(session, urlOrDoi, output, opts = { resolvers: DEFAULT_RESOLVERS }) {
|
|
20
|
+
if (fs.existsSync(urlOrDoi)) {
|
|
21
|
+
throw new Error(`File "${urlOrDoi}" is local and cannot be downloaded!`);
|
|
22
|
+
}
|
|
23
|
+
if (!(doi.validate(urlOrDoi) || isUrl(urlOrDoi))) {
|
|
24
|
+
throw new Error(`Path must be a URL or DOI, not "${urlOrDoi}"`);
|
|
25
|
+
}
|
|
26
|
+
if (!hasValidExtension(output)) {
|
|
27
|
+
session.log.warn(`The extension ${extname(output)} is not a valid extension for JATS, try using ".xml" or ".jats"`);
|
|
28
|
+
}
|
|
29
|
+
const { success, data, source } = await downloadJatsFromUrl(session, urlOrDoi, opts);
|
|
30
|
+
if (!success || !data) {
|
|
31
|
+
logAboutJatsFailing(session, [source]);
|
|
32
|
+
process.exit(1);
|
|
33
|
+
}
|
|
34
|
+
writeFileToFolder(output, data);
|
|
35
|
+
return data;
|
|
36
|
+
}
|
|
37
|
+
function logAboutJatsFailing(session, jatsUrls) {
|
|
38
|
+
session.log.warn('⛔️ JATS may not be Open Access 😭, you should petition your local representative 🪧');
|
|
39
|
+
session.log.info(`${chalk.green(`\nThe XML ${chalk.bold('should')} be here:\n\n${jatsUrls.join('\n')}`)}\n`);
|
|
40
|
+
const FAIR = highlightFAIR('A', { chalk });
|
|
41
|
+
session.log.info(`Some publishers aggressively block programmatic access, which isn't ${FAIR}.`);
|
|
42
|
+
session.log.debug(formatPrinciples('A*', { chalk }));
|
|
43
|
+
session.log.info(`\n${chalk.blue('The link may work in a browser.')}\n`);
|
|
44
|
+
}
|
|
45
|
+
export async function parseJats(session, file, opts = { resolvers: DEFAULT_RESOLVERS }) {
|
|
46
|
+
const toc = tic();
|
|
47
|
+
if (fs.existsSync(file)) {
|
|
48
|
+
session.log.debug(`Found ${file} locally, parsing`);
|
|
49
|
+
const data = fs.readFileSync(file).toString();
|
|
50
|
+
return new Jats(data, { log: session.log });
|
|
51
|
+
}
|
|
52
|
+
const { success, source, data } = await downloadJatsFromUrl(session, file, opts);
|
|
53
|
+
if (!success || !data) {
|
|
54
|
+
logAboutJatsFailing(session, [source]);
|
|
55
|
+
process.exit(1);
|
|
56
|
+
}
|
|
57
|
+
const jats = new Jats(data, { source, log: session.log });
|
|
58
|
+
session.log.debug(toc(`Downloaded and parsed JATS file in %s`));
|
|
59
|
+
return jats;
|
|
60
|
+
}
|
|
61
|
+
function formatLongString(data, offset = 0, length = 88 - offset) {
|
|
62
|
+
const out = [data.slice(0, length)];
|
|
63
|
+
let left = data.slice(length);
|
|
64
|
+
while (left.length > length) {
|
|
65
|
+
out.push(left.slice(0, length).trim());
|
|
66
|
+
left = left.slice(length);
|
|
67
|
+
}
|
|
68
|
+
if (left)
|
|
69
|
+
out.push(left.trim());
|
|
70
|
+
return out.join(`\n${' '.repeat(offset)}`);
|
|
71
|
+
}
|
|
72
|
+
function formatDictionary(dict, opts) {
|
|
73
|
+
const maxLabel = Object.keys(dict).reduce((a, b) => Math.max(a, b.length), 0);
|
|
74
|
+
return Object.entries(dict)
|
|
75
|
+
.map(([k, t]) => {
|
|
76
|
+
var _a;
|
|
77
|
+
if (!t)
|
|
78
|
+
return null;
|
|
79
|
+
let wrap = typeof (opts === null || opts === void 0 ? void 0 : opts.wrap) === 'boolean' ? opts.wrap : true;
|
|
80
|
+
let value = t;
|
|
81
|
+
let color = chalk.yellow.bold;
|
|
82
|
+
if (t && typeof t === 'object') {
|
|
83
|
+
if (!t.value)
|
|
84
|
+
return null;
|
|
85
|
+
color = (_a = t.label) !== null && _a !== void 0 ? _a : color;
|
|
86
|
+
value = t.value;
|
|
87
|
+
wrap = typeof t.wrap === 'boolean' ? t.wrap : wrap;
|
|
88
|
+
}
|
|
89
|
+
const wrapped = wrap ? formatLongString(String(value), maxLabel + 2) : String(value);
|
|
90
|
+
return `${color(k)}:${' '.repeat(maxLabel - k.length + 1)}${wrapped}`;
|
|
91
|
+
})
|
|
92
|
+
.filter((o) => !!o)
|
|
93
|
+
.join('\n');
|
|
94
|
+
}
|
|
95
|
+
async function jatsSummaryCLI(session, file) {
|
|
96
|
+
var _a, _b, _c;
|
|
97
|
+
const jats = await parseJats(session, file);
|
|
98
|
+
const summary = {
|
|
99
|
+
Source: { value: jats.source, wrap: false },
|
|
100
|
+
DOI: jats.doi ? { value: doi.buildUrl(jats.doi), wrap: false } : null,
|
|
101
|
+
Title: (_a = toText(jats.articleTitle)) === null || _a === void 0 ? void 0 : _a.replace(/\n/g, ' '),
|
|
102
|
+
Date: formatDate(toDate(jats.publicationDate)),
|
|
103
|
+
Authors: jats.articleAuthors
|
|
104
|
+
.map((a) => `${toText(select(Tags.givenNames, a))} ${toText(select(Tags.surname, a))}`)
|
|
105
|
+
.join(', '),
|
|
106
|
+
Abstract: (_b = toText(jats.abstract)) === null || _b === void 0 ? void 0 : _b.replace(/\n/g, ' '),
|
|
107
|
+
Keywords: jats.keywords.map((k) => toText(k)).join(', '),
|
|
108
|
+
License: (_c = jats.license) === null || _c === void 0 ? void 0 : _c['xlink:href'],
|
|
109
|
+
};
|
|
110
|
+
if (jats.body) {
|
|
111
|
+
summary.Figures = {
|
|
112
|
+
label: chalk.blue.bold,
|
|
113
|
+
value: String(selectAll(Tags.fig, jats.body).length),
|
|
114
|
+
};
|
|
115
|
+
summary.Equations = {
|
|
116
|
+
label: chalk.blue.bold,
|
|
117
|
+
value: String(selectAll(Tags.dispFormula, jats.body).length),
|
|
118
|
+
};
|
|
119
|
+
summary.Tables = {
|
|
120
|
+
label: chalk.blue.bold,
|
|
121
|
+
value: String(selectAll(Tags.table, jats.body).length),
|
|
122
|
+
};
|
|
123
|
+
summary.Code = {
|
|
124
|
+
label: chalk.blue.bold,
|
|
125
|
+
value: String(selectAll(Tags.code, jats.body).length),
|
|
126
|
+
};
|
|
127
|
+
summary.Sections = {
|
|
128
|
+
label: chalk.blue.bold,
|
|
129
|
+
value: String(selectAll(Tags.sec, jats.body).length),
|
|
130
|
+
};
|
|
131
|
+
summary.Paragraphs = {
|
|
132
|
+
label: chalk.blue.bold,
|
|
133
|
+
value: String(selectAll(Tags.p, jats.body).length),
|
|
134
|
+
};
|
|
135
|
+
summary.Citations = { label: chalk.blue.bold, value: String(jats.references.length) };
|
|
136
|
+
summary['Cross-References'] = {
|
|
137
|
+
label: chalk.blue.bold,
|
|
138
|
+
value: String(selectAll(Tags.xref, jats.body).length),
|
|
139
|
+
};
|
|
140
|
+
summary['Sub Articles'] = { label: chalk.blue.bold, value: String(jats.subArticles.length) };
|
|
141
|
+
}
|
|
142
|
+
session.log.info(formatDictionary(summary));
|
|
143
|
+
if (!jats.body) {
|
|
144
|
+
session.log.warn('\nThis is a partial JATS record that does not have <body>.');
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
async function jatsReferencesCLI(session, file) {
|
|
148
|
+
const jats = await parseJats(session, file);
|
|
149
|
+
const sorted = jats.references
|
|
150
|
+
.map((ref) => {
|
|
151
|
+
const doiString = findArticleId(ref, 'doi');
|
|
152
|
+
const title = toText(select(Tags.articleTitle, ref));
|
|
153
|
+
const year = toText(select(Tags.year, ref));
|
|
154
|
+
const surnames = selectAll(Tags.surname, ref);
|
|
155
|
+
const short = surnames.length > 2
|
|
156
|
+
? toText(surnames[0]) + ' et al.'
|
|
157
|
+
: surnames.length === 2
|
|
158
|
+
? toText(surnames[0]) + ' and ' + toText(surnames[1])
|
|
159
|
+
: toText(surnames[0]);
|
|
160
|
+
const s = selectAll(`[rid=${ref.id}]`, jats.body);
|
|
161
|
+
return {
|
|
162
|
+
Citation: `${short} (${year})`,
|
|
163
|
+
Title: title,
|
|
164
|
+
DOI: doiString ? doi.buildUrl(doiString) : null,
|
|
165
|
+
Count: s.length,
|
|
166
|
+
};
|
|
167
|
+
})
|
|
168
|
+
.sort((a, b) => b.Count - a.Count);
|
|
169
|
+
sorted.forEach((r) => {
|
|
170
|
+
session.log.info(formatDictionary(r, { wrap: false }) + '\n');
|
|
171
|
+
});
|
|
172
|
+
}
|
|
173
|
+
function makeSummaryCLI(program) {
|
|
174
|
+
const command = new Command('summary')
|
|
175
|
+
.description('Parse a JATS file and provide a summary')
|
|
176
|
+
.argument('<jats>', 'The JATS file or remote URL to be parsed')
|
|
177
|
+
.action(clirun(jatsSummaryCLI, { program, getSession }));
|
|
178
|
+
return command;
|
|
179
|
+
}
|
|
180
|
+
function makeReferencesCLI(program) {
|
|
181
|
+
const command = new Command('refs')
|
|
182
|
+
.alias('references')
|
|
183
|
+
.description('Parse a JATS file and provide a summary')
|
|
184
|
+
.argument('<jats>', 'The JATS file or remote URL to be parsed')
|
|
185
|
+
.action(clirun(jatsReferencesCLI, { program, getSession }));
|
|
186
|
+
return command;
|
|
187
|
+
}
|
|
188
|
+
function makeDownloadCLI(program) {
|
|
189
|
+
const command = new Command('download')
|
|
190
|
+
.description('Parse a JATS file and provide a summary')
|
|
191
|
+
.argument('<url>', 'The JATS url or a DOI')
|
|
192
|
+
.argument('<output>', 'The JATS output file')
|
|
193
|
+
.action(clirun(downloadAndSaveJats, { program, getSession }));
|
|
194
|
+
return command;
|
|
195
|
+
}
|
|
196
|
+
export function addDownloadCLI(program) {
|
|
197
|
+
program.addCommand(makeDownloadCLI(program));
|
|
198
|
+
program.addCommand(makeSummaryCLI(program));
|
|
199
|
+
program.addCommand(makeReferencesCLI(program));
|
|
200
|
+
}
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
import { Command, Option } from 'commander';
|
|
2
2
|
import { clirun } from 'myst-cli-utils';
|
|
3
|
-
import { getSession } from '../session';
|
|
4
|
-
import { validateJatsAgainstDtdWrapper } from '../validate';
|
|
3
|
+
import { getSession } from '../session.js';
|
|
4
|
+
import { validateJatsAgainstDtdWrapper } from '../validate/index.js';
|
|
5
5
|
function makeValidateCLI(program) {
|
|
6
6
|
const command = new Command('validate')
|
|
7
7
|
.description(`
|
|
8
8
|
Validate JATS file against DTD schema.
|
|
9
|
-
|
|
9
|
+
|
|
10
10
|
The JATS DTD schema file is fetched from nih.gov ftp server if not available locally.
|
|
11
11
|
This will attempt to infer the specific JATS DTD version, library, etc from the file header,
|
|
12
12
|
but options are available to override the inferred values.
|
package/dist/index.d.ts
ADDED
package/dist/index.js
ADDED