jats-xml 0.0.13 → 0.0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -1
- package/dist/cjs/{download.js → cli/download.js} +10 -20
- package/dist/cjs/cli/index.js +2 -0
- package/dist/cjs/cli/parse.js +21 -4
- package/dist/cjs/cli/validate.js +22 -0
- package/dist/cjs/index.js +2 -3
- package/dist/cjs/validate/dtd.js +170 -0
- package/dist/cjs/validate/index.js +17 -0
- package/dist/cjs/version.js +1 -1
- package/dist/esm/{download.js → cli/download.js} +10 -20
- package/dist/esm/cli/index.js +2 -0
- package/dist/esm/cli/parse.js +21 -4
- package/dist/esm/cli/validate.js +18 -0
- package/dist/esm/index.js +1 -1
- package/dist/esm/validate/dtd.js +139 -0
- package/dist/esm/validate/index.js +1 -0
- package/dist/esm/version.js +1 -1
- package/dist/jats.js +16876 -523
- package/dist/types/{download.d.ts → cli/download.d.ts} +3 -2
- package/dist/types/cli/download.d.ts.map +1 -0
- package/dist/types/cli/parse.d.ts.map +1 -1
- package/dist/types/cli/validate.d.ts +3 -0
- package/dist/types/cli/validate.d.ts.map +1 -0
- package/dist/types/index.d.ts +1 -1
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/jats.d.ts +2 -2
- package/dist/types/jats.d.ts.map +1 -1
- package/dist/types/types/elements.d.ts +2 -2
- package/dist/types/types/elements.d.ts.map +1 -1
- package/dist/types/validate/dtd.d.ts +11 -0
- package/dist/types/validate/dtd.d.ts.map +1 -0
- package/dist/types/validate/index.d.ts +2 -0
- package/dist/types/validate/index.d.ts.map +1 -0
- package/dist/types/version.d.ts +1 -1
- package/package.json +5 -1
- package/dist/types/download.d.ts.map +0 -1
package/README.md
CHANGED
|
@@ -35,7 +35,7 @@ jats download https://elifesciences.org/articles/81952 article.jats
|
|
|
35
35
|
|
|
36
36
|
Note, currently this just downloads the XML, **not** the associated files.
|
|
37
37
|
|
|
38
|
-
`
|
|
38
|
+
`summary`: summarize the contents of the JATS, given a URL, DOI, or local file
|
|
39
39
|
|
|
40
40
|
```bash
|
|
41
41
|
jats summary https://elifesciences.org/articles/81952
|
|
@@ -47,6 +47,12 @@ This will provide a summary, including a list of what the JATS file contains.
|
|
|
47
47
|
|
|
48
48
|

|
|
49
49
|
|
|
50
|
+
`validate`: validate local file against JATS Archive DTD schema. By default, this uses JATS 1.3.
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
jats validate article.jats --jats 1.2 --mathmml 2
|
|
54
|
+
```
|
|
55
|
+
|
|
50
56
|
## Working in Typescript
|
|
51
57
|
|
|
52
58
|
All tags are accessible as types/enums. There is also documentation from each node-type
|
|
@@ -14,20 +14,10 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
14
14
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
15
|
exports.downloadJatsFromUrl = exports.checkIfPubMedCentralHasJats = exports.convertPMID2PMCID = void 0;
|
|
16
16
|
const doi_utils_1 = __importDefault(require("doi-utils"));
|
|
17
|
-
const chalk_1 = __importDefault(require("chalk"));
|
|
18
17
|
const node_fetch_1 = __importDefault(require("node-fetch"));
|
|
19
18
|
const myst_cli_utils_1 = require("myst-cli-utils");
|
|
20
|
-
const
|
|
21
|
-
|
|
22
|
-
function logAboutJatsFailing(session, jatsUrls) {
|
|
23
|
-
session.log.warn('⛔️ JATS may not be Open Access 😭, you should petition your local representative 🪧');
|
|
24
|
-
session.log.info(`${chalk_1.default.green(`\nThe XML ${chalk_1.default.bold('should')} be here:\n\n${jatsUrls.join('\n')}`)}\n`);
|
|
25
|
-
const FAIR = (0, fair_principles_1.highlightFAIR)('A', { chalk: chalk_1.default });
|
|
26
|
-
session.log.info(`Some publishers aggressively block programmatic access, which isn't ${FAIR}.`);
|
|
27
|
-
session.log.debug((0, fair_principles_1.formatPrinciples)('A*', { chalk: chalk_1.default }));
|
|
28
|
-
session.log.info(`${chalk_1.default.blue('The link may work in a browser.')}\n`);
|
|
29
|
-
}
|
|
30
|
-
function dowloadFromUrl(session, jatsUrl, opts) {
|
|
19
|
+
const resolvers_1 = require("../resolvers");
|
|
20
|
+
function downloadFromUrl(session, jatsUrl, opts) {
|
|
31
21
|
var _a, _b;
|
|
32
22
|
return __awaiter(this, void 0, void 0, function* () {
|
|
33
23
|
const toc = (0, myst_cli_utils_1.tic)();
|
|
@@ -41,7 +31,7 @@ function dowloadFromUrl(session, jatsUrl, opts) {
|
|
|
41
31
|
if (!((contentType === null || contentType === void 0 ? void 0 : contentType.includes('application/xml')) ||
|
|
42
32
|
(contentType === null || contentType === void 0 ? void 0 : contentType.includes('text/xml')) ||
|
|
43
33
|
(contentType === null || contentType === void 0 ? void 0 : contentType.includes('text/plain')))) {
|
|
44
|
-
session.log.warn(`Expected content-type "application/xml" instead we got "${contentType}" for ${jatsUrl}
|
|
34
|
+
session.log.warn(`Expected content-type "application/xml" instead we got "${contentType}" for ${jatsUrl}`);
|
|
45
35
|
}
|
|
46
36
|
const data = yield resp.text();
|
|
47
37
|
session.log.debug(toc(`Fetched document with content-type "${contentType}" in %s`));
|
|
@@ -169,26 +159,26 @@ function downloadJatsFromUrl(session, urlOrDoi, opts = {}) {
|
|
|
169
159
|
for (let index = 0; index < expectedUrls.length; index++) {
|
|
170
160
|
const url = expectedUrls[index];
|
|
171
161
|
try {
|
|
172
|
-
const data = yield
|
|
162
|
+
const data = yield downloadFromUrl(session, url, opts);
|
|
173
163
|
if (data)
|
|
174
|
-
return { source: url, data };
|
|
164
|
+
return { success: true, source: url, data };
|
|
175
165
|
}
|
|
176
166
|
catch (error) {
|
|
177
167
|
session.log.debug(error.message);
|
|
178
168
|
}
|
|
179
169
|
}
|
|
180
170
|
// If there are expected URLs that don't work: see something, say something, etc.
|
|
181
|
-
|
|
171
|
+
return { success: false, source: expectedUrls[0] };
|
|
182
172
|
}
|
|
183
173
|
if (doi_utils_1.default.validate(urlOrDoi)) {
|
|
184
174
|
const jatsUrl = yield (0, resolvers_1.customResolveJatsUrlFromDoi)(session, urlOrDoi, opts);
|
|
185
|
-
const data = yield
|
|
186
|
-
return { source: jatsUrl, data };
|
|
175
|
+
const data = yield downloadFromUrl(session, jatsUrl, opts);
|
|
176
|
+
return { success: true, source: jatsUrl, data };
|
|
187
177
|
}
|
|
188
178
|
if ((0, myst_cli_utils_1.isUrl)(urlOrDoi)) {
|
|
189
179
|
session.log.debug("No resolver matched, and the URL doesn't look like a DOI. We will attempt to download it directly.");
|
|
190
|
-
const data = yield
|
|
191
|
-
return { source: urlOrDoi, data };
|
|
180
|
+
const data = yield downloadFromUrl(session, urlOrDoi, opts);
|
|
181
|
+
return { success: true, source: urlOrDoi, data };
|
|
192
182
|
}
|
|
193
183
|
throw new Error(`Could not find ${urlOrDoi} locally, and it doesn't look like a URL or DOI`);
|
|
194
184
|
});
|
package/dist/cjs/cli/index.js
CHANGED
|
@@ -7,8 +7,10 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
7
7
|
const commander_1 = __importDefault(require("commander"));
|
|
8
8
|
const version_1 = __importDefault(require("../version"));
|
|
9
9
|
const parse_1 = require("./parse");
|
|
10
|
+
const validate_1 = require("./validate");
|
|
10
11
|
const program = new commander_1.default.Command();
|
|
11
12
|
(0, parse_1.addDownloadCLI)(program);
|
|
13
|
+
(0, validate_1.addValidateCLI)(program);
|
|
12
14
|
program.version(`v${version_1.default}`, '-v, --version', 'Print the current version of jats-xml');
|
|
13
15
|
program.option('-d, --debug', 'Log out any errors to the console.');
|
|
14
16
|
program.parse(process.argv);
|
package/dist/cjs/cli/parse.js
CHANGED
|
@@ -19,12 +19,13 @@ const path_1 = require("path");
|
|
|
19
19
|
const myst_cli_utils_1 = require("myst-cli-utils");
|
|
20
20
|
const doi_utils_1 = __importDefault(require("doi-utils"));
|
|
21
21
|
const chalk_1 = __importDefault(require("chalk"));
|
|
22
|
+
const fair_principles_1 = require("fair-principles");
|
|
22
23
|
const session_1 = require("../session");
|
|
23
24
|
const types_1 = require("../types");
|
|
24
25
|
const jats_1 = require("../jats");
|
|
25
26
|
const myst_common_1 = require("myst-common");
|
|
26
27
|
const unist_util_select_1 = require("unist-util-select");
|
|
27
|
-
const download_1 = require("
|
|
28
|
+
const download_1 = require("./download");
|
|
28
29
|
const resolvers_1 = require("../resolvers");
|
|
29
30
|
const utils_1 = require("../utils");
|
|
30
31
|
function hasValidExtension(output) {
|
|
@@ -41,11 +42,23 @@ function downloadAndSaveJats(session, urlOrDoi, output, opts = { resolvers: reso
|
|
|
41
42
|
if (!hasValidExtension(output)) {
|
|
42
43
|
session.log.warn(`The extension ${(0, path_1.extname)(output)} is not a valid extension for JATS, try using ".xml" or ".jats"`);
|
|
43
44
|
}
|
|
44
|
-
const { data } = yield (0, download_1.downloadJatsFromUrl)(session, urlOrDoi, opts);
|
|
45
|
+
const { success, data, source } = yield (0, download_1.downloadJatsFromUrl)(session, urlOrDoi, opts);
|
|
46
|
+
if (!success || !data) {
|
|
47
|
+
logAboutJatsFailing(session, [source]);
|
|
48
|
+
process.exit(1);
|
|
49
|
+
}
|
|
45
50
|
(0, myst_cli_utils_1.writeFileToFolder)(output, data);
|
|
46
51
|
return data;
|
|
47
52
|
});
|
|
48
53
|
}
|
|
54
|
+
function logAboutJatsFailing(session, jatsUrls) {
|
|
55
|
+
session.log.warn('⛔️ JATS may not be Open Access 😭, you should petition your local representative 🪧');
|
|
56
|
+
session.log.info(`${chalk_1.default.green(`\nThe XML ${chalk_1.default.bold('should')} be here:\n\n${jatsUrls.join('\n')}`)}\n`);
|
|
57
|
+
const FAIR = (0, fair_principles_1.highlightFAIR)('A', { chalk: chalk_1.default });
|
|
58
|
+
session.log.info(`Some publishers aggressively block programmatic access, which isn't ${FAIR}.`);
|
|
59
|
+
session.log.debug((0, fair_principles_1.formatPrinciples)('A*', { chalk: chalk_1.default }));
|
|
60
|
+
session.log.info(`\n${chalk_1.default.blue('The link may work in a browser.')}\n`);
|
|
61
|
+
}
|
|
49
62
|
function parseJats(session, file, opts = { resolvers: resolvers_1.DEFAULT_RESOLVERS }) {
|
|
50
63
|
return __awaiter(this, void 0, void 0, function* () {
|
|
51
64
|
const toc = (0, myst_cli_utils_1.tic)();
|
|
@@ -54,7 +67,11 @@ function parseJats(session, file, opts = { resolvers: resolvers_1.DEFAULT_RESOLV
|
|
|
54
67
|
const data = fs_1.default.readFileSync(file).toString();
|
|
55
68
|
return new jats_1.Jats(data, { log: session.log });
|
|
56
69
|
}
|
|
57
|
-
const { source, data } = yield (0, download_1.downloadJatsFromUrl)(session, file, opts);
|
|
70
|
+
const { success, source, data } = yield (0, download_1.downloadJatsFromUrl)(session, file, opts);
|
|
71
|
+
if (!success || !data) {
|
|
72
|
+
logAboutJatsFailing(session, [source]);
|
|
73
|
+
process.exit(1);
|
|
74
|
+
}
|
|
58
75
|
const jats = new jats_1.Jats(data, { source, log: session.log });
|
|
59
76
|
session.log.debug(toc(`Downloaded and parsed JATS file in %s`));
|
|
60
77
|
return jats;
|
|
@@ -195,7 +212,7 @@ function makeDownloadCLI(program) {
|
|
|
195
212
|
const command = new commander_1.Command('download')
|
|
196
213
|
.description('Parse a JATS file and provide a summary')
|
|
197
214
|
.argument('<url>', 'The JATS url or a DOI')
|
|
198
|
-
.argument('<output>', 'The JATS
|
|
215
|
+
.argument('<output>', 'The JATS output file')
|
|
199
216
|
.action((0, myst_cli_utils_1.clirun)(downloadAndSaveJats, { program, getSession: session_1.getSession }));
|
|
200
217
|
return command;
|
|
201
218
|
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.addValidateCLI = void 0;
|
|
4
|
+
const commander_1 = require("commander");
|
|
5
|
+
const myst_cli_utils_1 = require("myst-cli-utils");
|
|
6
|
+
const session_1 = require("../session");
|
|
7
|
+
const validate_1 = require("../validate");
|
|
8
|
+
function makeValidateCLI(program) {
|
|
9
|
+
const command = new commander_1.Command('validate')
|
|
10
|
+
.description('Fetch JATS DTD schema file from nih.gov ftp server')
|
|
11
|
+
.argument('<file>', 'JATS file to validate')
|
|
12
|
+
.addOption(new commander_1.Option('--jats <version>', 'JATS version, must be 1.2 or later').default('1.3'))
|
|
13
|
+
.addOption(new commander_1.Option('--mathml <version>', 'MathML version, 2 or 3').default('3'))
|
|
14
|
+
.addOption(new commander_1.Option('--oasis', 'Use OASIS table model').default(false))
|
|
15
|
+
.addOption(new commander_1.Option('--directory <value>', 'Directory to save DTD file'))
|
|
16
|
+
.action((0, myst_cli_utils_1.clirun)(validate_1.validateJatsAgainstDtdWrapper, { program, getSession: session_1.getSession }));
|
|
17
|
+
return command;
|
|
18
|
+
}
|
|
19
|
+
function addValidateCLI(program) {
|
|
20
|
+
program.addCommand(makeValidateCLI(program));
|
|
21
|
+
}
|
|
22
|
+
exports.addValidateCLI = addValidateCLI;
|
package/dist/cjs/index.js
CHANGED
|
@@ -17,11 +17,10 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
17
17
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
18
18
|
};
|
|
19
19
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
20
|
-
exports.
|
|
20
|
+
exports.Jats = exports.version = void 0;
|
|
21
21
|
var version_1 = require("./version");
|
|
22
22
|
Object.defineProperty(exports, "version", { enumerable: true, get: function () { return __importDefault(version_1).default; } });
|
|
23
23
|
var jats_1 = require("./jats");
|
|
24
24
|
Object.defineProperty(exports, "Jats", { enumerable: true, get: function () { return jats_1.Jats; } });
|
|
25
|
-
var download_1 = require("./download");
|
|
26
|
-
Object.defineProperty(exports, "downloadJatsFromUrl", { enumerable: true, get: function () { return download_1.downloadJatsFromUrl; } });
|
|
27
25
|
__exportStar(require("./types"), exports);
|
|
26
|
+
__exportStar(require("./validate"), exports);
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || function (mod) {
|
|
19
|
+
if (mod && mod.__esModule) return mod;
|
|
20
|
+
var result = {};
|
|
21
|
+
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
|
|
22
|
+
__setModuleDefault(result, mod);
|
|
23
|
+
return result;
|
|
24
|
+
};
|
|
25
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
26
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
27
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
28
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
29
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
30
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
31
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
32
|
+
});
|
|
33
|
+
};
|
|
34
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
35
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
36
|
+
};
|
|
37
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
38
|
+
exports.validateJatsAgainstDtdWrapper = exports.validateJatsAgainstDtd = void 0;
|
|
39
|
+
const fs_1 = __importStar(require("fs"));
|
|
40
|
+
const path_1 = __importDefault(require("path"));
|
|
41
|
+
const node_fetch_1 = __importDefault(require("node-fetch"));
|
|
42
|
+
const unzipper_1 = __importDefault(require("unzipper"));
|
|
43
|
+
const which_1 = require("which");
|
|
44
|
+
const myst_cli_utils_1 = require("myst-cli-utils");
|
|
45
|
+
const chalk_1 = __importDefault(require("chalk"));
|
|
46
|
+
const JATS_VERSIONS = ['1.2', '1.2d1', '1.2d2', '1.3', '1.3d1', '1.3d2'];
|
|
47
|
+
const DEFAULT_JATS_VERSION = '1.3';
|
|
48
|
+
const MATHML_VERSIONS = ['2', '3'];
|
|
49
|
+
const DEFAULT_MATHML_VERSION = '3';
|
|
50
|
+
function validateOptions(opts) {
|
|
51
|
+
var _a;
|
|
52
|
+
let jats;
|
|
53
|
+
if (!opts.jats) {
|
|
54
|
+
jats = DEFAULT_JATS_VERSION;
|
|
55
|
+
}
|
|
56
|
+
else if (!JATS_VERSIONS.includes(opts.jats)) {
|
|
57
|
+
throw new Error(`Invalid JATS version "${opts.jats}" - must be one of [${JATS_VERSIONS.join(', ')}]`);
|
|
58
|
+
}
|
|
59
|
+
else {
|
|
60
|
+
jats = opts.jats;
|
|
61
|
+
}
|
|
62
|
+
let mathml;
|
|
63
|
+
if (!opts.mathml) {
|
|
64
|
+
mathml = DEFAULT_MATHML_VERSION;
|
|
65
|
+
}
|
|
66
|
+
else if (!MATHML_VERSIONS.includes(opts.mathml)) {
|
|
67
|
+
throw new Error(`Invalid MathML version "${opts.mathml}" - must be one of [${MATHML_VERSIONS.join(', ')}]`);
|
|
68
|
+
}
|
|
69
|
+
else {
|
|
70
|
+
mathml = opts.mathml;
|
|
71
|
+
}
|
|
72
|
+
const out = {
|
|
73
|
+
jats,
|
|
74
|
+
mathml,
|
|
75
|
+
oasis: !!opts.oasis,
|
|
76
|
+
directory: (_a = opts.directory) !== null && _a !== void 0 ? _a : defaultDirectory(),
|
|
77
|
+
};
|
|
78
|
+
return out;
|
|
79
|
+
}
|
|
80
|
+
function dtdFolder(opts) {
|
|
81
|
+
const version = opts.jats.replace('.', '-');
|
|
82
|
+
const oasis = opts.oasis ? '-OASIS' : '';
|
|
83
|
+
const mathml = `MathML${opts.mathml}`;
|
|
84
|
+
return `JATS-Archiving-${version}${oasis}-${mathml}-DTD`;
|
|
85
|
+
}
|
|
86
|
+
function dtdZipFile(opts) {
|
|
87
|
+
return `${dtdFolder(opts)}.zip`;
|
|
88
|
+
}
|
|
89
|
+
function localDtdZipFile(opts) {
|
|
90
|
+
return path_1.default.join(opts.directory, dtdZipFile(opts));
|
|
91
|
+
}
|
|
92
|
+
function dtdFile(opts) {
|
|
93
|
+
const version = opts.jats.startsWith('1.2') ? '1' : opts.jats.replace('.', '-');
|
|
94
|
+
const article = opts.oasis ? 'archive-oasis-article' : 'archivearticle';
|
|
95
|
+
const mathml = opts.mathml === '3' ? '-mathml3' : '';
|
|
96
|
+
return `JATS-${article}${version}${mathml}.dtd`;
|
|
97
|
+
}
|
|
98
|
+
function localDtdFile(opts) {
|
|
99
|
+
return path_1.default.join(opts.directory, dtdFolder(opts), dtdFile(opts));
|
|
100
|
+
}
|
|
101
|
+
function ftpUrl(opts) {
|
|
102
|
+
return `https://ftp.ncbi.nih.gov/pub/jats/archiving/${opts.jats}/${dtdZipFile(opts)}`;
|
|
103
|
+
}
|
|
104
|
+
function defaultDirectory() {
|
|
105
|
+
return path_1.default.join(__dirname, 'static');
|
|
106
|
+
}
|
|
107
|
+
function dtdDownload(session, opts) {
|
|
108
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
109
|
+
if (!fs_1.default.existsSync(opts.directory)) {
|
|
110
|
+
fs_1.default.mkdirSync(opts.directory, { recursive: true });
|
|
111
|
+
}
|
|
112
|
+
session.log.info(`🌎 Downloading: ${ftpUrl(opts)}`);
|
|
113
|
+
session.log.debug(`Saving to ${localDtdZipFile(opts)}`);
|
|
114
|
+
const resp = yield (0, node_fetch_1.default)(ftpUrl(opts));
|
|
115
|
+
(0, myst_cli_utils_1.writeFileToFolder)(localDtdZipFile(opts), yield resp.buffer());
|
|
116
|
+
});
|
|
117
|
+
}
|
|
118
|
+
function ensureDtdZipExists(session, opts) {
|
|
119
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
120
|
+
if (!fs_1.default.existsSync(path_1.default.join(opts.directory, dtdZipFile(opts)))) {
|
|
121
|
+
yield dtdDownload(session, opts);
|
|
122
|
+
}
|
|
123
|
+
});
|
|
124
|
+
}
|
|
125
|
+
function ensureDtdExists(session, opts) {
|
|
126
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
127
|
+
if (!fs_1.default.existsSync(localDtdFile(opts))) {
|
|
128
|
+
yield ensureDtdZipExists(session, opts);
|
|
129
|
+
const zipFile = localDtdZipFile(opts);
|
|
130
|
+
session.log.info(`🤐 Unzipping template on disk ${zipFile}`);
|
|
131
|
+
yield (0, fs_1.createReadStream)(zipFile)
|
|
132
|
+
.pipe(unzipper_1.default.Extract({ path: opts.directory }))
|
|
133
|
+
.promise();
|
|
134
|
+
}
|
|
135
|
+
session.log.debug(`Validating against ${localDtdFile(opts)}`);
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
function isXmllintAvailable() {
|
|
139
|
+
return (0, which_1.sync)('xmllint', { nothrow: true });
|
|
140
|
+
}
|
|
141
|
+
function validateJatsAgainstDtd(session, file, opts) {
|
|
142
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
143
|
+
if (!isXmllintAvailable()) {
|
|
144
|
+
session.log.error(`JATS validation against DTD requires xmllint\n\n${chalk_1.default.dim('To install:\n mac: brew install xmlstarlet\n debian: apt install libxml2-utils')}`);
|
|
145
|
+
return;
|
|
146
|
+
}
|
|
147
|
+
const validatedOpts = validateOptions(opts !== null && opts !== void 0 ? opts : {});
|
|
148
|
+
yield ensureDtdExists(session, validatedOpts);
|
|
149
|
+
try {
|
|
150
|
+
yield (0, myst_cli_utils_1.makeExecutable)(`xmllint --dtdvalid ${localDtdFile(validatedOpts)} --noout ${file}`, session.log)();
|
|
151
|
+
}
|
|
152
|
+
catch (_a) {
|
|
153
|
+
return false;
|
|
154
|
+
}
|
|
155
|
+
return true;
|
|
156
|
+
});
|
|
157
|
+
}
|
|
158
|
+
exports.validateJatsAgainstDtd = validateJatsAgainstDtd;
|
|
159
|
+
function validateJatsAgainstDtdWrapper(session, file, opts) {
|
|
160
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
161
|
+
const success = yield validateJatsAgainstDtd(session, file, opts);
|
|
162
|
+
if (success) {
|
|
163
|
+
session.log.info(chalk_1.default.greenBright('JATS validation passed!'));
|
|
164
|
+
}
|
|
165
|
+
else {
|
|
166
|
+
throw new Error('JATS validation failed.');
|
|
167
|
+
}
|
|
168
|
+
});
|
|
169
|
+
}
|
|
170
|
+
exports.validateJatsAgainstDtdWrapper = validateJatsAgainstDtdWrapper;
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
14
|
+
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
15
|
+
};
|
|
16
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
|
+
__exportStar(require("./dtd"), exports);
|
package/dist/cjs/version.js
CHANGED
|
@@ -8,20 +8,10 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
|
|
8
8
|
});
|
|
9
9
|
};
|
|
10
10
|
import doi from 'doi-utils';
|
|
11
|
-
import chalk from 'chalk';
|
|
12
11
|
import fetch from 'node-fetch';
|
|
13
12
|
import { isUrl, tic } from 'myst-cli-utils';
|
|
14
|
-
import {
|
|
15
|
-
|
|
16
|
-
function logAboutJatsFailing(session, jatsUrls) {
|
|
17
|
-
session.log.warn('⛔️ JATS may not be Open Access 😭, you should petition your local representative 🪧');
|
|
18
|
-
session.log.info(`${chalk.green(`\nThe XML ${chalk.bold('should')} be here:\n\n${jatsUrls.join('\n')}`)}\n`);
|
|
19
|
-
const FAIR = highlightFAIR('A', { chalk });
|
|
20
|
-
session.log.info(`Some publishers aggressively block programmatic access, which isn't ${FAIR}.`);
|
|
21
|
-
session.log.debug(formatPrinciples('A*', { chalk }));
|
|
22
|
-
session.log.info(`${chalk.blue('The link may work in a browser.')}\n`);
|
|
23
|
-
}
|
|
24
|
-
function dowloadFromUrl(session, jatsUrl, opts) {
|
|
13
|
+
import { customResolveJatsUrlFromDoi } from '../resolvers';
|
|
14
|
+
function downloadFromUrl(session, jatsUrl, opts) {
|
|
25
15
|
var _a, _b;
|
|
26
16
|
return __awaiter(this, void 0, void 0, function* () {
|
|
27
17
|
const toc = tic();
|
|
@@ -35,7 +25,7 @@ function dowloadFromUrl(session, jatsUrl, opts) {
|
|
|
35
25
|
if (!((contentType === null || contentType === void 0 ? void 0 : contentType.includes('application/xml')) ||
|
|
36
26
|
(contentType === null || contentType === void 0 ? void 0 : contentType.includes('text/xml')) ||
|
|
37
27
|
(contentType === null || contentType === void 0 ? void 0 : contentType.includes('text/plain')))) {
|
|
38
|
-
session.log.warn(`Expected content-type "application/xml" instead we got "${contentType}" for ${jatsUrl}
|
|
28
|
+
session.log.warn(`Expected content-type "application/xml" instead we got "${contentType}" for ${jatsUrl}`);
|
|
39
29
|
}
|
|
40
30
|
const data = yield resp.text();
|
|
41
31
|
session.log.debug(toc(`Fetched document with content-type "${contentType}" in %s`));
|
|
@@ -161,26 +151,26 @@ export function downloadJatsFromUrl(session, urlOrDoi, opts = {}) {
|
|
|
161
151
|
for (let index = 0; index < expectedUrls.length; index++) {
|
|
162
152
|
const url = expectedUrls[index];
|
|
163
153
|
try {
|
|
164
|
-
const data = yield
|
|
154
|
+
const data = yield downloadFromUrl(session, url, opts);
|
|
165
155
|
if (data)
|
|
166
|
-
return { source: url, data };
|
|
156
|
+
return { success: true, source: url, data };
|
|
167
157
|
}
|
|
168
158
|
catch (error) {
|
|
169
159
|
session.log.debug(error.message);
|
|
170
160
|
}
|
|
171
161
|
}
|
|
172
162
|
// If there are expected URLs that don't work: see something, say something, etc.
|
|
173
|
-
|
|
163
|
+
return { success: false, source: expectedUrls[0] };
|
|
174
164
|
}
|
|
175
165
|
if (doi.validate(urlOrDoi)) {
|
|
176
166
|
const jatsUrl = yield customResolveJatsUrlFromDoi(session, urlOrDoi, opts);
|
|
177
|
-
const data = yield
|
|
178
|
-
return { source: jatsUrl, data };
|
|
167
|
+
const data = yield downloadFromUrl(session, jatsUrl, opts);
|
|
168
|
+
return { success: true, source: jatsUrl, data };
|
|
179
169
|
}
|
|
180
170
|
if (isUrl(urlOrDoi)) {
|
|
181
171
|
session.log.debug("No resolver matched, and the URL doesn't look like a DOI. We will attempt to download it directly.");
|
|
182
|
-
const data = yield
|
|
183
|
-
return { source: urlOrDoi, data };
|
|
172
|
+
const data = yield downloadFromUrl(session, urlOrDoi, opts);
|
|
173
|
+
return { success: true, source: urlOrDoi, data };
|
|
184
174
|
}
|
|
185
175
|
throw new Error(`Could not find ${urlOrDoi} locally, and it doesn't look like a URL or DOI`);
|
|
186
176
|
});
|
package/dist/esm/cli/index.js
CHANGED
|
@@ -2,8 +2,10 @@
|
|
|
2
2
|
import commander from 'commander';
|
|
3
3
|
import version from '../version';
|
|
4
4
|
import { addDownloadCLI } from './parse';
|
|
5
|
+
import { addValidateCLI } from './validate';
|
|
5
6
|
const program = new commander.Command();
|
|
6
7
|
addDownloadCLI(program);
|
|
8
|
+
addValidateCLI(program);
|
|
7
9
|
program.version(`v${version}`, '-v, --version', 'Print the current version of jats-xml');
|
|
8
10
|
program.option('-d, --debug', 'Log out any errors to the console.');
|
|
9
11
|
program.parse(process.argv);
|
package/dist/esm/cli/parse.js
CHANGED
|
@@ -13,12 +13,13 @@ import { extname } from 'path';
|
|
|
13
13
|
import { clirun, isUrl, tic, writeFileToFolder } from 'myst-cli-utils';
|
|
14
14
|
import doi from 'doi-utils';
|
|
15
15
|
import chalk from 'chalk';
|
|
16
|
+
import { formatPrinciples, highlightFAIR } from 'fair-principles';
|
|
16
17
|
import { getSession } from '../session';
|
|
17
18
|
import { Tags } from '../types';
|
|
18
19
|
import { Jats } from '../jats';
|
|
19
20
|
import { toText } from 'myst-common';
|
|
20
21
|
import { select, selectAll } from 'unist-util-select';
|
|
21
|
-
import { downloadJatsFromUrl } from '
|
|
22
|
+
import { downloadJatsFromUrl } from './download';
|
|
22
23
|
import { DEFAULT_RESOLVERS } from '../resolvers';
|
|
23
24
|
import { findArticleId, formatDate, toDate } from '../utils';
|
|
24
25
|
function hasValidExtension(output) {
|
|
@@ -35,11 +36,23 @@ function downloadAndSaveJats(session, urlOrDoi, output, opts = { resolvers: DEFA
|
|
|
35
36
|
if (!hasValidExtension(output)) {
|
|
36
37
|
session.log.warn(`The extension ${extname(output)} is not a valid extension for JATS, try using ".xml" or ".jats"`);
|
|
37
38
|
}
|
|
38
|
-
const { data } = yield downloadJatsFromUrl(session, urlOrDoi, opts);
|
|
39
|
+
const { success, data, source } = yield downloadJatsFromUrl(session, urlOrDoi, opts);
|
|
40
|
+
if (!success || !data) {
|
|
41
|
+
logAboutJatsFailing(session, [source]);
|
|
42
|
+
process.exit(1);
|
|
43
|
+
}
|
|
39
44
|
writeFileToFolder(output, data);
|
|
40
45
|
return data;
|
|
41
46
|
});
|
|
42
47
|
}
|
|
48
|
+
function logAboutJatsFailing(session, jatsUrls) {
|
|
49
|
+
session.log.warn('⛔️ JATS may not be Open Access 😭, you should petition your local representative 🪧');
|
|
50
|
+
session.log.info(`${chalk.green(`\nThe XML ${chalk.bold('should')} be here:\n\n${jatsUrls.join('\n')}`)}\n`);
|
|
51
|
+
const FAIR = highlightFAIR('A', { chalk });
|
|
52
|
+
session.log.info(`Some publishers aggressively block programmatic access, which isn't ${FAIR}.`);
|
|
53
|
+
session.log.debug(formatPrinciples('A*', { chalk }));
|
|
54
|
+
session.log.info(`\n${chalk.blue('The link may work in a browser.')}\n`);
|
|
55
|
+
}
|
|
43
56
|
function parseJats(session, file, opts = { resolvers: DEFAULT_RESOLVERS }) {
|
|
44
57
|
return __awaiter(this, void 0, void 0, function* () {
|
|
45
58
|
const toc = tic();
|
|
@@ -48,7 +61,11 @@ function parseJats(session, file, opts = { resolvers: DEFAULT_RESOLVERS }) {
|
|
|
48
61
|
const data = fs.readFileSync(file).toString();
|
|
49
62
|
return new Jats(data, { log: session.log });
|
|
50
63
|
}
|
|
51
|
-
const { source, data } = yield downloadJatsFromUrl(session, file, opts);
|
|
64
|
+
const { success, source, data } = yield downloadJatsFromUrl(session, file, opts);
|
|
65
|
+
if (!success || !data) {
|
|
66
|
+
logAboutJatsFailing(session, [source]);
|
|
67
|
+
process.exit(1);
|
|
68
|
+
}
|
|
52
69
|
const jats = new Jats(data, { source, log: session.log });
|
|
53
70
|
session.log.debug(toc(`Downloaded and parsed JATS file in %s`));
|
|
54
71
|
return jats;
|
|
@@ -189,7 +206,7 @@ function makeDownloadCLI(program) {
|
|
|
189
206
|
const command = new Command('download')
|
|
190
207
|
.description('Parse a JATS file and provide a summary')
|
|
191
208
|
.argument('<url>', 'The JATS url or a DOI')
|
|
192
|
-
.argument('<output>', 'The JATS
|
|
209
|
+
.argument('<output>', 'The JATS output file')
|
|
193
210
|
.action(clirun(downloadAndSaveJats, { program, getSession }));
|
|
194
211
|
return command;
|
|
195
212
|
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { Command, Option } from 'commander';
|
|
2
|
+
import { clirun } from 'myst-cli-utils';
|
|
3
|
+
import { getSession } from '../session';
|
|
4
|
+
import { validateJatsAgainstDtdWrapper } from '../validate';
|
|
5
|
+
function makeValidateCLI(program) {
|
|
6
|
+
const command = new Command('validate')
|
|
7
|
+
.description('Fetch JATS DTD schema file from nih.gov ftp server')
|
|
8
|
+
.argument('<file>', 'JATS file to validate')
|
|
9
|
+
.addOption(new Option('--jats <version>', 'JATS version, must be 1.2 or later').default('1.3'))
|
|
10
|
+
.addOption(new Option('--mathml <version>', 'MathML version, 2 or 3').default('3'))
|
|
11
|
+
.addOption(new Option('--oasis', 'Use OASIS table model').default(false))
|
|
12
|
+
.addOption(new Option('--directory <value>', 'Directory to save DTD file'))
|
|
13
|
+
.action(clirun(validateJatsAgainstDtdWrapper, { program, getSession }));
|
|
14
|
+
return command;
|
|
15
|
+
}
|
|
16
|
+
export function addValidateCLI(program) {
|
|
17
|
+
program.addCommand(makeValidateCLI(program));
|
|
18
|
+
}
|