jats-xml 0.0.14 → 0.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -35,7 +35,7 @@ jats download https://elifesciences.org/articles/81952 article.jats
35
35
 
36
36
  Note, currently this just downloads the XML, **not** the associated files.
37
37
 
38
- `sumamry`: summarize the contents of the JATS, given a URL, DOI, or local file
38
+ `summary`: summarize the contents of the JATS, given a URL, DOI, or local file
39
39
 
40
40
  ```bash
41
41
  jats summary https://elifesciences.org/articles/81952
@@ -47,6 +47,12 @@ This will provide a summary, including a list of what the JATS file contains.
47
47
 
48
48
  ![Output of `jats summary`](/images/jats-output.png)
49
49
 
50
+ `validate`: validate local file against JATS Archive DTD schema. By default, this uses JATS 1.3.
51
+
52
+ ```bash
53
+ jats validate article.jats --jats 1.2 --mathmml 2
54
+ ```
55
+
50
56
  ## Working in Typescript
51
57
 
52
58
  All tags are accessible as types/enums. There is also documentation from each node-type
@@ -7,8 +7,10 @@ Object.defineProperty(exports, "__esModule", { value: true });
7
7
  const commander_1 = __importDefault(require("commander"));
8
8
  const version_1 = __importDefault(require("../version"));
9
9
  const parse_1 = require("./parse");
10
+ const validate_1 = require("./validate");
10
11
  const program = new commander_1.default.Command();
11
12
  (0, parse_1.addDownloadCLI)(program);
13
+ (0, validate_1.addValidateCLI)(program);
12
14
  program.version(`v${version_1.default}`, '-v, --version', 'Print the current version of jats-xml');
13
15
  program.option('-d, --debug', 'Log out any errors to the console.');
14
16
  program.parse(process.argv);
@@ -212,7 +212,7 @@ function makeDownloadCLI(program) {
212
212
  const command = new commander_1.Command('download')
213
213
  .description('Parse a JATS file and provide a summary')
214
214
  .argument('<url>', 'The JATS url or a DOI')
215
- .argument('<output>', 'The JATS url or a DOI')
215
+ .argument('<output>', 'The JATS output file')
216
216
  .action((0, myst_cli_utils_1.clirun)(downloadAndSaveJats, { program, getSession: session_1.getSession }));
217
217
  return command;
218
218
  }
@@ -0,0 +1,29 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.addValidateCLI = void 0;
4
+ const commander_1 = require("commander");
5
+ const myst_cli_utils_1 = require("myst-cli-utils");
6
+ const session_1 = require("../session");
7
+ const validate_1 = require("../validate");
8
+ function makeValidateCLI(program) {
9
+ const command = new commander_1.Command('validate')
10
+ .description(`
11
+ Validate JATS file against DTD schema.
12
+
13
+ The JATS DTD schema file is fetched from nih.gov ftp server if not available locally.
14
+ This will attempt to infer the specific JATS DTD version, library, etc from the file header,
15
+ but options are available to override the inferred values.
16
+ `)
17
+ .argument('<file>', 'JATS file to validate')
18
+ .addOption(new commander_1.Option('--library <value>', 'JATS library - archiving, publishing, or authoring (default: archiving, if value cannot be inferred from file)'))
19
+ .addOption(new commander_1.Option('--jats <version>', 'JATS version, must be 1.1 or later (default: 1.3, if value cannot be inferred from file)'))
20
+ .addOption(new commander_1.Option('--mathml <version>', 'MathML version, 2 or 3 (default: 3, if value cannot be inferred from file)'))
21
+ .addOption(new commander_1.Option('--oasis', 'Use OASIS table model (default: false, if value cannot be inferred from file)'))
22
+ .addOption(new commander_1.Option('--directory <value>', 'Directory to save DTD file'))
23
+ .action((0, myst_cli_utils_1.clirun)(validate_1.validateJatsAgainstDtdWrapper, { program, getSession: session_1.getSession }));
24
+ return command;
25
+ }
26
+ function addValidateCLI(program) {
27
+ program.addCommand(makeValidateCLI(program));
28
+ }
29
+ exports.addValidateCLI = addValidateCLI;
package/dist/cjs/index.js CHANGED
@@ -23,3 +23,4 @@ Object.defineProperty(exports, "version", { enumerable: true, get: function () {
23
23
  var jats_1 = require("./jats");
24
24
  Object.defineProperty(exports, "Jats", { enumerable: true, get: function () { return jats_1.Jats; } });
25
25
  __exportStar(require("./types"), exports);
26
+ __exportStar(require("./validate"), exports);
@@ -0,0 +1,320 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || function (mod) {
19
+ if (mod && mod.__esModule) return mod;
20
+ var result = {};
21
+ if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
22
+ __setModuleDefault(result, mod);
23
+ return result;
24
+ };
25
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
26
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
27
+ return new (P || (P = Promise))(function (resolve, reject) {
28
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
29
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
30
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
31
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
32
+ });
33
+ };
34
+ var __importDefault = (this && this.__importDefault) || function (mod) {
35
+ return (mod && mod.__esModule) ? mod : { "default": mod };
36
+ };
37
+ Object.defineProperty(exports, "__esModule", { value: true });
38
+ exports.validateJatsAgainstDtdWrapper = exports.validateJatsAgainstDtd = exports.inferOptions = void 0;
39
+ const fs_1 = __importStar(require("fs"));
40
+ const path_1 = __importDefault(require("path"));
41
+ const node_fetch_1 = __importDefault(require("node-fetch"));
42
+ const unzipper_1 = __importDefault(require("unzipper"));
43
+ const which_1 = require("which");
44
+ const myst_cli_utils_1 = require("myst-cli-utils");
45
+ const chalk_1 = __importDefault(require("chalk"));
46
+ const JATS_VERSIONS = [
47
+ '1.1',
48
+ '1.1d1',
49
+ '1.1d2',
50
+ '1.1d3',
51
+ '1.2',
52
+ '1.2d1',
53
+ '1.2d2',
54
+ '1.3',
55
+ '1.3d1',
56
+ '1.3d2',
57
+ ];
58
+ const DEFAULT_JATS_VERSION = '1.3';
59
+ const MATHML_VERSIONS = ['2', '3'];
60
+ const DEFAULT_MATHML_VERSION = '3';
61
+ const JATS_LIBRARIES = ['authoring', 'publishing', 'archiving'];
62
+ const DEFAULT_JATS_LIBRARY = 'archiving';
63
+ /**
64
+ * Return static/ directory adjacent to the code
65
+ *
66
+ * This provides a standard location to cache DTD files, minimizing re-downloading.
67
+ */
68
+ function defaultDirectory() {
69
+ return path_1.default.join(__dirname, 'static');
70
+ }
71
+ function warnOnOptionsMismatch(session, opts, inferredOpts) {
72
+ if (opts.jats && inferredOpts.jats && opts.jats !== inferredOpts.jats) {
73
+ session.log.warn(`Using JATS version ${opts.jats}; does not match version inferred from file ${inferredOpts.jats}`);
74
+ }
75
+ if (opts.library && inferredOpts.library && opts.library !== inferredOpts.library) {
76
+ session.log.warn(`Using JATS library ${opts.library}; does not match library inferred from file ${inferredOpts.library}`);
77
+ }
78
+ if (opts.mathml && inferredOpts.mathml && opts.mathml !== inferredOpts.mathml) {
79
+ session.log.warn(`Using MathML version ${opts.mathml}; does not match version inferred from file ${inferredOpts.mathml}`);
80
+ }
81
+ if (opts.oasis && !inferredOpts.oasis) {
82
+ session.log.warn('Using OASIS table model; does not match non-OASIS inferred from file');
83
+ }
84
+ }
85
+ /**
86
+ * Validate input value as JATS options and fill in defaults
87
+ */
88
+ function validateOptions(session, opts, inferredOpts) {
89
+ var _a, _b, _c, _d, _e;
90
+ warnOnOptionsMismatch(session, opts, inferredOpts);
91
+ let jats;
92
+ if (!opts.jats) {
93
+ jats = (_a = inferredOpts.jats) !== null && _a !== void 0 ? _a : DEFAULT_JATS_VERSION;
94
+ }
95
+ else if (!JATS_VERSIONS.includes(opts.jats)) {
96
+ throw new Error(`Invalid JATS version "${opts.jats}" - must be one of [${JATS_VERSIONS.join(', ')}]`);
97
+ }
98
+ else {
99
+ jats = opts.jats;
100
+ }
101
+ let mathml;
102
+ if (!opts.mathml) {
103
+ mathml = (_b = inferredOpts.mathml) !== null && _b !== void 0 ? _b : DEFAULT_MATHML_VERSION;
104
+ }
105
+ else if (!MATHML_VERSIONS.includes(opts.mathml)) {
106
+ throw new Error(`Invalid MathML version "${opts.mathml}" - must be one of [${MATHML_VERSIONS.join(', ')}]`);
107
+ }
108
+ else {
109
+ mathml = opts.mathml;
110
+ }
111
+ let library;
112
+ if (!opts.library) {
113
+ library = (_c = inferredOpts.library) !== null && _c !== void 0 ? _c : DEFAULT_JATS_LIBRARY;
114
+ }
115
+ else if (typeof opts.library !== 'string' ||
116
+ !JATS_LIBRARIES.includes(opts.library.toLowerCase())) {
117
+ throw new Error(`Invalid JATS library "${opts.library}" - must be one of [${JATS_LIBRARIES.join(', ')}]`);
118
+ }
119
+ else {
120
+ library = opts.library.toLowerCase();
121
+ }
122
+ const oasis = (_d = inferredOpts.oasis) !== null && _d !== void 0 ? _d : !!opts.oasis;
123
+ if (library === 'authoring' && oasis) {
124
+ throw new Error('JATS article authoring library cannot use OASIS table model');
125
+ }
126
+ const out = {
127
+ library,
128
+ jats,
129
+ mathml,
130
+ oasis,
131
+ directory: (_e = opts.directory) !== null && _e !== void 0 ? _e : defaultDirectory(),
132
+ };
133
+ return out;
134
+ }
135
+ /**
136
+ * DTD folder name
137
+ */
138
+ function dtdFolder(opts) {
139
+ const version = opts.jats.replace('.', '-');
140
+ const oasis = opts.oasis ? '-OASIS' : '';
141
+ const mathml = `MathML${opts.mathml}`;
142
+ const library = opts.library.charAt(0).toUpperCase() + opts.library.slice(1);
143
+ return `JATS-${library}-${version}${oasis}-${mathml}-DTD`;
144
+ }
145
+ /**
146
+ * DTD zip file name on FTP server
147
+ */
148
+ function dtdZipFile(opts) {
149
+ return `${dtdFolder(opts)}.zip`;
150
+ }
151
+ /**
152
+ * Local location of DTD zip file
153
+ */
154
+ function localDtdZipFile(opts) {
155
+ return path_1.default.join(opts.directory, dtdZipFile(opts));
156
+ }
157
+ /**
158
+ * Extracted DTD file name
159
+ */
160
+ function dtdFile(opts) {
161
+ const version = opts.jats.startsWith('1.3') ? opts.jats.replace('.', '-') : '1';
162
+ let article;
163
+ if (opts.library === 'archiving') {
164
+ article = opts.oasis ? 'archive-oasis-article' : 'archivearticle';
165
+ }
166
+ else if (opts.library === 'publishing') {
167
+ article = opts.oasis ? 'journalpublishing-oasis-article' : 'journalpublishing';
168
+ }
169
+ else {
170
+ article = 'articleauthoring';
171
+ }
172
+ const mathml = opts.mathml === '3' ? '-mathml3' : '';
173
+ return `JATS-${article}${version}${mathml}.dtd`;
174
+ }
175
+ /**
176
+ * Local location of extracted DTD file
177
+ */
178
+ function localDtdFile(opts) {
179
+ return path_1.default.join(opts.directory, dtdFolder(opts), dtdFile(opts));
180
+ }
181
+ /**
182
+ * NIH FTP server and path for downloading JATS DTD files
183
+ *
184
+ * This is accessed by node-fetch over https.
185
+ */
186
+ function ftpUrl(opts) {
187
+ const library = opts.library === 'authoring' ? 'articleauthoring' : opts.library;
188
+ return `https://ftp.ncbi.nih.gov/pub/jats/${library}/${opts.jats}/${dtdZipFile(opts)}`;
189
+ }
190
+ /**
191
+ * Create a DTS-filename-options lookup for implicitly setting options based on JATS header content
192
+ */
193
+ function buildDtdFileLookup() {
194
+ const lookup = {};
195
+ JATS_VERSIONS.filter((jats) => jats === '1.2' || jats.startsWith('1.3')).forEach((jats) => {
196
+ MATHML_VERSIONS.forEach((mathml) => {
197
+ JATS_LIBRARIES.forEach((library) => {
198
+ (library === 'authoring' ? [false] : [true, false]).forEach((oasis) => {
199
+ const opts = { jats, mathml, library, oasis };
200
+ lookup[dtdFile(opts)] = opts;
201
+ });
202
+ });
203
+ });
204
+ });
205
+ return lookup;
206
+ }
207
+ /**
208
+ * Infer DTD options from file content
209
+ *
210
+ * This looks at DTD file name in DOCTYPE as well as dtd-version in article element
211
+ */
212
+ function inferOptions(file) {
213
+ var _a, _b;
214
+ const data = fs_1.default.readFileSync(file).toString();
215
+ const doctype = (_a = data.match(/<!DOCTYPE [\s\S]+?">/g)) === null || _a === void 0 ? void 0 : _a[0];
216
+ const lookup = buildDtdFileLookup();
217
+ let opts = {};
218
+ Object.entries(lookup).forEach(([key, value]) => {
219
+ if (doctype === null || doctype === void 0 ? void 0 : doctype.includes(key))
220
+ opts = Object.assign({}, value);
221
+ });
222
+ const article = (_b = data.match(/<article [\s\S]+?>/g)) === null || _b === void 0 ? void 0 : _b[0];
223
+ JATS_VERSIONS.forEach((jats) => {
224
+ if (article === null || article === void 0 ? void 0 : article.includes(`dtd-version="${jats}"`))
225
+ opts.jats = jats;
226
+ });
227
+ return opts;
228
+ }
229
+ exports.inferOptions = inferOptions;
230
+ /**
231
+ * Download DTD zip file from NIH FTP server
232
+ */
233
+ function dtdDownload(session, opts) {
234
+ return __awaiter(this, void 0, void 0, function* () {
235
+ if (!fs_1.default.existsSync(opts.directory)) {
236
+ fs_1.default.mkdirSync(opts.directory, { recursive: true });
237
+ }
238
+ session.log.info(`🌎 Downloading: ${ftpUrl(opts)}`);
239
+ session.log.debug(`Saving to ${localDtdZipFile(opts)}`);
240
+ const resp = yield (0, node_fetch_1.default)(ftpUrl(opts));
241
+ (0, myst_cli_utils_1.writeFileToFolder)(localDtdZipFile(opts), yield resp.buffer());
242
+ });
243
+ }
244
+ /**
245
+ * Download DTD zip file from NIH FTP server if it does not yet exist
246
+ */
247
+ function ensureDtdZipExists(session, opts) {
248
+ return __awaiter(this, void 0, void 0, function* () {
249
+ if (!fs_1.default.existsSync(path_1.default.join(opts.directory, dtdZipFile(opts)))) {
250
+ yield dtdDownload(session, opts);
251
+ }
252
+ });
253
+ }
254
+ /**
255
+ * Download and extract DTD file if it does not yet exist
256
+ */
257
+ function ensureDtdExists(session, opts) {
258
+ return __awaiter(this, void 0, void 0, function* () {
259
+ if (!fs_1.default.existsSync(localDtdFile(opts))) {
260
+ yield ensureDtdZipExists(session, opts);
261
+ const zipFile = localDtdZipFile(opts);
262
+ session.log.info(`🤐 Unzipping template: ${zipFile}`);
263
+ yield (0, fs_1.createReadStream)(zipFile)
264
+ .pipe(unzipper_1.default.Extract({ path: opts.directory }))
265
+ .promise();
266
+ }
267
+ });
268
+ }
269
+ /**
270
+ * Test if xmllint is available as a cli command
271
+ */
272
+ function isXmllintAvailable() {
273
+ return (0, which_1.sync)('xmllint', { nothrow: true });
274
+ }
275
+ /**
276
+ * Check if JATS file is valid based on JATS version/library/etc.
277
+ *
278
+ * Returns true if valid and false if invalid.
279
+ */
280
+ function validateJatsAgainstDtd(session, file, opts) {
281
+ return __awaiter(this, void 0, void 0, function* () {
282
+ if (!isXmllintAvailable()) {
283
+ session.log.error(`JATS validation against DTD requires xmllint\n\n${chalk_1.default.dim('To install:\n mac: brew install xmlstarlet\n debian: apt install libxml2-utils')}`);
284
+ return;
285
+ }
286
+ const inferredOpts = inferOptions(file);
287
+ const validatedOpts = validateOptions(session, opts !== null && opts !== void 0 ? opts : {}, inferredOpts);
288
+ yield ensureDtdExists(session, validatedOpts);
289
+ session.log.debug(`Validating against: ${localDtdFile(validatedOpts)}`);
290
+ session.log.info(`🧐 Validating against: ${dtdFolder(validatedOpts)}`);
291
+ try {
292
+ // First drop DOCTYPE with DTD in it - we have already fetched the DTD
293
+ const dropDtdCommand = `xmllint --dropdtd`;
294
+ const validateCommand = `xmllint --noout --dtdvalid ${localDtdFile(validatedOpts)}`;
295
+ yield (0, myst_cli_utils_1.makeExecutable)(`${dropDtdCommand} ${file} | ${validateCommand} -`, session.log)();
296
+ }
297
+ catch (_a) {
298
+ return false;
299
+ }
300
+ return true;
301
+ });
302
+ }
303
+ exports.validateJatsAgainstDtd = validateJatsAgainstDtd;
304
+ /**
305
+ * Check if JATS file is valid based on JATS version/library/etc.
306
+ *
307
+ * Logs confirmation message if valid and throws an error if invalid.
308
+ */
309
+ function validateJatsAgainstDtdWrapper(session, file, opts) {
310
+ return __awaiter(this, void 0, void 0, function* () {
311
+ const success = yield validateJatsAgainstDtd(session, file, opts);
312
+ if (success) {
313
+ session.log.info(chalk_1.default.greenBright('JATS validation passed!'));
314
+ }
315
+ else {
316
+ throw new Error('JATS validation failed.');
317
+ }
318
+ });
319
+ }
320
+ exports.validateJatsAgainstDtdWrapper = validateJatsAgainstDtdWrapper;
@@ -0,0 +1,17 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
+ for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
+ };
16
+ Object.defineProperty(exports, "__esModule", { value: true });
17
+ __exportStar(require("./dtd"), exports);
@@ -1,4 +1,4 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- const version = '0.0.13';
3
+ const version = '0.0.16';
4
4
  exports.default = version;
@@ -2,8 +2,10 @@
2
2
  import commander from 'commander';
3
3
  import version from '../version';
4
4
  import { addDownloadCLI } from './parse';
5
+ import { addValidateCLI } from './validate';
5
6
  const program = new commander.Command();
6
7
  addDownloadCLI(program);
8
+ addValidateCLI(program);
7
9
  program.version(`v${version}`, '-v, --version', 'Print the current version of jats-xml');
8
10
  program.option('-d, --debug', 'Log out any errors to the console.');
9
11
  program.parse(process.argv);
@@ -206,7 +206,7 @@ function makeDownloadCLI(program) {
206
206
  const command = new Command('download')
207
207
  .description('Parse a JATS file and provide a summary')
208
208
  .argument('<url>', 'The JATS url or a DOI')
209
- .argument('<output>', 'The JATS url or a DOI')
209
+ .argument('<output>', 'The JATS output file')
210
210
  .action(clirun(downloadAndSaveJats, { program, getSession }));
211
211
  return command;
212
212
  }
@@ -0,0 +1,25 @@
1
+ import { Command, Option } from 'commander';
2
+ import { clirun } from 'myst-cli-utils';
3
+ import { getSession } from '../session';
4
+ import { validateJatsAgainstDtdWrapper } from '../validate';
5
+ function makeValidateCLI(program) {
6
+ const command = new Command('validate')
7
+ .description(`
8
+ Validate JATS file against DTD schema.
9
+
10
+ The JATS DTD schema file is fetched from nih.gov ftp server if not available locally.
11
+ This will attempt to infer the specific JATS DTD version, library, etc from the file header,
12
+ but options are available to override the inferred values.
13
+ `)
14
+ .argument('<file>', 'JATS file to validate')
15
+ .addOption(new Option('--library <value>', 'JATS library - archiving, publishing, or authoring (default: archiving, if value cannot be inferred from file)'))
16
+ .addOption(new Option('--jats <version>', 'JATS version, must be 1.1 or later (default: 1.3, if value cannot be inferred from file)'))
17
+ .addOption(new Option('--mathml <version>', 'MathML version, 2 or 3 (default: 3, if value cannot be inferred from file)'))
18
+ .addOption(new Option('--oasis', 'Use OASIS table model (default: false, if value cannot be inferred from file)'))
19
+ .addOption(new Option('--directory <value>', 'Directory to save DTD file'))
20
+ .action(clirun(validateJatsAgainstDtdWrapper, { program, getSession }));
21
+ return command;
22
+ }
23
+ export function addValidateCLI(program) {
24
+ program.addCommand(makeValidateCLI(program));
25
+ }
package/dist/esm/index.js CHANGED
@@ -1,3 +1,4 @@
1
1
  export { default as version } from './version';
2
2
  export { Jats } from './jats';
3
3
  export * from './types';
4
+ export * from './validate';