jats-cli 1.0.12 → 1.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/jats.cjs CHANGED
@@ -6561,7 +6561,7 @@ var require_xml2js = __commonJS({
6561
6561
  var require_xml2json = __commonJS({
6562
6562
  "../../node_modules/xml-js/lib/xml2json.js"(exports, module2) {
6563
6563
  var helper = require_options_helper();
6564
- var xml2js2 = require_xml2js();
6564
+ var xml2js3 = require_xml2js();
6565
6565
  function validateOptions2(userOptions) {
6566
6566
  var options = helper.copyOptions(userOptions);
6567
6567
  helper.ensureSpacesExists(options);
@@ -6570,7 +6570,7 @@ var require_xml2json = __commonJS({
6570
6570
  module2.exports = function(xml, userOptions) {
6571
6571
  var options, js, json2, parentKey;
6572
6572
  options = validateOptions2(userOptions);
6573
- js = xml2js2(xml, options);
6573
+ js = xml2js3(xml, options);
6574
6574
  parentKey = "compact" in options && options.compact ? "_parent" : "parent";
6575
6575
  if ("addParent" in options && options.addParent) {
6576
6576
  json2 = JSON.stringify(js, function(k, v) {
@@ -6942,12 +6942,12 @@ var require_json2xml = __commonJS({
6942
6942
  // ../../node_modules/xml-js/lib/index.js
6943
6943
  var require_lib2 = __commonJS({
6944
6944
  "../../node_modules/xml-js/lib/index.js"(exports, module2) {
6945
- var xml2js2 = require_xml2js();
6945
+ var xml2js3 = require_xml2js();
6946
6946
  var xml2json = require_xml2json();
6947
6947
  var js2xml3 = require_js2xml();
6948
6948
  var json2xml = require_json2xml();
6949
6949
  module2.exports = {
6950
- xml2js: xml2js2,
6950
+ xml2js: xml2js3,
6951
6951
  xml2json,
6952
6952
  js2xml: js2xml3,
6953
6953
  json2xml
@@ -48999,7 +48999,7 @@ var {
48999
48999
  } = import_index.default;
49000
49000
 
49001
49001
  // src/version.ts
49002
- var version = "1.0.12";
49002
+ var version = "1.0.13";
49003
49003
  var version_default = version;
49004
49004
 
49005
49005
  // src/parse.ts
@@ -56436,6 +56436,7 @@ var import_node_fs4 = __toESM(require("node:fs"), 1);
56436
56436
  var import_node_path3 = __toESM(require("node:path"), 1);
56437
56437
  var import_node_readline = __toESM(require("node:readline"), 1);
56438
56438
  var import_client_s32 = __toESM(require_dist_cjs71(), 1);
56439
+ var import_xml_js2 = __toESM(require_lib2(), 1);
56439
56440
 
56440
56441
  // ../jats-fetch/dist/utils.js
56441
56442
  var import_node_fs3 = __toESM(require("node:fs"), 1);
@@ -57789,6 +57790,7 @@ async function streamToFile(url, dest, fetcher) {
57789
57790
  // ../jats-fetch/dist/pubmed.js
57790
57791
  var EFETCH_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi";
57791
57792
  var ESUMMARY_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi";
57793
+ var OA_URL = "https://www.ncbi.nlm.nih.gov/pmc/utils/oa/oa.fcgi";
57792
57794
  var IDCONV_URL = "https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/";
57793
57795
  var LISTING_BASE_URL = "https://ftp.ncbi.nlm.nih.gov/pub/pmc/";
57794
57796
  var LISTING_URL = `${LISTING_BASE_URL}oa_file_list.csv`;
@@ -57811,42 +57813,52 @@ function normalizePMID(session, pmid) {
57811
57813
  }
57812
57814
  return pmid;
57813
57815
  }
57814
- async function convertPMID2PMCID(session, pmid, opts) {
57816
+ async function convertId(session, id, from, to, opts) {
57815
57817
  var _a, _b, _c;
57816
- pmid = normalizePMID(session, pmid);
57817
57818
  const toc = tic();
57818
- const resp = await ((_a = opts === null || opts === void 0 ? void 0 : opts.fetcher) !== null && _a !== void 0 ? _a : defaultFetcher)(`${IDCONV_URL}?tool=jats-xml&format=json&ids=${pmid}`, "json");
57819
+ const resp = await ((_a = opts === null || opts === void 0 ? void 0 : opts.fetcher) !== null && _a !== void 0 ? _a : defaultFetcher)(`${IDCONV_URL}?tool=jats-xml&format=json&ids=${id}`, "json");
57819
57820
  if (!resp.ok) {
57820
- session.log.debug(`Failed to convert PubMedID: ${pmid}`);
57821
+ session.log.debug(`Failed to convert ${from} ID: ${id}`);
57821
57822
  return;
57822
57823
  }
57823
57824
  const data = await resp.json();
57824
- const pmcid = (_c = (_b = data === null || data === void 0 ? void 0 : data.records) === null || _b === void 0 ? void 0 : _b[0]) === null || _c === void 0 ? void 0 : _c.pmcid;
57825
- session.log.debug(toc(`Used nih.gov to transform ${pmid} to ${pmcid} in %s.`));
57825
+ const newId = (_c = (_b = data === null || data === void 0 ? void 0 : data.records) === null || _b === void 0 ? void 0 : _b[0]) === null || _c === void 0 ? void 0 : _c[to];
57826
+ if (newId) {
57827
+ session.log.debug(toc(`Used nih.gov to transform ${id} to ${newId} in %s.`));
57828
+ }
57829
+ return newId;
57830
+ }
57831
+ async function convertIds(session, ids, from, to, opts) {
57832
+ var _a, _b, _c;
57833
+ const toc = tic();
57834
+ const resp = await ((_a = opts === null || opts === void 0 ? void 0 : opts.fetcher) !== null && _a !== void 0 ? _a : defaultFetcher)(`${IDCONV_URL}?tool=jats-xml&format=json&ids=${ids.join(",")}`, "json");
57835
+ if (!resp.ok) {
57836
+ session.log.debug(`Failed to convert ${from} ${ids.length} IDs`);
57837
+ return;
57838
+ }
57839
+ const data = await resp.json();
57840
+ const entries = (_b = data === null || data === void 0 ? void 0 : data.records) === null || _b === void 0 ? void 0 : _b.filter((record) => !!record[from] && !!record[to]).map((record) => [record[from], record[to]]);
57841
+ const newIds = entries ? Object.fromEntries(entries) : {};
57842
+ session.log.debug(toc(`Used nih.gov to transform ${(_c = entries === null || entries === void 0 ? void 0 : entries.length) !== null && _c !== void 0 ? _c : 0}/${ids.length} ${from} to ${to} in %s.`));
57843
+ return newIds;
57844
+ }
57845
+ async function convertPMID2PMCID(session, pmid, opts) {
57846
+ pmid = normalizePMID(session, pmid);
57847
+ const pmcid = await convertId(session, pmid, "pmid", "pmcid", opts);
57826
57848
  return pmcid;
57827
57849
  }
57828
57850
  async function convertPMIDs2DOIs(session, pmids, opts) {
57829
- var _a, _b, _c, _d;
57851
+ var _a, _b, _c;
57830
57852
  pmids = [...new Set(pmids.map((pmid) => normalizePMID(session, pmid)))];
57831
- const pmDois = {};
57832
57853
  const toc = tic();
57833
- const idconvResp = await ((_a = opts === null || opts === void 0 ? void 0 : opts.fetcher) !== null && _a !== void 0 ? _a : defaultFetcher)(`${IDCONV_URL}?tool=jats-xml&format=json&ids=${pmids.join(",")}`, "json");
57834
- if (idconvResp.ok) {
57835
- const data = await idconvResp.json();
57836
- (_b = data === null || data === void 0 ? void 0 : data.records) === null || _b === void 0 ? void 0 : _b.forEach((record) => {
57837
- if (record.doi)
57838
- pmDois[record.pmid] = record.doi;
57839
- });
57840
- const pmDoiCount = Object.keys(pmDois).length;
57841
- if (pmDoiCount === pmids.length) {
57842
- session.log.debug(toc(`Used nih.gov to convert ${pmDoiCount} PMIDs to DOIs in %s.`));
57843
- return pmDois;
57844
- }
57854
+ const pmDois = (_a = await convertIds(session, pmids, "pmid", "doi", opts)) !== null && _a !== void 0 ? _a : {};
57855
+ if (Object.keys(pmDois).length === pmids.length) {
57856
+ return pmDois;
57845
57857
  }
57846
- const esummaryResp = await ((_c = opts === null || opts === void 0 ? void 0 : opts.fetcher) !== null && _c !== void 0 ? _c : defaultFetcher)(`${ESUMMARY_URL}?db=pubmed&format=json&id=${pmids.filter((pmid) => !pmDois[pmid]).join(",")}`, "json");
57858
+ const esummaryResp = await ((_b = opts === null || opts === void 0 ? void 0 : opts.fetcher) !== null && _b !== void 0 ? _b : defaultFetcher)(`${ESUMMARY_URL}?db=pubmed&format=json&id=${pmids.filter((pmid) => !pmDois[pmid]).join(",")}`, "json");
57847
57859
  if (esummaryResp.ok) {
57848
57860
  const data = await esummaryResp.json();
57849
- Object.entries((_d = data === null || data === void 0 ? void 0 : data.result) !== null && _d !== void 0 ? _d : {}).filter(([pmid]) => pmid !== "uids").forEach(([pmid, record]) => {
57861
+ Object.entries((_c = data === null || data === void 0 ? void 0 : data.result) !== null && _c !== void 0 ? _c : {}).filter(([pmid]) => pmid !== "uids").forEach(([pmid, record]) => {
57850
57862
  var _a2, _b2;
57851
57863
  const pmDoi = (_b2 = (_a2 = record.articleids) === null || _a2 === void 0 ? void 0 : _a2.find((articleid) => {
57852
57864
  return articleid.idtype === "doi";
@@ -57917,8 +57929,6 @@ async function getPubMedJatsFromS3(session, pmcid) {
57917
57929
  }
57918
57930
  }
57919
57931
  async function getListingsFile(session, dest, fetcher) {
57920
- if (!dest)
57921
- dest = __dirname;
57922
57932
  if (!import_node_path3.default.extname(dest))
57923
57933
  dest = import_node_path3.default.join(dest, LISTING_FILENAME);
57924
57934
  if (import_node_path3.default.extname(dest) !== ".csv") {
@@ -57963,19 +57973,37 @@ async function searchListingForPMC(listingFile, pmcid) {
57963
57973
  }
57964
57974
  throw new Error(`Article ${pmcid} not found in ${listingFile}`);
57965
57975
  }
57966
- async function downloadAndUnzipPMC(session, entry, outputDir, fetcher) {
57967
- const urlParts = entry.url.split("/");
57976
+ async function getDownloadMetadata(pmcid, fetcher) {
57977
+ var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m;
57978
+ const resp = await (fetcher !== null && fetcher !== void 0 ? fetcher : defaultFetcher)(`${OA_URL}?format=tgz&id=${pmcid}`, "xml");
57979
+ if (!resp.ok) {
57980
+ throw new Error(`Bad response from ${OA_URL}`);
57981
+ }
57982
+ const oaMeta = (0, import_xml_js2.xml2js)(await resp.text(), { compact: true });
57983
+ if (((_c = (_b = (_a = oaMeta.OA) === null || _a === void 0 ? void 0 : _a.records) === null || _b === void 0 ? void 0 : _b._attributes) === null || _c === void 0 ? void 0 : _c["returned-count"]) !== "1") {
57984
+ throw new Error(`Bad response from ${OA_URL} - returned count is not 1`);
57985
+ }
57986
+ const url = (_h = (_g = (_f = (_e = (_d = oaMeta === null || oaMeta === void 0 ? void 0 : oaMeta.OA) === null || _d === void 0 ? void 0 : _d.records) === null || _e === void 0 ? void 0 : _e.record) === null || _f === void 0 ? void 0 : _f.link) === null || _g === void 0 ? void 0 : _g._attributes) === null || _h === void 0 ? void 0 : _h.href;
57987
+ if (!url) {
57988
+ throw new Error(`Bad response from ${OA_URL} - href is not available`);
57989
+ }
57990
+ const { citation, license } = (_m = (_l = (_k = (_j = oaMeta.OA) === null || _j === void 0 ? void 0 : _j.records) === null || _k === void 0 ? void 0 : _k.record) === null || _l === void 0 ? void 0 : _l._attributes) !== null && _m !== void 0 ? _m : {};
57991
+ return { url, citation, license };
57992
+ }
57993
+ async function downloadAndUnzipPMC(session, url, outputDir, fetcher) {
57994
+ url = url.replace(/^ftp:/, "https:");
57995
+ const urlParts = url.split("/");
57968
57996
  const filename = urlParts[urlParts.length - 1];
57969
57997
  const dest = import_node_path3.default.join(outputDir, filename);
57970
57998
  if (!import_node_fs4.default.existsSync(outputDir)) {
57971
57999
  import_node_fs4.default.mkdirSync(outputDir, { recursive: true });
57972
58000
  }
57973
58001
  if (!import_node_fs4.default.existsSync(dest)) {
57974
- session.log.info(`Downloading PMC data from ${entry.url}`);
57975
- await streamToFile(`${LISTING_BASE_URL}${entry.url}`, dest, fetcher);
58002
+ session.log.info(`Downloading PMC data from ${url}`);
58003
+ await streamToFile(url, dest, fetcher);
57976
58004
  }
57977
58005
  if (!import_node_fs4.default.existsSync(dest)) {
57978
- throw new Error(`Unable to download ${entry.url}`);
58006
+ throw new Error(`Unable to download ${url}`);
57979
58007
  }
57980
58008
  session.log.info(`Extracting PMC data from ${dest} to ${outputDir}`);
57981
58009
  const unzip = makeExecutable(`tar -xf ${dest} -C ${outputDir}`, session.log);
@@ -57995,9 +58023,28 @@ async function downloadAndUnzipPMC(session, entry, outputDir, fetcher) {
57995
58023
  import_node_fs4.default.rmdirSync(zipDir);
57996
58024
  }
57997
58025
  async function getDataFromPMC(session, pmcid, outputDir, listing, fetcher) {
57998
- const listingFile = await getListingsFile(session, listing, fetcher);
57999
- const entry = await searchListingForPMC(listingFile, pmcid);
58000
- await downloadAndUnzipPMC(session, entry, outputDir, fetcher);
58026
+ let url;
58027
+ if (pmcid.endsWith(".tar.gz")) {
58028
+ url = pmcid;
58029
+ } else {
58030
+ if (!pmcid.startsWith("PMC")) {
58031
+ throw new Error("Data may only be downloaded for PMC articles");
58032
+ }
58033
+ try {
58034
+ const metadata = await getDownloadMetadata(pmcid);
58035
+ url = metadata.url;
58036
+ } catch {
58037
+ if (listing) {
58038
+ const listingFile = await getListingsFile(session, listing, fetcher);
58039
+ const entry = await searchListingForPMC(listingFile, pmcid);
58040
+ url = `${LISTING_BASE_URL}${entry.url}`;
58041
+ }
58042
+ }
58043
+ }
58044
+ if (!url) {
58045
+ throw new Error(`Unable to find PMC data download url for: ${pmcid}`);
58046
+ }
58047
+ await downloadAndUnzipPMC(session, url, outputDir, fetcher);
58001
58048
  }
58002
58049
  async function getPubMedJatsFromData(session, pmcid, outputDir, listing, fetcher) {
58003
58050
  await getDataFromPMC(session, pmcid, outputDir, listing, fetcher);
@@ -58149,22 +58196,40 @@ async function downloadJatsFromUrl(session, urlOrDoi, opts = {}) {
58149
58196
  return { success: false, source: urlOrDoi };
58150
58197
  }
58151
58198
  async function jatsFetch(session, input, opts) {
58152
- var _a, _b;
58199
+ var _a;
58153
58200
  if (input === "listing" && !opts.data && !(opts.output && opts.listing)) {
58154
- const dest = await getListingsFile(session, (_a = opts.output) !== null && _a !== void 0 ? _a : opts.listing);
58201
+ const inputDest = (_a = opts.output) !== null && _a !== void 0 ? _a : opts.listing;
58202
+ if (!inputDest) {
58203
+ throw new Error("Destination for listing file must be specified");
58204
+ }
58205
+ const dest = await getListingsFile(session, inputDest);
58155
58206
  session.log.info(`PMC Open Access listing saved to ${dest}`);
58156
58207
  return;
58157
58208
  }
58158
- let output = (_b = opts.output) !== null && _b !== void 0 ? _b : opts.data ? `${input}` : ".";
58209
+ let output = opts.output;
58210
+ let filename;
58211
+ if (input.endsWith(".tar.gz")) {
58212
+ opts.data = true;
58213
+ const foldername = input.split("/").slice(-1)[0].slice(0, -".tar.gz".length);
58214
+ filename = `${foldername}.xml`;
58215
+ if (!output) {
58216
+ output = foldername;
58217
+ }
58218
+ }
58219
+ if (!output)
58220
+ output = opts.data ? `${input}` : ".";
58159
58221
  if (!import_node_path4.default.extname(output)) {
58160
- const filename = input.startsWith("PMC") ? `${input}.xml` : "jats.xml";
58222
+ filename = filename !== null && filename !== void 0 ? filename : input.startsWith("PMC") ? `${input}.xml` : "jats.xml";
58161
58223
  output = import_node_path4.default.join(output, filename);
58162
58224
  }
58163
58225
  if (import_node_path4.default.extname(output) && ![".xml", ".jats"].includes(import_node_path4.default.extname(output).toLowerCase())) {
58164
58226
  throw new Error(`Output must be an XML file or a directory`);
58165
58227
  }
58166
58228
  let result;
58167
- if (input.startsWith("PMC")) {
58229
+ if (opts.data) {
58230
+ result = await getPubMedJatsFromData(session, input, import_node_path4.default.dirname(output), opts.listing);
58231
+ }
58232
+ if (!(result === null || result === void 0 ? void 0 : result.data) && input.startsWith("PMC")) {
58168
58233
  result = await getPubMedJatsFromS3(session, input);
58169
58234
  }
58170
58235
  if (!(result === null || result === void 0 ? void 0 : result.data)) {
@@ -58183,17 +58248,10 @@ async function jatsFetch(session, input, opts) {
58183
58248
  }
58184
58249
  import_node_fs5.default.writeFileSync(output, result.data);
58185
58250
  session.log.info(`JATS written to ${output}`);
58186
- if (!opts.data)
58187
- return;
58188
- if (input.startsWith("PMC")) {
58189
- await getDataFromPMC(session, input, import_node_path4.default.dirname(output), opts.listing);
58190
- } else {
58191
- session.log.error("Data may only be downloaded for PMC articles");
58192
- }
58193
58251
  }
58194
58252
 
58195
58253
  // ../jats-xml/dist/jats.js
58196
- var import_xml_js2 = __toESM(require_lib2(), 1);
58254
+ var import_xml_js3 = __toESM(require_lib2(), 1);
58197
58255
 
58198
58256
  // ../jats-xml/node_modules/unist-util-is/lib/index.js
58199
58257
  var convert2 = (
@@ -58525,7 +58583,7 @@ var Jats = class {
58525
58583
  if (opts === null || opts === void 0 ? void 0 : opts.source)
58526
58584
  this.source = opts.source;
58527
58585
  try {
58528
- this.raw = (0, import_xml_js2.xml2js)(data, { compact: false });
58586
+ this.raw = (0, import_xml_js3.xml2js)(data, { compact: false });
58529
58587
  } catch (error) {
58530
58588
  throw new Error("Problem parsing the JATS document, please ensure it is XML");
58531
58589
  }
@@ -64768,7 +64826,7 @@ var u = (
64768
64826
 
64769
64827
  // ../jats-convert/dist/index.js
64770
64828
  var import_mathml_to_latex = __toESM(require_bundle_min(), 1);
64771
- var import_xml_js3 = __toESM(require_lib2(), 1);
64829
+ var import_xml_js4 = __toESM(require_lib2(), 1);
64772
64830
 
64773
64831
  // ../jats-convert/dist/transforms/admonitions.js
64774
64832
  function admonitionTransform(tree, file) {
@@ -65473,7 +65531,7 @@ function journalTransforms(tree) {
65473
65531
  }
65474
65532
 
65475
65533
  // ../jats-convert/dist/version.js
65476
- var version3 = "1.0.12";
65534
+ var version3 = "1.0.13";
65477
65535
  var version_default3 = version3;
65478
65536
 
65479
65537
  // ../jats-convert/dist/myst/inlineCitations.js
@@ -65694,7 +65752,7 @@ function texMathFromNode(node) {
65694
65752
  });
65695
65753
  }
65696
65754
  });
65697
- return import_mathml_to_latex.MathMLToLaTeX.convert((0, import_xml_js3.js2xml)({ type: "element", name: "root", elements: [math] }));
65755
+ return import_mathml_to_latex.MathMLToLaTeX.convert((0, import_xml_js4.js2xml)({ type: "element", name: "root", elements: [math] }));
65698
65756
  }
65699
65757
  var handlers = {
65700
65758
  body(node, state2) {
package/dist/version.d.ts CHANGED
@@ -1,3 +1,3 @@
1
- declare const version = "1.0.12";
1
+ declare const version = "1.0.13";
2
2
  export default version;
3
3
  //# sourceMappingURL=version.d.ts.map
package/dist/version.js CHANGED
@@ -1,2 +1,2 @@
1
- const version = '1.0.12';
1
+ const version = '1.0.13';
2
2
  export default version;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "jats-cli",
3
- "version": "1.0.12",
3
+ "version": "1.0.13",
4
4
  "description": "Typescript CLI for with JATS",
5
5
  "author": "Rowan Cockett <rowan@curvenote.com>",
6
6
  "homepage": "https://github.com/curvenote/jats",
@@ -45,11 +45,11 @@
45
45
  "dependencies": {
46
46
  "doi-utils": "^2.0.0",
47
47
  "fair-principles": "^2.0.0",
48
- "jats-convert": "^1.0.12",
49
- "jats-fetch": "^1.0.12",
50
- "jats-tags": "^1.0.12",
51
- "jats-utils": "^1.0.12",
52
- "jats-xml": "^1.0.12",
48
+ "jats-convert": "^1.0.13",
49
+ "jats-fetch": "^1.0.13",
50
+ "jats-tags": "^1.0.13",
51
+ "jats-utils": "^1.0.13",
52
+ "jats-xml": "^1.0.13",
53
53
  "js-yaml": "^4.1.0",
54
54
  "unist-util-is": "^5.2.1",
55
55
  "unist-util-select": "^4.0.0"