@yangfei_93sky/biocli 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +197 -0
- package/dist/batch.d.ts +20 -0
- package/dist/batch.js +69 -0
- package/dist/build-manifest.d.ts +38 -0
- package/dist/build-manifest.js +186 -0
- package/dist/cache.d.ts +28 -0
- package/dist/cache.js +126 -0
- package/dist/cli-manifest.json +1500 -0
- package/dist/cli.d.ts +7 -0
- package/dist/cli.js +336 -0
- package/dist/clis/_shared/common.d.ts +8 -0
- package/dist/clis/_shared/common.js +13 -0
- package/dist/clis/_shared/eutils.d.ts +9 -0
- package/dist/clis/_shared/eutils.js +9 -0
- package/dist/clis/_shared/organism-db.d.ts +23 -0
- package/dist/clis/_shared/organism-db.js +58 -0
- package/dist/clis/_shared/xml-helpers.d.ts +58 -0
- package/dist/clis/_shared/xml-helpers.js +266 -0
- package/dist/clis/aggregate/enrichment.d.ts +7 -0
- package/dist/clis/aggregate/enrichment.js +105 -0
- package/dist/clis/aggregate/gene-dossier.d.ts +13 -0
- package/dist/clis/aggregate/gene-dossier.js +248 -0
- package/dist/clis/aggregate/gene-profile.d.ts +16 -0
- package/dist/clis/aggregate/gene-profile.js +305 -0
- package/dist/clis/aggregate/literature-brief.d.ts +7 -0
- package/dist/clis/aggregate/literature-brief.js +79 -0
- package/dist/clis/aggregate/variant-dossier.d.ts +11 -0
- package/dist/clis/aggregate/variant-dossier.js +161 -0
- package/dist/clis/aggregate/variant-interpret.d.ts +10 -0
- package/dist/clis/aggregate/variant-interpret.js +210 -0
- package/dist/clis/aggregate/workflow-prepare.d.ts +12 -0
- package/dist/clis/aggregate/workflow-prepare.js +228 -0
- package/dist/clis/aggregate/workflow-scout.d.ts +13 -0
- package/dist/clis/aggregate/workflow-scout.js +175 -0
- package/dist/clis/clinvar/search.d.ts +8 -0
- package/dist/clis/clinvar/search.js +61 -0
- package/dist/clis/clinvar/variant.d.ts +7 -0
- package/dist/clis/clinvar/variant.js +53 -0
- package/dist/clis/enrichr/analyze.d.ts +7 -0
- package/dist/clis/enrichr/analyze.js +48 -0
- package/dist/clis/ensembl/lookup.d.ts +6 -0
- package/dist/clis/ensembl/lookup.js +38 -0
- package/dist/clis/ensembl/vep.d.ts +7 -0
- package/dist/clis/ensembl/vep.js +86 -0
- package/dist/clis/ensembl/xrefs.d.ts +6 -0
- package/dist/clis/ensembl/xrefs.js +36 -0
- package/dist/clis/gene/fetch.d.ts +10 -0
- package/dist/clis/gene/fetch.js +96 -0
- package/dist/clis/gene/info.d.ts +7 -0
- package/dist/clis/gene/info.js +37 -0
- package/dist/clis/gene/search.d.ts +7 -0
- package/dist/clis/gene/search.js +71 -0
- package/dist/clis/geo/dataset.d.ts +7 -0
- package/dist/clis/geo/dataset.js +55 -0
- package/dist/clis/geo/download.d.ts +17 -0
- package/dist/clis/geo/download.js +115 -0
- package/dist/clis/geo/samples.d.ts +7 -0
- package/dist/clis/geo/samples.js +57 -0
- package/dist/clis/geo/search.d.ts +8 -0
- package/dist/clis/geo/search.js +66 -0
- package/dist/clis/kegg/convert.d.ts +7 -0
- package/dist/clis/kegg/convert.js +37 -0
- package/dist/clis/kegg/disease.d.ts +6 -0
- package/dist/clis/kegg/disease.js +57 -0
- package/dist/clis/kegg/link.d.ts +7 -0
- package/dist/clis/kegg/link.js +36 -0
- package/dist/clis/kegg/pathway.d.ts +6 -0
- package/dist/clis/kegg/pathway.js +37 -0
- package/dist/clis/pubmed/abstract.d.ts +7 -0
- package/dist/clis/pubmed/abstract.js +42 -0
- package/dist/clis/pubmed/cited-by.d.ts +7 -0
- package/dist/clis/pubmed/cited-by.js +77 -0
- package/dist/clis/pubmed/fetch.d.ts +6 -0
- package/dist/clis/pubmed/fetch.js +36 -0
- package/dist/clis/pubmed/info.yaml +22 -0
- package/dist/clis/pubmed/related.d.ts +7 -0
- package/dist/clis/pubmed/related.js +81 -0
- package/dist/clis/pubmed/search.d.ts +8 -0
- package/dist/clis/pubmed/search.js +63 -0
- package/dist/clis/snp/lookup.d.ts +7 -0
- package/dist/clis/snp/lookup.js +57 -0
- package/dist/clis/sra/download.d.ts +18 -0
- package/dist/clis/sra/download.js +217 -0
- package/dist/clis/sra/run.d.ts +8 -0
- package/dist/clis/sra/run.js +77 -0
- package/dist/clis/sra/search.d.ts +8 -0
- package/dist/clis/sra/search.js +83 -0
- package/dist/clis/string/enrichment.d.ts +7 -0
- package/dist/clis/string/enrichment.js +50 -0
- package/dist/clis/string/network.d.ts +7 -0
- package/dist/clis/string/network.js +47 -0
- package/dist/clis/string/partners.d.ts +4 -0
- package/dist/clis/string/partners.js +44 -0
- package/dist/clis/taxonomy/lookup.d.ts +8 -0
- package/dist/clis/taxonomy/lookup.js +54 -0
- package/dist/clis/uniprot/fetch.d.ts +7 -0
- package/dist/clis/uniprot/fetch.js +82 -0
- package/dist/clis/uniprot/search.d.ts +6 -0
- package/dist/clis/uniprot/search.js +65 -0
- package/dist/clis/uniprot/sequence.d.ts +7 -0
- package/dist/clis/uniprot/sequence.js +51 -0
- package/dist/commander-adapter.d.ts +27 -0
- package/dist/commander-adapter.js +286 -0
- package/dist/completion.d.ts +19 -0
- package/dist/completion.js +117 -0
- package/dist/config.d.ts +57 -0
- package/dist/config.js +94 -0
- package/dist/databases/enrichr.d.ts +28 -0
- package/dist/databases/enrichr.js +131 -0
- package/dist/databases/ensembl.d.ts +14 -0
- package/dist/databases/ensembl.js +106 -0
- package/dist/databases/index.d.ts +45 -0
- package/dist/databases/index.js +49 -0
- package/dist/databases/kegg.d.ts +26 -0
- package/dist/databases/kegg.js +136 -0
- package/dist/databases/ncbi.d.ts +28 -0
- package/dist/databases/ncbi.js +144 -0
- package/dist/databases/string-db.d.ts +19 -0
- package/dist/databases/string-db.js +105 -0
- package/dist/databases/uniprot.d.ts +13 -0
- package/dist/databases/uniprot.js +110 -0
- package/dist/discovery.d.ts +32 -0
- package/dist/discovery.js +235 -0
- package/dist/doctor.d.ts +19 -0
- package/dist/doctor.js +151 -0
- package/dist/errors.d.ts +68 -0
- package/dist/errors.js +105 -0
- package/dist/execution.d.ts +15 -0
- package/dist/execution.js +178 -0
- package/dist/hooks.d.ts +48 -0
- package/dist/hooks.js +58 -0
- package/dist/main.d.ts +13 -0
- package/dist/main.js +31 -0
- package/dist/ncbi-fetch.d.ts +10 -0
- package/dist/ncbi-fetch.js +10 -0
- package/dist/output.d.ts +18 -0
- package/dist/output.js +394 -0
- package/dist/pipeline/executor.d.ts +22 -0
- package/dist/pipeline/executor.js +40 -0
- package/dist/pipeline/index.d.ts +6 -0
- package/dist/pipeline/index.js +6 -0
- package/dist/pipeline/registry.d.ts +16 -0
- package/dist/pipeline/registry.js +31 -0
- package/dist/pipeline/steps/fetch.d.ts +21 -0
- package/dist/pipeline/steps/fetch.js +160 -0
- package/dist/pipeline/steps/transform.d.ts +26 -0
- package/dist/pipeline/steps/transform.js +92 -0
- package/dist/pipeline/steps/xml-parse.d.ts +12 -0
- package/dist/pipeline/steps/xml-parse.js +27 -0
- package/dist/pipeline/template.d.ts +35 -0
- package/dist/pipeline/template.js +312 -0
- package/dist/rate-limiter.d.ts +56 -0
- package/dist/rate-limiter.js +120 -0
- package/dist/registry-api.d.ts +15 -0
- package/dist/registry-api.js +13 -0
- package/dist/registry.d.ts +90 -0
- package/dist/registry.js +100 -0
- package/dist/schema.d.ts +80 -0
- package/dist/schema.js +72 -0
- package/dist/spinner.d.ts +19 -0
- package/dist/spinner.js +37 -0
- package/dist/types.d.ts +101 -0
- package/dist/types.js +27 -0
- package/dist/utils.d.ts +16 -0
- package/dist/utils.js +40 -0
- package/dist/validate.d.ts +29 -0
- package/dist/validate.js +136 -0
- package/dist/verify.d.ts +20 -0
- package/dist/verify.js +131 -0
- package/dist/version.d.ts +13 -0
- package/dist/version.js +36 -0
- package/dist/xml-parser.d.ts +19 -0
- package/dist/xml-parser.js +119 -0
- package/dist/yaml-schema.d.ts +40 -0
- package/dist/yaml-schema.js +62 -0
- package/package.json +68 -0
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* pubmed/cited-by — Find articles that cite a given PubMed article.
|
|
3
|
+
*
|
|
4
|
+
* Uses elink with linkname 'pubmed_pubmed_citedin' to discover citing
|
|
5
|
+
* PMIDs, then efetch to retrieve article metadata.
|
|
6
|
+
*/
|
|
7
|
+
import { cli, Strategy } from '../../registry.js';
|
|
8
|
+
import { CliError } from '../../errors.js';
|
|
9
|
+
import { buildEutilsUrl } from '../_shared/eutils.js';
|
|
10
|
+
import { parsePubmedArticles } from '../_shared/xml-helpers.js';
|
|
11
|
+
import { clamp } from '../_shared/common.js';
|
|
12
|
+
import { isRecord } from '../../utils.js';
|
|
13
|
+
cli({
|
|
14
|
+
site: 'pubmed',
|
|
15
|
+
name: 'cited-by',
|
|
16
|
+
description: 'Articles that cite a PubMed article',
|
|
17
|
+
database: 'pubmed',
|
|
18
|
+
strategy: Strategy.PUBLIC,
|
|
19
|
+
args: [
|
|
20
|
+
{ name: 'pmid', positional: true, required: true, help: 'PubMed ID' },
|
|
21
|
+
{ name: 'limit', type: 'int', default: 10, help: 'Max results (1-100)' },
|
|
22
|
+
],
|
|
23
|
+
columns: ['pmid', 'title', 'authors', 'journal', 'year', 'doi'],
|
|
24
|
+
func: async (ctx, args) => {
|
|
25
|
+
const pmid = String(args.pmid).trim();
|
|
26
|
+
if (!/^\d+$/.test(pmid)) {
|
|
27
|
+
throw new CliError('ARGUMENT', `Invalid PMID: "${pmid}"`, 'PMID must be a numeric identifier');
|
|
28
|
+
}
|
|
29
|
+
const limit = clamp(Number(args.limit), 1, 100);
|
|
30
|
+
// Step 1: elink to get citing PMIDs
|
|
31
|
+
const linkResult = await ctx.fetchJson(buildEutilsUrl('elink.fcgi', {
|
|
32
|
+
dbfrom: 'pubmed',
|
|
33
|
+
db: 'pubmed',
|
|
34
|
+
id: pmid,
|
|
35
|
+
linkname: 'pubmed_pubmed_citedin',
|
|
36
|
+
retmode: 'json',
|
|
37
|
+
}));
|
|
38
|
+
// Navigate elink JSON response:
|
|
39
|
+
// { linksets: [{ linksetdbs: [{ links: ["12345", ...] }] }] }
|
|
40
|
+
const linksets = linkResult?.linksets;
|
|
41
|
+
if (!Array.isArray(linksets) || !linksets.length) {
|
|
42
|
+
throw new CliError('NOT_FOUND', `No citing articles found for PMID ${pmid}`, 'This article may not have been cited yet');
|
|
43
|
+
}
|
|
44
|
+
const firstLinkset = linksets[0];
|
|
45
|
+
const linksetdbs = firstLinkset?.linksetdbs;
|
|
46
|
+
if (!Array.isArray(linksetdbs) || !linksetdbs.length) {
|
|
47
|
+
throw new CliError('NOT_FOUND', `No citing articles found for PMID ${pmid}`, 'This article may not have been cited yet');
|
|
48
|
+
}
|
|
49
|
+
// Find the correct linksetdb entry
|
|
50
|
+
let citingIds = [];
|
|
51
|
+
for (const lsdb of linksetdbs) {
|
|
52
|
+
if (!isRecord(lsdb))
|
|
53
|
+
continue;
|
|
54
|
+
const links = lsdb.links;
|
|
55
|
+
if (Array.isArray(links) && links.length > 0) {
|
|
56
|
+
citingIds = links.map(String);
|
|
57
|
+
break;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
if (!citingIds.length) {
|
|
61
|
+
throw new CliError('NOT_FOUND', `No citing articles found for PMID ${pmid}`, 'This article may not have been cited yet');
|
|
62
|
+
}
|
|
63
|
+
// Trim to requested limit
|
|
64
|
+
const trimmedIds = citingIds.slice(0, limit);
|
|
65
|
+
// Step 2: efetch those PMIDs
|
|
66
|
+
const xmlData = await ctx.fetchXml(buildEutilsUrl('efetch.fcgi', {
|
|
67
|
+
db: 'pubmed',
|
|
68
|
+
id: trimmedIds.join(','),
|
|
69
|
+
rettype: 'xml',
|
|
70
|
+
}));
|
|
71
|
+
const articles = parsePubmedArticles(xmlData);
|
|
72
|
+
if (!articles.length) {
|
|
73
|
+
throw new CliError('PARSE_ERROR', 'Failed to parse citing articles', 'Try again later');
|
|
74
|
+
}
|
|
75
|
+
return articles;
|
|
76
|
+
},
|
|
77
|
+
});
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* pubmed/fetch — Get PubMed article details by PMID.
|
|
3
|
+
*
|
|
4
|
+
* Fetches a single article and returns full metadata including abstract.
|
|
5
|
+
*/
|
|
6
|
+
import { cli, Strategy } from '../../registry.js';
|
|
7
|
+
import { CliError } from '../../errors.js';
|
|
8
|
+
import { buildEutilsUrl } from '../_shared/eutils.js';
|
|
9
|
+
import { parsePubmedArticles } from '../_shared/xml-helpers.js';
|
|
10
|
+
cli({
|
|
11
|
+
site: 'pubmed',
|
|
12
|
+
name: 'fetch',
|
|
13
|
+
description: 'Get PubMed article details by PMID',
|
|
14
|
+
database: 'pubmed',
|
|
15
|
+
strategy: Strategy.PUBLIC,
|
|
16
|
+
args: [
|
|
17
|
+
{ name: 'pmid', positional: true, required: true, help: 'PubMed ID (e.g. 39088800)' },
|
|
18
|
+
],
|
|
19
|
+
columns: ['pmid', 'title', 'authors', 'journal', 'year', 'doi', 'abstract'],
|
|
20
|
+
func: async (ctx, args) => {
|
|
21
|
+
const pmid = String(args.pmid).trim();
|
|
22
|
+
if (!/^\d+$/.test(pmid)) {
|
|
23
|
+
throw new CliError('ARGUMENT', `Invalid PMID: "${pmid}"`, 'PMID must be a numeric identifier (e.g. 39088800)');
|
|
24
|
+
}
|
|
25
|
+
const xmlData = await ctx.fetchXml(buildEutilsUrl('efetch.fcgi', {
|
|
26
|
+
db: 'pubmed',
|
|
27
|
+
id: pmid,
|
|
28
|
+
rettype: 'xml',
|
|
29
|
+
}));
|
|
30
|
+
const articles = parsePubmedArticles(xmlData);
|
|
31
|
+
if (!articles.length) {
|
|
32
|
+
throw new CliError('NOT_FOUND', `Article PMID ${pmid} not found`, 'Check that the PMID is correct');
|
|
33
|
+
}
|
|
34
|
+
return articles;
|
|
35
|
+
},
|
|
36
|
+
});
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
site: pubmed
|
|
2
|
+
name: info
|
|
3
|
+
description: PubMed database statistics
|
|
4
|
+
database: pubmed
|
|
5
|
+
strategy: public
|
|
6
|
+
|
|
7
|
+
pipeline:
|
|
8
|
+
- fetch:
|
|
9
|
+
url: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/einfo.fcgi
|
|
10
|
+
params:
|
|
11
|
+
db: pubmed
|
|
12
|
+
retmode: json
|
|
13
|
+
|
|
14
|
+
- select: einforesult.dbinfo
|
|
15
|
+
|
|
16
|
+
- map:
|
|
17
|
+
database: ${{ item.dbname }}
|
|
18
|
+
description: ${{ item.description }}
|
|
19
|
+
count: ${{ item.count }}
|
|
20
|
+
last_update: ${{ item.lastupdate }}
|
|
21
|
+
|
|
22
|
+
columns: [database, description, count, last_update]
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* pubmed/related — Find related articles for a given PubMed article.
|
|
3
|
+
*
|
|
4
|
+
* Uses elink with linkname 'pubmed_pubmed' to discover related PMIDs
|
|
5
|
+
* (NCBI's pre-computed similarity), then efetch for article metadata.
|
|
6
|
+
*/
|
|
7
|
+
import { cli, Strategy } from '../../registry.js';
|
|
8
|
+
import { CliError } from '../../errors.js';
|
|
9
|
+
import { buildEutilsUrl } from '../_shared/eutils.js';
|
|
10
|
+
import { parsePubmedArticles } from '../_shared/xml-helpers.js';
|
|
11
|
+
import { clamp } from '../_shared/common.js';
|
|
12
|
+
import { isRecord } from '../../utils.js';
|
|
13
|
+
cli({
|
|
14
|
+
site: 'pubmed',
|
|
15
|
+
name: 'related',
|
|
16
|
+
description: 'Find related PubMed articles',
|
|
17
|
+
database: 'pubmed',
|
|
18
|
+
strategy: Strategy.PUBLIC,
|
|
19
|
+
args: [
|
|
20
|
+
{ name: 'pmid', positional: true, required: true, help: 'PubMed ID' },
|
|
21
|
+
{ name: 'limit', type: 'int', default: 10, help: 'Max results (1-100)' },
|
|
22
|
+
],
|
|
23
|
+
columns: ['pmid', 'title', 'authors', 'journal', 'year', 'doi'],
|
|
24
|
+
func: async (ctx, args) => {
|
|
25
|
+
const pmid = String(args.pmid).trim();
|
|
26
|
+
if (!/^\d+$/.test(pmid)) {
|
|
27
|
+
throw new CliError('ARGUMENT', `Invalid PMID: "${pmid}"`, 'PMID must be a numeric identifier');
|
|
28
|
+
}
|
|
29
|
+
const limit = clamp(Number(args.limit), 1, 100);
|
|
30
|
+
// Step 1: elink to get related PMIDs
|
|
31
|
+
const linkResult = await ctx.fetchJson(buildEutilsUrl('elink.fcgi', {
|
|
32
|
+
dbfrom: 'pubmed',
|
|
33
|
+
db: 'pubmed',
|
|
34
|
+
id: pmid,
|
|
35
|
+
linkname: 'pubmed_pubmed',
|
|
36
|
+
retmode: 'json',
|
|
37
|
+
}));
|
|
38
|
+
// Navigate elink JSON response
|
|
39
|
+
const linksets = linkResult?.linksets;
|
|
40
|
+
if (!Array.isArray(linksets) || !linksets.length) {
|
|
41
|
+
throw new CliError('NOT_FOUND', `No related articles found for PMID ${pmid}`, 'Try a different article');
|
|
42
|
+
}
|
|
43
|
+
const firstLinkset = linksets[0];
|
|
44
|
+
const linksetdbs = firstLinkset?.linksetdbs;
|
|
45
|
+
if (!Array.isArray(linksetdbs) || !linksetdbs.length) {
|
|
46
|
+
throw new CliError('NOT_FOUND', `No related articles found for PMID ${pmid}`, 'Try a different article');
|
|
47
|
+
}
|
|
48
|
+
// Find the correct linksetdb entry
|
|
49
|
+
let relatedIds = [];
|
|
50
|
+
for (const lsdb of linksetdbs) {
|
|
51
|
+
if (!isRecord(lsdb))
|
|
52
|
+
continue;
|
|
53
|
+
const links = lsdb.links;
|
|
54
|
+
if (Array.isArray(links) && links.length > 0) {
|
|
55
|
+
relatedIds = links.map(String);
|
|
56
|
+
break;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
if (!relatedIds.length) {
|
|
60
|
+
throw new CliError('NOT_FOUND', `No related articles found for PMID ${pmid}`, 'Try a different article');
|
|
61
|
+
}
|
|
62
|
+
// Exclude the queried PMID itself from results
|
|
63
|
+
relatedIds = relatedIds.filter((id) => id !== pmid);
|
|
64
|
+
// Trim to requested limit
|
|
65
|
+
const trimmedIds = relatedIds.slice(0, limit);
|
|
66
|
+
if (!trimmedIds.length) {
|
|
67
|
+
throw new CliError('NOT_FOUND', `No related articles found for PMID ${pmid}`, 'Try a different article');
|
|
68
|
+
}
|
|
69
|
+
// Step 2: efetch those PMIDs
|
|
70
|
+
const xmlData = await ctx.fetchXml(buildEutilsUrl('efetch.fcgi', {
|
|
71
|
+
db: 'pubmed',
|
|
72
|
+
id: trimmedIds.join(','),
|
|
73
|
+
rettype: 'xml',
|
|
74
|
+
}));
|
|
75
|
+
const articles = parsePubmedArticles(xmlData);
|
|
76
|
+
if (!articles.length) {
|
|
77
|
+
throw new CliError('PARSE_ERROR', 'Failed to parse related articles', 'Try again later');
|
|
78
|
+
}
|
|
79
|
+
return articles;
|
|
80
|
+
},
|
|
81
|
+
});
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* pubmed/search — Search PubMed articles.
|
|
3
|
+
*
|
|
4
|
+
* Uses the two-step esearch + efetch pattern:
|
|
5
|
+
* 1. esearch to retrieve matching PMIDs
|
|
6
|
+
* 2. efetch (XML) to get full article metadata
|
|
7
|
+
*/
|
|
8
|
+
import { cli, Strategy } from '../../registry.js';
|
|
9
|
+
import { CliError } from '../../errors.js';
|
|
10
|
+
import { buildEutilsUrl } from '../_shared/eutils.js';
|
|
11
|
+
import { parsePubmedArticles } from '../_shared/xml-helpers.js';
|
|
12
|
+
import { clamp } from '../_shared/common.js';
|
|
13
|
+
import { withMeta } from '../../types.js';
|
|
14
|
+
const SORT_MAP = {
|
|
15
|
+
relevance: 'relevance',
|
|
16
|
+
date: 'pub_date',
|
|
17
|
+
author: 'author',
|
|
18
|
+
journal: 'journal',
|
|
19
|
+
};
|
|
20
|
+
cli({
|
|
21
|
+
site: 'pubmed',
|
|
22
|
+
name: 'search',
|
|
23
|
+
description: 'Search PubMed articles',
|
|
24
|
+
database: 'pubmed',
|
|
25
|
+
strategy: Strategy.PUBLIC,
|
|
26
|
+
args: [
|
|
27
|
+
{ name: 'query', positional: true, required: true, help: 'Search query (e.g. "CRISPR cancer therapy")' },
|
|
28
|
+
{ name: 'limit', type: 'int', default: 10, help: 'Max results (1-200)' },
|
|
29
|
+
{ name: 'sort', default: 'relevance', choices: ['relevance', 'date', 'author', 'journal'], help: 'Sort order' },
|
|
30
|
+
],
|
|
31
|
+
columns: ['pmid', 'title', 'authors', 'journal', 'year', 'doi'],
|
|
32
|
+
func: async (ctx, args) => {
|
|
33
|
+
const limit = clamp(Number(args.limit), 1, 200);
|
|
34
|
+
const sort = SORT_MAP[String(args.sort)] ?? 'relevance';
|
|
35
|
+
// Step 1: esearch to get PMIDs
|
|
36
|
+
const searchResult = await ctx.fetchJson(buildEutilsUrl('esearch.fcgi', {
|
|
37
|
+
db: 'pubmed',
|
|
38
|
+
term: String(args.query),
|
|
39
|
+
retmax: String(limit),
|
|
40
|
+
sort,
|
|
41
|
+
retmode: 'json',
|
|
42
|
+
}));
|
|
43
|
+
const query = String(args.query);
|
|
44
|
+
const result = searchResult;
|
|
45
|
+
const esearchResult = result?.esearchresult;
|
|
46
|
+
const pmids = esearchResult?.idlist ?? [];
|
|
47
|
+
const totalCount = Number(esearchResult?.count ?? 0);
|
|
48
|
+
if (!pmids.length) {
|
|
49
|
+
throw new CliError('NOT_FOUND', 'No articles found', 'Try different search terms or check PubMed query syntax');
|
|
50
|
+
}
|
|
51
|
+
// Step 2: efetch to get full article details (XML only for PubMed)
|
|
52
|
+
const xmlData = await ctx.fetchXml(buildEutilsUrl('efetch.fcgi', {
|
|
53
|
+
db: 'pubmed',
|
|
54
|
+
id: pmids.join(','),
|
|
55
|
+
rettype: 'xml',
|
|
56
|
+
}));
|
|
57
|
+
const articles = parsePubmedArticles(xmlData);
|
|
58
|
+
if (!articles.length) {
|
|
59
|
+
throw new CliError('PARSE_ERROR', 'Failed to parse PubMed response', 'This may be a temporary issue; try again');
|
|
60
|
+
}
|
|
61
|
+
return withMeta(articles, { totalCount, query });
|
|
62
|
+
},
|
|
63
|
+
});
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* snp/lookup — Look up SNP details by rsID.
|
|
3
|
+
*
|
|
4
|
+
* Uses esummary (JSON mode) directly with the numeric SNP ID
|
|
5
|
+
* to retrieve variant metadata from dbSNP.
|
|
6
|
+
*/
|
|
7
|
+
import { cli, Strategy } from '../../registry.js';
|
|
8
|
+
import { CliError } from '../../errors.js';
|
|
9
|
+
import { buildEutilsUrl } from '../_shared/eutils.js';
|
|
10
|
+
cli({
|
|
11
|
+
site: 'snp',
|
|
12
|
+
name: 'lookup',
|
|
13
|
+
description: 'Look up SNP details by rsID',
|
|
14
|
+
database: 'snp',
|
|
15
|
+
strategy: Strategy.PUBLIC,
|
|
16
|
+
args: [
|
|
17
|
+
{ name: 'rsid', positional: true, required: true, help: 'dbSNP rsID (e.g. rs334, rs7412, rs429358)' },
|
|
18
|
+
],
|
|
19
|
+
columns: ['rsid', 'gene', 'chromosome', 'position', 'alleles', 'maf', 'clinical', 'function'],
|
|
20
|
+
func: async (ctx, args) => {
|
|
21
|
+
const rsid = String(args.rsid).toLowerCase();
|
|
22
|
+
// Strip 'rs' prefix if present for the search, keep for display
|
|
23
|
+
const numericId = rsid.replace(/^rs/, '');
|
|
24
|
+
// esummary with SNP ID
|
|
25
|
+
const summary = await ctx.fetchJson(buildEutilsUrl('esummary.fcgi', {
|
|
26
|
+
db: 'snp', id: numericId, retmode: 'json',
|
|
27
|
+
}));
|
|
28
|
+
const uids = summary?.result?.uids ?? [];
|
|
29
|
+
if (!uids.length)
|
|
30
|
+
throw new CliError('NOT_FOUND', `SNP rs${numericId} not found`);
|
|
31
|
+
const item = summary.result[uids[0]] ?? {};
|
|
32
|
+
// dbSNP esummary fields: snp_id, genes (array), chrpos, docsum, global_mafs, clinical_significance
|
|
33
|
+
const genes = Array.isArray(item.genes) ? item.genes.map((g) => g.name).join(', ') : '';
|
|
34
|
+
const chrpos = item.chrpos ?? '';
|
|
35
|
+
const [chr, pos] = chrpos.includes(':') ? chrpos.split(':') : ['', ''];
|
|
36
|
+
// Parse MAF from docsum or global_mafs
|
|
37
|
+
const mafs = Array.isArray(item.global_mafs)
|
|
38
|
+
? item.global_mafs.map((m) => `${m.study}:${m.freq}`).join('; ')
|
|
39
|
+
: '';
|
|
40
|
+
const clinical = Array.isArray(item.clinical_significance)
|
|
41
|
+
? item.clinical_significance.join(', ')
|
|
42
|
+
: String(item.clinical_significance ?? '');
|
|
43
|
+
const funcAnnot = Array.isArray(item.fxn_class)
|
|
44
|
+
? item.fxn_class.join(', ')
|
|
45
|
+
: String(item.fxn_class ?? '');
|
|
46
|
+
return [{
|
|
47
|
+
rsid: `rs${item.snp_id ?? numericId}`,
|
|
48
|
+
gene: genes,
|
|
49
|
+
chromosome: chr,
|
|
50
|
+
position: pos,
|
|
51
|
+
alleles: item.docsum ?? '',
|
|
52
|
+
maf: mafs.slice(0, 50) + (mafs.length > 50 ? '...' : ''),
|
|
53
|
+
clinical,
|
|
54
|
+
function: funcAnnot,
|
|
55
|
+
}];
|
|
56
|
+
},
|
|
57
|
+
});
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* sra/download — Download FASTQ files for an SRA run.
|
|
3
|
+
*
|
|
4
|
+
* Two download strategies:
|
|
5
|
+
* 1. ENA HTTPS (default, no external tools needed):
|
|
6
|
+
* https://ftp.sra.ebi.ac.uk/vol1/fastq/SRR123/SRR1234567/SRR1234567_1.fastq.gz
|
|
7
|
+
*
|
|
8
|
+
* 2. sra-tools (fallback, requires prefetch + fasterq-dump):
|
|
9
|
+
* prefetch SRR1234567 && fasterq-dump SRR1234567
|
|
10
|
+
*
|
|
11
|
+
* ENA is preferred because it downloads compressed FASTQ directly
|
|
12
|
+
* without needing sra-tools installed.
|
|
13
|
+
*/
|
|
14
|
+
/** Build ENA FASTQ download URLs for an SRR accession. */
|
|
15
|
+
export declare function buildEnaFastqUrls(accession: string): string[];
|
|
16
|
+
/** Parse a human-readable size string (e.g. "500M", "2G") to bytes. */
|
|
17
|
+
export declare function parseMaxSize(value: string): number;
|
|
18
|
+
export declare function formatSize(bytes: number): string;
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* sra/download — Download FASTQ files for an SRA run.
|
|
3
|
+
*
|
|
4
|
+
* Two download strategies:
|
|
5
|
+
* 1. ENA HTTPS (default, no external tools needed):
|
|
6
|
+
* https://ftp.sra.ebi.ac.uk/vol1/fastq/SRR123/SRR1234567/SRR1234567_1.fastq.gz
|
|
7
|
+
*
|
|
8
|
+
* 2. sra-tools (fallback, requires prefetch + fasterq-dump):
|
|
9
|
+
* prefetch SRR1234567 && fasterq-dump SRR1234567
|
|
10
|
+
*
|
|
11
|
+
* ENA is preferred because it downloads compressed FASTQ directly
|
|
12
|
+
* without needing sra-tools installed.
|
|
13
|
+
*/
|
|
14
|
+
import { cli, Strategy } from '../../registry.js';
|
|
15
|
+
import { CliError } from '../../errors.js';
|
|
16
|
+
import { mkdirSync, existsSync, createWriteStream } from 'node:fs';
|
|
17
|
+
import { join } from 'node:path';
|
|
18
|
+
import { pipeline } from 'node:stream/promises';
|
|
19
|
+
import { Readable } from 'node:stream';
|
|
20
|
+
import { execSync } from 'node:child_process';
|
|
21
|
+
/** Build ENA FASTQ download URLs for an SRR accession. */
|
|
22
|
+
export function buildEnaFastqUrls(accession) {
|
|
23
|
+
// ENA URL pattern: /vol1/fastq/SRR123/[NNN/]SRR1234567/
|
|
24
|
+
// Sub-directory depends on total accession length:
|
|
25
|
+
// <= 9 chars (e.g. SRR039885): no sub-directory
|
|
26
|
+
// 10 chars (e.g. SRR1039508): /00N/ where N = last digit
|
|
27
|
+
// 11 chars (e.g. SRR10395085): /0NN/ where NN = last 2 digits
|
|
28
|
+
// >= 12 chars: /NNN/ where NNN = last 3 digits
|
|
29
|
+
const prefix = accession.slice(0, 6); // e.g. SRR103
|
|
30
|
+
let subDir = '';
|
|
31
|
+
if (accession.length === 10) {
|
|
32
|
+
subDir = `/00${accession.slice(-1)}`;
|
|
33
|
+
}
|
|
34
|
+
else if (accession.length === 11) {
|
|
35
|
+
subDir = `/0${accession.slice(-2)}`;
|
|
36
|
+
}
|
|
37
|
+
else if (accession.length >= 12) {
|
|
38
|
+
subDir = `/${accession.slice(-3)}`;
|
|
39
|
+
}
|
|
40
|
+
const base = `https://ftp.sra.ebi.ac.uk/vol1/fastq/${prefix}${subDir}/${accession}`;
|
|
41
|
+
return [
|
|
42
|
+
`${base}/${accession}.fastq.gz`, // single-end
|
|
43
|
+
`${base}/${accession}_1.fastq.gz`, // paired-end read 1
|
|
44
|
+
`${base}/${accession}_2.fastq.gz`, // paired-end read 2
|
|
45
|
+
];
|
|
46
|
+
}
|
|
47
|
+
/** Check if a command exists on PATH. */
|
|
48
|
+
function commandExists(cmd) {
|
|
49
|
+
try {
|
|
50
|
+
execSync(`which ${cmd}`, { stdio: 'ignore' });
|
|
51
|
+
return true;
|
|
52
|
+
}
|
|
53
|
+
catch {
|
|
54
|
+
return false;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
/** Download a file. Returns { ok, size, notFound } to distinguish 404 from real errors. */
|
|
58
|
+
async function downloadFile(url, destPath) {
|
|
59
|
+
const response = await fetch(url);
|
|
60
|
+
if (response.status === 404) {
|
|
61
|
+
return { ok: false, size: 0, notFound: true };
|
|
62
|
+
}
|
|
63
|
+
if (!response.ok || !response.body) {
|
|
64
|
+
return { ok: false, size: 0, notFound: false };
|
|
65
|
+
}
|
|
66
|
+
const writable = createWriteStream(destPath);
|
|
67
|
+
await pipeline(Readable.fromWeb(response.body), writable);
|
|
68
|
+
const contentLength = response.headers.get('content-length');
|
|
69
|
+
return { ok: true, size: contentLength ? Number(contentLength) : 0, notFound: false };
|
|
70
|
+
}
|
|
71
|
+
/** Parse a human-readable size string (e.g. "500M", "2G") to bytes. */
|
|
72
|
+
export function parseMaxSize(value) {
|
|
73
|
+
const match = value.trim().match(/^(\d+(?:\.\d+)?)\s*([KMGT]?)B?$/i);
|
|
74
|
+
if (!match)
|
|
75
|
+
return NaN;
|
|
76
|
+
const num = parseFloat(match[1]);
|
|
77
|
+
const unit = (match[2] || '').toUpperCase();
|
|
78
|
+
const multipliers = { '': 1, K: 1024, M: 1024 ** 2, G: 1024 ** 3, T: 1024 ** 4 };
|
|
79
|
+
return num * (multipliers[unit] ?? 1);
|
|
80
|
+
}
|
|
81
|
+
export function formatSize(bytes) {
|
|
82
|
+
if (bytes === 0)
|
|
83
|
+
return 'unknown size';
|
|
84
|
+
if (bytes < 1024)
|
|
85
|
+
return `${bytes} B`;
|
|
86
|
+
if (bytes < 1024 * 1024)
|
|
87
|
+
return `${(bytes / 1024).toFixed(1)} KB`;
|
|
88
|
+
if (bytes < 1024 * 1024 * 1024)
|
|
89
|
+
return `${(bytes / 1024 / 1024).toFixed(1)} MB`;
|
|
90
|
+
return `${(bytes / 1024 / 1024 / 1024).toFixed(2)} GB`;
|
|
91
|
+
}
|
|
92
|
+
cli({
|
|
93
|
+
site: 'sra',
|
|
94
|
+
name: 'download',
|
|
95
|
+
description: 'Download FASTQ files for an SRA run (via ENA or sra-tools)',
|
|
96
|
+
database: 'sra',
|
|
97
|
+
strategy: Strategy.PUBLIC,
|
|
98
|
+
timeoutSeconds: 600,
|
|
99
|
+
args: [
|
|
100
|
+
{ name: 'accession', positional: true, required: true, help: 'SRA run accession (e.g. SRR1234567)' },
|
|
101
|
+
{ name: 'outdir', default: '.', help: 'Output directory (default: current directory)' },
|
|
102
|
+
{ name: 'method', default: 'ena', choices: ['ena', 'sra-tools'], help: 'Download method' },
|
|
103
|
+
{ name: 'dry-run', type: 'boolean', default: false, help: 'Show download URLs without downloading' },
|
|
104
|
+
{ name: 'max-size', help: 'Max file size to download (e.g. "500M", "2G"). Larger files are skipped.' },
|
|
105
|
+
],
|
|
106
|
+
columns: ['file', 'size', 'status'],
|
|
107
|
+
func: async (_ctx, args) => {
|
|
108
|
+
const accession = String(args.accession).trim();
|
|
109
|
+
const outdir = String(args.outdir);
|
|
110
|
+
const method = String(args.method);
|
|
111
|
+
const dryRun = Boolean(args['dry-run']);
|
|
112
|
+
const maxSizeStr = args['max-size'] ? String(args['max-size']) : undefined;
|
|
113
|
+
const maxSizeBytes = maxSizeStr ? parseMaxSize(maxSizeStr) : Infinity;
|
|
114
|
+
if (maxSizeStr && Number.isNaN(maxSizeBytes)) {
|
|
115
|
+
throw new CliError('ARGUMENT', `Invalid --max-size value: "${maxSizeStr}"`, 'Use format like "500M", "2G", "1024K"');
|
|
116
|
+
}
|
|
117
|
+
if (!/^[SDE]RR\d+$/i.test(accession)) {
|
|
118
|
+
throw new CliError('ARGUMENT', `Invalid SRA run accession: "${accession}"`, 'Use a run accession starting with SRR, ERR, or DRR (e.g. SRR1234567)');
|
|
119
|
+
}
|
|
120
|
+
if (!existsSync(outdir)) {
|
|
121
|
+
mkdirSync(outdir, { recursive: true });
|
|
122
|
+
}
|
|
123
|
+
// Method 1: ENA HTTPS download
|
|
124
|
+
if (method === 'ena') {
|
|
125
|
+
const urls = buildEnaFastqUrls(accession);
|
|
126
|
+
const rows = [];
|
|
127
|
+
const errors = [];
|
|
128
|
+
for (const url of urls) {
|
|
129
|
+
const fileName = url.split('/').pop();
|
|
130
|
+
const destPath = join(outdir, fileName);
|
|
131
|
+
// Dry-run: probe with HEAD, report URL and size without downloading
|
|
132
|
+
if (dryRun) {
|
|
133
|
+
try {
|
|
134
|
+
const head = await fetch(url, { method: 'HEAD' });
|
|
135
|
+
if (head.ok) {
|
|
136
|
+
const size = Number(head.headers.get('content-length') ?? 0);
|
|
137
|
+
rows.push({ file: fileName, size: formatSize(size), status: `→ ${url}` });
|
|
138
|
+
}
|
|
139
|
+
// 404 → skip silently (expected for single/paired mismatch)
|
|
140
|
+
}
|
|
141
|
+
catch { /* skip */ }
|
|
142
|
+
continue;
|
|
143
|
+
}
|
|
144
|
+
try {
|
|
145
|
+
// Max-size check: HEAD request first to get size
|
|
146
|
+
if (maxSizeBytes < Infinity) {
|
|
147
|
+
const head = await fetch(url, { method: 'HEAD' });
|
|
148
|
+
if (head.status === 404)
|
|
149
|
+
continue; // expected
|
|
150
|
+
if (!head.ok) {
|
|
151
|
+
errors.push(`${fileName}: HTTP ${head.status}`);
|
|
152
|
+
rows.push({ file: fileName, size: '', status: 'failed' });
|
|
153
|
+
continue;
|
|
154
|
+
}
|
|
155
|
+
const size = Number(head.headers.get('content-length') ?? 0);
|
|
156
|
+
if (size > maxSizeBytes) {
|
|
157
|
+
rows.push({ file: fileName, size: formatSize(size), status: `skipped (exceeds --max-size ${maxSizeStr})` });
|
|
158
|
+
continue;
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
const result = await downloadFile(url, destPath);
|
|
162
|
+
if (result.ok) {
|
|
163
|
+
rows.push({ file: fileName, size: formatSize(result.size), status: `saved → ${destPath}` });
|
|
164
|
+
}
|
|
165
|
+
else if (!result.notFound) {
|
|
166
|
+
errors.push(`${fileName}: HTTP error`);
|
|
167
|
+
rows.push({ file: fileName, size: '', status: 'failed' });
|
|
168
|
+
}
|
|
169
|
+
// 404 is expected: single-end has no _1/_2, paired-end has no plain .fastq.gz
|
|
170
|
+
}
|
|
171
|
+
catch (err) {
|
|
172
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
173
|
+
errors.push(`${fileName}: ${msg}`);
|
|
174
|
+
rows.push({ file: fileName, size: '', status: `error: ${msg}` });
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
if (dryRun) {
|
|
178
|
+
if (!rows.length) {
|
|
179
|
+
throw new CliError('NOT_FOUND', `FASTQ files not available on ENA for ${accession}`);
|
|
180
|
+
}
|
|
181
|
+
return rows;
|
|
182
|
+
}
|
|
183
|
+
const successCount = rows.filter(r => r.status.startsWith('saved')).length;
|
|
184
|
+
if (successCount === 0) {
|
|
185
|
+
throw new CliError('NOT_FOUND', `FASTQ files not available on ENA for ${accession}`, 'The run may not be mirrored to ENA yet. Try: biocli sra download ' + accession + ' --method sra-tools');
|
|
186
|
+
}
|
|
187
|
+
if (errors.length > 0) {
|
|
188
|
+
throw new CliError('API_ERROR', `Partial download failure for ${accession}: ${errors.join('; ')}`, 'Some files failed to download. Check network connectivity and retry.');
|
|
189
|
+
}
|
|
190
|
+
return rows;
|
|
191
|
+
}
|
|
192
|
+
// Method 2: sra-tools
|
|
193
|
+
if (!commandExists('prefetch')) {
|
|
194
|
+
throw new CliError('ARGUMENT', 'sra-tools not found on PATH', 'Install sra-tools: conda install -c bioconda sra-tools, or use --method ena');
|
|
195
|
+
}
|
|
196
|
+
const rows = [];
|
|
197
|
+
try {
|
|
198
|
+
// prefetch downloads the .sra file
|
|
199
|
+
console.error(`Downloading ${accession} with prefetch...`);
|
|
200
|
+
execSync(`prefetch ${accession} -O "${outdir}"`, { stdio: 'inherit' });
|
|
201
|
+
rows.push({ file: `${accession}.sra`, size: '', status: 'prefetch done' });
|
|
202
|
+
// fasterq-dump converts .sra to .fastq
|
|
203
|
+
if (commandExists('fasterq-dump')) {
|
|
204
|
+
console.error(`Converting to FASTQ with fasterq-dump...`);
|
|
205
|
+
execSync(`fasterq-dump "${join(outdir, accession)}" -O "${outdir}" --split-files`, { stdio: 'inherit' });
|
|
206
|
+
rows.push({ file: `${accession}*.fastq`, size: '', status: 'fasterq-dump done' });
|
|
207
|
+
}
|
|
208
|
+
else {
|
|
209
|
+
rows.push({ file: '', size: '', status: 'fasterq-dump not found — .sra file downloaded only' });
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
catch (err) {
|
|
213
|
+
throw new CliError('API_ERROR', `sra-tools failed: ${err instanceof Error ? err.message : String(err)}`, 'Check that sra-tools is correctly configured');
|
|
214
|
+
}
|
|
215
|
+
return rows;
|
|
216
|
+
},
|
|
217
|
+
});
|