@yangfei_93sky/biocli 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +197 -0
  3. package/dist/batch.d.ts +20 -0
  4. package/dist/batch.js +69 -0
  5. package/dist/build-manifest.d.ts +38 -0
  6. package/dist/build-manifest.js +186 -0
  7. package/dist/cache.d.ts +28 -0
  8. package/dist/cache.js +126 -0
  9. package/dist/cli-manifest.json +1500 -0
  10. package/dist/cli.d.ts +7 -0
  11. package/dist/cli.js +336 -0
  12. package/dist/clis/_shared/common.d.ts +8 -0
  13. package/dist/clis/_shared/common.js +13 -0
  14. package/dist/clis/_shared/eutils.d.ts +9 -0
  15. package/dist/clis/_shared/eutils.js +9 -0
  16. package/dist/clis/_shared/organism-db.d.ts +23 -0
  17. package/dist/clis/_shared/organism-db.js +58 -0
  18. package/dist/clis/_shared/xml-helpers.d.ts +58 -0
  19. package/dist/clis/_shared/xml-helpers.js +266 -0
  20. package/dist/clis/aggregate/enrichment.d.ts +7 -0
  21. package/dist/clis/aggregate/enrichment.js +105 -0
  22. package/dist/clis/aggregate/gene-dossier.d.ts +13 -0
  23. package/dist/clis/aggregate/gene-dossier.js +248 -0
  24. package/dist/clis/aggregate/gene-profile.d.ts +16 -0
  25. package/dist/clis/aggregate/gene-profile.js +305 -0
  26. package/dist/clis/aggregate/literature-brief.d.ts +7 -0
  27. package/dist/clis/aggregate/literature-brief.js +79 -0
  28. package/dist/clis/aggregate/variant-dossier.d.ts +11 -0
  29. package/dist/clis/aggregate/variant-dossier.js +161 -0
  30. package/dist/clis/aggregate/variant-interpret.d.ts +10 -0
  31. package/dist/clis/aggregate/variant-interpret.js +210 -0
  32. package/dist/clis/aggregate/workflow-prepare.d.ts +12 -0
  33. package/dist/clis/aggregate/workflow-prepare.js +228 -0
  34. package/dist/clis/aggregate/workflow-scout.d.ts +13 -0
  35. package/dist/clis/aggregate/workflow-scout.js +175 -0
  36. package/dist/clis/clinvar/search.d.ts +8 -0
  37. package/dist/clis/clinvar/search.js +61 -0
  38. package/dist/clis/clinvar/variant.d.ts +7 -0
  39. package/dist/clis/clinvar/variant.js +53 -0
  40. package/dist/clis/enrichr/analyze.d.ts +7 -0
  41. package/dist/clis/enrichr/analyze.js +48 -0
  42. package/dist/clis/ensembl/lookup.d.ts +6 -0
  43. package/dist/clis/ensembl/lookup.js +38 -0
  44. package/dist/clis/ensembl/vep.d.ts +7 -0
  45. package/dist/clis/ensembl/vep.js +86 -0
  46. package/dist/clis/ensembl/xrefs.d.ts +6 -0
  47. package/dist/clis/ensembl/xrefs.js +36 -0
  48. package/dist/clis/gene/fetch.d.ts +10 -0
  49. package/dist/clis/gene/fetch.js +96 -0
  50. package/dist/clis/gene/info.d.ts +7 -0
  51. package/dist/clis/gene/info.js +37 -0
  52. package/dist/clis/gene/search.d.ts +7 -0
  53. package/dist/clis/gene/search.js +71 -0
  54. package/dist/clis/geo/dataset.d.ts +7 -0
  55. package/dist/clis/geo/dataset.js +55 -0
  56. package/dist/clis/geo/download.d.ts +17 -0
  57. package/dist/clis/geo/download.js +115 -0
  58. package/dist/clis/geo/samples.d.ts +7 -0
  59. package/dist/clis/geo/samples.js +57 -0
  60. package/dist/clis/geo/search.d.ts +8 -0
  61. package/dist/clis/geo/search.js +66 -0
  62. package/dist/clis/kegg/convert.d.ts +7 -0
  63. package/dist/clis/kegg/convert.js +37 -0
  64. package/dist/clis/kegg/disease.d.ts +6 -0
  65. package/dist/clis/kegg/disease.js +57 -0
  66. package/dist/clis/kegg/link.d.ts +7 -0
  67. package/dist/clis/kegg/link.js +36 -0
  68. package/dist/clis/kegg/pathway.d.ts +6 -0
  69. package/dist/clis/kegg/pathway.js +37 -0
  70. package/dist/clis/pubmed/abstract.d.ts +7 -0
  71. package/dist/clis/pubmed/abstract.js +42 -0
  72. package/dist/clis/pubmed/cited-by.d.ts +7 -0
  73. package/dist/clis/pubmed/cited-by.js +77 -0
  74. package/dist/clis/pubmed/fetch.d.ts +6 -0
  75. package/dist/clis/pubmed/fetch.js +36 -0
  76. package/dist/clis/pubmed/info.yaml +22 -0
  77. package/dist/clis/pubmed/related.d.ts +7 -0
  78. package/dist/clis/pubmed/related.js +81 -0
  79. package/dist/clis/pubmed/search.d.ts +8 -0
  80. package/dist/clis/pubmed/search.js +63 -0
  81. package/dist/clis/snp/lookup.d.ts +7 -0
  82. package/dist/clis/snp/lookup.js +57 -0
  83. package/dist/clis/sra/download.d.ts +18 -0
  84. package/dist/clis/sra/download.js +217 -0
  85. package/dist/clis/sra/run.d.ts +8 -0
  86. package/dist/clis/sra/run.js +77 -0
  87. package/dist/clis/sra/search.d.ts +8 -0
  88. package/dist/clis/sra/search.js +83 -0
  89. package/dist/clis/string/enrichment.d.ts +7 -0
  90. package/dist/clis/string/enrichment.js +50 -0
  91. package/dist/clis/string/network.d.ts +7 -0
  92. package/dist/clis/string/network.js +47 -0
  93. package/dist/clis/string/partners.d.ts +4 -0
  94. package/dist/clis/string/partners.js +44 -0
  95. package/dist/clis/taxonomy/lookup.d.ts +8 -0
  96. package/dist/clis/taxonomy/lookup.js +54 -0
  97. package/dist/clis/uniprot/fetch.d.ts +7 -0
  98. package/dist/clis/uniprot/fetch.js +82 -0
  99. package/dist/clis/uniprot/search.d.ts +6 -0
  100. package/dist/clis/uniprot/search.js +65 -0
  101. package/dist/clis/uniprot/sequence.d.ts +7 -0
  102. package/dist/clis/uniprot/sequence.js +51 -0
  103. package/dist/commander-adapter.d.ts +27 -0
  104. package/dist/commander-adapter.js +286 -0
  105. package/dist/completion.d.ts +19 -0
  106. package/dist/completion.js +117 -0
  107. package/dist/config.d.ts +57 -0
  108. package/dist/config.js +94 -0
  109. package/dist/databases/enrichr.d.ts +28 -0
  110. package/dist/databases/enrichr.js +131 -0
  111. package/dist/databases/ensembl.d.ts +14 -0
  112. package/dist/databases/ensembl.js +106 -0
  113. package/dist/databases/index.d.ts +45 -0
  114. package/dist/databases/index.js +49 -0
  115. package/dist/databases/kegg.d.ts +26 -0
  116. package/dist/databases/kegg.js +136 -0
  117. package/dist/databases/ncbi.d.ts +28 -0
  118. package/dist/databases/ncbi.js +144 -0
  119. package/dist/databases/string-db.d.ts +19 -0
  120. package/dist/databases/string-db.js +105 -0
  121. package/dist/databases/uniprot.d.ts +13 -0
  122. package/dist/databases/uniprot.js +110 -0
  123. package/dist/discovery.d.ts +32 -0
  124. package/dist/discovery.js +235 -0
  125. package/dist/doctor.d.ts +19 -0
  126. package/dist/doctor.js +151 -0
  127. package/dist/errors.d.ts +68 -0
  128. package/dist/errors.js +105 -0
  129. package/dist/execution.d.ts +15 -0
  130. package/dist/execution.js +178 -0
  131. package/dist/hooks.d.ts +48 -0
  132. package/dist/hooks.js +58 -0
  133. package/dist/main.d.ts +13 -0
  134. package/dist/main.js +31 -0
  135. package/dist/ncbi-fetch.d.ts +10 -0
  136. package/dist/ncbi-fetch.js +10 -0
  137. package/dist/output.d.ts +18 -0
  138. package/dist/output.js +394 -0
  139. package/dist/pipeline/executor.d.ts +22 -0
  140. package/dist/pipeline/executor.js +40 -0
  141. package/dist/pipeline/index.d.ts +6 -0
  142. package/dist/pipeline/index.js +6 -0
  143. package/dist/pipeline/registry.d.ts +16 -0
  144. package/dist/pipeline/registry.js +31 -0
  145. package/dist/pipeline/steps/fetch.d.ts +21 -0
  146. package/dist/pipeline/steps/fetch.js +160 -0
  147. package/dist/pipeline/steps/transform.d.ts +26 -0
  148. package/dist/pipeline/steps/transform.js +92 -0
  149. package/dist/pipeline/steps/xml-parse.d.ts +12 -0
  150. package/dist/pipeline/steps/xml-parse.js +27 -0
  151. package/dist/pipeline/template.d.ts +35 -0
  152. package/dist/pipeline/template.js +312 -0
  153. package/dist/rate-limiter.d.ts +56 -0
  154. package/dist/rate-limiter.js +120 -0
  155. package/dist/registry-api.d.ts +15 -0
  156. package/dist/registry-api.js +13 -0
  157. package/dist/registry.d.ts +90 -0
  158. package/dist/registry.js +100 -0
  159. package/dist/schema.d.ts +80 -0
  160. package/dist/schema.js +72 -0
  161. package/dist/spinner.d.ts +19 -0
  162. package/dist/spinner.js +37 -0
  163. package/dist/types.d.ts +101 -0
  164. package/dist/types.js +27 -0
  165. package/dist/utils.d.ts +16 -0
  166. package/dist/utils.js +40 -0
  167. package/dist/validate.d.ts +29 -0
  168. package/dist/validate.js +136 -0
  169. package/dist/verify.d.ts +20 -0
  170. package/dist/verify.js +131 -0
  171. package/dist/version.d.ts +13 -0
  172. package/dist/version.js +36 -0
  173. package/dist/xml-parser.d.ts +19 -0
  174. package/dist/xml-parser.js +119 -0
  175. package/dist/yaml-schema.d.ts +40 -0
  176. package/dist/yaml-schema.js +62 -0
  177. package/package.json +68 -0
@@ -0,0 +1,77 @@
1
+ /**
2
+ * pubmed/cited-by — Find articles that cite a given PubMed article.
3
+ *
4
+ * Uses elink with linkname 'pubmed_pubmed_citedin' to discover citing
5
+ * PMIDs, then efetch to retrieve article metadata.
6
+ */
7
+ import { cli, Strategy } from '../../registry.js';
8
+ import { CliError } from '../../errors.js';
9
+ import { buildEutilsUrl } from '../_shared/eutils.js';
10
+ import { parsePubmedArticles } from '../_shared/xml-helpers.js';
11
+ import { clamp } from '../_shared/common.js';
12
+ import { isRecord } from '../../utils.js';
13
+ cli({
14
+ site: 'pubmed',
15
+ name: 'cited-by',
16
+ description: 'Articles that cite a PubMed article',
17
+ database: 'pubmed',
18
+ strategy: Strategy.PUBLIC,
19
+ args: [
20
+ { name: 'pmid', positional: true, required: true, help: 'PubMed ID' },
21
+ { name: 'limit', type: 'int', default: 10, help: 'Max results (1-100)' },
22
+ ],
23
+ columns: ['pmid', 'title', 'authors', 'journal', 'year', 'doi'],
24
+ func: async (ctx, args) => {
25
+ const pmid = String(args.pmid).trim();
26
+ if (!/^\d+$/.test(pmid)) {
27
+ throw new CliError('ARGUMENT', `Invalid PMID: "${pmid}"`, 'PMID must be a numeric identifier');
28
+ }
29
+ const limit = clamp(Number(args.limit), 1, 100);
30
+ // Step 1: elink to get citing PMIDs
31
+ const linkResult = await ctx.fetchJson(buildEutilsUrl('elink.fcgi', {
32
+ dbfrom: 'pubmed',
33
+ db: 'pubmed',
34
+ id: pmid,
35
+ linkname: 'pubmed_pubmed_citedin',
36
+ retmode: 'json',
37
+ }));
38
+ // Navigate elink JSON response:
39
+ // { linksets: [{ linksetdbs: [{ links: ["12345", ...] }] }] }
40
+ const linksets = linkResult?.linksets;
41
+ if (!Array.isArray(linksets) || !linksets.length) {
42
+ throw new CliError('NOT_FOUND', `No citing articles found for PMID ${pmid}`, 'This article may not have been cited yet');
43
+ }
44
+ const firstLinkset = linksets[0];
45
+ const linksetdbs = firstLinkset?.linksetdbs;
46
+ if (!Array.isArray(linksetdbs) || !linksetdbs.length) {
47
+ throw new CliError('NOT_FOUND', `No citing articles found for PMID ${pmid}`, 'This article may not have been cited yet');
48
+ }
49
+ // Find the correct linksetdb entry
50
+ let citingIds = [];
51
+ for (const lsdb of linksetdbs) {
52
+ if (!isRecord(lsdb))
53
+ continue;
54
+ const links = lsdb.links;
55
+ if (Array.isArray(links) && links.length > 0) {
56
+ citingIds = links.map(String);
57
+ break;
58
+ }
59
+ }
60
+ if (!citingIds.length) {
61
+ throw new CliError('NOT_FOUND', `No citing articles found for PMID ${pmid}`, 'This article may not have been cited yet');
62
+ }
63
+ // Trim to requested limit
64
+ const trimmedIds = citingIds.slice(0, limit);
65
+ // Step 2: efetch those PMIDs
66
+ const xmlData = await ctx.fetchXml(buildEutilsUrl('efetch.fcgi', {
67
+ db: 'pubmed',
68
+ id: trimmedIds.join(','),
69
+ rettype: 'xml',
70
+ }));
71
+ const articles = parsePubmedArticles(xmlData);
72
+ if (!articles.length) {
73
+ throw new CliError('PARSE_ERROR', 'Failed to parse citing articles', 'Try again later');
74
+ }
75
+ return articles;
76
+ },
77
+ });
@@ -0,0 +1,6 @@
1
+ /**
2
+ * pubmed/fetch — Get PubMed article details by PMID.
3
+ *
4
+ * Fetches a single article and returns full metadata including abstract.
5
+ */
6
+ export {};
@@ -0,0 +1,36 @@
1
+ /**
2
+ * pubmed/fetch — Get PubMed article details by PMID.
3
+ *
4
+ * Fetches a single article and returns full metadata including abstract.
5
+ */
6
+ import { cli, Strategy } from '../../registry.js';
7
+ import { CliError } from '../../errors.js';
8
+ import { buildEutilsUrl } from '../_shared/eutils.js';
9
+ import { parsePubmedArticles } from '../_shared/xml-helpers.js';
10
+ cli({
11
+ site: 'pubmed',
12
+ name: 'fetch',
13
+ description: 'Get PubMed article details by PMID',
14
+ database: 'pubmed',
15
+ strategy: Strategy.PUBLIC,
16
+ args: [
17
+ { name: 'pmid', positional: true, required: true, help: 'PubMed ID (e.g. 39088800)' },
18
+ ],
19
+ columns: ['pmid', 'title', 'authors', 'journal', 'year', 'doi', 'abstract'],
20
+ func: async (ctx, args) => {
21
+ const pmid = String(args.pmid).trim();
22
+ if (!/^\d+$/.test(pmid)) {
23
+ throw new CliError('ARGUMENT', `Invalid PMID: "${pmid}"`, 'PMID must be a numeric identifier (e.g. 39088800)');
24
+ }
25
+ const xmlData = await ctx.fetchXml(buildEutilsUrl('efetch.fcgi', {
26
+ db: 'pubmed',
27
+ id: pmid,
28
+ rettype: 'xml',
29
+ }));
30
+ const articles = parsePubmedArticles(xmlData);
31
+ if (!articles.length) {
32
+ throw new CliError('NOT_FOUND', `Article PMID ${pmid} not found`, 'Check that the PMID is correct');
33
+ }
34
+ return articles;
35
+ },
36
+ });
@@ -0,0 +1,22 @@
1
+ site: pubmed
2
+ name: info
3
+ description: PubMed database statistics
4
+ database: pubmed
5
+ strategy: public
6
+
7
+ pipeline:
8
+ - fetch:
9
+ url: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/einfo.fcgi
10
+ params:
11
+ db: pubmed
12
+ retmode: json
13
+
14
+ - select: einforesult.dbinfo
15
+
16
+ - map:
17
+ database: ${{ item.dbname }}
18
+ description: ${{ item.description }}
19
+ count: ${{ item.count }}
20
+ last_update: ${{ item.lastupdate }}
21
+
22
+ columns: [database, description, count, last_update]
@@ -0,0 +1,7 @@
1
+ /**
2
+ * pubmed/related — Find related articles for a given PubMed article.
3
+ *
4
+ * Uses elink with linkname 'pubmed_pubmed' to discover related PMIDs
5
+ * (NCBI's pre-computed similarity), then efetch for article metadata.
6
+ */
7
+ export {};
@@ -0,0 +1,81 @@
1
+ /**
2
+ * pubmed/related — Find related articles for a given PubMed article.
3
+ *
4
+ * Uses elink with linkname 'pubmed_pubmed' to discover related PMIDs
5
+ * (NCBI's pre-computed similarity), then efetch for article metadata.
6
+ */
7
+ import { cli, Strategy } from '../../registry.js';
8
+ import { CliError } from '../../errors.js';
9
+ import { buildEutilsUrl } from '../_shared/eutils.js';
10
+ import { parsePubmedArticles } from '../_shared/xml-helpers.js';
11
+ import { clamp } from '../_shared/common.js';
12
+ import { isRecord } from '../../utils.js';
13
+ cli({
14
+ site: 'pubmed',
15
+ name: 'related',
16
+ description: 'Find related PubMed articles',
17
+ database: 'pubmed',
18
+ strategy: Strategy.PUBLIC,
19
+ args: [
20
+ { name: 'pmid', positional: true, required: true, help: 'PubMed ID' },
21
+ { name: 'limit', type: 'int', default: 10, help: 'Max results (1-100)' },
22
+ ],
23
+ columns: ['pmid', 'title', 'authors', 'journal', 'year', 'doi'],
24
+ func: async (ctx, args) => {
25
+ const pmid = String(args.pmid).trim();
26
+ if (!/^\d+$/.test(pmid)) {
27
+ throw new CliError('ARGUMENT', `Invalid PMID: "${pmid}"`, 'PMID must be a numeric identifier');
28
+ }
29
+ const limit = clamp(Number(args.limit), 1, 100);
30
+ // Step 1: elink to get related PMIDs
31
+ const linkResult = await ctx.fetchJson(buildEutilsUrl('elink.fcgi', {
32
+ dbfrom: 'pubmed',
33
+ db: 'pubmed',
34
+ id: pmid,
35
+ linkname: 'pubmed_pubmed',
36
+ retmode: 'json',
37
+ }));
38
+ // Navigate elink JSON response
39
+ const linksets = linkResult?.linksets;
40
+ if (!Array.isArray(linksets) || !linksets.length) {
41
+ throw new CliError('NOT_FOUND', `No related articles found for PMID ${pmid}`, 'Try a different article');
42
+ }
43
+ const firstLinkset = linksets[0];
44
+ const linksetdbs = firstLinkset?.linksetdbs;
45
+ if (!Array.isArray(linksetdbs) || !linksetdbs.length) {
46
+ throw new CliError('NOT_FOUND', `No related articles found for PMID ${pmid}`, 'Try a different article');
47
+ }
48
+ // Find the correct linksetdb entry
49
+ let relatedIds = [];
50
+ for (const lsdb of linksetdbs) {
51
+ if (!isRecord(lsdb))
52
+ continue;
53
+ const links = lsdb.links;
54
+ if (Array.isArray(links) && links.length > 0) {
55
+ relatedIds = links.map(String);
56
+ break;
57
+ }
58
+ }
59
+ if (!relatedIds.length) {
60
+ throw new CliError('NOT_FOUND', `No related articles found for PMID ${pmid}`, 'Try a different article');
61
+ }
62
+ // Exclude the queried PMID itself from results
63
+ relatedIds = relatedIds.filter((id) => id !== pmid);
64
+ // Trim to requested limit
65
+ const trimmedIds = relatedIds.slice(0, limit);
66
+ if (!trimmedIds.length) {
67
+ throw new CliError('NOT_FOUND', `No related articles found for PMID ${pmid}`, 'Try a different article');
68
+ }
69
+ // Step 2: efetch those PMIDs
70
+ const xmlData = await ctx.fetchXml(buildEutilsUrl('efetch.fcgi', {
71
+ db: 'pubmed',
72
+ id: trimmedIds.join(','),
73
+ rettype: 'xml',
74
+ }));
75
+ const articles = parsePubmedArticles(xmlData);
76
+ if (!articles.length) {
77
+ throw new CliError('PARSE_ERROR', 'Failed to parse related articles', 'Try again later');
78
+ }
79
+ return articles;
80
+ },
81
+ });
@@ -0,0 +1,8 @@
1
+ /**
2
+ * pubmed/search — Search PubMed articles.
3
+ *
4
+ * Uses the two-step esearch + efetch pattern:
5
+ * 1. esearch to retrieve matching PMIDs
6
+ * 2. efetch (XML) to get full article metadata
7
+ */
8
+ export {};
@@ -0,0 +1,63 @@
1
+ /**
2
+ * pubmed/search — Search PubMed articles.
3
+ *
4
+ * Uses the two-step esearch + efetch pattern:
5
+ * 1. esearch to retrieve matching PMIDs
6
+ * 2. efetch (XML) to get full article metadata
7
+ */
8
+ import { cli, Strategy } from '../../registry.js';
9
+ import { CliError } from '../../errors.js';
10
+ import { buildEutilsUrl } from '../_shared/eutils.js';
11
+ import { parsePubmedArticles } from '../_shared/xml-helpers.js';
12
+ import { clamp } from '../_shared/common.js';
13
+ import { withMeta } from '../../types.js';
14
+ const SORT_MAP = {
15
+ relevance: 'relevance',
16
+ date: 'pub_date',
17
+ author: 'author',
18
+ journal: 'journal',
19
+ };
20
+ cli({
21
+ site: 'pubmed',
22
+ name: 'search',
23
+ description: 'Search PubMed articles',
24
+ database: 'pubmed',
25
+ strategy: Strategy.PUBLIC,
26
+ args: [
27
+ { name: 'query', positional: true, required: true, help: 'Search query (e.g. "CRISPR cancer therapy")' },
28
+ { name: 'limit', type: 'int', default: 10, help: 'Max results (1-200)' },
29
+ { name: 'sort', default: 'relevance', choices: ['relevance', 'date', 'author', 'journal'], help: 'Sort order' },
30
+ ],
31
+ columns: ['pmid', 'title', 'authors', 'journal', 'year', 'doi'],
32
+ func: async (ctx, args) => {
33
+ const limit = clamp(Number(args.limit), 1, 200);
34
+ const sort = SORT_MAP[String(args.sort)] ?? 'relevance';
35
+ // Step 1: esearch to get PMIDs
36
+ const searchResult = await ctx.fetchJson(buildEutilsUrl('esearch.fcgi', {
37
+ db: 'pubmed',
38
+ term: String(args.query),
39
+ retmax: String(limit),
40
+ sort,
41
+ retmode: 'json',
42
+ }));
43
+ const query = String(args.query);
44
+ const result = searchResult;
45
+ const esearchResult = result?.esearchresult;
46
+ const pmids = esearchResult?.idlist ?? [];
47
+ const totalCount = Number(esearchResult?.count ?? 0);
48
+ if (!pmids.length) {
49
+ throw new CliError('NOT_FOUND', 'No articles found', 'Try different search terms or check PubMed query syntax');
50
+ }
51
+ // Step 2: efetch to get full article details (XML only for PubMed)
52
+ const xmlData = await ctx.fetchXml(buildEutilsUrl('efetch.fcgi', {
53
+ db: 'pubmed',
54
+ id: pmids.join(','),
55
+ rettype: 'xml',
56
+ }));
57
+ const articles = parsePubmedArticles(xmlData);
58
+ if (!articles.length) {
59
+ throw new CliError('PARSE_ERROR', 'Failed to parse PubMed response', 'This may be a temporary issue; try again');
60
+ }
61
+ return withMeta(articles, { totalCount, query });
62
+ },
63
+ });
@@ -0,0 +1,7 @@
1
+ /**
2
+ * snp/lookup — Look up SNP details by rsID.
3
+ *
4
+ * Uses esummary (JSON mode) directly with the numeric SNP ID
5
+ * to retrieve variant metadata from dbSNP.
6
+ */
7
+ export {};
@@ -0,0 +1,57 @@
1
+ /**
2
+ * snp/lookup — Look up SNP details by rsID.
3
+ *
4
+ * Uses esummary (JSON mode) directly with the numeric SNP ID
5
+ * to retrieve variant metadata from dbSNP.
6
+ */
7
+ import { cli, Strategy } from '../../registry.js';
8
+ import { CliError } from '../../errors.js';
9
+ import { buildEutilsUrl } from '../_shared/eutils.js';
10
+ cli({
11
+ site: 'snp',
12
+ name: 'lookup',
13
+ description: 'Look up SNP details by rsID',
14
+ database: 'snp',
15
+ strategy: Strategy.PUBLIC,
16
+ args: [
17
+ { name: 'rsid', positional: true, required: true, help: 'dbSNP rsID (e.g. rs334, rs7412, rs429358)' },
18
+ ],
19
+ columns: ['rsid', 'gene', 'chromosome', 'position', 'alleles', 'maf', 'clinical', 'function'],
20
+ func: async (ctx, args) => {
21
+ const rsid = String(args.rsid).toLowerCase();
22
+ // Strip 'rs' prefix if present for the search, keep for display
23
+ const numericId = rsid.replace(/^rs/, '');
24
+ // esummary with SNP ID
25
+ const summary = await ctx.fetchJson(buildEutilsUrl('esummary.fcgi', {
26
+ db: 'snp', id: numericId, retmode: 'json',
27
+ }));
28
+ const uids = summary?.result?.uids ?? [];
29
+ if (!uids.length)
30
+ throw new CliError('NOT_FOUND', `SNP rs${numericId} not found`);
31
+ const item = summary.result[uids[0]] ?? {};
32
+ // dbSNP esummary fields: snp_id, genes (array), chrpos, docsum, global_mafs, clinical_significance
33
+ const genes = Array.isArray(item.genes) ? item.genes.map((g) => g.name).join(', ') : '';
34
+ const chrpos = item.chrpos ?? '';
35
+ const [chr, pos] = chrpos.includes(':') ? chrpos.split(':') : ['', ''];
36
+ // Parse MAF from docsum or global_mafs
37
+ const mafs = Array.isArray(item.global_mafs)
38
+ ? item.global_mafs.map((m) => `${m.study}:${m.freq}`).join('; ')
39
+ : '';
40
+ const clinical = Array.isArray(item.clinical_significance)
41
+ ? item.clinical_significance.join(', ')
42
+ : String(item.clinical_significance ?? '');
43
+ const funcAnnot = Array.isArray(item.fxn_class)
44
+ ? item.fxn_class.join(', ')
45
+ : String(item.fxn_class ?? '');
46
+ return [{
47
+ rsid: `rs${item.snp_id ?? numericId}`,
48
+ gene: genes,
49
+ chromosome: chr,
50
+ position: pos,
51
+ alleles: item.docsum ?? '',
52
+ maf: mafs.slice(0, 50) + (mafs.length > 50 ? '...' : ''),
53
+ clinical,
54
+ function: funcAnnot,
55
+ }];
56
+ },
57
+ });
@@ -0,0 +1,18 @@
1
+ /**
2
+ * sra/download — Download FASTQ files for an SRA run.
3
+ *
4
+ * Two download strategies:
5
+ * 1. ENA HTTPS (default, no external tools needed):
6
+ * https://ftp.sra.ebi.ac.uk/vol1/fastq/SRR123/SRR1234567/SRR1234567_1.fastq.gz
7
+ *
8
+ * 2. sra-tools (fallback, requires prefetch + fasterq-dump):
9
+ * prefetch SRR1234567 && fasterq-dump SRR1234567
10
+ *
11
+ * ENA is preferred because it downloads compressed FASTQ directly
12
+ * without needing sra-tools installed.
13
+ */
14
+ /** Build ENA FASTQ download URLs for an SRR accession. */
15
+ export declare function buildEnaFastqUrls(accession: string): string[];
16
+ /** Parse a human-readable size string (e.g. "500M", "2G") to bytes. */
17
+ export declare function parseMaxSize(value: string): number;
18
+ export declare function formatSize(bytes: number): string;
@@ -0,0 +1,217 @@
1
+ /**
2
+ * sra/download — Download FASTQ files for an SRA run.
3
+ *
4
+ * Two download strategies:
5
+ * 1. ENA HTTPS (default, no external tools needed):
6
+ * https://ftp.sra.ebi.ac.uk/vol1/fastq/SRR123/SRR1234567/SRR1234567_1.fastq.gz
7
+ *
8
+ * 2. sra-tools (fallback, requires prefetch + fasterq-dump):
9
+ * prefetch SRR1234567 && fasterq-dump SRR1234567
10
+ *
11
+ * ENA is preferred because it downloads compressed FASTQ directly
12
+ * without needing sra-tools installed.
13
+ */
14
+ import { cli, Strategy } from '../../registry.js';
15
+ import { CliError } from '../../errors.js';
16
+ import { mkdirSync, existsSync, createWriteStream } from 'node:fs';
17
+ import { join } from 'node:path';
18
+ import { pipeline } from 'node:stream/promises';
19
+ import { Readable } from 'node:stream';
20
+ import { execSync } from 'node:child_process';
21
+ /** Build ENA FASTQ download URLs for an SRR accession. */
22
+ export function buildEnaFastqUrls(accession) {
23
+ // ENA URL pattern: /vol1/fastq/SRR123/[NNN/]SRR1234567/
24
+ // Sub-directory depends on total accession length:
25
+ // <= 9 chars (e.g. SRR039885): no sub-directory
26
+ // 10 chars (e.g. SRR1039508): /00N/ where N = last digit
27
+ // 11 chars (e.g. SRR10395085): /0NN/ where NN = last 2 digits
28
+ // >= 12 chars: /NNN/ where NNN = last 3 digits
29
+ const prefix = accession.slice(0, 6); // e.g. SRR103
30
+ let subDir = '';
31
+ if (accession.length === 10) {
32
+ subDir = `/00${accession.slice(-1)}`;
33
+ }
34
+ else if (accession.length === 11) {
35
+ subDir = `/0${accession.slice(-2)}`;
36
+ }
37
+ else if (accession.length >= 12) {
38
+ subDir = `/${accession.slice(-3)}`;
39
+ }
40
+ const base = `https://ftp.sra.ebi.ac.uk/vol1/fastq/${prefix}${subDir}/${accession}`;
41
+ return [
42
+ `${base}/${accession}.fastq.gz`, // single-end
43
+ `${base}/${accession}_1.fastq.gz`, // paired-end read 1
44
+ `${base}/${accession}_2.fastq.gz`, // paired-end read 2
45
+ ];
46
+ }
47
+ /** Check if a command exists on PATH. */
48
+ function commandExists(cmd) {
49
+ try {
50
+ execSync(`which ${cmd}`, { stdio: 'ignore' });
51
+ return true;
52
+ }
53
+ catch {
54
+ return false;
55
+ }
56
+ }
57
+ /** Download a file. Returns { ok, size, notFound } to distinguish 404 from real errors. */
58
+ async function downloadFile(url, destPath) {
59
+ const response = await fetch(url);
60
+ if (response.status === 404) {
61
+ return { ok: false, size: 0, notFound: true };
62
+ }
63
+ if (!response.ok || !response.body) {
64
+ return { ok: false, size: 0, notFound: false };
65
+ }
66
+ const writable = createWriteStream(destPath);
67
+ await pipeline(Readable.fromWeb(response.body), writable);
68
+ const contentLength = response.headers.get('content-length');
69
+ return { ok: true, size: contentLength ? Number(contentLength) : 0, notFound: false };
70
+ }
71
+ /** Parse a human-readable size string (e.g. "500M", "2G") to bytes. */
72
+ export function parseMaxSize(value) {
73
+ const match = value.trim().match(/^(\d+(?:\.\d+)?)\s*([KMGT]?)B?$/i);
74
+ if (!match)
75
+ return NaN;
76
+ const num = parseFloat(match[1]);
77
+ const unit = (match[2] || '').toUpperCase();
78
+ const multipliers = { '': 1, K: 1024, M: 1024 ** 2, G: 1024 ** 3, T: 1024 ** 4 };
79
+ return num * (multipliers[unit] ?? 1);
80
+ }
81
+ export function formatSize(bytes) {
82
+ if (bytes === 0)
83
+ return 'unknown size';
84
+ if (bytes < 1024)
85
+ return `${bytes} B`;
86
+ if (bytes < 1024 * 1024)
87
+ return `${(bytes / 1024).toFixed(1)} KB`;
88
+ if (bytes < 1024 * 1024 * 1024)
89
+ return `${(bytes / 1024 / 1024).toFixed(1)} MB`;
90
+ return `${(bytes / 1024 / 1024 / 1024).toFixed(2)} GB`;
91
+ }
92
+ cli({
93
+ site: 'sra',
94
+ name: 'download',
95
+ description: 'Download FASTQ files for an SRA run (via ENA or sra-tools)',
96
+ database: 'sra',
97
+ strategy: Strategy.PUBLIC,
98
+ timeoutSeconds: 600,
99
+ args: [
100
+ { name: 'accession', positional: true, required: true, help: 'SRA run accession (e.g. SRR1234567)' },
101
+ { name: 'outdir', default: '.', help: 'Output directory (default: current directory)' },
102
+ { name: 'method', default: 'ena', choices: ['ena', 'sra-tools'], help: 'Download method' },
103
+ { name: 'dry-run', type: 'boolean', default: false, help: 'Show download URLs without downloading' },
104
+ { name: 'max-size', help: 'Max file size to download (e.g. "500M", "2G"). Larger files are skipped.' },
105
+ ],
106
+ columns: ['file', 'size', 'status'],
107
+ func: async (_ctx, args) => {
108
+ const accession = String(args.accession).trim();
109
+ const outdir = String(args.outdir);
110
+ const method = String(args.method);
111
+ const dryRun = Boolean(args['dry-run']);
112
+ const maxSizeStr = args['max-size'] ? String(args['max-size']) : undefined;
113
+ const maxSizeBytes = maxSizeStr ? parseMaxSize(maxSizeStr) : Infinity;
114
+ if (maxSizeStr && Number.isNaN(maxSizeBytes)) {
115
+ throw new CliError('ARGUMENT', `Invalid --max-size value: "${maxSizeStr}"`, 'Use format like "500M", "2G", "1024K"');
116
+ }
117
+ if (!/^[SDE]RR\d+$/i.test(accession)) {
118
+ throw new CliError('ARGUMENT', `Invalid SRA run accession: "${accession}"`, 'Use a run accession starting with SRR, ERR, or DRR (e.g. SRR1234567)');
119
+ }
120
+ if (!existsSync(outdir)) {
121
+ mkdirSync(outdir, { recursive: true });
122
+ }
123
+ // Method 1: ENA HTTPS download
124
+ if (method === 'ena') {
125
+ const urls = buildEnaFastqUrls(accession);
126
+ const rows = [];
127
+ const errors = [];
128
+ for (const url of urls) {
129
+ const fileName = url.split('/').pop();
130
+ const destPath = join(outdir, fileName);
131
+ // Dry-run: probe with HEAD, report URL and size without downloading
132
+ if (dryRun) {
133
+ try {
134
+ const head = await fetch(url, { method: 'HEAD' });
135
+ if (head.ok) {
136
+ const size = Number(head.headers.get('content-length') ?? 0);
137
+ rows.push({ file: fileName, size: formatSize(size), status: `→ ${url}` });
138
+ }
139
+ // 404 → skip silently (expected for single/paired mismatch)
140
+ }
141
+ catch { /* skip */ }
142
+ continue;
143
+ }
144
+ try {
145
+ // Max-size check: HEAD request first to get size
146
+ if (maxSizeBytes < Infinity) {
147
+ const head = await fetch(url, { method: 'HEAD' });
148
+ if (head.status === 404)
149
+ continue; // expected
150
+ if (!head.ok) {
151
+ errors.push(`${fileName}: HTTP ${head.status}`);
152
+ rows.push({ file: fileName, size: '', status: 'failed' });
153
+ continue;
154
+ }
155
+ const size = Number(head.headers.get('content-length') ?? 0);
156
+ if (size > maxSizeBytes) {
157
+ rows.push({ file: fileName, size: formatSize(size), status: `skipped (exceeds --max-size ${maxSizeStr})` });
158
+ continue;
159
+ }
160
+ }
161
+ const result = await downloadFile(url, destPath);
162
+ if (result.ok) {
163
+ rows.push({ file: fileName, size: formatSize(result.size), status: `saved → ${destPath}` });
164
+ }
165
+ else if (!result.notFound) {
166
+ errors.push(`${fileName}: HTTP error`);
167
+ rows.push({ file: fileName, size: '', status: 'failed' });
168
+ }
169
+ // 404 is expected: single-end has no _1/_2, paired-end has no plain .fastq.gz
170
+ }
171
+ catch (err) {
172
+ const msg = err instanceof Error ? err.message : String(err);
173
+ errors.push(`${fileName}: ${msg}`);
174
+ rows.push({ file: fileName, size: '', status: `error: ${msg}` });
175
+ }
176
+ }
177
+ if (dryRun) {
178
+ if (!rows.length) {
179
+ throw new CliError('NOT_FOUND', `FASTQ files not available on ENA for ${accession}`);
180
+ }
181
+ return rows;
182
+ }
183
+ const successCount = rows.filter(r => r.status.startsWith('saved')).length;
184
+ if (successCount === 0) {
185
+ throw new CliError('NOT_FOUND', `FASTQ files not available on ENA for ${accession}`, 'The run may not be mirrored to ENA yet. Try: biocli sra download ' + accession + ' --method sra-tools');
186
+ }
187
+ if (errors.length > 0) {
188
+ throw new CliError('API_ERROR', `Partial download failure for ${accession}: ${errors.join('; ')}`, 'Some files failed to download. Check network connectivity and retry.');
189
+ }
190
+ return rows;
191
+ }
192
+ // Method 2: sra-tools
193
+ if (!commandExists('prefetch')) {
194
+ throw new CliError('ARGUMENT', 'sra-tools not found on PATH', 'Install sra-tools: conda install -c bioconda sra-tools, or use --method ena');
195
+ }
196
+ const rows = [];
197
+ try {
198
+ // prefetch downloads the .sra file
199
+ console.error(`Downloading ${accession} with prefetch...`);
200
+ execSync(`prefetch ${accession} -O "${outdir}"`, { stdio: 'inherit' });
201
+ rows.push({ file: `${accession}.sra`, size: '', status: 'prefetch done' });
202
+ // fasterq-dump converts .sra to .fastq
203
+ if (commandExists('fasterq-dump')) {
204
+ console.error(`Converting to FASTQ with fasterq-dump...`);
205
+ execSync(`fasterq-dump "${join(outdir, accession)}" -O "${outdir}" --split-files`, { stdio: 'inherit' });
206
+ rows.push({ file: `${accession}*.fastq`, size: '', status: 'fasterq-dump done' });
207
+ }
208
+ else {
209
+ rows.push({ file: '', size: '', status: 'fasterq-dump not found — .sra file downloaded only' });
210
+ }
211
+ }
212
+ catch (err) {
213
+ throw new CliError('API_ERROR', `sra-tools failed: ${err instanceof Error ? err.message : String(err)}`, 'Check that sra-tools is correctly configured');
214
+ }
215
+ return rows;
216
+ },
217
+ });
@@ -0,0 +1,8 @@
1
+ /**
2
+ * sra/run — Get SRA run details by accession.
3
+ *
4
+ * Searches for a single SRA accession (SRR, SRX, SRP, etc.) and
5
+ * retrieves detailed run metadata via esummary (JSON). Parses the
6
+ * embedded XML strings in expxml/runs fields.
7
+ */
8
+ export {};