@yangfei_93sky/biocli 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +197 -0
  3. package/dist/batch.d.ts +20 -0
  4. package/dist/batch.js +69 -0
  5. package/dist/build-manifest.d.ts +38 -0
  6. package/dist/build-manifest.js +186 -0
  7. package/dist/cache.d.ts +28 -0
  8. package/dist/cache.js +126 -0
  9. package/dist/cli-manifest.json +1500 -0
  10. package/dist/cli.d.ts +7 -0
  11. package/dist/cli.js +336 -0
  12. package/dist/clis/_shared/common.d.ts +8 -0
  13. package/dist/clis/_shared/common.js +13 -0
  14. package/dist/clis/_shared/eutils.d.ts +9 -0
  15. package/dist/clis/_shared/eutils.js +9 -0
  16. package/dist/clis/_shared/organism-db.d.ts +23 -0
  17. package/dist/clis/_shared/organism-db.js +58 -0
  18. package/dist/clis/_shared/xml-helpers.d.ts +58 -0
  19. package/dist/clis/_shared/xml-helpers.js +266 -0
  20. package/dist/clis/aggregate/enrichment.d.ts +7 -0
  21. package/dist/clis/aggregate/enrichment.js +105 -0
  22. package/dist/clis/aggregate/gene-dossier.d.ts +13 -0
  23. package/dist/clis/aggregate/gene-dossier.js +248 -0
  24. package/dist/clis/aggregate/gene-profile.d.ts +16 -0
  25. package/dist/clis/aggregate/gene-profile.js +305 -0
  26. package/dist/clis/aggregate/literature-brief.d.ts +7 -0
  27. package/dist/clis/aggregate/literature-brief.js +79 -0
  28. package/dist/clis/aggregate/variant-dossier.d.ts +11 -0
  29. package/dist/clis/aggregate/variant-dossier.js +161 -0
  30. package/dist/clis/aggregate/variant-interpret.d.ts +10 -0
  31. package/dist/clis/aggregate/variant-interpret.js +210 -0
  32. package/dist/clis/aggregate/workflow-prepare.d.ts +12 -0
  33. package/dist/clis/aggregate/workflow-prepare.js +228 -0
  34. package/dist/clis/aggregate/workflow-scout.d.ts +13 -0
  35. package/dist/clis/aggregate/workflow-scout.js +175 -0
  36. package/dist/clis/clinvar/search.d.ts +8 -0
  37. package/dist/clis/clinvar/search.js +61 -0
  38. package/dist/clis/clinvar/variant.d.ts +7 -0
  39. package/dist/clis/clinvar/variant.js +53 -0
  40. package/dist/clis/enrichr/analyze.d.ts +7 -0
  41. package/dist/clis/enrichr/analyze.js +48 -0
  42. package/dist/clis/ensembl/lookup.d.ts +6 -0
  43. package/dist/clis/ensembl/lookup.js +38 -0
  44. package/dist/clis/ensembl/vep.d.ts +7 -0
  45. package/dist/clis/ensembl/vep.js +86 -0
  46. package/dist/clis/ensembl/xrefs.d.ts +6 -0
  47. package/dist/clis/ensembl/xrefs.js +36 -0
  48. package/dist/clis/gene/fetch.d.ts +10 -0
  49. package/dist/clis/gene/fetch.js +96 -0
  50. package/dist/clis/gene/info.d.ts +7 -0
  51. package/dist/clis/gene/info.js +37 -0
  52. package/dist/clis/gene/search.d.ts +7 -0
  53. package/dist/clis/gene/search.js +71 -0
  54. package/dist/clis/geo/dataset.d.ts +7 -0
  55. package/dist/clis/geo/dataset.js +55 -0
  56. package/dist/clis/geo/download.d.ts +17 -0
  57. package/dist/clis/geo/download.js +115 -0
  58. package/dist/clis/geo/samples.d.ts +7 -0
  59. package/dist/clis/geo/samples.js +57 -0
  60. package/dist/clis/geo/search.d.ts +8 -0
  61. package/dist/clis/geo/search.js +66 -0
  62. package/dist/clis/kegg/convert.d.ts +7 -0
  63. package/dist/clis/kegg/convert.js +37 -0
  64. package/dist/clis/kegg/disease.d.ts +6 -0
  65. package/dist/clis/kegg/disease.js +57 -0
  66. package/dist/clis/kegg/link.d.ts +7 -0
  67. package/dist/clis/kegg/link.js +36 -0
  68. package/dist/clis/kegg/pathway.d.ts +6 -0
  69. package/dist/clis/kegg/pathway.js +37 -0
  70. package/dist/clis/pubmed/abstract.d.ts +7 -0
  71. package/dist/clis/pubmed/abstract.js +42 -0
  72. package/dist/clis/pubmed/cited-by.d.ts +7 -0
  73. package/dist/clis/pubmed/cited-by.js +77 -0
  74. package/dist/clis/pubmed/fetch.d.ts +6 -0
  75. package/dist/clis/pubmed/fetch.js +36 -0
  76. package/dist/clis/pubmed/info.yaml +22 -0
  77. package/dist/clis/pubmed/related.d.ts +7 -0
  78. package/dist/clis/pubmed/related.js +81 -0
  79. package/dist/clis/pubmed/search.d.ts +8 -0
  80. package/dist/clis/pubmed/search.js +63 -0
  81. package/dist/clis/snp/lookup.d.ts +7 -0
  82. package/dist/clis/snp/lookup.js +57 -0
  83. package/dist/clis/sra/download.d.ts +18 -0
  84. package/dist/clis/sra/download.js +217 -0
  85. package/dist/clis/sra/run.d.ts +8 -0
  86. package/dist/clis/sra/run.js +77 -0
  87. package/dist/clis/sra/search.d.ts +8 -0
  88. package/dist/clis/sra/search.js +83 -0
  89. package/dist/clis/string/enrichment.d.ts +7 -0
  90. package/dist/clis/string/enrichment.js +50 -0
  91. package/dist/clis/string/network.d.ts +7 -0
  92. package/dist/clis/string/network.js +47 -0
  93. package/dist/clis/string/partners.d.ts +4 -0
  94. package/dist/clis/string/partners.js +44 -0
  95. package/dist/clis/taxonomy/lookup.d.ts +8 -0
  96. package/dist/clis/taxonomy/lookup.js +54 -0
  97. package/dist/clis/uniprot/fetch.d.ts +7 -0
  98. package/dist/clis/uniprot/fetch.js +82 -0
  99. package/dist/clis/uniprot/search.d.ts +6 -0
  100. package/dist/clis/uniprot/search.js +65 -0
  101. package/dist/clis/uniprot/sequence.d.ts +7 -0
  102. package/dist/clis/uniprot/sequence.js +51 -0
  103. package/dist/commander-adapter.d.ts +27 -0
  104. package/dist/commander-adapter.js +286 -0
  105. package/dist/completion.d.ts +19 -0
  106. package/dist/completion.js +117 -0
  107. package/dist/config.d.ts +57 -0
  108. package/dist/config.js +94 -0
  109. package/dist/databases/enrichr.d.ts +28 -0
  110. package/dist/databases/enrichr.js +131 -0
  111. package/dist/databases/ensembl.d.ts +14 -0
  112. package/dist/databases/ensembl.js +106 -0
  113. package/dist/databases/index.d.ts +45 -0
  114. package/dist/databases/index.js +49 -0
  115. package/dist/databases/kegg.d.ts +26 -0
  116. package/dist/databases/kegg.js +136 -0
  117. package/dist/databases/ncbi.d.ts +28 -0
  118. package/dist/databases/ncbi.js +144 -0
  119. package/dist/databases/string-db.d.ts +19 -0
  120. package/dist/databases/string-db.js +105 -0
  121. package/dist/databases/uniprot.d.ts +13 -0
  122. package/dist/databases/uniprot.js +110 -0
  123. package/dist/discovery.d.ts +32 -0
  124. package/dist/discovery.js +235 -0
  125. package/dist/doctor.d.ts +19 -0
  126. package/dist/doctor.js +151 -0
  127. package/dist/errors.d.ts +68 -0
  128. package/dist/errors.js +105 -0
  129. package/dist/execution.d.ts +15 -0
  130. package/dist/execution.js +178 -0
  131. package/dist/hooks.d.ts +48 -0
  132. package/dist/hooks.js +58 -0
  133. package/dist/main.d.ts +13 -0
  134. package/dist/main.js +31 -0
  135. package/dist/ncbi-fetch.d.ts +10 -0
  136. package/dist/ncbi-fetch.js +10 -0
  137. package/dist/output.d.ts +18 -0
  138. package/dist/output.js +394 -0
  139. package/dist/pipeline/executor.d.ts +22 -0
  140. package/dist/pipeline/executor.js +40 -0
  141. package/dist/pipeline/index.d.ts +6 -0
  142. package/dist/pipeline/index.js +6 -0
  143. package/dist/pipeline/registry.d.ts +16 -0
  144. package/dist/pipeline/registry.js +31 -0
  145. package/dist/pipeline/steps/fetch.d.ts +21 -0
  146. package/dist/pipeline/steps/fetch.js +160 -0
  147. package/dist/pipeline/steps/transform.d.ts +26 -0
  148. package/dist/pipeline/steps/transform.js +92 -0
  149. package/dist/pipeline/steps/xml-parse.d.ts +12 -0
  150. package/dist/pipeline/steps/xml-parse.js +27 -0
  151. package/dist/pipeline/template.d.ts +35 -0
  152. package/dist/pipeline/template.js +312 -0
  153. package/dist/rate-limiter.d.ts +56 -0
  154. package/dist/rate-limiter.js +120 -0
  155. package/dist/registry-api.d.ts +15 -0
  156. package/dist/registry-api.js +13 -0
  157. package/dist/registry.d.ts +90 -0
  158. package/dist/registry.js +100 -0
  159. package/dist/schema.d.ts +80 -0
  160. package/dist/schema.js +72 -0
  161. package/dist/spinner.d.ts +19 -0
  162. package/dist/spinner.js +37 -0
  163. package/dist/types.d.ts +101 -0
  164. package/dist/types.js +27 -0
  165. package/dist/utils.d.ts +16 -0
  166. package/dist/utils.js +40 -0
  167. package/dist/validate.d.ts +29 -0
  168. package/dist/validate.js +136 -0
  169. package/dist/verify.d.ts +20 -0
  170. package/dist/verify.js +131 -0
  171. package/dist/version.d.ts +13 -0
  172. package/dist/version.js +36 -0
  173. package/dist/xml-parser.d.ts +19 -0
  174. package/dist/xml-parser.js +119 -0
  175. package/dist/yaml-schema.d.ts +40 -0
  176. package/dist/yaml-schema.js +62 -0
  177. package/package.json +68 -0
@@ -0,0 +1,61 @@
1
+ /**
2
+ * clinvar/search — Search ClinVar clinical variants.
3
+ *
4
+ * Uses the two-step esearch + esummary pattern:
5
+ * 1. esearch to retrieve matching ClinVar IDs
6
+ * 2. esummary (JSON) to get variant metadata
7
+ */
8
+ import { cli, Strategy } from '../../registry.js';
9
+ import { CliError } from '../../errors.js';
10
+ import { buildEutilsUrl } from '../_shared/eutils.js';
11
+ import { withMeta } from '../../types.js';
12
+ cli({
13
+ site: 'clinvar',
14
+ name: 'search',
15
+ description: 'Search ClinVar clinical variants',
16
+ database: 'clinvar',
17
+ strategy: Strategy.PUBLIC,
18
+ args: [
19
+ { name: 'query', positional: true, required: true, help: 'Search query (e.g. "BRCA1", "rs80357906", "breast cancer")' },
20
+ { name: 'limit', type: 'int', default: 10, help: 'Max results (1-200)' },
21
+ ],
22
+ columns: ['uid', 'title', 'gene', 'significance', 'condition', 'accession'],
23
+ func: async (ctx, args) => {
24
+ const limit = Math.max(1, Math.min(Number(args.limit), 200));
25
+ const query = String(args.query);
26
+ // Step 1: esearch to get ClinVar IDs
27
+ const searchResult = await ctx.fetchJson(buildEutilsUrl('esearch.fcgi', {
28
+ db: 'clinvar', term: query, retmax: String(limit), retmode: 'json',
29
+ }));
30
+ const ids = searchResult?.esearchresult?.idlist ?? [];
31
+ const totalCount = Number(searchResult?.esearchresult?.count ?? 0);
32
+ if (!ids.length)
33
+ throw new CliError('NOT_FOUND', 'No ClinVar entries found');
34
+ // Step 2: esummary to get variant details
35
+ const summary = await ctx.fetchJson(buildEutilsUrl('esummary.fcgi', {
36
+ db: 'clinvar', id: ids.join(','), retmode: 'json',
37
+ }));
38
+ const uids = summary?.result?.uids ?? [];
39
+ const rows = uids.map(uid => {
40
+ const item = summary.result[uid] ?? {};
41
+ // ClinVar esummary has: title, clinical_significance, genes (array of {symbol}),
42
+ // trait_set (array of {trait_name}), accession, variation_set
43
+ const genes = Array.isArray(item.genes) ? item.genes.map((g) => g.symbol).join(', ') : '';
44
+ const significance = typeof item.clinical_significance === 'object'
45
+ ? item.clinical_significance?.description ?? ''
46
+ : String(item.clinical_significance ?? '');
47
+ const conditions = Array.isArray(item.trait_set)
48
+ ? item.trait_set.map((t) => t.trait_name).join('; ')
49
+ : '';
50
+ return {
51
+ uid,
52
+ title: item.title ?? '',
53
+ gene: genes,
54
+ significance,
55
+ condition: conditions.slice(0, 60) + (conditions.length > 60 ? '...' : ''),
56
+ accession: item.accession ?? '',
57
+ };
58
+ });
59
+ return withMeta(rows, { totalCount, query });
60
+ },
61
+ });
@@ -0,0 +1,7 @@
1
+ /**
2
+ * clinvar/variant — Get ClinVar variant details by ID.
3
+ *
4
+ * Accepts a ClinVar variation ID (numeric) or accession (VCV*),
5
+ * uses esearch + esummary (JSON) to retrieve detailed variant metadata.
6
+ */
7
+ export {};
@@ -0,0 +1,53 @@
1
+ /**
2
+ * clinvar/variant — Get ClinVar variant details by ID.
3
+ *
4
+ * Accepts a ClinVar variation ID (numeric) or accession (VCV*),
5
+ * uses esearch + esummary (JSON) to retrieve detailed variant metadata.
6
+ */
7
+ import { cli, Strategy } from '../../registry.js';
8
+ import { CliError } from '../../errors.js';
9
+ import { buildEutilsUrl } from '../_shared/eutils.js';
10
+ cli({
11
+ site: 'clinvar',
12
+ name: 'variant',
13
+ description: 'Get ClinVar variant details by ID',
14
+ database: 'clinvar',
15
+ strategy: Strategy.PUBLIC,
16
+ args: [
17
+ { name: 'id', positional: true, required: true, help: 'ClinVar variation ID or accession (e.g. 37722, VCV000037722)' },
18
+ ],
19
+ columns: ['uid', 'title', 'gene', 'significance', 'condition', 'accession', 'type', 'assembly'],
20
+ func: async (ctx, args) => {
21
+ const query = String(args.id);
22
+ // Try searching by ID or accession
23
+ const searchTerm = /^\d+$/.test(query) ? `${query}[VariationID]` : `${query}[Accession]`;
24
+ const searchResult = await ctx.fetchJson(buildEutilsUrl('esearch.fcgi', {
25
+ db: 'clinvar', term: searchTerm, retmode: 'json',
26
+ }));
27
+ const ids = searchResult?.esearchresult?.idlist ?? [];
28
+ if (!ids.length)
29
+ throw new CliError('NOT_FOUND', `ClinVar entry ${query} not found`);
30
+ const summary = await ctx.fetchJson(buildEutilsUrl('esummary.fcgi', {
31
+ db: 'clinvar', id: ids[0], retmode: 'json',
32
+ }));
33
+ const item = summary?.result?.[ids[0]] ?? {};
34
+ const genes = Array.isArray(item.genes) ? item.genes.map((g) => g.symbol).join(', ') : '';
35
+ const significance = typeof item.clinical_significance === 'object'
36
+ ? item.clinical_significance?.description ?? ''
37
+ : String(item.clinical_significance ?? '');
38
+ const conditions = Array.isArray(item.trait_set)
39
+ ? item.trait_set.map((t) => t.trait_name).join('; ')
40
+ : '';
41
+ const varType = item.obj_type ?? item.variation_type ?? '';
42
+ return [{
43
+ uid: ids[0],
44
+ title: item.title ?? '',
45
+ gene: genes,
46
+ significance,
47
+ condition: conditions,
48
+ accession: item.accession ?? '',
49
+ type: varType,
50
+ assembly: item.assembly ?? '',
51
+ }];
52
+ },
53
+ });
@@ -0,0 +1,7 @@
1
+ /**
2
+ * enrichr/analyze — Run gene set enrichment analysis via Enrichr.
3
+ *
4
+ * 2-step workflow: submits a gene list, then retrieves enrichment results
5
+ * for the specified gene set library.
6
+ */
7
+ export {};
@@ -0,0 +1,48 @@
1
+ /**
2
+ * enrichr/analyze — Run gene set enrichment analysis via Enrichr.
3
+ *
4
+ * 2-step workflow: submits a gene list, then retrieves enrichment results
5
+ * for the specified gene set library.
6
+ */
7
+ import { cli, Strategy } from '../../registry.js';
8
+ import { CliError } from '../../errors.js';
9
+ import { submitGeneList, getEnrichment } from '../../databases/enrichr.js';
10
+ import { withMeta } from '../../types.js';
11
+ const DEFAULT_LIBRARY = 'KEGG_2021_Human';
12
+ cli({
13
+ site: 'enrichr',
14
+ name: 'analyze',
15
+ description: 'Run gene set enrichment analysis',
16
+ database: 'enrichr',
17
+ strategy: Strategy.PUBLIC,
18
+ args: [
19
+ { name: 'genes', positional: true, required: true, help: 'Comma-separated gene symbols (e.g. TP53,BRCA1,EGFR,MYC,CDK2)' },
20
+ { name: 'library', default: DEFAULT_LIBRARY, help: 'Gene set library (e.g. KEGG_2021_Human, GO_Biological_Process_2023, Reactome_2022)' },
21
+ { name: 'limit', type: 'int', default: 20, help: 'Max results to show (1-100)' },
22
+ ],
23
+ columns: ['rank', 'term', 'adjustedPValue', 'combinedScore', 'genes'],
24
+ func: async (_ctx, args) => {
25
+ const geneList = String(args.genes).split(',').map(s => s.trim()).filter(Boolean);
26
+ if (geneList.length < 2) {
27
+ throw new CliError('ARGUMENT', 'At least 2 genes required for enrichment analysis', 'Example: biocli enrichr analyze TP53,BRCA1,EGFR,MYC,CDK2');
28
+ }
29
+ const library = String(args.library);
30
+ const limit = Math.max(1, Math.min(Number(args.limit), 100));
31
+ // Step 1: Submit gene list
32
+ const userListId = await submitGeneList(geneList);
33
+ // Step 2: Get enrichment results
34
+ const results = await getEnrichment(userListId, library);
35
+ if (!results.length) {
36
+ throw new CliError('NOT_FOUND', `No enrichment results from ${library}`, 'Try a different library or add more genes');
37
+ }
38
+ // Take top results by combined score
39
+ const rows = results.slice(0, limit).map(r => ({
40
+ rank: Number(r.rank),
41
+ term: String(r.term),
42
+ adjustedPValue: Number(r.adjustedPValue).toExponential(2),
43
+ combinedScore: Number(r.combinedScore).toFixed(1),
44
+ genes: String(r.genes),
45
+ }));
46
+ return withMeta(rows, { totalCount: results.length, query: geneList.join(',') });
47
+ },
48
+ });
@@ -0,0 +1,6 @@
1
+ /**
2
+ * ensembl/lookup — Look up a gene by symbol in Ensembl.
3
+ *
4
+ * Returns Ensembl gene ID, coordinates, biotype, and description.
5
+ */
6
+ export {};
@@ -0,0 +1,38 @@
1
+ /**
2
+ * ensembl/lookup — Look up a gene by symbol in Ensembl.
3
+ *
4
+ * Returns Ensembl gene ID, coordinates, biotype, and description.
5
+ */
6
+ import { cli, Strategy } from '../../registry.js';
7
+ import { CliError } from '../../errors.js';
8
+ import { buildEnsemblUrl } from '../../databases/ensembl.js';
9
+ cli({
10
+ site: 'ensembl',
11
+ name: 'lookup',
12
+ description: 'Look up a gene by symbol in Ensembl',
13
+ database: 'ensembl',
14
+ strategy: Strategy.PUBLIC,
15
+ args: [
16
+ { name: 'symbol', positional: true, required: true, help: 'Gene symbol (e.g. BRCA2, TP53)' },
17
+ { name: 'species', default: 'homo_sapiens', help: 'Species name (e.g. homo_sapiens, mus_musculus)' },
18
+ ],
19
+ columns: ['ensemblId', 'symbol', 'biotype', 'chromosome', 'start', 'end', 'strand', 'description'],
20
+ func: async (ctx, args) => {
21
+ const symbol = String(args.symbol).trim();
22
+ const species = String(args.species).toLowerCase().replace(/\s+/g, '_');
23
+ const data = await ctx.fetchJson(buildEnsemblUrl(`/lookup/symbol/${species}/${symbol}`, { expand: '1' }));
24
+ if (!data || !data.id) {
25
+ throw new CliError('NOT_FOUND', `Gene "${symbol}" not found in Ensembl for ${species}`, 'Check the gene symbol and species');
26
+ }
27
+ return [{
28
+ ensemblId: String(data.id ?? ''),
29
+ symbol: String(data.display_name ?? ''),
30
+ biotype: String(data.biotype ?? ''),
31
+ chromosome: String(data.seq_region_name ?? ''),
32
+ start: Number(data.start ?? 0),
33
+ end: Number(data.end ?? 0),
34
+ strand: Number(data.strand ?? 0) === 1 ? '+' : '-',
35
+ description: String(data.description ?? '').replace(/\s*\[Source:.*\]/, ''),
36
+ }];
37
+ },
38
+ });
@@ -0,0 +1,7 @@
1
+ /**
2
+ * ensembl/vep — Variant Effect Predictor via Ensembl REST API.
3
+ *
4
+ * Predicts the functional consequences of variants using HGVS notation,
5
+ * rsID, or genomic coordinates.
6
+ */
7
+ export {};
@@ -0,0 +1,86 @@
1
+ /**
2
+ * ensembl/vep — Variant Effect Predictor via Ensembl REST API.
3
+ *
4
+ * Predicts the functional consequences of variants using HGVS notation,
5
+ * rsID, or genomic coordinates.
6
+ */
7
+ import { cli, Strategy } from '../../registry.js';
8
+ import { CliError } from '../../errors.js';
9
+ import { buildEnsemblUrl } from '../../databases/ensembl.js';
10
+ cli({
11
+ site: 'ensembl',
12
+ name: 'vep',
13
+ description: 'Predict variant effects (VEP)',
14
+ database: 'ensembl',
15
+ strategy: Strategy.PUBLIC,
16
+ args: [
17
+ { name: 'variant', positional: true, required: true, help: 'Variant in HGVS (e.g. "NM_000518.5:c.20A>T") or rsID (e.g. rs334)' },
18
+ { name: 'species', default: 'human', help: 'Species (default: human)' },
19
+ ],
20
+ columns: ['input', 'gene', 'consequence', 'impact', 'biotype', 'aminoAcid', 'codons'],
21
+ func: async (ctx, args) => {
22
+ const variant = String(args.variant).trim();
23
+ const species = String(args.species).toLowerCase();
24
+ // Determine endpoint based on input format
25
+ let url;
26
+ if (variant.startsWith('rs')) {
27
+ // rsID input
28
+ url = buildEnsemblUrl(`/vep/${species}/id/${variant}`, {
29
+ canonical: '1',
30
+ hgvs: '1',
31
+ protein: '1',
32
+ });
33
+ }
34
+ else {
35
+ // HGVS notation
36
+ url = buildEnsemblUrl(`/vep/${species}/hgvs/${encodeURIComponent(variant)}`, {
37
+ canonical: '1',
38
+ hgvs: '1',
39
+ protein: '1',
40
+ });
41
+ }
42
+ const data = await ctx.fetchJson(url);
43
+ if (!Array.isArray(data) || !data.length) {
44
+ throw new CliError('NOT_FOUND', `No VEP results for "${variant}"`, 'Check the variant notation');
45
+ }
46
+ const rows = [];
47
+ for (const entry of data) {
48
+ const transcriptConsequences = (entry.transcript_consequences ?? []);
49
+ const input = String(entry.input ?? entry.id ?? variant);
50
+ if (!transcriptConsequences.length) {
51
+ rows.push({
52
+ input,
53
+ gene: '',
54
+ consequence: String(entry.most_severe_consequence ?? ''),
55
+ impact: '',
56
+ biotype: '',
57
+ aminoAcid: '',
58
+ codons: '',
59
+ });
60
+ continue;
61
+ }
62
+ // Show canonical transcript first, then others
63
+ const sorted = [...transcriptConsequences].sort((a, b) => {
64
+ if (a.canonical && !b.canonical)
65
+ return -1;
66
+ if (!a.canonical && b.canonical)
67
+ return 1;
68
+ return 0;
69
+ });
70
+ // Take top 5 most relevant consequences
71
+ for (const tc of sorted.slice(0, 5)) {
72
+ const consequences = (tc.consequence_terms ?? []);
73
+ rows.push({
74
+ input,
75
+ gene: String(tc.gene_symbol ?? ''),
76
+ consequence: consequences.join(', '),
77
+ impact: String(tc.impact ?? ''),
78
+ biotype: String(tc.biotype ?? ''),
79
+ aminoAcid: String(tc.amino_acids ?? ''),
80
+ codons: String(tc.codons ?? ''),
81
+ });
82
+ }
83
+ }
84
+ return rows;
85
+ },
86
+ });
@@ -0,0 +1,6 @@
1
+ /**
2
+ * ensembl/xrefs — Cross-references for a gene symbol in Ensembl.
3
+ *
4
+ * Returns linked IDs in HGNC, UniProt, RefSeq, OMIM, etc.
5
+ */
6
+ export {};
@@ -0,0 +1,36 @@
1
+ /**
2
+ * ensembl/xrefs — Cross-references for a gene symbol in Ensembl.
3
+ *
4
+ * Returns linked IDs in HGNC, UniProt, RefSeq, OMIM, etc.
5
+ */
6
+ import { cli, Strategy } from '../../registry.js';
7
+ import { CliError } from '../../errors.js';
8
+ import { buildEnsemblUrl } from '../../databases/ensembl.js';
9
+ import { withMeta } from '../../types.js';
10
+ cli({
11
+ site: 'ensembl',
12
+ name: 'xrefs',
13
+ description: 'Get cross-references for a gene',
14
+ database: 'ensembl',
15
+ strategy: Strategy.PUBLIC,
16
+ args: [
17
+ { name: 'symbol', positional: true, required: true, help: 'Gene symbol (e.g. BRCA2)' },
18
+ { name: 'species', default: 'homo_sapiens', help: 'Species name' },
19
+ ],
20
+ columns: ['database', 'primaryId', 'displayId', 'description'],
21
+ func: async (ctx, args) => {
22
+ const symbol = String(args.symbol).trim();
23
+ const species = String(args.species).toLowerCase().replace(/\s+/g, '_');
24
+ const data = await ctx.fetchJson(buildEnsemblUrl(`/xrefs/symbol/${species}/${symbol}`));
25
+ if (!Array.isArray(data) || !data.length) {
26
+ throw new CliError('NOT_FOUND', `No cross-references found for "${symbol}"`, 'Check the gene symbol');
27
+ }
28
+ const rows = data.map(item => ({
29
+ database: String(item.dbname ?? ''),
30
+ primaryId: String(item.primary_id ?? ''),
31
+ displayId: String(item.display_id ?? ''),
32
+ description: String(item.description ?? '').replace(/\s*\[Source:.*\]/, ''),
33
+ }));
34
+ return withMeta(rows, { totalCount: rows.length, query: symbol });
35
+ },
36
+ });
@@ -0,0 +1,10 @@
1
+ /**
2
+ * gene/fetch — Download gene sequence by NCBI Gene ID.
3
+ *
4
+ * Uses efetch to retrieve nucleotide or protein sequences in FASTA format.
5
+ * Workflow:
6
+ * 1. esummary to get gene metadata (for organism context)
7
+ * 2. elink to find linked nucleotide/protein records
8
+ * 3. efetch to download the sequence in requested format
9
+ */
10
+ export {};
@@ -0,0 +1,96 @@
1
+ /**
2
+ * gene/fetch — Download gene sequence by NCBI Gene ID.
3
+ *
4
+ * Uses efetch to retrieve nucleotide or protein sequences in FASTA format.
5
+ * Workflow:
6
+ * 1. esummary to get gene metadata (for organism context)
7
+ * 2. elink to find linked nucleotide/protein records
8
+ * 3. efetch to download the sequence in requested format
9
+ */
10
+ import { cli, Strategy } from '../../registry.js';
11
+ import { CliError } from '../../errors.js';
12
+ import { buildEutilsUrl } from '../_shared/eutils.js';
13
+ import { writeFileSync } from 'node:fs';
14
+ cli({
15
+ site: 'gene',
16
+ name: 'fetch',
17
+ description: 'Download gene sequence (nucleotide or protein) in FASTA format',
18
+ database: 'gene',
19
+ strategy: Strategy.PUBLIC,
20
+ args: [
21
+ { name: 'id', positional: true, required: true, help: 'NCBI Gene ID (e.g. 7157) or gene symbol with --search' },
22
+ { name: 'type', default: 'nucleotide', choices: ['nucleotide', 'protein'], help: 'Sequence type to download' },
23
+ { name: 'output', help: 'Output file path (default: stdout)' },
24
+ ],
25
+ columns: ['content'],
26
+ defaultFormat: 'plain',
27
+ func: async (ctx, args) => {
28
+ const geneId = String(args.id).trim();
29
+ const seqType = String(args.type);
30
+ const outputFile = args.output ? String(args.output) : undefined;
31
+ if (!/^\d+$/.test(geneId)) {
32
+ throw new CliError('ARGUMENT', `Invalid Gene ID: "${geneId}"`, 'Use a numeric NCBI Gene ID (e.g. 7157 for TP53). Use "biocli gene search" to find IDs.');
33
+ }
34
+ // Step 1: elink to find linked nucleotide or protein records
35
+ const linkDb = seqType === 'protein' ? 'protein' : 'nuccore';
36
+ const linkName = seqType === 'protein' ? 'gene_protein_refseq' : 'gene_nuccore_refseqrna';
37
+ const linkResult = await ctx.fetchJson(buildEutilsUrl('elink.fcgi', {
38
+ dbfrom: 'gene',
39
+ db: linkDb,
40
+ id: geneId,
41
+ linkname: linkName,
42
+ retmode: 'json',
43
+ }));
44
+ // Parse elink result to get linked IDs
45
+ const linksets = (linkResult?.linksets ?? []);
46
+ let linkedIds = [];
47
+ if (linksets.length > 0) {
48
+ const linksetdbs = (linksets[0]?.linksetdbs ?? []);
49
+ if (linksetdbs.length > 0) {
50
+ const links = (linksetdbs[0]?.links ?? []);
51
+ linkedIds = links;
52
+ }
53
+ }
54
+ // Fallback: try broader link name
55
+ if (!linkedIds.length && seqType === 'nucleotide') {
56
+ const fallbackResult = await ctx.fetchJson(buildEutilsUrl('elink.fcgi', {
57
+ dbfrom: 'gene',
58
+ db: 'nuccore',
59
+ id: geneId,
60
+ linkname: 'gene_nuccore_refseqgene',
61
+ retmode: 'json',
62
+ }));
63
+ const fb = (fallbackResult?.linksets ?? []);
64
+ if (fb.length > 0) {
65
+ const fbdbs = (fb[0]?.linksetdbs ?? []);
66
+ if (fbdbs.length > 0) {
67
+ linkedIds = (fbdbs[0]?.links ?? []);
68
+ }
69
+ }
70
+ }
71
+ if (!linkedIds.length) {
72
+ throw new CliError('NOT_FOUND', `No ${seqType} sequences found for Gene ID ${geneId}`, `Try the other type: biocli gene fetch ${geneId} --type ${seqType === 'protein' ? 'nucleotide' : 'protein'}`);
73
+ }
74
+ // Step 2: efetch to download FASTA (use first linked ID)
75
+ const targetId = linkedIds[0];
76
+ const fastaUrl = buildEutilsUrl('efetch.fcgi', {
77
+ db: linkDb,
78
+ id: targetId,
79
+ rettype: 'fasta',
80
+ retmode: 'text',
81
+ });
82
+ const fasta = await ctx.fetchText(fastaUrl);
83
+ if (!fasta || !fasta.startsWith('>')) {
84
+ throw new CliError('PARSE_ERROR', 'Failed to retrieve FASTA sequence', 'The linked record may not have a sequence available');
85
+ }
86
+ // Write to file or return for stdout
87
+ if (outputFile) {
88
+ writeFileSync(outputFile, fasta, 'utf-8');
89
+ const lines = fasta.split('\n');
90
+ const header = lines[0];
91
+ const seqLength = lines.slice(1).join('').replace(/\s/g, '').length;
92
+ return [{ content: `Saved ${seqType} sequence to ${outputFile} (${seqLength} bp/aa, ${header})` }];
93
+ }
94
+ return [{ content: fasta.trim() }];
95
+ },
96
+ });
@@ -0,0 +1,7 @@
1
+ /**
2
+ * gene/info — Get gene details by NCBI Gene ID.
3
+ *
4
+ * Uses esummary (JSON mode) to retrieve comprehensive gene metadata
5
+ * for a single Gene ID.
6
+ */
7
+ export {};
@@ -0,0 +1,37 @@
1
+ /**
2
+ * gene/info — Get gene details by NCBI Gene ID.
3
+ *
4
+ * Uses esummary (JSON mode) to retrieve comprehensive gene metadata
5
+ * for a single Gene ID.
6
+ */
7
+ import { cli, Strategy } from '../../registry.js';
8
+ import { CliError } from '../../errors.js';
9
+ import { buildEutilsUrl } from '../_shared/eutils.js';
10
+ import { parseGeneSummaries } from '../_shared/xml-helpers.js';
11
+ cli({
12
+ site: 'gene',
13
+ name: 'info',
14
+ description: 'Get gene details by NCBI Gene ID',
15
+ database: 'gene',
16
+ strategy: Strategy.PUBLIC,
17
+ args: [
18
+ { name: 'id', positional: true, required: true, help: 'NCBI Gene ID (e.g. 7157 for TP53)' },
19
+ ],
20
+ columns: ['geneId', 'symbol', 'name', 'organism', 'summary', 'chromosome', 'location'],
21
+ func: async (ctx, args) => {
22
+ const geneId = String(args.id).trim();
23
+ if (!/^\d+$/.test(geneId)) {
24
+ throw new CliError('ARGUMENT', `Invalid Gene ID: "${geneId}"`, 'Gene ID must be a numeric identifier (e.g. 7157 for TP53)');
25
+ }
26
+ const summaryResult = await ctx.fetchJson(buildEutilsUrl('esummary.fcgi', {
27
+ db: 'gene',
28
+ id: geneId,
29
+ retmode: 'json',
30
+ }));
31
+ const genes = parseGeneSummaries(summaryResult);
32
+ if (!genes.length) {
33
+ throw new CliError('NOT_FOUND', `Gene ID ${geneId} not found`, 'Check that the Gene ID is correct (e.g. 7157 for TP53)');
34
+ }
35
+ return genes;
36
+ },
37
+ });
@@ -0,0 +1,7 @@
1
+ /**
2
+ * gene/search — Search NCBI Gene database.
3
+ *
4
+ * Uses esearch to find Gene IDs matching the query, then esummary
5
+ * (JSON mode) to retrieve gene metadata.
6
+ */
7
+ export {};
@@ -0,0 +1,71 @@
1
+ /**
2
+ * gene/search — Search NCBI Gene database.
3
+ *
4
+ * Uses esearch to find Gene IDs matching the query, then esummary
5
+ * (JSON mode) to retrieve gene metadata.
6
+ */
7
+ import { cli, Strategy } from '../../registry.js';
8
+ import { CliError } from '../../errors.js';
9
+ import { buildEutilsUrl } from '../_shared/eutils.js';
10
+ import { parseGeneSummaries } from '../_shared/xml-helpers.js';
11
+ import { clamp } from '../_shared/common.js';
12
+ import { withMeta } from '../../types.js';
13
+ /** Map common organism names to NCBI search terms. */
14
+ const ORGANISM_MAP = {
15
+ human: 'Homo sapiens',
16
+ mouse: 'Mus musculus',
17
+ rat: 'Rattus norvegicus',
18
+ zebrafish: 'Danio rerio',
19
+ fly: 'Drosophila melanogaster',
20
+ worm: 'Caenorhabditis elegans',
21
+ yeast: 'Saccharomyces cerevisiae',
22
+ chicken: 'Gallus gallus',
23
+ dog: 'Canis lupus familiaris',
24
+ pig: 'Sus scrofa',
25
+ };
26
+ cli({
27
+ site: 'gene',
28
+ name: 'search',
29
+ description: 'Search NCBI Gene database',
30
+ database: 'gene',
31
+ strategy: Strategy.PUBLIC,
32
+ args: [
33
+ { name: 'query', positional: true, required: true, help: 'Gene symbol or keyword (e.g. TP53, BRCA1)' },
34
+ { name: 'limit', type: 'int', default: 10, help: 'Max results (1-200)' },
35
+ { name: 'organism', default: 'human', help: 'Organism name (e.g. human, mouse, rat, zebrafish)' },
36
+ ],
37
+ columns: ['geneId', 'symbol', 'name', 'organism'],
38
+ func: async (ctx, args) => {
39
+ const limit = clamp(Number(args.limit), 1, 200);
40
+ const orgInput = String(args.organism).toLowerCase().trim();
41
+ const organism = ORGANISM_MAP[orgInput] ?? String(args.organism);
42
+ // Build search term: "query[Gene Name] AND organism[Organism]"
43
+ const query = String(args.query).trim();
44
+ const term = `${query}[Gene Name] AND ${organism}[Organism]`;
45
+ // Step 1: esearch to get Gene IDs
46
+ const searchResult = await ctx.fetchJson(buildEutilsUrl('esearch.fcgi', {
47
+ db: 'gene',
48
+ term,
49
+ retmax: String(limit),
50
+ retmode: 'json',
51
+ }));
52
+ const result = searchResult;
53
+ const esearchResult = result?.esearchresult;
54
+ const geneIds = esearchResult?.idlist ?? [];
55
+ const totalCount = Number(esearchResult?.count ?? 0);
56
+ if (!geneIds.length) {
57
+ throw new CliError('NOT_FOUND', `No genes found for "${query}" in ${organism}`, 'Try a different gene name/symbol or organism');
58
+ }
59
+ // Step 2: esummary to get gene details (JSON mode works for gene db)
60
+ const summaryResult = await ctx.fetchJson(buildEutilsUrl('esummary.fcgi', {
61
+ db: 'gene',
62
+ id: geneIds.join(','),
63
+ retmode: 'json',
64
+ }));
65
+ const genes = parseGeneSummaries(summaryResult);
66
+ if (!genes.length) {
67
+ throw new CliError('PARSE_ERROR', 'Failed to parse gene summary data', 'Try again later');
68
+ }
69
+ return withMeta(genes, { totalCount, query });
70
+ },
71
+ });
@@ -0,0 +1,7 @@
1
+ /**
2
+ * geo/dataset — Get GEO dataset details by accession.
3
+ *
4
+ * Searches by accession (GSE, GDS, GPL, GSM) in the gds database,
5
+ * then retrieves the full summary via esummary (JSON).
6
+ */
7
+ export {};