@yangfei_93sky/biocli 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +197 -0
- package/dist/batch.d.ts +20 -0
- package/dist/batch.js +69 -0
- package/dist/build-manifest.d.ts +38 -0
- package/dist/build-manifest.js +186 -0
- package/dist/cache.d.ts +28 -0
- package/dist/cache.js +126 -0
- package/dist/cli-manifest.json +1500 -0
- package/dist/cli.d.ts +7 -0
- package/dist/cli.js +336 -0
- package/dist/clis/_shared/common.d.ts +8 -0
- package/dist/clis/_shared/common.js +13 -0
- package/dist/clis/_shared/eutils.d.ts +9 -0
- package/dist/clis/_shared/eutils.js +9 -0
- package/dist/clis/_shared/organism-db.d.ts +23 -0
- package/dist/clis/_shared/organism-db.js +58 -0
- package/dist/clis/_shared/xml-helpers.d.ts +58 -0
- package/dist/clis/_shared/xml-helpers.js +266 -0
- package/dist/clis/aggregate/enrichment.d.ts +7 -0
- package/dist/clis/aggregate/enrichment.js +105 -0
- package/dist/clis/aggregate/gene-dossier.d.ts +13 -0
- package/dist/clis/aggregate/gene-dossier.js +248 -0
- package/dist/clis/aggregate/gene-profile.d.ts +16 -0
- package/dist/clis/aggregate/gene-profile.js +305 -0
- package/dist/clis/aggregate/literature-brief.d.ts +7 -0
- package/dist/clis/aggregate/literature-brief.js +79 -0
- package/dist/clis/aggregate/variant-dossier.d.ts +11 -0
- package/dist/clis/aggregate/variant-dossier.js +161 -0
- package/dist/clis/aggregate/variant-interpret.d.ts +10 -0
- package/dist/clis/aggregate/variant-interpret.js +210 -0
- package/dist/clis/aggregate/workflow-prepare.d.ts +12 -0
- package/dist/clis/aggregate/workflow-prepare.js +228 -0
- package/dist/clis/aggregate/workflow-scout.d.ts +13 -0
- package/dist/clis/aggregate/workflow-scout.js +175 -0
- package/dist/clis/clinvar/search.d.ts +8 -0
- package/dist/clis/clinvar/search.js +61 -0
- package/dist/clis/clinvar/variant.d.ts +7 -0
- package/dist/clis/clinvar/variant.js +53 -0
- package/dist/clis/enrichr/analyze.d.ts +7 -0
- package/dist/clis/enrichr/analyze.js +48 -0
- package/dist/clis/ensembl/lookup.d.ts +6 -0
- package/dist/clis/ensembl/lookup.js +38 -0
- package/dist/clis/ensembl/vep.d.ts +7 -0
- package/dist/clis/ensembl/vep.js +86 -0
- package/dist/clis/ensembl/xrefs.d.ts +6 -0
- package/dist/clis/ensembl/xrefs.js +36 -0
- package/dist/clis/gene/fetch.d.ts +10 -0
- package/dist/clis/gene/fetch.js +96 -0
- package/dist/clis/gene/info.d.ts +7 -0
- package/dist/clis/gene/info.js +37 -0
- package/dist/clis/gene/search.d.ts +7 -0
- package/dist/clis/gene/search.js +71 -0
- package/dist/clis/geo/dataset.d.ts +7 -0
- package/dist/clis/geo/dataset.js +55 -0
- package/dist/clis/geo/download.d.ts +17 -0
- package/dist/clis/geo/download.js +115 -0
- package/dist/clis/geo/samples.d.ts +7 -0
- package/dist/clis/geo/samples.js +57 -0
- package/dist/clis/geo/search.d.ts +8 -0
- package/dist/clis/geo/search.js +66 -0
- package/dist/clis/kegg/convert.d.ts +7 -0
- package/dist/clis/kegg/convert.js +37 -0
- package/dist/clis/kegg/disease.d.ts +6 -0
- package/dist/clis/kegg/disease.js +57 -0
- package/dist/clis/kegg/link.d.ts +7 -0
- package/dist/clis/kegg/link.js +36 -0
- package/dist/clis/kegg/pathway.d.ts +6 -0
- package/dist/clis/kegg/pathway.js +37 -0
- package/dist/clis/pubmed/abstract.d.ts +7 -0
- package/dist/clis/pubmed/abstract.js +42 -0
- package/dist/clis/pubmed/cited-by.d.ts +7 -0
- package/dist/clis/pubmed/cited-by.js +77 -0
- package/dist/clis/pubmed/fetch.d.ts +6 -0
- package/dist/clis/pubmed/fetch.js +36 -0
- package/dist/clis/pubmed/info.yaml +22 -0
- package/dist/clis/pubmed/related.d.ts +7 -0
- package/dist/clis/pubmed/related.js +81 -0
- package/dist/clis/pubmed/search.d.ts +8 -0
- package/dist/clis/pubmed/search.js +63 -0
- package/dist/clis/snp/lookup.d.ts +7 -0
- package/dist/clis/snp/lookup.js +57 -0
- package/dist/clis/sra/download.d.ts +18 -0
- package/dist/clis/sra/download.js +217 -0
- package/dist/clis/sra/run.d.ts +8 -0
- package/dist/clis/sra/run.js +77 -0
- package/dist/clis/sra/search.d.ts +8 -0
- package/dist/clis/sra/search.js +83 -0
- package/dist/clis/string/enrichment.d.ts +7 -0
- package/dist/clis/string/enrichment.js +50 -0
- package/dist/clis/string/network.d.ts +7 -0
- package/dist/clis/string/network.js +47 -0
- package/dist/clis/string/partners.d.ts +4 -0
- package/dist/clis/string/partners.js +44 -0
- package/dist/clis/taxonomy/lookup.d.ts +8 -0
- package/dist/clis/taxonomy/lookup.js +54 -0
- package/dist/clis/uniprot/fetch.d.ts +7 -0
- package/dist/clis/uniprot/fetch.js +82 -0
- package/dist/clis/uniprot/search.d.ts +6 -0
- package/dist/clis/uniprot/search.js +65 -0
- package/dist/clis/uniprot/sequence.d.ts +7 -0
- package/dist/clis/uniprot/sequence.js +51 -0
- package/dist/commander-adapter.d.ts +27 -0
- package/dist/commander-adapter.js +286 -0
- package/dist/completion.d.ts +19 -0
- package/dist/completion.js +117 -0
- package/dist/config.d.ts +57 -0
- package/dist/config.js +94 -0
- package/dist/databases/enrichr.d.ts +28 -0
- package/dist/databases/enrichr.js +131 -0
- package/dist/databases/ensembl.d.ts +14 -0
- package/dist/databases/ensembl.js +106 -0
- package/dist/databases/index.d.ts +45 -0
- package/dist/databases/index.js +49 -0
- package/dist/databases/kegg.d.ts +26 -0
- package/dist/databases/kegg.js +136 -0
- package/dist/databases/ncbi.d.ts +28 -0
- package/dist/databases/ncbi.js +144 -0
- package/dist/databases/string-db.d.ts +19 -0
- package/dist/databases/string-db.js +105 -0
- package/dist/databases/uniprot.d.ts +13 -0
- package/dist/databases/uniprot.js +110 -0
- package/dist/discovery.d.ts +32 -0
- package/dist/discovery.js +235 -0
- package/dist/doctor.d.ts +19 -0
- package/dist/doctor.js +151 -0
- package/dist/errors.d.ts +68 -0
- package/dist/errors.js +105 -0
- package/dist/execution.d.ts +15 -0
- package/dist/execution.js +178 -0
- package/dist/hooks.d.ts +48 -0
- package/dist/hooks.js +58 -0
- package/dist/main.d.ts +13 -0
- package/dist/main.js +31 -0
- package/dist/ncbi-fetch.d.ts +10 -0
- package/dist/ncbi-fetch.js +10 -0
- package/dist/output.d.ts +18 -0
- package/dist/output.js +394 -0
- package/dist/pipeline/executor.d.ts +22 -0
- package/dist/pipeline/executor.js +40 -0
- package/dist/pipeline/index.d.ts +6 -0
- package/dist/pipeline/index.js +6 -0
- package/dist/pipeline/registry.d.ts +16 -0
- package/dist/pipeline/registry.js +31 -0
- package/dist/pipeline/steps/fetch.d.ts +21 -0
- package/dist/pipeline/steps/fetch.js +160 -0
- package/dist/pipeline/steps/transform.d.ts +26 -0
- package/dist/pipeline/steps/transform.js +92 -0
- package/dist/pipeline/steps/xml-parse.d.ts +12 -0
- package/dist/pipeline/steps/xml-parse.js +27 -0
- package/dist/pipeline/template.d.ts +35 -0
- package/dist/pipeline/template.js +312 -0
- package/dist/rate-limiter.d.ts +56 -0
- package/dist/rate-limiter.js +120 -0
- package/dist/registry-api.d.ts +15 -0
- package/dist/registry-api.js +13 -0
- package/dist/registry.d.ts +90 -0
- package/dist/registry.js +100 -0
- package/dist/schema.d.ts +80 -0
- package/dist/schema.js +72 -0
- package/dist/spinner.d.ts +19 -0
- package/dist/spinner.js +37 -0
- package/dist/types.d.ts +101 -0
- package/dist/types.js +27 -0
- package/dist/utils.d.ts +16 -0
- package/dist/utils.js +40 -0
- package/dist/validate.d.ts +29 -0
- package/dist/validate.js +136 -0
- package/dist/verify.d.ts +20 -0
- package/dist/verify.js +131 -0
- package/dist/version.d.ts +13 -0
- package/dist/version.js +36 -0
- package/dist/xml-parser.d.ts +19 -0
- package/dist/xml-parser.js +119 -0
- package/dist/yaml-schema.d.ts +40 -0
- package/dist/yaml-schema.js +62 -0
- package/package.json +68 -0
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* geo/dataset — Get GEO dataset details by accession.
|
|
3
|
+
*
|
|
4
|
+
* Searches by accession (GSE, GDS, GPL, GSM) in the gds database,
|
|
5
|
+
* then retrieves the full summary via esummary (JSON).
|
|
6
|
+
*/
|
|
7
|
+
import { cli, Strategy } from '../../registry.js';
|
|
8
|
+
import { CliError } from '../../errors.js';
|
|
9
|
+
import { buildEutilsUrl } from '../_shared/eutils.js';
|
|
10
|
+
import { truncate } from '../_shared/common.js';
|
|
11
|
+
cli({
|
|
12
|
+
site: 'geo',
|
|
13
|
+
name: 'dataset',
|
|
14
|
+
description: 'Get GEO dataset details by accession',
|
|
15
|
+
database: 'gds',
|
|
16
|
+
strategy: Strategy.PUBLIC,
|
|
17
|
+
args: [
|
|
18
|
+
{ name: 'accession', positional: true, required: true, help: 'GEO accession (e.g. GSE12345, GDS1234)' },
|
|
19
|
+
],
|
|
20
|
+
columns: ['accession', 'title', 'organism', 'type', 'platform', 'samples', 'summary', 'date'],
|
|
21
|
+
func: async (ctx, args) => {
|
|
22
|
+
const acc = String(args.accession).toUpperCase();
|
|
23
|
+
// Step 1: esearch by accession
|
|
24
|
+
const searchResult = await ctx.fetchJson(buildEutilsUrl('esearch.fcgi', {
|
|
25
|
+
db: 'gds',
|
|
26
|
+
term: `${acc}[Accession]`,
|
|
27
|
+
retmode: 'json',
|
|
28
|
+
}));
|
|
29
|
+
const result = searchResult;
|
|
30
|
+
const esearchResult = result?.esearchresult;
|
|
31
|
+
const ids = esearchResult?.idlist ?? [];
|
|
32
|
+
if (!ids.length) {
|
|
33
|
+
throw new CliError('NOT_FOUND', `GEO entry ${acc} not found`, 'Check that the accession is correct (e.g. GSE12345, GDS1234)');
|
|
34
|
+
}
|
|
35
|
+
// Step 2: esummary for full details
|
|
36
|
+
const summaryResult = await ctx.fetchJson(buildEutilsUrl('esummary.fcgi', {
|
|
37
|
+
db: 'gds',
|
|
38
|
+
id: ids[0],
|
|
39
|
+
retmode: 'json',
|
|
40
|
+
}));
|
|
41
|
+
const summary = summaryResult;
|
|
42
|
+
const resultObj = summary?.result;
|
|
43
|
+
const item = (resultObj?.[ids[0]] ?? {});
|
|
44
|
+
return [{
|
|
45
|
+
accession: String(item.accession ?? acc),
|
|
46
|
+
title: String(item.title ?? ''),
|
|
47
|
+
organism: String(item.taxon ?? ''),
|
|
48
|
+
type: String(item.entrytype ?? ''),
|
|
49
|
+
platform: String(item.gpl ?? ''),
|
|
50
|
+
samples: Number(item.n_samples ?? 0),
|
|
51
|
+
summary: truncate(String(item.summary ?? ''), 300),
|
|
52
|
+
date: String(item.pdat ?? ''),
|
|
53
|
+
}];
|
|
54
|
+
},
|
|
55
|
+
});
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* geo/download — Download GEO supplementary files.
|
|
3
|
+
*
|
|
4
|
+
* GEO stores supplementary files at a predictable HTTPS URL:
|
|
5
|
+
* https://ftp.ncbi.nlm.nih.gov/geo/series/GSEnnn/GSExxxxx/suppl/
|
|
6
|
+
*
|
|
7
|
+
* This command:
|
|
8
|
+
* 1. Lists available supplementary files for a GSE accession
|
|
9
|
+
* 2. Downloads them to a specified directory (or current dir)
|
|
10
|
+
*/
|
|
11
|
+
/** Build the GEO FTP-over-HTTPS URL for supplementary files. */
|
|
12
|
+
export declare function buildGeoSupplUrl(accession: string): string;
|
|
13
|
+
/** Parse file list from NCBI FTP directory listing (HTML). */
|
|
14
|
+
export declare function parseFileList(html: string): {
|
|
15
|
+
name: string;
|
|
16
|
+
size: string;
|
|
17
|
+
}[];
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* geo/download — Download GEO supplementary files.
|
|
3
|
+
*
|
|
4
|
+
* GEO stores supplementary files at a predictable HTTPS URL:
|
|
5
|
+
* https://ftp.ncbi.nlm.nih.gov/geo/series/GSEnnn/GSExxxxx/suppl/
|
|
6
|
+
*
|
|
7
|
+
* This command:
|
|
8
|
+
* 1. Lists available supplementary files for a GSE accession
|
|
9
|
+
* 2. Downloads them to a specified directory (or current dir)
|
|
10
|
+
*/
|
|
11
|
+
import { cli, Strategy } from '../../registry.js';
|
|
12
|
+
import { CliError } from '../../errors.js';
|
|
13
|
+
import { mkdirSync, existsSync, createWriteStream } from 'node:fs';
|
|
14
|
+
import { join } from 'node:path';
|
|
15
|
+
import { pipeline } from 'node:stream/promises';
|
|
16
|
+
import { Readable } from 'node:stream';
|
|
17
|
+
import { withMeta } from '../../types.js';
|
|
18
|
+
/** Build the GEO FTP-over-HTTPS URL for supplementary files. */
|
|
19
|
+
export function buildGeoSupplUrl(accession) {
|
|
20
|
+
// GSE12345 → series/GSE12nnn/GSE12345/suppl/
|
|
21
|
+
const prefix = accession.slice(0, -3) + 'nnn';
|
|
22
|
+
return `https://ftp.ncbi.nlm.nih.gov/geo/series/${prefix}/${accession}/suppl/`;
|
|
23
|
+
}
|
|
24
|
+
/** Parse file list from NCBI FTP directory listing (HTML). */
|
|
25
|
+
export function parseFileList(html) {
|
|
26
|
+
const files = [];
|
|
27
|
+
// NCBI FTP HTML listings have <a href="filename">filename</a> followed by size info
|
|
28
|
+
const linkRegex = /<a\s+href="([^"]+)">[^<]+<\/a>\s+[\d-]+\s+[\d:]+\s+([\d.]+[KMG]?)/gi;
|
|
29
|
+
let match;
|
|
30
|
+
while ((match = linkRegex.exec(html)) !== null) {
|
|
31
|
+
const name = match[1];
|
|
32
|
+
// Skip parent directory and non-file links
|
|
33
|
+
if (name === '../' || name.endsWith('/') || name.startsWith('?'))
|
|
34
|
+
continue;
|
|
35
|
+
files.push({ name, size: match[2] });
|
|
36
|
+
}
|
|
37
|
+
return files;
|
|
38
|
+
}
|
|
39
|
+
cli({
|
|
40
|
+
site: 'geo',
|
|
41
|
+
name: 'download',
|
|
42
|
+
description: 'Download GEO supplementary files (expression matrices, etc.)',
|
|
43
|
+
database: 'gds',
|
|
44
|
+
strategy: Strategy.PUBLIC,
|
|
45
|
+
args: [
|
|
46
|
+
{ name: 'accession', positional: true, required: true, help: 'GEO Series accession (e.g. GSE12345)' },
|
|
47
|
+
{ name: 'outdir', default: '.', help: 'Output directory (default: current directory)' },
|
|
48
|
+
{ name: 'list-only', type: 'boolean', default: false, help: 'Only list available files, do not download' },
|
|
49
|
+
{ name: 'dry-run', type: 'boolean', default: false, help: 'Same as --list-only: show files without downloading' },
|
|
50
|
+
{ name: 'pattern', help: 'Filter files by pattern (e.g. "counts", "matrix", "tar.gz")' },
|
|
51
|
+
],
|
|
52
|
+
columns: ['file', 'size', 'status'],
|
|
53
|
+
func: async (ctx, args) => {
|
|
54
|
+
const accession = String(args.accession).toUpperCase().trim();
|
|
55
|
+
if (!/^GSE\d+$/.test(accession)) {
|
|
56
|
+
throw new CliError('ARGUMENT', `Invalid GEO accession: "${accession}"`, 'Use a GSE accession (e.g. GSE12345)');
|
|
57
|
+
}
|
|
58
|
+
const listOnly = Boolean(args['list-only']) || Boolean(args['dry-run']);
|
|
59
|
+
const outdir = String(args.outdir);
|
|
60
|
+
const pattern = args.pattern ? String(args.pattern).toLowerCase() : undefined;
|
|
61
|
+
// Step 1: Get directory listing
|
|
62
|
+
const supplUrl = buildGeoSupplUrl(accession);
|
|
63
|
+
let html;
|
|
64
|
+
try {
|
|
65
|
+
html = await ctx.fetchText(supplUrl);
|
|
66
|
+
}
|
|
67
|
+
catch {
|
|
68
|
+
throw new CliError('NOT_FOUND', `No supplementary files found for ${accession}`, 'The dataset may not have supplementary files, or the accession may be incorrect');
|
|
69
|
+
}
|
|
70
|
+
let files = parseFileList(html);
|
|
71
|
+
if (!files.length) {
|
|
72
|
+
throw new CliError('NOT_FOUND', `No downloadable files found at ${supplUrl}`, 'The directory listing may be empty or in an unexpected format');
|
|
73
|
+
}
|
|
74
|
+
// Filter by pattern if specified
|
|
75
|
+
if (pattern) {
|
|
76
|
+
files = files.filter(f => f.name.toLowerCase().includes(pattern));
|
|
77
|
+
if (!files.length) {
|
|
78
|
+
throw new CliError('NOT_FOUND', `No files matching "${pattern}" in ${accession}`, 'Try without --pattern to see all available files');
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
// List-only mode
|
|
82
|
+
if (listOnly) {
|
|
83
|
+
const rows = files.map(f => ({
|
|
84
|
+
file: f.name,
|
|
85
|
+
size: f.size,
|
|
86
|
+
status: 'available',
|
|
87
|
+
url: `${supplUrl}${f.name}`,
|
|
88
|
+
}));
|
|
89
|
+
return withMeta(rows, { totalCount: rows.length, query: accession });
|
|
90
|
+
}
|
|
91
|
+
// Step 2: Download files
|
|
92
|
+
if (!existsSync(outdir)) {
|
|
93
|
+
mkdirSync(outdir, { recursive: true });
|
|
94
|
+
}
|
|
95
|
+
const rows = [];
|
|
96
|
+
for (const file of files) {
|
|
97
|
+
const fileUrl = `${supplUrl}${file.name}`;
|
|
98
|
+
const destPath = join(outdir, file.name);
|
|
99
|
+
try {
|
|
100
|
+
const response = await fetch(fileUrl);
|
|
101
|
+
if (!response.ok || !response.body) {
|
|
102
|
+
rows.push({ file: file.name, size: file.size, status: `failed (HTTP ${response.status})` });
|
|
103
|
+
continue;
|
|
104
|
+
}
|
|
105
|
+
const writable = createWriteStream(destPath);
|
|
106
|
+
await pipeline(Readable.fromWeb(response.body), writable);
|
|
107
|
+
rows.push({ file: file.name, size: file.size, status: `saved → ${destPath}` });
|
|
108
|
+
}
|
|
109
|
+
catch (err) {
|
|
110
|
+
rows.push({ file: file.name, size: file.size, status: `error: ${err instanceof Error ? err.message : String(err)}` });
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
return withMeta(rows, { totalCount: rows.length, query: accession });
|
|
114
|
+
},
|
|
115
|
+
});
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* geo/samples — List samples in a GEO dataset.
|
|
3
|
+
*
|
|
4
|
+
* Searches for GSM (sample) entries associated with a given GEO series
|
|
5
|
+
* accession, then retrieves sample metadata via esummary (JSON).
|
|
6
|
+
*/
|
|
7
|
+
import { cli, Strategy } from '../../registry.js';
|
|
8
|
+
import { CliError } from '../../errors.js';
|
|
9
|
+
import { buildEutilsUrl } from '../_shared/eutils.js';
|
|
10
|
+
import { clamp } from '../_shared/common.js';
|
|
11
|
+
cli({
|
|
12
|
+
site: 'geo',
|
|
13
|
+
name: 'samples',
|
|
14
|
+
description: 'List samples in a GEO dataset',
|
|
15
|
+
database: 'gds',
|
|
16
|
+
strategy: Strategy.PUBLIC,
|
|
17
|
+
args: [
|
|
18
|
+
{ name: 'accession', positional: true, required: true, help: 'GEO series accession (e.g. GSE12345)' },
|
|
19
|
+
{ name: 'limit', type: 'int', default: 20, help: 'Max results (1-200)' },
|
|
20
|
+
],
|
|
21
|
+
columns: ['accession', 'title', 'organism', 'type'],
|
|
22
|
+
func: async (ctx, args) => {
|
|
23
|
+
const acc = String(args.accession).toUpperCase();
|
|
24
|
+
const limit = clamp(Number(args.limit), 1, 200);
|
|
25
|
+
// Search for GSM samples within this series
|
|
26
|
+
const searchResult = await ctx.fetchJson(buildEutilsUrl('esearch.fcgi', {
|
|
27
|
+
db: 'gds',
|
|
28
|
+
term: `${acc}[Accession] AND gsm[Entry Type]`,
|
|
29
|
+
retmax: String(limit),
|
|
30
|
+
retmode: 'json',
|
|
31
|
+
}));
|
|
32
|
+
const result = searchResult;
|
|
33
|
+
const esearchResult = result?.esearchresult;
|
|
34
|
+
const ids = esearchResult?.idlist ?? [];
|
|
35
|
+
if (!ids.length) {
|
|
36
|
+
throw new CliError('NOT_FOUND', `No samples found for ${acc}`, 'Check that the accession is a valid GEO series (GSE)');
|
|
37
|
+
}
|
|
38
|
+
// Get sample details
|
|
39
|
+
const summaryResult = await ctx.fetchJson(buildEutilsUrl('esummary.fcgi', {
|
|
40
|
+
db: 'gds',
|
|
41
|
+
id: ids.join(','),
|
|
42
|
+
retmode: 'json',
|
|
43
|
+
}));
|
|
44
|
+
const summary = summaryResult;
|
|
45
|
+
const resultObj = summary?.result;
|
|
46
|
+
const uids = resultObj?.uids ?? [];
|
|
47
|
+
return uids.map(uid => {
|
|
48
|
+
const item = (resultObj?.[uid] ?? {});
|
|
49
|
+
return {
|
|
50
|
+
accession: String(item.accession ?? `GSM${uid}`),
|
|
51
|
+
title: String(item.title ?? ''),
|
|
52
|
+
organism: String(item.taxon ?? ''),
|
|
53
|
+
type: String(item.entrytype ?? ''),
|
|
54
|
+
};
|
|
55
|
+
});
|
|
56
|
+
},
|
|
57
|
+
});
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* geo/search — Search GEO datasets.
|
|
3
|
+
*
|
|
4
|
+
* Uses the two-step esearch + esummary pattern against db=gds:
|
|
5
|
+
* 1. esearch to retrieve matching GEO DataSet IDs
|
|
6
|
+
* 2. esummary (JSON) to get dataset metadata
|
|
7
|
+
*/
|
|
8
|
+
import { cli, Strategy } from '../../registry.js';
|
|
9
|
+
import { CliError } from '../../errors.js';
|
|
10
|
+
import { buildEutilsUrl } from '../_shared/eutils.js';
|
|
11
|
+
import { clamp } from '../_shared/common.js';
|
|
12
|
+
import { withMeta } from '../../types.js';
|
|
13
|
+
cli({
|
|
14
|
+
site: 'geo',
|
|
15
|
+
name: 'search',
|
|
16
|
+
description: 'Search GEO datasets',
|
|
17
|
+
database: 'gds',
|
|
18
|
+
strategy: Strategy.PUBLIC,
|
|
19
|
+
args: [
|
|
20
|
+
{ name: 'query', positional: true, required: true, help: 'Search query (e.g. "breast cancer RNA-seq")' },
|
|
21
|
+
{ name: 'limit', type: 'int', default: 10, help: 'Max results (1-200)' },
|
|
22
|
+
{ name: 'type', default: 'gse', choices: ['gse', 'gds', 'gpl', 'gsm'], help: 'Entry type filter' },
|
|
23
|
+
],
|
|
24
|
+
columns: ['accession', 'title', 'organism', 'type', 'samples', 'date'],
|
|
25
|
+
func: async (ctx, args) => {
|
|
26
|
+
const limit = clamp(Number(args.limit), 1, 200);
|
|
27
|
+
const typeFilter = String(args.type).toUpperCase();
|
|
28
|
+
const term = `${args.query} AND ${typeFilter}[Entry Type]`;
|
|
29
|
+
// Step 1: esearch to get GDS IDs
|
|
30
|
+
const searchResult = await ctx.fetchJson(buildEutilsUrl('esearch.fcgi', {
|
|
31
|
+
db: 'gds',
|
|
32
|
+
term,
|
|
33
|
+
retmax: String(limit),
|
|
34
|
+
retmode: 'json',
|
|
35
|
+
}));
|
|
36
|
+
const query = String(args.query);
|
|
37
|
+
const result = searchResult;
|
|
38
|
+
const esearchResult = result?.esearchresult;
|
|
39
|
+
const ids = esearchResult?.idlist ?? [];
|
|
40
|
+
const totalCount = Number(esearchResult?.count ?? 0);
|
|
41
|
+
if (!ids.length) {
|
|
42
|
+
throw new CliError('NOT_FOUND', 'No GEO entries found', 'Try different search terms or a different entry type');
|
|
43
|
+
}
|
|
44
|
+
// Step 2: esummary to get dataset details
|
|
45
|
+
const summaryResult = await ctx.fetchJson(buildEutilsUrl('esummary.fcgi', {
|
|
46
|
+
db: 'gds',
|
|
47
|
+
id: ids.join(','),
|
|
48
|
+
retmode: 'json',
|
|
49
|
+
}));
|
|
50
|
+
const summary = summaryResult;
|
|
51
|
+
const resultObj = summary?.result;
|
|
52
|
+
const uids = resultObj?.uids ?? [];
|
|
53
|
+
const rows = uids.map(uid => {
|
|
54
|
+
const item = (resultObj?.[uid] ?? {});
|
|
55
|
+
return {
|
|
56
|
+
accession: String(item.accession ?? `GDS${uid}`),
|
|
57
|
+
title: String(item.title ?? ''),
|
|
58
|
+
organism: String(item.taxon ?? ''),
|
|
59
|
+
type: String(item.entrytype ?? ''),
|
|
60
|
+
samples: Number(item.n_samples ?? 0),
|
|
61
|
+
date: String(item.pdat ?? ''),
|
|
62
|
+
};
|
|
63
|
+
});
|
|
64
|
+
return withMeta(rows, { totalCount, query });
|
|
65
|
+
},
|
|
66
|
+
});
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* kegg/convert — Convert IDs between KEGG and other databases.
|
|
3
|
+
*
|
|
4
|
+
* Uses KEGG REST /conv endpoint for ID mapping between
|
|
5
|
+
* KEGG gene IDs and NCBI Gene IDs, UniProt accessions, etc.
|
|
6
|
+
*/
|
|
7
|
+
import { cli, Strategy } from '../../registry.js';
|
|
8
|
+
import { CliError } from '../../errors.js';
|
|
9
|
+
import { buildKeggUrl, parseKeggTsv } from '../../databases/kegg.js';
|
|
10
|
+
import { withMeta } from '../../types.js';
|
|
11
|
+
cli({
|
|
12
|
+
site: 'kegg',
|
|
13
|
+
name: 'convert',
|
|
14
|
+
description: 'Convert IDs between KEGG and external databases',
|
|
15
|
+
database: 'kegg',
|
|
16
|
+
strategy: Strategy.PUBLIC,
|
|
17
|
+
args: [
|
|
18
|
+
{ name: 'id', positional: true, required: true, help: 'ID to convert (e.g. hsa:7157, ncbi-geneid:7157)' },
|
|
19
|
+
{ name: 'to', default: 'ncbi-geneid', choices: ['ncbi-geneid', 'ncbi-proteinid', 'uniprot'], help: 'Target database' },
|
|
20
|
+
],
|
|
21
|
+
columns: ['source', 'target'],
|
|
22
|
+
func: async (ctx, args) => {
|
|
23
|
+
const id = String(args.id).trim();
|
|
24
|
+
const target = String(args.to);
|
|
25
|
+
// Determine direction: /conv/target/source
|
|
26
|
+
const text = await ctx.fetchText(buildKeggUrl(`/conv/${target}/${id}`));
|
|
27
|
+
if (!text.trim()) {
|
|
28
|
+
throw new CliError('NOT_FOUND', `No conversion found for ${id} → ${target}`, 'Check the ID format');
|
|
29
|
+
}
|
|
30
|
+
const parsed = parseKeggTsv(text);
|
|
31
|
+
const rows = parsed.map(p => ({
|
|
32
|
+
source: p.key,
|
|
33
|
+
target: p.value,
|
|
34
|
+
}));
|
|
35
|
+
return withMeta(rows, { totalCount: rows.length, query: id });
|
|
36
|
+
},
|
|
37
|
+
});
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* kegg/disease — Find diseases linked to a KEGG gene.
|
|
3
|
+
*
|
|
4
|
+
* Shorthand for `kegg link --target disease`. Adds disease name resolution.
|
|
5
|
+
*/
|
|
6
|
+
import { cli, Strategy } from '../../registry.js';
|
|
7
|
+
import { CliError } from '../../errors.js';
|
|
8
|
+
import { buildKeggUrl, parseKeggTsv, parseKeggEntry } from '../../databases/kegg.js';
|
|
9
|
+
import { withMeta } from '../../types.js';
|
|
10
|
+
cli({
|
|
11
|
+
site: 'kegg',
|
|
12
|
+
name: 'disease',
|
|
13
|
+
description: 'Find diseases linked to a KEGG gene',
|
|
14
|
+
database: 'kegg',
|
|
15
|
+
strategy: Strategy.PUBLIC,
|
|
16
|
+
args: [
|
|
17
|
+
{ name: 'gene', positional: true, required: true, help: 'KEGG gene ID (e.g. hsa:7157)' },
|
|
18
|
+
],
|
|
19
|
+
columns: ['geneId', 'diseaseId', 'diseaseName'],
|
|
20
|
+
func: async (ctx, args) => {
|
|
21
|
+
const gene = String(args.gene).trim();
|
|
22
|
+
// Step 1: Get disease links
|
|
23
|
+
const linkText = await ctx.fetchText(buildKeggUrl(`/link/disease/${gene}`));
|
|
24
|
+
if (!linkText.trim()) {
|
|
25
|
+
throw new CliError('NOT_FOUND', `No disease links found for ${gene}`, 'Check the gene ID (e.g. hsa:7157)');
|
|
26
|
+
}
|
|
27
|
+
const links = parseKeggTsv(linkText);
|
|
28
|
+
// Step 2: Get disease names (batch, max 10 per request)
|
|
29
|
+
const diseaseIds = links.map(l => l.value).filter(Boolean);
|
|
30
|
+
const names = {};
|
|
31
|
+
// Batch in groups of 10
|
|
32
|
+
for (let i = 0; i < diseaseIds.length; i += 10) {
|
|
33
|
+
const batch = diseaseIds.slice(i, i + 10);
|
|
34
|
+
try {
|
|
35
|
+
const text = await ctx.fetchText(buildKeggUrl(`/get/${batch.join('+')}`));
|
|
36
|
+
// Parse multiple entries separated by ///
|
|
37
|
+
const entries = text.split('///').filter(e => e.trim());
|
|
38
|
+
for (const entryText of entries) {
|
|
39
|
+
const entry = parseKeggEntry(entryText);
|
|
40
|
+
if (entry.ENTRY && entry.NAME) {
|
|
41
|
+
const id = 'ds:' + entry.ENTRY.split(/\s+/)[0];
|
|
42
|
+
names[id] = entry.NAME;
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
catch {
|
|
47
|
+
// Non-fatal — display without names
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
const rows = links.map(l => ({
|
|
51
|
+
geneId: l.key,
|
|
52
|
+
diseaseId: l.value,
|
|
53
|
+
diseaseName: names[l.value] ?? '',
|
|
54
|
+
}));
|
|
55
|
+
return withMeta(rows, { totalCount: rows.length, query: gene });
|
|
56
|
+
},
|
|
57
|
+
});
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* kegg/link — Find cross-references for a gene in KEGG.
|
|
3
|
+
*
|
|
4
|
+
* Uses KEGG REST /link endpoint to find pathways, diseases, or
|
|
5
|
+
* other database cross-references for a given gene.
|
|
6
|
+
*/
|
|
7
|
+
import { cli, Strategy } from '../../registry.js';
|
|
8
|
+
import { CliError } from '../../errors.js';
|
|
9
|
+
import { buildKeggUrl, parseKeggTsv } from '../../databases/kegg.js';
|
|
10
|
+
import { withMeta } from '../../types.js';
|
|
11
|
+
cli({
|
|
12
|
+
site: 'kegg',
|
|
13
|
+
name: 'link',
|
|
14
|
+
description: 'Find KEGG cross-references for a gene',
|
|
15
|
+
database: 'kegg',
|
|
16
|
+
strategy: Strategy.PUBLIC,
|
|
17
|
+
args: [
|
|
18
|
+
{ name: 'gene', positional: true, required: true, help: 'KEGG gene ID (e.g. hsa:7157) or comma-separated list' },
|
|
19
|
+
{ name: 'target', default: 'pathway', choices: ['pathway', 'disease', 'drug', 'compound'], help: 'Target database to link to' },
|
|
20
|
+
],
|
|
21
|
+
columns: ['source', 'target'],
|
|
22
|
+
func: async (ctx, args) => {
|
|
23
|
+
const gene = String(args.gene).trim();
|
|
24
|
+
const target = String(args.target);
|
|
25
|
+
const text = await ctx.fetchText(buildKeggUrl(`/link/${target}/${gene}`));
|
|
26
|
+
if (!text.trim()) {
|
|
27
|
+
throw new CliError('NOT_FOUND', `No ${target} links found for ${gene}`, 'Check the gene ID format (e.g. hsa:7157)');
|
|
28
|
+
}
|
|
29
|
+
const parsed = parseKeggTsv(text);
|
|
30
|
+
const rows = parsed.map(p => ({
|
|
31
|
+
source: p.key,
|
|
32
|
+
target: p.value,
|
|
33
|
+
}));
|
|
34
|
+
return withMeta(rows, { totalCount: rows.length, query: gene });
|
|
35
|
+
},
|
|
36
|
+
});
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* kegg/pathway — Get KEGG pathway details.
|
|
3
|
+
*
|
|
4
|
+
* Uses KEGG REST /get endpoint to retrieve pathway information.
|
|
5
|
+
*/
|
|
6
|
+
import { cli, Strategy } from '../../registry.js';
|
|
7
|
+
import { CliError } from '../../errors.js';
|
|
8
|
+
import { buildKeggUrl, parseKeggEntry } from '../../databases/kegg.js';
|
|
9
|
+
cli({
|
|
10
|
+
site: 'kegg',
|
|
11
|
+
name: 'pathway',
|
|
12
|
+
description: 'Get KEGG pathway details',
|
|
13
|
+
database: 'kegg',
|
|
14
|
+
strategy: Strategy.PUBLIC,
|
|
15
|
+
args: [
|
|
16
|
+
{ name: 'id', positional: true, required: true, help: 'KEGG pathway ID (e.g. hsa05200, hsa00600)' },
|
|
17
|
+
],
|
|
18
|
+
columns: ['id', 'name', 'description', 'class', 'genes', 'diseases'],
|
|
19
|
+
func: async (ctx, args) => {
|
|
20
|
+
const id = String(args.id).trim();
|
|
21
|
+
const text = await ctx.fetchText(buildKeggUrl(`/get/${id}`));
|
|
22
|
+
if (!text || text.includes('No such')) {
|
|
23
|
+
throw new CliError('NOT_FOUND', `KEGG pathway ${id} not found`, 'Check the pathway ID (e.g. hsa05200)');
|
|
24
|
+
}
|
|
25
|
+
const entry = parseKeggEntry(text);
|
|
26
|
+
// Count genes: GENE field has lines like "7157 TP53; tumor protein p53"
|
|
27
|
+
const geneLines = (entry.GENE ?? '').split(/\d+\s+/).filter(Boolean);
|
|
28
|
+
return [{
|
|
29
|
+
id,
|
|
30
|
+
name: entry.NAME ?? '',
|
|
31
|
+
description: entry.DESCRIPTION ?? '',
|
|
32
|
+
class: entry.CLASS ?? '',
|
|
33
|
+
genes: `${geneLines.length} genes`,
|
|
34
|
+
diseases: entry.DISEASE ?? '',
|
|
35
|
+
}];
|
|
36
|
+
},
|
|
37
|
+
});
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* pubmed/abstract — Get abstract text for a PubMed article.
|
|
3
|
+
*
|
|
4
|
+
* Returns only the PMID and abstract text, defaulting to plain output
|
|
5
|
+
* format for easy piping and reading.
|
|
6
|
+
*/
|
|
7
|
+
import { cli, Strategy } from '../../registry.js';
|
|
8
|
+
import { CliError } from '../../errors.js';
|
|
9
|
+
import { buildEutilsUrl } from '../_shared/eutils.js';
|
|
10
|
+
import { parsePubmedArticles } from '../_shared/xml-helpers.js';
|
|
11
|
+
cli({
|
|
12
|
+
site: 'pubmed',
|
|
13
|
+
name: 'abstract',
|
|
14
|
+
description: 'Get abstract text for a PubMed article',
|
|
15
|
+
database: 'pubmed',
|
|
16
|
+
strategy: Strategy.PUBLIC,
|
|
17
|
+
args: [
|
|
18
|
+
{ name: 'pmid', positional: true, required: true, help: 'PubMed ID' },
|
|
19
|
+
],
|
|
20
|
+
columns: ['pmid', 'abstract'],
|
|
21
|
+
defaultFormat: 'plain',
|
|
22
|
+
func: async (ctx, args) => {
|
|
23
|
+
const pmid = String(args.pmid).trim();
|
|
24
|
+
if (!/^\d+$/.test(pmid)) {
|
|
25
|
+
throw new CliError('ARGUMENT', `Invalid PMID: "${pmid}"`, 'PMID must be a numeric identifier');
|
|
26
|
+
}
|
|
27
|
+
const xmlData = await ctx.fetchXml(buildEutilsUrl('efetch.fcgi', {
|
|
28
|
+
db: 'pubmed',
|
|
29
|
+
id: pmid,
|
|
30
|
+
rettype: 'xml',
|
|
31
|
+
}));
|
|
32
|
+
const articles = parsePubmedArticles(xmlData);
|
|
33
|
+
if (!articles.length) {
|
|
34
|
+
throw new CliError('NOT_FOUND', `Article PMID ${pmid} not found`, 'Check that the PMID is correct');
|
|
35
|
+
}
|
|
36
|
+
const article = articles[0];
|
|
37
|
+
if (!article.abstract) {
|
|
38
|
+
throw new CliError('EMPTY_RESULT', `No abstract available for PMID ${pmid}`, 'This article may not have an abstract');
|
|
39
|
+
}
|
|
40
|
+
return [{ pmid: article.pmid, abstract: article.abstract }];
|
|
41
|
+
},
|
|
42
|
+
});
|