@yangfei_93sky/biocli 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -5,13 +5,13 @@ Query biological databases from the terminal. Agent-first design.
5
5
  ```
6
6
  biocli v0.2.0
7
7
  NCBI · UniProt · KEGG · STRING · Ensembl · Enrichr
8
- 42 commands · 6 database backends · 8 workflow commands · 4 download commands
8
+ 44 commands · 6 database backends · 10 workflow commands · 4 download commands
9
9
  ```
10
10
 
11
11
  ## Install
12
12
 
13
13
  ```bash
14
- npm install -g @biocli/cli
14
+ npm install -g @yangfei_93sky/biocli
15
15
  ```
16
16
 
17
17
  Requires Node.js >= 20. No API keys needed (optional NCBI key increases rate limit).
@@ -75,7 +75,7 @@ Designed for **AI agents** (Claude Code, Codex CLI, etc.) — structured JSON ou
75
75
 
76
76
  </details>
77
77
 
78
- > All three tools were installed (`npm install -g @biocli/cli`, `pip install gget==0.30.3`, `uv tool install biomcp-cli==0.8.19`) and executed on the same machine with the same inputs. Raw stdout/stderr, scoring scripts, and runner scripts are in [`benchmarks/`](benchmarks/). BioMCP excels at biomedical entity breadth (drugs, trials, diseases) not covered by this task set; gget excels at sequence analysis (BLAST, AlphaFold) not covered here.
78
+ > All three tools were installed (`npm install -g @yangfei_93sky/biocli`, `pip install gget==0.30.3`, `uv tool install biomcp-cli==0.8.19`) and executed on the same machine with the same inputs. Raw stdout/stderr, scoring scripts, and runner scripts are in [`benchmarks/`](benchmarks/). BioMCP excels at biomedical entity breadth (drugs, trials, diseases) not covered by this task set; gget excels at sequence analysis (BLAST, AlphaFold) not covered here.
79
79
 
80
80
  ## Quick start
81
81
 
@@ -118,6 +118,8 @@ biocli aggregate gene-profile TP53
118
118
  | `aggregate gene-profile <gene>` | NCBI+UniProt+KEGG+STRING | Gene profile (no literature) |
119
119
  | `aggregate workflow-scout <query>` | GEO+SRA | Scout datasets for a research question |
120
120
  | `aggregate workflow-prepare <dataset>` | GEO+NCBI+UniProt+KEGG | Prepare research-ready directory with data + annotations |
121
+ | `aggregate workflow-annotate <genes>` | NCBI+UniProt+KEGG+Enrichr | Annotate gene list → genes.csv + pathways.csv + enrichment.csv + report.md |
122
+ | `aggregate workflow-profile <genes>` | NCBI+UniProt+KEGG+STRING+Enrichr | Gene set functional profile → shared pathways, interactions, GO terms |
121
123
 
122
124
  ### Database commands (atomic)
123
125
 
@@ -224,6 +224,57 @@
224
224
  "type": "ts",
225
225
  "modulePath": "aggregate/variant-interpret.js"
226
226
  },
227
+ {
228
+ "site": "aggregate",
229
+ "name": "workflow-annotate",
230
+ "description": "Annotate a gene list into a research-ready directory",
231
+ "database": "aggregate",
232
+ "strategy": "public",
233
+ "args": [
234
+ {
235
+ "name": "genes",
236
+ "type": "str",
237
+ "required": true,
238
+ "positional": true,
239
+ "help": "Gene symbols: comma-separated (TP53,BRCA1) or use --input file"
240
+ },
241
+ {
242
+ "name": "outdir",
243
+ "type": "str",
244
+ "required": true,
245
+ "help": "Output directory for annotation results"
246
+ },
247
+ {
248
+ "name": "organism",
249
+ "type": "str",
250
+ "default": "human",
251
+ "required": false,
252
+ "help": "Organism (human, mouse, rat, etc.)"
253
+ },
254
+ {
255
+ "name": "library",
256
+ "type": "str",
257
+ "default": "KEGG_2021_Human",
258
+ "required": false,
259
+ "help": "Enrichr library for enrichment analysis"
260
+ },
261
+ {
262
+ "name": "plan",
263
+ "type": "boolean",
264
+ "default": false,
265
+ "required": false,
266
+ "help": "Preview steps without executing"
267
+ }
268
+ ],
269
+ "columns": [
270
+ "step",
271
+ "status",
272
+ "detail"
273
+ ],
274
+ "timeout": 120,
275
+ "type": "ts",
276
+ "modulePath": "aggregate/workflow-annotate.js"
277
+ },
227
278
  {
228
279
  "site": "aggregate",
229
280
  "name": "workflow-prepare",
@@ -267,6 +318,57 @@
267
318
  "type": "ts",
268
319
  "modulePath": "aggregate/workflow-prepare.js"
269
320
  },
321
+ {
322
+ "site": "aggregate",
323
+ "name": "workflow-profile",
324
+ "description": "Functional profile for a gene set (interactions, GO terms, shared pathways)",
325
+ "database": "aggregate",
326
+ "strategy": "public",
327
+ "args": [
328
+ {
329
+ "name": "genes",
330
+ "type": "str",
331
+ "required": true,
332
+ "positional": true,
333
+ "help": "Gene symbols: comma-separated (TP53,BRCA1,EGFR,MYC,CDK2)"
334
+ },
335
+ {
336
+ "name": "outdir",
337
+ "type": "str",
338
+ "required": true,
339
+ "help": "Output directory"
340
+ },
341
+ {
342
+ "name": "organism",
343
+ "type": "str",
344
+ "default": "human",
345
+ "required": false,
346
+ "help": "Organism (human, mouse, rat, etc.)"
347
+ },
348
+ {
349
+ "name": "library",
350
+ "type": "str",
351
+ "default": "KEGG_2021_Human",
352
+ "required": false,
353
+ "help": "Enrichr library"
354
+ },
355
+ {
356
+ "name": "plan",
357
+ "type": "boolean",
358
+ "default": false,
359
+ "required": false,
360
+ "help": "Preview steps without executing"
361
+ }
362
+ ],
363
+ "columns": [
364
+ "step",
365
+ "status",
366
+ "detail"
367
+ ],
368
+ "timeout": 180,
369
+ "type": "ts",
370
+ "modulePath": "aggregate/workflow-profile.js"
371
+ },
270
372
  {
271
373
  "site": "aggregate",
272
374
  "name": "workflow-scout",
@@ -0,0 +1,15 @@
1
+ /**
2
+ * aggregate/workflow-annotate — Annotate a gene list into a research-ready directory.
3
+ *
4
+ * Input: gene list (comma-separated, --input file, or stdin)
5
+ * Output directory:
6
+ * summary.json — high-level overview (gene count, sources, warnings)
7
+ * genes.csv — per-gene annotations (symbol, name, function, chromosome, etc.)
8
+ * pathways.csv — all KEGG pathways linked to any input gene
9
+ * enrichment.csv — Enrichr pathway enrichment results
10
+ * report.md — human-readable Markdown report
11
+ * manifest.json — full provenance (biocli version, run timestamp, sources, inputs)
12
+ *
13
+ * Cross-queries: NCBI Gene + UniProt + KEGG + Enrichr
14
+ */
15
+ export {};
@@ -0,0 +1,323 @@
1
+ /**
2
+ * aggregate/workflow-annotate — Annotate a gene list into a research-ready directory.
3
+ *
4
+ * Input: gene list (comma-separated, --input file, or stdin)
5
+ * Output directory:
6
+ * summary.json — high-level overview (gene count, sources, warnings)
7
+ * genes.csv — per-gene annotations (symbol, name, function, chromosome, etc.)
8
+ * pathways.csv — all KEGG pathways linked to any input gene
9
+ * enrichment.csv — Enrichr pathway enrichment results
10
+ * report.md — human-readable Markdown report
11
+ * manifest.json — full provenance (biocli version, run timestamp, sources, inputs)
12
+ *
13
+ * Cross-queries: NCBI Gene + UniProt + KEGG + Enrichr
14
+ */
15
+ import { cli, Strategy } from '../../registry.js';
16
+ import { CliError } from '../../errors.js';
17
+ import { wrapResult } from '../../types.js';
18
+ import { createHttpContextForDatabase } from '../../databases/index.js';
19
+ import { buildEutilsUrl } from '../../databases/ncbi.js';
20
+ import { buildUniprotUrl } from '../../databases/uniprot.js';
21
+ import { buildKeggUrl, parseKeggTsv } from '../../databases/kegg.js';
22
+ import { submitGeneList, getEnrichment } from '../../databases/enrichr.js';
23
+ import { parseGeneSummaries } from '../_shared/xml-helpers.js';
24
+ import { resolveOrganism } from '../_shared/organism-db.js';
25
+ import { mkdirSync, existsSync, writeFileSync } from 'node:fs';
26
+ import { join } from 'node:path';
27
+ import { getVersion } from '../../version.js';
28
+ // ── CSV helper ───────────────────────────────────────────────────────────────
29
+ function toCsv(headers, rows) {
30
+ const escape = (v) => {
31
+ const s = String(v ?? '');
32
+ return s.includes(',') || s.includes('"') || s.includes('\n')
33
+ ? `"${s.replace(/"/g, '""')}"` : s;
34
+ };
35
+ const lines = [headers.join(',')];
36
+ for (const row of rows) {
37
+ lines.push(headers.map(h => escape(row[h])).join(','));
38
+ }
39
+ return lines.join('\n') + '\n';
40
+ }
41
+ // ── Main ─────────────────────────────────────────────────────────────────────
42
+ cli({
43
+ site: 'aggregate',
44
+ name: 'workflow-annotate',
45
+ description: 'Annotate a gene list into a research-ready directory',
46
+ database: 'aggregate',
47
+ strategy: Strategy.PUBLIC,
48
+ defaultFormat: 'json',
49
+ timeoutSeconds: 120,
50
+ args: [
51
+ { name: 'genes', positional: true, required: true, help: 'Gene symbols: comma-separated (TP53,BRCA1) or use --input file' },
52
+ { name: 'outdir', required: true, help: 'Output directory for annotation results' },
53
+ { name: 'organism', default: 'human', help: 'Organism (human, mouse, rat, etc.)' },
54
+ { name: 'library', default: 'KEGG_2021_Human', help: 'Enrichr library for enrichment analysis' },
55
+ { name: 'plan', type: 'boolean', default: false, help: 'Preview steps without executing' },
56
+ ],
57
+ columns: ['step', 'status', 'detail'],
58
+ func: async (_ctx, args) => {
59
+ const geneInput = String(args.genes);
60
+ const genes = geneInput.split(',').map(s => s.trim()).filter(Boolean);
61
+ const outdir = String(args.outdir);
62
+ const orgInput = String(args.organism);
63
+ const library = String(args.library);
64
+ const planOnly = Boolean(args.plan);
65
+ if (genes.length === 0) {
66
+ throw new CliError('ARGUMENT', 'At least one gene symbol is required', 'Example: biocli aggregate workflow-annotate TP53,BRCA1,EGFR --outdir ./results');
67
+ }
68
+ const org = resolveOrganism(orgInput);
69
+ const sources = [];
70
+ const warnings = [];
71
+ const steps = [];
72
+ // ── Plan mode ───────────────────────────────────────────────────────
73
+ if (planOnly) {
74
+ return wrapResult({
75
+ plan: [
76
+ { step: 'gene-annotations', detail: `Query NCBI Gene + UniProt for ${genes.length} gene(s)` },
77
+ { step: 'pathways', detail: `Query KEGG pathways for each gene` },
78
+ { step: 'enrichment', detail: `Run Enrichr enrichment (${library}) for gene set` },
79
+ { step: 'output', detail: `Write genes.csv, pathways.csv, enrichment.csv, report.md, summary.json, manifest.json → ${outdir}` },
80
+ ],
81
+ genes,
82
+ organism: org.name,
83
+ outdir,
84
+ }, { ids: {}, sources: [], warnings: [], query: genes.join(','), organism: org.name });
85
+ }
86
+ // Create output directory
87
+ if (!existsSync(outdir))
88
+ mkdirSync(outdir, { recursive: true });
89
+ const ncbiCtx = createHttpContextForDatabase('ncbi');
90
+ const uniprotCtx = createHttpContextForDatabase('uniprot');
91
+ const keggCtx = createHttpContextForDatabase('kegg');
92
+ // ── Step 1: Gene annotations (NCBI + UniProt) ───────────────────────
93
+ const geneAnnotations = [];
94
+ for (const gene of genes) {
95
+ const annot = {
96
+ symbol: gene, ncbiGeneId: '', name: '', summary: '',
97
+ chromosome: '', location: '', uniprotAccession: '',
98
+ proteinFunction: '', subcellularLocation: '', goTerms: '',
99
+ };
100
+ // NCBI Gene
101
+ try {
102
+ const searchResult = await ncbiCtx.fetchJson(buildEutilsUrl('esearch.fcgi', {
103
+ db: 'gene', term: `${gene}[Gene Name] AND ${org.name}[Organism]`,
104
+ retmax: '5', retmode: 'json',
105
+ }));
106
+ const ids = searchResult?.esearchresult?.idlist ?? [];
107
+ if (ids.length > 0) {
108
+ const summaryResult = await ncbiCtx.fetchJson(buildEutilsUrl('esummary.fcgi', {
109
+ db: 'gene', id: ids.join(','), retmode: 'json',
110
+ }));
111
+ const parsed = parseGeneSummaries(summaryResult);
112
+ const best = parsed.find(g => g.symbol.toUpperCase() === gene.toUpperCase()) ?? parsed[0];
113
+ if (best) {
114
+ annot.ncbiGeneId = best.geneId;
115
+ annot.name = best.name;
116
+ annot.summary = best.summary;
117
+ annot.chromosome = best.chromosome;
118
+ annot.location = best.location;
119
+ }
120
+ }
121
+ }
122
+ catch (err) {
123
+ warnings.push(`NCBI Gene ${gene}: ${err instanceof Error ? err.message : String(err)}`);
124
+ }
125
+ // UniProt
126
+ try {
127
+ const upResult = await uniprotCtx.fetchJson(buildUniprotUrl('/uniprotkb/search', {
128
+ query: `gene:${gene} AND organism_id:${org.taxId} AND reviewed:true`,
129
+ format: 'json', size: '5',
130
+ }));
131
+ const results = (upResult?.results ?? []);
132
+ if (results.length > 0) {
133
+ const getGeneName = (e) => {
134
+ const gs = e.genes;
135
+ const gn = gs?.[0];
136
+ return String(gn?.geneName?.value ?? '');
137
+ };
138
+ const exact = results.find(e => getGeneName(e).toUpperCase() === gene.toUpperCase());
139
+ const entry = exact ?? results[0];
140
+ annot.uniprotAccession = String(entry.primaryAccession ?? '');
141
+ const comments = (entry.comments ?? []);
142
+ const funcComment = comments.find(c => c.commentType === 'FUNCTION');
143
+ const funcTexts = (funcComment?.texts ?? []);
144
+ annot.proteinFunction = funcTexts.map(t => String(t.value ?? '')).join(' ');
145
+ const locComment = comments.find(c => c.commentType === 'SUBCELLULAR LOCATION');
146
+ const locEntries = (locComment?.subcellularLocations ?? []);
147
+ annot.subcellularLocation = locEntries.map(l => String(l.location?.value ?? '')).filter(Boolean).join(', ');
148
+ const xrefs = (entry.uniProtKBCrossReferences ?? []);
149
+ const goTerms = xrefs.filter(x => x.database === 'GO').map(x => {
150
+ const props = (x.properties ?? []);
151
+ const termProp = props.find(p => p.key === 'GoTerm');
152
+ return String(termProp?.value ?? '');
153
+ });
154
+ annot.goTerms = goTerms.slice(0, 10).join('; ');
155
+ }
156
+ }
157
+ catch (err) {
158
+ warnings.push(`UniProt ${gene}: ${err instanceof Error ? err.message : String(err)}`);
159
+ }
160
+ geneAnnotations.push(annot);
161
+ }
162
+ if (geneAnnotations.some(a => a.ncbiGeneId))
163
+ sources.push('NCBI Gene');
164
+ if (geneAnnotations.some(a => a.uniprotAccession))
165
+ sources.push('UniProt');
166
+ writeFileSync(join(outdir, 'genes.csv'), toCsv(['symbol', 'ncbiGeneId', 'name', 'chromosome', 'location', 'uniprotAccession', 'proteinFunction', 'subcellularLocation', 'goTerms', 'summary'], geneAnnotations));
167
+ steps.push({ step: 'gene-annotations', status: 'done', detail: `${geneAnnotations.length} gene(s) → genes.csv` });
168
+ // ── Step 2: KEGG pathways ───────────────────────────────────────────
169
+ const pathwayLinks = [];
170
+ const pathIdSet = new Set();
171
+ for (const annot of geneAnnotations) {
172
+ if (!annot.ncbiGeneId)
173
+ continue;
174
+ try {
175
+ const linkText = await keggCtx.fetchText(buildKeggUrl(`/link/pathway/${org.keggOrg}:${annot.ncbiGeneId}`));
176
+ if (linkText && linkText.trim()) {
177
+ const links = parseKeggTsv(linkText);
178
+ for (const l of links) {
179
+ const pid = l.value.replace(/^path:/, '');
180
+ pathIdSet.add(pid);
181
+ pathwayLinks.push({ gene: annot.symbol, pathwayId: pid, pathwayName: '' });
182
+ }
183
+ }
184
+ }
185
+ catch { /* non-fatal */ }
186
+ }
187
+ // Resolve pathway names
188
+ if (pathIdSet.size > 0) {
189
+ try {
190
+ const listText = await keggCtx.fetchText(buildKeggUrl(`/list/pathway/${org.keggOrg}`));
191
+ const allPaths = parseKeggTsv(listText);
192
+ const nameMap = new Map(allPaths.map(p => [p.key, p.value.replace(/ - .*$/, '')]));
193
+ for (const link of pathwayLinks) {
194
+ link.pathwayName = nameMap.get(link.pathwayId) ?? link.pathwayId;
195
+ }
196
+ sources.push('KEGG');
197
+ }
198
+ catch (err) {
199
+ warnings.push(`KEGG pathway names: ${err instanceof Error ? err.message : String(err)}`);
200
+ }
201
+ }
202
+ writeFileSync(join(outdir, 'pathways.csv'), toCsv(['gene', 'pathwayId', 'pathwayName'], pathwayLinks));
203
+ steps.push({ step: 'pathways', status: 'done', detail: `${pathwayLinks.length} pathway links (${pathIdSet.size} unique) → pathways.csv` });
204
+ // ── Step 3: Enrichment (Enrichr) ────────────────────────────────────
205
+ const enrichmentRows = [];
206
+ if (genes.length >= 2) {
207
+ try {
208
+ const userListId = await submitGeneList(genes);
209
+ const results = await getEnrichment(userListId, library);
210
+ for (let i = 0; i < Math.min(results.length, 30); i++) {
211
+ const r = results[i];
212
+ enrichmentRows.push({
213
+ rank: i + 1,
214
+ term: String(r.term),
215
+ library,
216
+ adjustedPValue: Number(r.adjustedPValue).toExponential(2),
217
+ combinedScore: Number(r.combinedScore).toFixed(1),
218
+ genes: String(r.genes),
219
+ });
220
+ }
221
+ sources.push('Enrichr');
222
+ }
223
+ catch (err) {
224
+ warnings.push(`Enrichr: ${err instanceof Error ? err.message : String(err)}`);
225
+ }
226
+ }
227
+ else {
228
+ warnings.push('Enrichment skipped: at least 2 genes required');
229
+ }
230
+ writeFileSync(join(outdir, 'enrichment.csv'), toCsv(['rank', 'term', 'library', 'adjustedPValue', 'combinedScore', 'genes'], enrichmentRows));
231
+ steps.push({ step: 'enrichment', status: enrichmentRows.length > 0 ? 'done' : 'skipped',
232
+ detail: enrichmentRows.length > 0 ? `${enrichmentRows.length} terms → enrichment.csv` : 'skipped (need ≥ 2 genes)' });
233
+ // ── Step 4: report.md ───────────────────────────────────────────────
234
+ const reportLines = [
235
+ `# Gene Annotation Report`,
236
+ ``,
237
+ `**Generated by biocli** v${getVersion()} on ${new Date().toISOString()}`,
238
+ ``,
239
+ `## Input`,
240
+ ``,
241
+ `- **Genes**: ${genes.join(', ')}`,
242
+ `- **Organism**: ${org.name}`,
243
+ `- **Sources**: ${sources.join(', ') || 'none'}`,
244
+ warnings.length > 0 ? `- **Warnings**: ${warnings.length}` : '',
245
+ ``,
246
+ `## Gene Summary`,
247
+ ``,
248
+ `| Symbol | Name | Chromosome | UniProt | Function |`,
249
+ `|--------|------|------------|---------|----------|`,
250
+ ];
251
+ for (const g of geneAnnotations) {
252
+ const func = g.proteinFunction.length > 80 ? g.proteinFunction.slice(0, 80) + '...' : g.proteinFunction;
253
+ reportLines.push(`| ${g.symbol} | ${g.name} | ${g.chromosome} | ${g.uniprotAccession} | ${func} |`);
254
+ }
255
+ if (pathwayLinks.length > 0) {
256
+ // Deduplicate pathways
257
+ const uniquePathways = [...new Map(pathwayLinks.map(p => [p.pathwayId, p])).values()];
258
+ reportLines.push('', `## KEGG Pathways (${uniquePathways.length} unique)`, '', '| Pathway | Genes |', '|---------|-------|');
259
+ const pathwayGenes = new Map();
260
+ for (const link of pathwayLinks) {
261
+ const list = pathwayGenes.get(link.pathwayName) ?? [];
262
+ list.push(link.gene);
263
+ pathwayGenes.set(link.pathwayName, list);
264
+ }
265
+ for (const [name, gList] of [...pathwayGenes.entries()].slice(0, 20)) {
266
+ reportLines.push(`| ${name} | ${[...new Set(gList)].join(', ')} |`);
267
+ }
268
+ }
269
+ if (enrichmentRows.length > 0) {
270
+ reportLines.push('', `## Enrichment Analysis (${library})`, '', '| Rank | Term | Adj. P-value | Genes |', '|------|------|-------------|-------|');
271
+ for (const r of enrichmentRows.slice(0, 15)) {
272
+ reportLines.push(`| ${r.rank} | ${r.term} | ${r.adjustedPValue} | ${r.genes} |`);
273
+ }
274
+ }
275
+ if (warnings.length > 0) {
276
+ reportLines.push('', '## Warnings', '');
277
+ for (const w of warnings)
278
+ reportLines.push(`- ${w}`);
279
+ }
280
+ reportLines.push('', '---', `*Report generated by [biocli](https://github.com/youngfly93/biocli)*`);
281
+ writeFileSync(join(outdir, 'report.md'), reportLines.filter(l => l !== undefined).join('\n') + '\n');
282
+ steps.push({ step: 'report', status: 'done', detail: `report.md → ${outdir}` });
283
+ // ── Step 5: summary.json + manifest.json ────────────────────────────
284
+ const summary = {
285
+ geneCount: genes.length,
286
+ annotatedCount: geneAnnotations.filter(a => a.ncbiGeneId).length,
287
+ pathwayCount: pathIdSet.size,
288
+ enrichmentTerms: enrichmentRows.length,
289
+ sources,
290
+ warnings,
291
+ };
292
+ writeFileSync(join(outdir, 'summary.json'), JSON.stringify(summary, null, 2));
293
+ const manifest = {
294
+ biocliVersion: getVersion(),
295
+ createdAt: new Date().toISOString(),
296
+ command: 'workflow-annotate',
297
+ input: { genes, organism: org.name, library },
298
+ output: {
299
+ 'genes.csv': `${geneAnnotations.length} genes`,
300
+ 'pathways.csv': `${pathwayLinks.length} pathway links`,
301
+ 'enrichment.csv': `${enrichmentRows.length} terms`,
302
+ 'report.md': 'Markdown report',
303
+ 'summary.json': 'Overview statistics',
304
+ },
305
+ sources,
306
+ warnings,
307
+ };
308
+ steps.push({ step: 'manifest', status: 'done', detail: `summary.json + manifest.json → ${outdir}` });
309
+ writeFileSync(join(outdir, 'manifest.json'), JSON.stringify(manifest, null, 2));
310
+ return wrapResult({
311
+ outdir,
312
+ genes,
313
+ steps,
314
+ summary,
315
+ }, {
316
+ ids: {},
317
+ sources,
318
+ warnings,
319
+ query: genes.join(','),
320
+ organism: org.name,
321
+ });
322
+ },
323
+ });
@@ -0,0 +1,17 @@
1
+ /**
2
+ * aggregate/workflow-profile — Functional profile for a gene set.
3
+ *
4
+ * Unlike workflow-annotate (per-gene annotations), this command focuses on
5
+ * the SET-LEVEL view: shared pathways, interaction network, GO term
6
+ * distribution, and enrichment. Think "what does this gene set DO together?"
7
+ *
8
+ * Output directory:
9
+ * profiles.json — per-gene profile summaries (from gene-profile)
10
+ * interactions.csv — STRING protein-protein interaction network
11
+ * go_summary.csv — GO term frequency across the gene set
12
+ * shared_pathways.csv — KEGG pathways shared by 2+ input genes
13
+ * enrichment.csv — Enrichr enrichment results
14
+ * report.md — human-readable Markdown report
15
+ * manifest.json — provenance
16
+ */
17
+ export {};
@@ -0,0 +1,326 @@
1
+ /**
2
+ * aggregate/workflow-profile — Functional profile for a gene set.
3
+ *
4
+ * Unlike workflow-annotate (per-gene annotations), this command focuses on
5
+ * the SET-LEVEL view: shared pathways, interaction network, GO term
6
+ * distribution, and enrichment. Think "what does this gene set DO together?"
7
+ *
8
+ * Output directory:
9
+ * profiles.json — per-gene profile summaries (from gene-profile)
10
+ * interactions.csv — STRING protein-protein interaction network
11
+ * go_summary.csv — GO term frequency across the gene set
12
+ * shared_pathways.csv — KEGG pathways shared by 2+ input genes
13
+ * enrichment.csv — Enrichr enrichment results
14
+ * report.md — human-readable Markdown report
15
+ * manifest.json — provenance
16
+ */
17
+ import { cli, Strategy } from '../../registry.js';
18
+ import { CliError } from '../../errors.js';
19
+ import { wrapResult } from '../../types.js';
20
+ import { createHttpContextForDatabase } from '../../databases/index.js';
21
+ import { buildEutilsUrl } from '../../databases/ncbi.js';
22
+ import { buildUniprotUrl } from '../../databases/uniprot.js';
23
+ import { buildKeggUrl, parseKeggTsv } from '../../databases/kegg.js';
24
+ import { buildStringUrl } from '../../databases/string-db.js';
25
+ import { submitGeneList, getEnrichment } from '../../databases/enrichr.js';
26
+ import { parseGeneSummaries } from '../_shared/xml-helpers.js';
27
+ import { resolveOrganism } from '../_shared/organism-db.js';
28
+ import { mkdirSync, existsSync, writeFileSync } from 'node:fs';
29
+ import { join } from 'node:path';
30
+ import { getVersion } from '../../version.js';
31
+ // ── CSV helper ───────────────────────────────────────────────────────────────
32
+ function toCsv(headers, rows) {
33
+ const escape = (v) => {
34
+ const s = String(v ?? '');
35
+ return s.includes(',') || s.includes('"') || s.includes('\n')
36
+ ? `"${s.replace(/"/g, '""')}"` : s;
37
+ };
38
+ return [headers.join(','), ...rows.map(r => headers.map(h => escape(r[h])).join(','))].join('\n') + '\n';
39
+ }
40
+ // ── Main ─────────────────────────────────────────────────────────────────────
41
+ cli({
42
+ site: 'aggregate',
43
+ name: 'workflow-profile',
44
+ description: 'Functional profile for a gene set (interactions, GO terms, shared pathways)',
45
+ database: 'aggregate',
46
+ strategy: Strategy.PUBLIC,
47
+ defaultFormat: 'json',
48
+ timeoutSeconds: 180,
49
+ args: [
50
+ { name: 'genes', positional: true, required: true, help: 'Gene symbols: comma-separated (TP53,BRCA1,EGFR,MYC,CDK2)' },
51
+ { name: 'outdir', required: true, help: 'Output directory' },
52
+ { name: 'organism', default: 'human', help: 'Organism (human, mouse, rat, etc.)' },
53
+ { name: 'library', default: 'KEGG_2021_Human', help: 'Enrichr library' },
54
+ { name: 'plan', type: 'boolean', default: false, help: 'Preview steps without executing' },
55
+ ],
56
+ columns: ['step', 'status', 'detail'],
57
+ func: async (_ctx, args) => {
58
+ const genes = String(args.genes).split(',').map(s => s.trim()).filter(Boolean);
59
+ const outdir = String(args.outdir);
60
+ const library = String(args.library);
61
+ const planOnly = Boolean(args.plan);
62
+ if (genes.length < 2) {
63
+ throw new CliError('ARGUMENT', 'At least 2 gene symbols required for profiling', 'Example: biocli aggregate workflow-profile TP53,BRCA1,EGFR,MYC,CDK2 --outdir ./profile');
64
+ }
65
+ const org = resolveOrganism(String(args.organism));
66
+ const sources = [];
67
+ const warnings = [];
68
+ const steps = [];
69
+ if (planOnly) {
70
+ return wrapResult({
71
+ plan: [
72
+ { step: 'gene-profiles', detail: `Query NCBI Gene + UniProt for ${genes.length} gene(s)` },
73
+ { step: 'interactions', detail: `Query STRING network for all ${genes.length} genes` },
74
+ { step: 'pathways', detail: `Find KEGG pathways shared by 2+ genes` },
75
+ { step: 'go-summary', detail: `Aggregate GO terms across gene set` },
76
+ { step: 'enrichment', detail: `Run Enrichr (${library})` },
77
+ { step: 'output', detail: `Write profiles.json, interactions.csv, go_summary.csv, shared_pathways.csv, enrichment.csv, report.md → ${outdir}` },
78
+ ],
79
+ genes, organism: org.name, outdir,
80
+ }, { ids: {}, sources: [], warnings: [], query: genes.join(','), organism: org.name });
81
+ }
82
+ if (!existsSync(outdir))
83
+ mkdirSync(outdir, { recursive: true });
84
+ const ncbiCtx = createHttpContextForDatabase('ncbi');
85
+ const uniprotCtx = createHttpContextForDatabase('uniprot');
86
+ const keggCtx = createHttpContextForDatabase('kegg');
87
+ const stringCtx = createHttpContextForDatabase('string');
88
+ // ── Step 1: Per-gene profiles (NCBI + UniProt) ──────────────────────
89
+ const profiles = [];
90
+ const geneIds = {}; // symbol → ncbi gene id
91
+ const allGoTerms = [];
92
+ for (const gene of genes) {
93
+ const profile = { symbol: gene };
94
+ try {
95
+ const sr = await ncbiCtx.fetchJson(buildEutilsUrl('esearch.fcgi', {
96
+ db: 'gene', term: `${gene}[Gene Name] AND ${org.name}[Organism]`,
97
+ retmax: '5', retmode: 'json',
98
+ }));
99
+ const ids = sr?.esearchresult?.idlist ?? [];
100
+ if (ids.length > 0) {
101
+ const summ = await ncbiCtx.fetchJson(buildEutilsUrl('esummary.fcgi', { db: 'gene', id: ids.join(','), retmode: 'json' }));
102
+ const parsed = parseGeneSummaries(summ);
103
+ const best = parsed.find(g => g.symbol.toUpperCase() === gene.toUpperCase()) ?? parsed[0];
104
+ if (best) {
105
+ profile.ncbiGeneId = best.geneId;
106
+ profile.name = best.name;
107
+ profile.chromosome = best.chromosome;
108
+ geneIds[gene] = best.geneId;
109
+ }
110
+ }
111
+ }
112
+ catch (err) {
113
+ warnings.push(`NCBI ${gene}: ${err instanceof Error ? err.message : String(err)}`);
114
+ }
115
+ try {
116
+ const upResult = await uniprotCtx.fetchJson(buildUniprotUrl('/uniprotkb/search', {
117
+ query: `gene:${gene} AND organism_id:${org.taxId} AND reviewed:true`, format: 'json', size: '5',
118
+ }));
119
+ const results = (upResult?.results ?? []);
120
+ if (results.length > 0) {
121
+ const getGN = (e) => String(e.genes?.[0]?.geneName?.value ?? '');
122
+ const entry = results.find(e => getGN(e).toUpperCase() === gene.toUpperCase()) ?? results[0];
123
+ profile.uniprotAccession = entry.primaryAccession;
124
+ const comments = (entry.comments ?? []);
125
+ const funcComment = comments.find(c => c.commentType === 'FUNCTION');
126
+ const funcTexts = (funcComment?.texts ?? []);
127
+ profile.function = funcTexts.map(t => String(t.value ?? '')).join(' ');
128
+ const xrefs = (entry.uniProtKBCrossReferences ?? []);
129
+ xrefs.filter(x => x.database === 'GO').forEach(x => {
130
+ const id = String(x.id ?? '');
131
+ const props = (x.properties ?? []);
132
+ const termProp = props.find(p => p.key === 'GoTerm');
133
+ const term = String(termProp?.value ?? '');
134
+ const aspectMap = { C: 'CC', F: 'MF', P: 'BP' };
135
+ const [aspect, ...nameParts] = term.split(':');
136
+ allGoTerms.push({ gene, id, name: nameParts.join(':'), aspect: aspectMap[aspect] ?? aspect });
137
+ });
138
+ }
139
+ }
140
+ catch (err) {
141
+ warnings.push(`UniProt ${gene}: ${err instanceof Error ? err.message : String(err)}`);
142
+ }
143
+ profiles.push(profile);
144
+ }
145
+ if (profiles.some(p => p.ncbiGeneId))
146
+ sources.push('NCBI Gene');
147
+ if (profiles.some(p => p.uniprotAccession))
148
+ sources.push('UniProt');
149
+ writeFileSync(join(outdir, 'profiles.json'), JSON.stringify(profiles, null, 2));
150
+ steps.push({ step: 'gene-profiles', status: 'done', detail: `${profiles.length} gene(s) → profiles.json` });
151
+ // ── Step 2: STRING interactions ──────────────────────────────────────
152
+ const interactions = [];
153
+ try {
154
+ const data = await stringCtx.fetchJson(buildStringUrl('network', {
155
+ identifiers: genes.join('%0d'),
156
+ species: String(org.taxId),
157
+ required_score: '400',
158
+ }));
159
+ if (Array.isArray(data)) {
160
+ for (const item of data) {
161
+ interactions.push({
162
+ geneA: String(item.preferredName_A ?? ''),
163
+ geneB: String(item.preferredName_B ?? ''),
164
+ score: Number(item.score ?? 0),
165
+ experimentalScore: Number(item.escore ?? 0),
166
+ databaseScore: Number(item.dscore ?? 0),
167
+ textminingScore: Number(item.tscore ?? 0),
168
+ });
169
+ }
170
+ sources.push('STRING');
171
+ }
172
+ }
173
+ catch (err) {
174
+ warnings.push(`STRING: ${err instanceof Error ? err.message : String(err)}`);
175
+ }
176
+ writeFileSync(join(outdir, 'interactions.csv'), toCsv(['geneA', 'geneB', 'score', 'experimentalScore', 'databaseScore', 'textminingScore'], interactions));
177
+ steps.push({ step: 'interactions', status: 'done', detail: `${interactions.length} interactions → interactions.csv` });
178
+ // ── Step 3: Shared KEGG pathways ────────────────────────────────────
179
+ const genePathways = {};
180
+ const pathwayGenes = {};
181
+ for (const gene of genes) {
182
+ const gid = geneIds[gene];
183
+ if (!gid)
184
+ continue;
185
+ try {
186
+ const linkText = await keggCtx.fetchText(buildKeggUrl(`/link/pathway/${org.keggOrg}:${gid}`));
187
+ if (linkText?.trim()) {
188
+ const links = parseKeggTsv(linkText);
189
+ genePathways[gene] = new Set(links.map(l => l.value.replace(/^path:/, '')));
190
+ for (const pid of genePathways[gene]) {
191
+ if (!pathwayGenes[pid])
192
+ pathwayGenes[pid] = new Set();
193
+ pathwayGenes[pid].add(gene);
194
+ }
195
+ }
196
+ }
197
+ catch { /* non-fatal */ }
198
+ }
199
+ // Resolve pathway names
200
+ let pathNameMap = new Map();
201
+ try {
202
+ const listText = await keggCtx.fetchText(buildKeggUrl(`/list/pathway/${org.keggOrg}`));
203
+ pathNameMap = new Map(parseKeggTsv(listText).map(p => [p.key, p.value.replace(/ - .*$/, '')]));
204
+ if (Object.keys(pathwayGenes).length > 0)
205
+ sources.push('KEGG');
206
+ }
207
+ catch { /* non-fatal */ }
208
+ // Only pathways shared by 2+ genes
209
+ const sharedPathways = Object.entries(pathwayGenes)
210
+ .filter(([, gSet]) => gSet.size >= 2)
211
+ .map(([pid, gSet]) => ({
212
+ pathwayId: pid,
213
+ pathwayName: pathNameMap.get(pid) ?? pid,
214
+ geneCount: gSet.size,
215
+ genes: [...gSet].join(', '),
216
+ }))
217
+ .sort((a, b) => b.geneCount - a.geneCount);
218
+ writeFileSync(join(outdir, 'shared_pathways.csv'), toCsv(['pathwayId', 'pathwayName', 'geneCount', 'genes'], sharedPathways));
219
+ steps.push({ step: 'shared-pathways', status: 'done', detail: `${sharedPathways.length} pathways shared by 2+ genes → shared_pathways.csv` });
220
+ // ── Step 4: GO term frequency ───────────────────────────────────────
221
+ const goFreq = {};
222
+ for (const gt of allGoTerms) {
223
+ if (!goFreq[gt.id])
224
+ goFreq[gt.id] = { id: gt.id, name: gt.name, aspect: gt.aspect, genes: new Set() };
225
+ goFreq[gt.id].genes.add(gt.gene);
226
+ }
227
+ const goSummary = Object.values(goFreq)
228
+ .map(g => ({ id: g.id, name: g.name, aspect: g.aspect, geneCount: g.genes.size, genes: [...g.genes].join(', ') }))
229
+ .sort((a, b) => b.geneCount - a.geneCount);
230
+ writeFileSync(join(outdir, 'go_summary.csv'), toCsv(['id', 'name', 'aspect', 'geneCount', 'genes'], goSummary));
231
+ steps.push({ step: 'go-summary', status: 'done', detail: `${goSummary.length} GO terms → go_summary.csv` });
232
+ // ── Step 5: Enrichment ──────────────────────────────────────────────
233
+ const enrichmentRows = [];
234
+ try {
235
+ const userListId = await submitGeneList(genes);
236
+ const results = await getEnrichment(userListId, library);
237
+ for (let i = 0; i < Math.min(results.length, 30); i++) {
238
+ const r = results[i];
239
+ enrichmentRows.push({
240
+ rank: i + 1, term: r.term, library,
241
+ adjustedPValue: Number(r.adjustedPValue).toExponential(2),
242
+ combinedScore: Number(r.combinedScore).toFixed(1),
243
+ genes: r.genes,
244
+ });
245
+ }
246
+ sources.push('Enrichr');
247
+ }
248
+ catch (err) {
249
+ warnings.push(`Enrichr: ${err instanceof Error ? err.message : String(err)}`);
250
+ }
251
+ writeFileSync(join(outdir, 'enrichment.csv'), toCsv(['rank', 'term', 'library', 'adjustedPValue', 'combinedScore', 'genes'], enrichmentRows));
252
+ steps.push({ step: 'enrichment', status: enrichmentRows.length > 0 ? 'done' : 'skipped',
253
+ detail: `${enrichmentRows.length} terms → enrichment.csv` });
254
+ // ── Step 6: report.md ───────────────────────────────────────────────
255
+ const lines = [
256
+ `# Gene Set Functional Profile`, '',
257
+ `**Generated by biocli** v${getVersion()} on ${new Date().toISOString()}`, '',
258
+ `## Input`, '',
259
+ `- **Genes**: ${genes.join(', ')} (${genes.length})`,
260
+ `- **Organism**: ${org.name}`,
261
+ `- **Sources**: ${sources.join(', ')}`,
262
+ warnings.length > 0 ? `- **Warnings**: ${warnings.length}` : '', '',
263
+ ];
264
+ if (sharedPathways.length > 0) {
265
+ lines.push(`## Shared Pathways (${sharedPathways.length})`, '', '| Pathway | Genes | Count |', '|---------|-------|-------|');
266
+ for (const p of sharedPathways.slice(0, 20)) {
267
+ lines.push(`| ${p.pathwayName} | ${p.genes} | ${p.geneCount} |`);
268
+ }
269
+ lines.push('');
270
+ }
271
+ if (interactions.length > 0) {
272
+ lines.push(`## Protein Interactions (${interactions.length})`, '', '| Gene A | Gene B | Score |', '|--------|--------|-------|');
273
+ for (const i of interactions.slice(0, 20)) {
274
+ lines.push(`| ${i.geneA} | ${i.geneB} | ${i.score} |`);
275
+ }
276
+ lines.push('');
277
+ }
278
+ if (goSummary.length > 0) {
279
+ const topGo = goSummary.filter(g => g.geneCount >= 2).slice(0, 15);
280
+ if (topGo.length > 0) {
281
+ lines.push(`## GO Terms Shared by 2+ Genes (${topGo.length})`, '', '| GO Term | Aspect | Genes | Count |', '|---------|--------|-------|-------|');
282
+ for (const g of topGo) {
283
+ lines.push(`| ${g.name} | ${g.aspect} | ${g.genes} | ${g.geneCount} |`);
284
+ }
285
+ lines.push('');
286
+ }
287
+ }
288
+ if (enrichmentRows.length > 0) {
289
+ lines.push(`## Enrichment (${library})`, '', '| Rank | Term | Adj. P-value | Genes |', '|------|------|-------------|-------|');
290
+ for (const r of enrichmentRows.slice(0, 15)) {
291
+ lines.push(`| ${r.rank} | ${r.term} | ${r.adjustedPValue} | ${r.genes} |`);
292
+ }
293
+ lines.push('');
294
+ }
295
+ if (warnings.length > 0) {
296
+ lines.push('## Warnings', '');
297
+ for (const w of warnings)
298
+ lines.push(`- ${w}`);
299
+ lines.push('');
300
+ }
301
+ lines.push('---', `*Generated by [biocli](https://github.com/youngfly93/biocli)*`);
302
+ writeFileSync(join(outdir, 'report.md'), lines.filter(l => l !== undefined).join('\n') + '\n');
303
+ steps.push({ step: 'report', status: 'done', detail: `report.md → ${outdir}` });
304
+ // ── manifest.json ───────────────────────────────────────────────────
305
+ const manifest = {
306
+ biocliVersion: getVersion(), createdAt: new Date().toISOString(),
307
+ command: 'workflow-profile', input: { genes, organism: org.name, library },
308
+ output: {
309
+ 'profiles.json': `${profiles.length} gene profiles`,
310
+ 'interactions.csv': `${interactions.length} interactions`,
311
+ 'shared_pathways.csv': `${sharedPathways.length} shared pathways`,
312
+ 'go_summary.csv': `${goSummary.length} GO terms`,
313
+ 'enrichment.csv': `${enrichmentRows.length} enrichment terms`,
314
+ 'report.md': 'Markdown report',
315
+ },
316
+ sources, warnings,
317
+ };
318
+ steps.push({ step: 'manifest', status: 'done', detail: `manifest.json → ${outdir}` });
319
+ writeFileSync(join(outdir, 'manifest.json'), JSON.stringify(manifest, null, 2));
320
+ return wrapResult({ outdir, genes, steps, summary: {
321
+ geneCount: genes.length, interactionCount: interactions.length,
322
+ sharedPathwayCount: sharedPathways.length, goTermCount: goSummary.length,
323
+ enrichmentTerms: enrichmentRows.length, sources, warnings,
324
+ } }, { ids: {}, sources, warnings, query: genes.join(','), organism: org.name });
325
+ },
326
+ });
@@ -10,7 +10,7 @@
10
10
  */
11
11
  import { cli, Strategy } from '../../registry.js';
12
12
  import { CliError } from '../../errors.js';
13
- import { mkdirSync, existsSync, createWriteStream } from 'node:fs';
13
+ import { mkdirSync, existsSync, createWriteStream, statSync } from 'node:fs';
14
14
  import { join } from 'node:path';
15
15
  import { pipeline } from 'node:stream/promises';
16
16
  import { Readable } from 'node:stream';
@@ -96,6 +96,24 @@ cli({
96
96
  for (const file of files) {
97
97
  const fileUrl = `${supplUrl}${file.name}`;
98
98
  const destPath = join(outdir, file.name);
99
+ // Resume: skip only if local file matches expected remote size
100
+ if (existsSync(destPath) && statSync(destPath).size > 0) {
101
+ try {
102
+ const head = await fetch(fileUrl, { method: 'HEAD' });
103
+ if (head.ok) {
104
+ const expectedSize = Number(head.headers.get('content-length') ?? 0);
105
+ const localSize = statSync(destPath).size;
106
+ if (expectedSize > 0 && localSize === expectedSize) {
107
+ rows.push({ file: file.name, size: file.size, status: `skipped (complete)` });
108
+ continue;
109
+ }
110
+ // Incomplete or mismatched — will re-download below
111
+ }
112
+ }
113
+ catch {
114
+ // HEAD failed — proceed with download
115
+ }
116
+ }
99
117
  try {
100
118
  const response = await fetch(fileUrl);
101
119
  if (!response.ok || !response.body) {
@@ -13,7 +13,7 @@
13
13
  */
14
14
  import { cli, Strategy } from '../../registry.js';
15
15
  import { CliError } from '../../errors.js';
16
- import { mkdirSync, existsSync, createWriteStream } from 'node:fs';
16
+ import { mkdirSync, existsSync, createWriteStream, statSync } from 'node:fs';
17
17
  import { join } from 'node:path';
18
18
  import { pipeline } from 'node:stream/promises';
19
19
  import { Readable } from 'node:stream';
@@ -141,6 +141,29 @@ cli({
141
141
  catch { /* skip */ }
142
142
  continue;
143
143
  }
144
+ // Resume: skip if file already exists AND matches expected size (HEAD check)
145
+ if (existsSync(destPath) && statSync(destPath).size > 0) {
146
+ try {
147
+ const head = await fetch(url, { method: 'HEAD' });
148
+ if (head.ok) {
149
+ const expectedSize = Number(head.headers.get('content-length') ?? 0);
150
+ const localSize = statSync(destPath).size;
151
+ if (expectedSize > 0 && localSize === expectedSize) {
152
+ rows.push({ file: fileName, size: formatSize(localSize), status: `skipped (complete, ${formatSize(localSize)})` });
153
+ continue;
154
+ }
155
+ else if (expectedSize > 0 && localSize < expectedSize) {
156
+ // Incomplete file — delete and re-download
157
+ const { rmSync } = await import('node:fs');
158
+ rmSync(destPath);
159
+ }
160
+ // localSize > expectedSize or expectedSize unknown: re-download
161
+ }
162
+ }
163
+ catch {
164
+ // HEAD failed — proceed with download
165
+ }
166
+ }
144
167
  try {
145
168
  // Max-size check: HEAD request first to get size
146
169
  if (maxSizeBytes < Infinity) {
@@ -68,7 +68,8 @@ export function registerCommandToProgram(siteCmd, cmd) {
68
68
  .option('-A, --all-columns', 'Show all available columns', false)
69
69
  .option('-v, --verbose', 'Debug output', false)
70
70
  .option('--input <file>', 'Batch input: file with one ID per line, or - for stdin')
71
- .option('--no-cache', 'Skip cache and fetch fresh data');
71
+ .option('--no-cache', 'Skip cache and fetch fresh data')
72
+ .option('--retry <n>', 'Retry failed batch items N times (default: 0)', '0');
72
73
  subCmd.action(async (...actionArgs) => {
73
74
  const actionOpts = actionArgs[positionalArgs.length] ?? {};
74
75
  const optionsRecord = typeof actionOpts === 'object' && actionOpts !== null ? actionOpts : {};
@@ -92,6 +93,18 @@ export function registerCommandToProgram(siteCmd, cmd) {
92
93
  }
93
94
  const verbose = optionsRecord.verbose === true;
94
95
  const inputFile = typeof optionsRecord.input === 'string' ? optionsRecord.input : undefined;
96
+ // If --input is provided, read file and inject into positional arg.
97
+ // Only for commands whose positional arg is named "genes" (multi-entity pattern).
98
+ // Single-entity commands (gene-dossier, variant-dossier, etc.) use batch mode instead.
99
+ const primaryArgName = positionalArgs[0]?.name;
100
+ const supportsInputInject = primaryArgName === 'genes';
101
+ if (inputFile && supportsInputInject && !kwargs[primaryArgName]) {
102
+ const { parseBatchInput: parseInput } = await import('./batch.js');
103
+ const items = parseInput(undefined, inputFile);
104
+ if (items && items.length > 0) {
105
+ kwargs[primaryArgName] = items.join(',');
106
+ }
107
+ }
95
108
  // Validate required positional args (unless --input provides batch input)
96
109
  if (!inputFile) {
97
110
  for (const arg of positionalArgs) {
@@ -113,17 +126,21 @@ export function registerCommandToProgram(siteCmd, cmd) {
113
126
  // Commander's --no-cache sets optionsRecord.cache to false
114
127
  const noCache = optionsRecord.cache === false;
115
128
  // ── Batch mode: --input or comma-separated positional ────────────
129
+ // Skip batch for aggregate commands — they handle their own multi-input parsing
116
130
  const primaryArg = positionalArgs[0]; // first positional = primary ID/query
117
- const batchItems = primaryArg
131
+ const skipBatch = cmd.database === 'aggregate';
132
+ const batchItems = (primaryArg && !skipBatch)
118
133
  ? parseBatchInput(kwargs[primaryArg.name], inputFile)
119
134
  : null;
135
+ const retryCount = Math.max(0, parseInt(String(optionsRecord.retry ?? '0'), 10) || 0);
120
136
  let result;
121
137
  if (batchItems && primaryArg) {
122
138
  const spinnerLabel = `Batch ${fullName(cmd)} (${batchItems.length} items)…`;
123
139
  const spinner = startSpinner(spinnerLabel);
124
140
  const batchResults = [];
125
- const errors = [];
141
+ let failedItems = [];
126
142
  try {
143
+ // First pass
127
144
  for (const item of batchItems) {
128
145
  try {
129
146
  const batchKwargs = { ...kwargs, [primaryArg.name]: item };
@@ -132,19 +149,35 @@ export function registerCommandToProgram(siteCmd, cmd) {
132
149
  batchResults.push(r);
133
150
  }
134
151
  catch (err) {
135
- errors.push(`${item}: ${err instanceof Error ? err.message : String(err)}`);
152
+ failedItems.push(item);
136
153
  if (verbose)
137
154
  console.error(chalk.yellow(`[Batch] ${item} failed: ${err instanceof Error ? err.message : String(err)}`));
138
155
  }
139
156
  }
157
+ // Retry failed items
158
+ for (let attempt = 1; attempt <= retryCount && failedItems.length > 0; attempt++) {
159
+ if (verbose)
160
+ console.error(chalk.dim(`[Batch] Retry ${attempt}/${retryCount}: ${failedItems.length} item(s)…`));
161
+ const stillFailed = [];
162
+ for (const item of failedItems) {
163
+ try {
164
+ const batchKwargs = { ...kwargs, [primaryArg.name]: item };
165
+ const r = await executeCommand(cmd, batchKwargs, verbose, { noCache: true });
166
+ if (r !== null && r !== undefined)
167
+ batchResults.push(r);
168
+ }
169
+ catch {
170
+ stillFailed.push(item);
171
+ }
172
+ }
173
+ failedItems = stillFailed;
174
+ }
140
175
  }
141
176
  finally {
142
177
  spinner.stop();
143
178
  }
144
- if (errors.length > 0) {
145
- console.error(chalk.yellow(`[Batch] ${errors.length}/${batchItems.length} failed`));
146
- if (verbose)
147
- errors.forEach(e => console.error(chalk.dim(` ${e}`)));
179
+ if (failedItems.length > 0) {
180
+ console.error(chalk.yellow(`[Batch] ${failedItems.length}/${batchItems.length} failed${retryCount > 0 ? ` (after ${retryCount} retries)` : ''}: ${failedItems.join(', ')}`));
148
181
  }
149
182
  if (!batchResults.length) {
150
183
  console.error(chalk.red(`All ${batchItems.length} batch items failed.`));
@@ -168,7 +201,8 @@ export function registerCommandToProgram(siteCmd, cmd) {
168
201
  if (result === null || result === undefined) {
169
202
  return;
170
203
  }
171
- // Extract display metadata if the command returned ResultWithMeta
204
+ // Extract display metadata if the command returned ResultWithMeta or BiocliResult
205
+ let biocliResultColumns = false;
172
206
  let renderData = result;
173
207
  let totalCount;
174
208
  let query;
@@ -177,6 +211,22 @@ export function registerCommandToProgram(siteCmd, cmd) {
177
211
  totalCount = result.meta.totalCount;
178
212
  query = result.meta.query;
179
213
  }
214
+ else if (typeof result === 'object' && result !== null && 'data' in result && 'sources' in result) {
215
+ // BiocliResult envelope — for report/table/csv, render the data payload
216
+ const biocliResult = result;
217
+ query = String(biocliResult.query ?? '');
218
+ if (format === 'json' || format === 'yaml' || format === 'yml') {
219
+ // JSON/YAML: render the full envelope (agent-friendly)
220
+ renderData = result;
221
+ }
222
+ else {
223
+ // table/csv/report/md: render the data payload with actual keys
224
+ renderData = biocliResult.data;
225
+ // Override columns to use data's actual keys (command-declared columns
226
+ // may not match the BiocliResult data payload field names)
227
+ biocliResultColumns = true;
228
+ }
229
+ }
180
230
  const resolved = getRegistry().get(fullName(cmd)) ?? cmd;
181
231
  if (format === 'table' && resolved.defaultFormat) {
182
232
  format = resolved.defaultFormat;
@@ -192,8 +242,9 @@ export function registerCommandToProgram(siteCmd, cmd) {
192
242
  // --columns pmid,title,abstract → user-specified subset
193
243
  // --all-columns / -A → all keys from first row
194
244
  // (default) → adapter-declared columns
195
- let displayColumns = resolved.columns;
196
- const allColumns = optionsRecord.allColumns === true;
245
+ // For BiocliResult data, use actual keys from the data payload
246
+ let displayColumns = biocliResultColumns ? undefined : resolved.columns;
247
+ const allColumns = optionsRecord.allColumns === true || biocliResultColumns;
197
248
  const userColumns = typeof optionsRecord.columns === 'string' ? optionsRecord.columns : undefined;
198
249
  if (userColumns) {
199
250
  displayColumns = userColumns.split(',').map((s) => s.trim()).filter(Boolean);
package/dist/output.js CHANGED
@@ -194,6 +194,9 @@ export function render(data, opts = {}) {
194
194
  case 'markdown':
195
195
  renderMarkdown(data, opts);
196
196
  break;
197
+ case 'report':
198
+ renderReport(data, opts);
199
+ break;
197
200
  case 'csv':
198
201
  renderCsv(data, opts);
199
202
  break;
@@ -386,6 +389,55 @@ function renderCsv(data, opts) {
386
389
  }).join(','));
387
390
  }
388
391
  }
392
+ function renderReport(data, opts) {
393
+ const rows = normalizeRows(data);
394
+ const columns = resolveColumns(rows, opts);
395
+ // Title
396
+ const title = opts.title ?? opts.source ?? 'biocli Report';
397
+ console.log(`# ${title}`);
398
+ console.log();
399
+ console.log(`*Generated on ${new Date().toISOString()}*`);
400
+ console.log();
401
+ // Metadata
402
+ if (opts.query)
403
+ console.log(`**Query**: ${opts.query}`);
404
+ if (opts.totalCount !== undefined)
405
+ console.log(`**Total results**: ${opts.totalCount} (showing ${rows.length})`);
406
+ console.log(`**Columns**: ${columns.join(', ')}`);
407
+ console.log();
408
+ if (!rows.length) {
409
+ console.log('*No results found.*');
410
+ return;
411
+ }
412
+ // Data table
413
+ console.log('## Results');
414
+ console.log();
415
+ console.log('| ' + columns.join(' | ') + ' |');
416
+ console.log('| ' + columns.map(() => '---').join(' | ') + ' |');
417
+ for (const row of rows) {
418
+ const cells = columns.map(c => {
419
+ const raw = row[c];
420
+ // Format nested objects/arrays for report readability
421
+ let v;
422
+ if (Array.isArray(raw)) {
423
+ v = `${raw.length} items`;
424
+ }
425
+ else if (raw !== null && typeof raw === 'object') {
426
+ v = JSON.stringify(raw);
427
+ }
428
+ else {
429
+ v = String(raw ?? '');
430
+ }
431
+ // Truncate long values and escape pipes for Markdown
432
+ v = v.replace(/\|/g, '\\|');
433
+ return v.length > 80 ? v.slice(0, 80) + '...' : v;
434
+ });
435
+ console.log('| ' + cells.join(' | ') + ' |');
436
+ }
437
+ console.log();
438
+ console.log('---');
439
+ console.log('*Generated by [biocli](https://github.com/youngfly93/biocli)*');
440
+ }
389
441
  function renderYaml(data) {
390
442
  console.log(yaml.dump(data, { sortKeys: false, lineWidth: 120, noRefs: true }));
391
443
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@yangfei_93sky/biocli",
3
- "version": "0.2.0",
3
+ "version": "0.3.0",
4
4
  "description": "Query biological databases from the terminal — agent-first bioinformatics CLI",
5
5
  "type": "module",
6
6
  "main": "dist/main.js",