@yangfei_93sky/biocli 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +197 -0
  3. package/dist/batch.d.ts +20 -0
  4. package/dist/batch.js +69 -0
  5. package/dist/build-manifest.d.ts +38 -0
  6. package/dist/build-manifest.js +186 -0
  7. package/dist/cache.d.ts +28 -0
  8. package/dist/cache.js +126 -0
  9. package/dist/cli-manifest.json +1500 -0
  10. package/dist/cli.d.ts +7 -0
  11. package/dist/cli.js +336 -0
  12. package/dist/clis/_shared/common.d.ts +8 -0
  13. package/dist/clis/_shared/common.js +13 -0
  14. package/dist/clis/_shared/eutils.d.ts +9 -0
  15. package/dist/clis/_shared/eutils.js +9 -0
  16. package/dist/clis/_shared/organism-db.d.ts +23 -0
  17. package/dist/clis/_shared/organism-db.js +58 -0
  18. package/dist/clis/_shared/xml-helpers.d.ts +58 -0
  19. package/dist/clis/_shared/xml-helpers.js +266 -0
  20. package/dist/clis/aggregate/enrichment.d.ts +7 -0
  21. package/dist/clis/aggregate/enrichment.js +105 -0
  22. package/dist/clis/aggregate/gene-dossier.d.ts +13 -0
  23. package/dist/clis/aggregate/gene-dossier.js +248 -0
  24. package/dist/clis/aggregate/gene-profile.d.ts +16 -0
  25. package/dist/clis/aggregate/gene-profile.js +305 -0
  26. package/dist/clis/aggregate/literature-brief.d.ts +7 -0
  27. package/dist/clis/aggregate/literature-brief.js +79 -0
  28. package/dist/clis/aggregate/variant-dossier.d.ts +11 -0
  29. package/dist/clis/aggregate/variant-dossier.js +161 -0
  30. package/dist/clis/aggregate/variant-interpret.d.ts +10 -0
  31. package/dist/clis/aggregate/variant-interpret.js +210 -0
  32. package/dist/clis/aggregate/workflow-prepare.d.ts +12 -0
  33. package/dist/clis/aggregate/workflow-prepare.js +228 -0
  34. package/dist/clis/aggregate/workflow-scout.d.ts +13 -0
  35. package/dist/clis/aggregate/workflow-scout.js +175 -0
  36. package/dist/clis/clinvar/search.d.ts +8 -0
  37. package/dist/clis/clinvar/search.js +61 -0
  38. package/dist/clis/clinvar/variant.d.ts +7 -0
  39. package/dist/clis/clinvar/variant.js +53 -0
  40. package/dist/clis/enrichr/analyze.d.ts +7 -0
  41. package/dist/clis/enrichr/analyze.js +48 -0
  42. package/dist/clis/ensembl/lookup.d.ts +6 -0
  43. package/dist/clis/ensembl/lookup.js +38 -0
  44. package/dist/clis/ensembl/vep.d.ts +7 -0
  45. package/dist/clis/ensembl/vep.js +86 -0
  46. package/dist/clis/ensembl/xrefs.d.ts +6 -0
  47. package/dist/clis/ensembl/xrefs.js +36 -0
  48. package/dist/clis/gene/fetch.d.ts +10 -0
  49. package/dist/clis/gene/fetch.js +96 -0
  50. package/dist/clis/gene/info.d.ts +7 -0
  51. package/dist/clis/gene/info.js +37 -0
  52. package/dist/clis/gene/search.d.ts +7 -0
  53. package/dist/clis/gene/search.js +71 -0
  54. package/dist/clis/geo/dataset.d.ts +7 -0
  55. package/dist/clis/geo/dataset.js +55 -0
  56. package/dist/clis/geo/download.d.ts +17 -0
  57. package/dist/clis/geo/download.js +115 -0
  58. package/dist/clis/geo/samples.d.ts +7 -0
  59. package/dist/clis/geo/samples.js +57 -0
  60. package/dist/clis/geo/search.d.ts +8 -0
  61. package/dist/clis/geo/search.js +66 -0
  62. package/dist/clis/kegg/convert.d.ts +7 -0
  63. package/dist/clis/kegg/convert.js +37 -0
  64. package/dist/clis/kegg/disease.d.ts +6 -0
  65. package/dist/clis/kegg/disease.js +57 -0
  66. package/dist/clis/kegg/link.d.ts +7 -0
  67. package/dist/clis/kegg/link.js +36 -0
  68. package/dist/clis/kegg/pathway.d.ts +6 -0
  69. package/dist/clis/kegg/pathway.js +37 -0
  70. package/dist/clis/pubmed/abstract.d.ts +7 -0
  71. package/dist/clis/pubmed/abstract.js +42 -0
  72. package/dist/clis/pubmed/cited-by.d.ts +7 -0
  73. package/dist/clis/pubmed/cited-by.js +77 -0
  74. package/dist/clis/pubmed/fetch.d.ts +6 -0
  75. package/dist/clis/pubmed/fetch.js +36 -0
  76. package/dist/clis/pubmed/info.yaml +22 -0
  77. package/dist/clis/pubmed/related.d.ts +7 -0
  78. package/dist/clis/pubmed/related.js +81 -0
  79. package/dist/clis/pubmed/search.d.ts +8 -0
  80. package/dist/clis/pubmed/search.js +63 -0
  81. package/dist/clis/snp/lookup.d.ts +7 -0
  82. package/dist/clis/snp/lookup.js +57 -0
  83. package/dist/clis/sra/download.d.ts +18 -0
  84. package/dist/clis/sra/download.js +217 -0
  85. package/dist/clis/sra/run.d.ts +8 -0
  86. package/dist/clis/sra/run.js +77 -0
  87. package/dist/clis/sra/search.d.ts +8 -0
  88. package/dist/clis/sra/search.js +83 -0
  89. package/dist/clis/string/enrichment.d.ts +7 -0
  90. package/dist/clis/string/enrichment.js +50 -0
  91. package/dist/clis/string/network.d.ts +7 -0
  92. package/dist/clis/string/network.js +47 -0
  93. package/dist/clis/string/partners.d.ts +4 -0
  94. package/dist/clis/string/partners.js +44 -0
  95. package/dist/clis/taxonomy/lookup.d.ts +8 -0
  96. package/dist/clis/taxonomy/lookup.js +54 -0
  97. package/dist/clis/uniprot/fetch.d.ts +7 -0
  98. package/dist/clis/uniprot/fetch.js +82 -0
  99. package/dist/clis/uniprot/search.d.ts +6 -0
  100. package/dist/clis/uniprot/search.js +65 -0
  101. package/dist/clis/uniprot/sequence.d.ts +7 -0
  102. package/dist/clis/uniprot/sequence.js +51 -0
  103. package/dist/commander-adapter.d.ts +27 -0
  104. package/dist/commander-adapter.js +286 -0
  105. package/dist/completion.d.ts +19 -0
  106. package/dist/completion.js +117 -0
  107. package/dist/config.d.ts +57 -0
  108. package/dist/config.js +94 -0
  109. package/dist/databases/enrichr.d.ts +28 -0
  110. package/dist/databases/enrichr.js +131 -0
  111. package/dist/databases/ensembl.d.ts +14 -0
  112. package/dist/databases/ensembl.js +106 -0
  113. package/dist/databases/index.d.ts +45 -0
  114. package/dist/databases/index.js +49 -0
  115. package/dist/databases/kegg.d.ts +26 -0
  116. package/dist/databases/kegg.js +136 -0
  117. package/dist/databases/ncbi.d.ts +28 -0
  118. package/dist/databases/ncbi.js +144 -0
  119. package/dist/databases/string-db.d.ts +19 -0
  120. package/dist/databases/string-db.js +105 -0
  121. package/dist/databases/uniprot.d.ts +13 -0
  122. package/dist/databases/uniprot.js +110 -0
  123. package/dist/discovery.d.ts +32 -0
  124. package/dist/discovery.js +235 -0
  125. package/dist/doctor.d.ts +19 -0
  126. package/dist/doctor.js +151 -0
  127. package/dist/errors.d.ts +68 -0
  128. package/dist/errors.js +105 -0
  129. package/dist/execution.d.ts +15 -0
  130. package/dist/execution.js +178 -0
  131. package/dist/hooks.d.ts +48 -0
  132. package/dist/hooks.js +58 -0
  133. package/dist/main.d.ts +13 -0
  134. package/dist/main.js +31 -0
  135. package/dist/ncbi-fetch.d.ts +10 -0
  136. package/dist/ncbi-fetch.js +10 -0
  137. package/dist/output.d.ts +18 -0
  138. package/dist/output.js +394 -0
  139. package/dist/pipeline/executor.d.ts +22 -0
  140. package/dist/pipeline/executor.js +40 -0
  141. package/dist/pipeline/index.d.ts +6 -0
  142. package/dist/pipeline/index.js +6 -0
  143. package/dist/pipeline/registry.d.ts +16 -0
  144. package/dist/pipeline/registry.js +31 -0
  145. package/dist/pipeline/steps/fetch.d.ts +21 -0
  146. package/dist/pipeline/steps/fetch.js +160 -0
  147. package/dist/pipeline/steps/transform.d.ts +26 -0
  148. package/dist/pipeline/steps/transform.js +92 -0
  149. package/dist/pipeline/steps/xml-parse.d.ts +12 -0
  150. package/dist/pipeline/steps/xml-parse.js +27 -0
  151. package/dist/pipeline/template.d.ts +35 -0
  152. package/dist/pipeline/template.js +312 -0
  153. package/dist/rate-limiter.d.ts +56 -0
  154. package/dist/rate-limiter.js +120 -0
  155. package/dist/registry-api.d.ts +15 -0
  156. package/dist/registry-api.js +13 -0
  157. package/dist/registry.d.ts +90 -0
  158. package/dist/registry.js +100 -0
  159. package/dist/schema.d.ts +80 -0
  160. package/dist/schema.js +72 -0
  161. package/dist/spinner.d.ts +19 -0
  162. package/dist/spinner.js +37 -0
  163. package/dist/types.d.ts +101 -0
  164. package/dist/types.js +27 -0
  165. package/dist/utils.d.ts +16 -0
  166. package/dist/utils.js +40 -0
  167. package/dist/validate.d.ts +29 -0
  168. package/dist/validate.js +136 -0
  169. package/dist/verify.d.ts +20 -0
  170. package/dist/verify.js +131 -0
  171. package/dist/version.d.ts +13 -0
  172. package/dist/version.js +36 -0
  173. package/dist/xml-parser.d.ts +19 -0
  174. package/dist/xml-parser.js +119 -0
  175. package/dist/yaml-schema.d.ts +40 -0
  176. package/dist/yaml-schema.js +62 -0
  177. package/package.json +68 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 youngfly93
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,197 @@
1
+ # biocli
2
+
3
+ Query biological databases from the terminal. Agent-first design.
4
+
5
+ ```
6
+ biocli v0.2.0
7
+ NCBI · UniProt · KEGG · STRING · Ensembl · Enrichr
8
+ 42 commands · 6 database backends · 8 workflow commands · 4 download commands
9
+ ```
10
+
11
+ ## Install
12
+
13
+ ```bash
14
+ npm install -g @biocli/cli
15
+ ```
16
+
17
+ Requires Node.js >= 20. No API keys needed (optional NCBI key increases rate limit).
18
+
19
+ ## Why biocli
20
+
21
+ biocli is the only CLI that takes you from a **research question** to an **analysis-ready working directory** — scout datasets, download data, fetch annotations, all in one pipeline.
22
+
23
+ ```bash
24
+ # Scout relevant datasets for your research question
25
+ biocli aggregate workflow-scout "TP53 breast cancer RNA-seq" --gene TP53
26
+
27
+ # Prepare a working directory with data + annotations + manifest
28
+ biocli aggregate workflow-prepare GSE315149 --gene TP53 --outdir ./project
29
+ ```
30
+
31
+ Designed for **AI agents** (Claude Code, Codex CLI, etc.) — structured JSON output, per-command schema, self-describing help, batch input, local cache.
32
+
33
+ ## How biocli compares
34
+
35
+ | | biocli | gget | BioMCP | EDirect |
36
+ |--|--------|------|--------|---------|
37
+ | Query biological databases | ✅ | ✅ | ✅ | ✅ |
38
+ | Structured JSON output | ✅ | ✅ | ✅ | ❌ |
39
+ | Cross-database aggregation | ✅ | ❌ | ✅ | ❌ |
40
+ | Download GEO/SRA data files | ✅ | ❌ | ❌ | ❌ |
41
+ | Dataset discovery (scout) | ✅ | ❌ | ❌ | ❌ |
42
+ | Working directory prep (prepare) | ✅ | ❌ | ❌ | ❌ |
43
+ | Agent command self-description | ✅ | ❌ | ⚠️ | ❌ |
44
+ | Safe preview (--plan/--dry-run) | ✅ | ❌ | ❌ | ❌ |
45
+ | Per-command JSON Schema | ✅ | ❌ | ❌ | ❌ |
46
+ | Local response cache | ✅ | ❌ | ❌ | ❌ |
47
+ | Batch input (--input) | ✅ | ❌ | ✅ | ✅ |
48
+
49
+ > **gget** excels at sequence analysis (BLAST, AlphaFold, MUSCLE). **BioMCP** covers more biomedical entities (drugs, trials, diseases). **EDirect** has the deepest NCBI Entrez integration. **biocli** is the only one that combines query + download + data preparation into agent-orchestrated workflows.
50
+
51
+ ### Benchmark: Agent-First Biological Workflow Tasks (2026-04-04)
52
+
53
+ 12 tasks across gene intelligence, variant interpretation, literature search, and data preparation. Task scores are automated from raw output; cross-cutting scores are manual audit with published justifications. [Full methodology →](benchmarks/README.md)
54
+
55
+ <p align="center">
56
+ <img src="benchmarks/results/2026-04-04/plots/total_scores.png" width="420" alt="Overall benchmark scores">
57
+ </p>
58
+
59
+ | Tool | Version | Task Success | Agent Readiness | Workflow Depth | Safety | Reproducibility | **Total** |
60
+ |------|---------|:---:|:---:|:---:|:---:|:---:|:---:|
61
+ | **biocli** | 0.2.0 | 47/49 | 10/10 | 10/10 | 9/10 | 10/10 | **96/100** |
62
+ | BioMCP | 0.8.19 | 20/49 | 6/10 | 4/10 | 3/10 | 2/10 | 44/100 |
63
+ | gget | 0.30.3 | 8/49 | 3/10 | 2/10 | 2/10 | 1/10 | 24/100 |
64
+
65
+ <details>
66
+ <summary>Detailed breakdown by dimension and category</summary>
67
+
68
+ <p align="center">
69
+ <img src="benchmarks/results/2026-04-04/plots/dimensions.png" width="560" alt="Cross-cutting quality dimensions">
70
+ </p>
71
+
72
+ <p align="center">
73
+ <img src="benchmarks/results/2026-04-04/plots/task_categories.png" width="500" alt="Task success by category">
74
+ </p>
75
+
76
+ </details>
77
+
78
+ > All three tools were installed (`npm install -g @biocli/cli`, `pip install gget==0.30.3`, `uv tool install biomcp-cli==0.8.19`) and executed on the same machine with the same inputs. Raw stdout/stderr, scoring scripts, and runner scripts are in [`benchmarks/`](benchmarks/). BioMCP excels at biomedical entity breadth (drugs, trials, diseases) not covered by this task set; gget excels at sequence analysis (BLAST, AlphaFold) not covered here.
79
+
80
+ ## Quick start
81
+
82
+ **One command replaces 4 browser tabs:**
83
+
84
+ ```bash
85
+ biocli aggregate gene-dossier TP53 -f json
86
+ ```
87
+
88
+ Returns a unified JSON with gene summary, protein function, KEGG pathways, GO terms, protein interactions, recent literature, and clinical variants — sourced from NCBI, UniProt, KEGG, STRING, PubMed, and ClinVar in parallel.
89
+
90
+ ```bash
91
+ # Gene intelligence (NCBI + UniProt + KEGG + STRING + PubMed + ClinVar)
92
+ biocli aggregate gene-dossier TP53
93
+
94
+ # Variant interpretation (dbSNP + ClinVar + Ensembl VEP)
95
+ biocli aggregate variant-dossier rs334
96
+
97
+ # Literature review with abstracts
98
+ biocli aggregate literature-brief "CRISPR cancer immunotherapy" --limit 10
99
+
100
+ # Pathway enrichment (Enrichr + STRING)
101
+ biocli aggregate enrichment TP53,BRCA1,EGFR,MYC,CDK2
102
+
103
+ # Gene profile (NCBI + UniProt + KEGG + STRING)
104
+ biocli aggregate gene-profile TP53
105
+ ```
106
+
107
+ ## All commands
108
+
109
+ ### Workflow commands (agent-optimized)
110
+
111
+ | Command | Sources | Use case |
112
+ |---------|---------|----------|
113
+ | `aggregate gene-dossier <gene>` | NCBI+UniProt+KEGG+STRING+PubMed+ClinVar | Complete gene intelligence report |
114
+ | `aggregate variant-dossier <variant>` | dbSNP+ClinVar+Ensembl VEP | Variant interpretation |
115
+ | `aggregate variant-interpret <variant>` | dbSNP+ClinVar+VEP+UniProt | Variant interpretation with clinical context |
116
+ | `aggregate literature-brief <query>` | PubMed | Literature summary with abstracts |
117
+ | `aggregate enrichment <genes>` | Enrichr+STRING | Pathway/GO enrichment analysis |
118
+ | `aggregate gene-profile <gene>` | NCBI+UniProt+KEGG+STRING | Gene profile (no literature) |
119
+ | `aggregate workflow-scout <query>` | GEO+SRA | Scout datasets for a research question |
120
+ | `aggregate workflow-prepare <dataset>` | GEO+NCBI+UniProt+KEGG | Prepare research-ready directory with data + annotations |
121
+
122
+ ### Database commands (atomic)
123
+
124
+ | Database | Commands |
125
+ |----------|----------|
126
+ | **PubMed** | `pubmed search`, `fetch`, `abstract`, `cited-by`, `related`, `info` |
127
+ | **Gene** | `gene search`, `info`, `fetch` (FASTA download) |
128
+ | **GEO** | `geo search`, `dataset`, `samples`, `download` |
129
+ | **SRA** | `sra search`, `run`, `download` (FASTQ via ENA/sra-tools) |
130
+ | **ClinVar** | `clinvar search`, `variant` |
131
+ | **SNP** | `snp lookup` |
132
+ | **Taxonomy** | `taxonomy lookup` |
133
+ | **UniProt** | `uniprot search`, `fetch`, `sequence` (FASTA download) |
134
+ | **KEGG** | `kegg pathway`, `link`, `disease`, `convert` |
135
+ | **STRING** | `string partners`, `network`, `enrichment` |
136
+ | **Ensembl** | `ensembl lookup`, `vep`, `xrefs` |
137
+ | **Enrichr** | `enrichr analyze` |
138
+
139
+ ## Output formats
140
+
141
+ ```bash
142
+ biocli gene info 7157 -f json # JSON (default for workflow commands)
143
+ biocli gene info 7157 -f table # Table (default for atomic commands)
144
+ biocli gene info 7157 -f yaml # YAML
145
+ biocli gene info 7157 -f csv # CSV
146
+ biocli gene info 7157 -f plain # Plain text
147
+ ```
148
+
149
+ ## Agent-first result schema
150
+
151
+ All workflow commands (`aggregate *`) return a standard `BiocliResult` envelope:
152
+
153
+ ```json
154
+ {
155
+ "data": { ... },
156
+ "ids": { "ncbiGeneId": "7157", "uniprotAccession": "P04637", ... },
157
+ "sources": ["NCBI Gene", "UniProt", "KEGG", "STRING"],
158
+ "warnings": [],
159
+ "queriedAt": "2026-04-03T10:00:00.000Z",
160
+ "organism": "Homo sapiens",
161
+ "query": "TP53"
162
+ }
163
+ ```
164
+
165
+ - `data` — the actual result payload
166
+ - `ids` — cross-database identifiers for the queried entity
167
+ - `sources` — which databases contributed data
168
+ - `warnings` — partial failures, ambiguous matches (never silently hidden)
169
+ - `queriedAt` — ISO timestamp for reproducibility
170
+ - `organism` — species context
171
+
172
+ ## Configuration
173
+
174
+ ```bash
175
+ biocli config set api_key YOUR_NCBI_KEY # Optional: increases NCBI rate limit 3→10 req/s
176
+ biocli config set email you@example.com
177
+ biocli config show
178
+ ```
179
+
180
+ Config stored at `~/.biocli/config.yaml`.
181
+
182
+ ## Rate limits
183
+
184
+ | Database | Rate | Auth |
185
+ |----------|------|------|
186
+ | NCBI | 3/s (10/s with API key) | Optional API key |
187
+ | UniProt | 50/s | None |
188
+ | KEGG | 10/s | None |
189
+ | STRING | 1/s | None |
190
+ | Ensembl | 15/s | None |
191
+ | Enrichr | 5/s | None |
192
+
193
+ All rate limits are enforced automatically per-database.
194
+
195
+ ## License
196
+
197
+ MIT
@@ -0,0 +1,20 @@
1
+ /**
2
+ * Batch input support for biocli commands.
3
+ *
4
+ * Resolves a list of IDs/queries from:
5
+ * 1. --input <file> (one ID per line)
6
+ * 2. --input - (stdin, one per line)
7
+ * 3. Comma-separated positional arg (e.g. "TP53,BRCA1,EGFR")
8
+ *
9
+ * Returns null if no batch mode is detected (single-value execution).
10
+ */
11
+ /**
12
+ * Parse a batch input source into an array of individual values.
13
+ * Returns null if the input is a single non-batch value.
14
+ */
15
+ export declare function parseBatchInput(positionalValue: string | undefined, inputFlag: string | undefined): string[] | null;
16
+ /**
17
+ * Merge batch results into a flat array.
18
+ * Handles both plain arrays and ResultWithMeta objects.
19
+ */
20
+ export declare function mergeBatchResults(results: unknown[]): unknown[];
package/dist/batch.js ADDED
@@ -0,0 +1,69 @@
1
+ /**
2
+ * Batch input support for biocli commands.
3
+ *
4
+ * Resolves a list of IDs/queries from:
5
+ * 1. --input <file> (one ID per line)
6
+ * 2. --input - (stdin, one per line)
7
+ * 3. Comma-separated positional arg (e.g. "TP53,BRCA1,EGFR")
8
+ *
9
+ * Returns null if no batch mode is detected (single-value execution).
10
+ */
11
+ import { readFileSync } from 'node:fs';
12
+ /**
13
+ * Parse a batch input source into an array of individual values.
14
+ * Returns null if the input is a single non-batch value.
15
+ */
16
+ export function parseBatchInput(positionalValue, inputFlag) {
17
+ // Priority 1: --input flag (file or stdin)
18
+ if (inputFlag) {
19
+ let raw;
20
+ if (inputFlag === '-') {
21
+ // Read from stdin (synchronous — assumes piped input, not interactive)
22
+ raw = readFileSync(0, 'utf-8');
23
+ }
24
+ else {
25
+ raw = readFileSync(inputFlag, 'utf-8');
26
+ }
27
+ const items = raw
28
+ .split(/[\n\r]+/)
29
+ .map(line => line.trim())
30
+ .filter(line => line && !line.startsWith('#'));
31
+ return items.length > 0 ? items : null;
32
+ }
33
+ // Priority 2: Comma-separated positional arg
34
+ if (positionalValue && positionalValue.includes(',')) {
35
+ const items = positionalValue
36
+ .split(',')
37
+ .map(s => s.trim())
38
+ .filter(Boolean);
39
+ if (items.length > 1)
40
+ return items;
41
+ }
42
+ return null;
43
+ }
44
+ /**
45
+ * Merge batch results into a flat array.
46
+ * Handles both plain arrays and ResultWithMeta objects.
47
+ */
48
+ export function mergeBatchResults(results) {
49
+ const merged = [];
50
+ for (const result of results) {
51
+ if (result === null || result === undefined)
52
+ continue;
53
+ if (Array.isArray(result)) {
54
+ merged.push(...result);
55
+ }
56
+ else if (typeof result === 'object' && 'rows' in result) {
57
+ // ResultWithMeta
58
+ const rows = result.rows;
59
+ if (Array.isArray(rows))
60
+ merged.push(...rows);
61
+ else
62
+ merged.push(result);
63
+ }
64
+ else {
65
+ merged.push(result);
66
+ }
67
+ }
68
+ return merged;
69
+ }
@@ -0,0 +1,38 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Build-time CLI manifest compiler.
4
+ *
5
+ * Scans all YAML/TS CLI definitions and pre-compiles them into a single
6
+ * manifest.json for instant cold-start registration (no runtime YAML parsing).
7
+ *
8
+ * Usage: npx tsx src/build-manifest.ts
9
+ * Output: dist/cli-manifest.json
10
+ */
11
+ export interface ManifestEntry {
12
+ site: string;
13
+ name: string;
14
+ aliases?: string[];
15
+ description: string;
16
+ database?: string;
17
+ strategy: string;
18
+ args: Array<{
19
+ name: string;
20
+ type?: string;
21
+ default?: unknown;
22
+ required?: boolean;
23
+ positional?: boolean;
24
+ help?: string;
25
+ choices?: string[];
26
+ }>;
27
+ columns?: string[];
28
+ pipeline?: Record<string, unknown>[];
29
+ timeout?: number;
30
+ deprecated?: boolean | string;
31
+ replacedBy?: string;
32
+ /** 'yaml' or 'ts' — determines how executeCommand loads the handler */
33
+ type: 'yaml' | 'ts';
34
+ /** Relative path from clis/ dir, e.g. 'pubmed/search.yaml' or 'gene/info.js' */
35
+ modulePath?: string;
36
+ }
37
+ export declare function loadTsManifestEntries(filePath: string, site: string, importer?: (moduleHref: string) => Promise<unknown>): Promise<ManifestEntry[]>;
38
+ export declare function buildManifest(): Promise<void>;
@@ -0,0 +1,186 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Build-time CLI manifest compiler.
4
+ *
5
+ * Scans all YAML/TS CLI definitions and pre-compiles them into a single
6
+ * manifest.json for instant cold-start registration (no runtime YAML parsing).
7
+ *
8
+ * Usage: npx tsx src/build-manifest.ts
9
+ * Output: dist/cli-manifest.json
10
+ */
11
+ import * as fs from 'node:fs';
12
+ import * as path from 'node:path';
13
+ import { fileURLToPath, pathToFileURL } from 'node:url';
14
+ import yaml from 'js-yaml';
15
+ import { getErrorMessage } from './errors.js';
16
+ import { fullName, getRegistry } from './registry.js';
17
+ import { parseYamlArgs } from './yaml-schema.js';
18
+ import { isRecord } from './utils.js';
19
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
20
+ const CLIS_DIR = path.resolve(__dirname, 'clis');
21
+ const OUTPUT = path.resolve(__dirname, '..', 'dist', 'cli-manifest.json');
22
+ // ── Helpers ─────────────────────────────────────────────────────────────────
23
+ const CLI_MODULE_PATTERN = /\bcli\s*\(/;
24
+ function toManifestArgs(args) {
25
+ return args.map(arg => ({
26
+ name: arg.name,
27
+ type: arg.type ?? 'str',
28
+ default: arg.default,
29
+ required: !!arg.required,
30
+ positional: arg.positional || undefined,
31
+ help: arg.help ?? '',
32
+ choices: arg.choices,
33
+ }));
34
+ }
35
+ function toTsModulePath(filePath, site) {
36
+ const baseName = path.basename(filePath, path.extname(filePath));
37
+ return `${site}/${baseName}.js`;
38
+ }
39
+ function isCliCommandValue(value, site) {
40
+ return isRecord(value)
41
+ && typeof value.site === 'string'
42
+ && value.site === site
43
+ && typeof value.name === 'string'
44
+ && Array.isArray(value.args);
45
+ }
46
+ function toManifestEntry(cmd, modulePath) {
47
+ return {
48
+ site: cmd.site,
49
+ name: cmd.name,
50
+ aliases: cmd.aliases,
51
+ description: cmd.description ?? '',
52
+ database: cmd.database,
53
+ strategy: (cmd.strategy ?? 'public').toString().toLowerCase(),
54
+ args: toManifestArgs(cmd.args),
55
+ columns: cmd.columns,
56
+ timeout: cmd.timeoutSeconds,
57
+ deprecated: cmd.deprecated,
58
+ replacedBy: cmd.replacedBy,
59
+ type: 'ts',
60
+ modulePath,
61
+ };
62
+ }
63
+ // ── YAML scanner ────────────────────────────────────────────────────────────
64
+ function scanYaml(filePath, site) {
65
+ try {
66
+ const raw = fs.readFileSync(filePath, 'utf-8');
67
+ const def = yaml.load(raw);
68
+ if (!isRecord(def))
69
+ return null;
70
+ const cliDef = def;
71
+ const strategyStr = cliDef.strategy ?? 'public';
72
+ const strategy = strategyStr.toLowerCase();
73
+ const args = parseYamlArgs(cliDef.args);
74
+ return {
75
+ site: cliDef.site ?? site,
76
+ name: cliDef.name ?? path.basename(filePath, path.extname(filePath)),
77
+ description: cliDef.description ?? '',
78
+ database: cliDef.database,
79
+ strategy,
80
+ aliases: isRecord(cliDef) && Array.isArray(cliDef.aliases)
81
+ ? cliDef.aliases.filter((value) => typeof value === 'string')
82
+ : undefined,
83
+ args,
84
+ columns: cliDef.columns,
85
+ pipeline: cliDef.pipeline,
86
+ timeout: cliDef.timeout,
87
+ deprecated: cliDef.deprecated,
88
+ replacedBy: cliDef.replacedBy,
89
+ type: 'yaml',
90
+ };
91
+ }
92
+ catch (err) {
93
+ process.stderr.write(`Warning: failed to parse ${filePath}: ${getErrorMessage(err)}\n`);
94
+ return null;
95
+ }
96
+ }
97
+ // ── TS scanner ──────────────────────────────────────────────────────────────
98
+ export async function loadTsManifestEntries(filePath, site, importer = moduleHref => import(moduleHref)) {
99
+ try {
100
+ const src = fs.readFileSync(filePath, 'utf-8');
101
+ // Helper/test modules should not appear as CLI commands in the manifest.
102
+ if (!CLI_MODULE_PATTERN.test(src))
103
+ return [];
104
+ // Snapshot registry keys before the import.
105
+ const before = new Set(getRegistry().keys());
106
+ // Import the module — its top-level cli() calls register commands.
107
+ const moduleExports = await importer(pathToFileURL(filePath).href);
108
+ // Collect newly registered commands.
109
+ const entries = [];
110
+ const modulePath = toTsModulePath(filePath, site);
111
+ // Strategy 1: Check exports for CliCommand objects.
112
+ if (moduleExports && typeof moduleExports === 'object') {
113
+ for (const value of Object.values(moduleExports)) {
114
+ if (isCliCommandValue(value, site)) {
115
+ entries.push(toManifestEntry(value, modulePath));
116
+ }
117
+ }
118
+ }
119
+ // Strategy 2: Check newly registered commands in the registry.
120
+ if (entries.length === 0) {
121
+ for (const [key, cmd] of getRegistry()) {
122
+ if (!before.has(key) && key === fullName(cmd) && cmd.site === site) {
123
+ entries.push(toManifestEntry(cmd, modulePath));
124
+ }
125
+ }
126
+ }
127
+ return entries;
128
+ }
129
+ catch (err) {
130
+ process.stderr.write(`Warning: failed to load TS adapter ${filePath}: ${getErrorMessage(err)}\n`);
131
+ return [];
132
+ }
133
+ }
134
+ // ── Main build function ─────────────────────────────────────────────────────
135
+ export async function buildManifest() {
136
+ const manifest = [];
137
+ // Check that CLIS_DIR exists
138
+ if (!fs.existsSync(CLIS_DIR)) {
139
+ process.stderr.write(`Warning: CLIs directory not found at ${CLIS_DIR}\n`);
140
+ // Write empty manifest
141
+ const outputDir = path.dirname(OUTPUT);
142
+ if (!fs.existsSync(outputDir))
143
+ fs.mkdirSync(outputDir, { recursive: true });
144
+ fs.writeFileSync(OUTPUT, JSON.stringify([], null, 2) + '\n', 'utf-8');
145
+ return;
146
+ }
147
+ const siteDirs = fs.readdirSync(CLIS_DIR, { withFileTypes: true })
148
+ .filter(d => d.isDirectory() && !d.name.startsWith('.') && !d.name.startsWith('_'));
149
+ for (const siteDir of siteDirs) {
150
+ const site = siteDir.name;
151
+ const sitePath = path.join(CLIS_DIR, site);
152
+ const files = fs.readdirSync(sitePath);
153
+ for (const file of files) {
154
+ if (file.startsWith('.'))
155
+ continue; // skip hidden/AppleDouble files
156
+ const filePath = path.join(sitePath, file);
157
+ if (file.endsWith('.yaml') || file.endsWith('.yml')) {
158
+ const entry = scanYaml(filePath, site);
159
+ if (entry)
160
+ manifest.push(entry);
161
+ }
162
+ else if ((file.endsWith('.js') && !file.endsWith('.d.js')) ||
163
+ (file.endsWith('.ts') && !file.endsWith('.d.ts') && !file.endsWith('.test.ts'))) {
164
+ const entries = await loadTsManifestEntries(filePath, site);
165
+ manifest.push(...entries);
166
+ }
167
+ }
168
+ }
169
+ // Write manifest
170
+ const outputDir = path.dirname(OUTPUT);
171
+ if (!fs.existsSync(outputDir))
172
+ fs.mkdirSync(outputDir, { recursive: true });
173
+ fs.writeFileSync(OUTPUT, JSON.stringify(manifest, null, 2) + '\n', 'utf-8');
174
+ process.stdout.write(`Manifest compiled: ${manifest.length} commands → ${OUTPUT}\n`);
175
+ }
176
+ // ── Run directly ────────────────────────────────────────────────────────────
177
+ // ESM equivalent of if (require.main === module)
178
+ const isMain = process.argv[1] &&
179
+ (process.argv[1] === fileURLToPath(import.meta.url) ||
180
+ process.argv[1].endsWith('/build-manifest.js'));
181
+ if (isMain) {
182
+ buildManifest().catch((err) => {
183
+ console.error('Manifest build failed:', err);
184
+ process.exit(1);
185
+ });
186
+ }
@@ -0,0 +1,28 @@
1
+ /**
2
+ * Local file-based cache for biocli API responses.
3
+ *
4
+ * Cache layout: ~/.biocli/cache/{database}/{command}/{sha256}.json
5
+ * Each entry stores: { data, cachedAt, ttlMs, key }
6
+ *
7
+ * TTL default: 24 hours. Configurable via `biocli config set cache.ttl <hours>`.
8
+ * Disable per-request with --no-cache global flag.
9
+ */
10
+ export interface CacheStats {
11
+ totalEntries: number;
12
+ totalSizeBytes: number;
13
+ databases: Record<string, number>;
14
+ oldestEntry: string | null;
15
+ newestEntry: string | null;
16
+ }
17
+ /** Build a stable cache key from command args. */
18
+ export declare function buildCacheKey(database: string, command: string, args: Record<string, unknown>): string;
19
+ /** Get a cached result if it exists and hasn't expired. */
20
+ export declare function getCached(database: string, command: string, argsKey: string, ttlMs?: number): unknown | null;
21
+ /** Store a result in the cache. */
22
+ export declare function setCached(database: string, command: string, argsKey: string, data: unknown, ttlMs?: number): void;
23
+ /** Get cache statistics. */
24
+ export declare function getStats(): CacheStats;
25
+ /** Clear all cache entries. Returns number of entries deleted. */
26
+ export declare function clearCache(): number;
27
+ /** Get the default TTL in milliseconds. */
28
+ export declare function getDefaultTtlMs(): number;