@yangfei_93sky/biocli 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +197 -0
  3. package/dist/batch.d.ts +20 -0
  4. package/dist/batch.js +69 -0
  5. package/dist/build-manifest.d.ts +38 -0
  6. package/dist/build-manifest.js +186 -0
  7. package/dist/cache.d.ts +28 -0
  8. package/dist/cache.js +126 -0
  9. package/dist/cli-manifest.json +1500 -0
  10. package/dist/cli.d.ts +7 -0
  11. package/dist/cli.js +336 -0
  12. package/dist/clis/_shared/common.d.ts +8 -0
  13. package/dist/clis/_shared/common.js +13 -0
  14. package/dist/clis/_shared/eutils.d.ts +9 -0
  15. package/dist/clis/_shared/eutils.js +9 -0
  16. package/dist/clis/_shared/organism-db.d.ts +23 -0
  17. package/dist/clis/_shared/organism-db.js +58 -0
  18. package/dist/clis/_shared/xml-helpers.d.ts +58 -0
  19. package/dist/clis/_shared/xml-helpers.js +266 -0
  20. package/dist/clis/aggregate/enrichment.d.ts +7 -0
  21. package/dist/clis/aggregate/enrichment.js +105 -0
  22. package/dist/clis/aggregate/gene-dossier.d.ts +13 -0
  23. package/dist/clis/aggregate/gene-dossier.js +248 -0
  24. package/dist/clis/aggregate/gene-profile.d.ts +16 -0
  25. package/dist/clis/aggregate/gene-profile.js +305 -0
  26. package/dist/clis/aggregate/literature-brief.d.ts +7 -0
  27. package/dist/clis/aggregate/literature-brief.js +79 -0
  28. package/dist/clis/aggregate/variant-dossier.d.ts +11 -0
  29. package/dist/clis/aggregate/variant-dossier.js +161 -0
  30. package/dist/clis/aggregate/variant-interpret.d.ts +10 -0
  31. package/dist/clis/aggregate/variant-interpret.js +210 -0
  32. package/dist/clis/aggregate/workflow-prepare.d.ts +12 -0
  33. package/dist/clis/aggregate/workflow-prepare.js +228 -0
  34. package/dist/clis/aggregate/workflow-scout.d.ts +13 -0
  35. package/dist/clis/aggregate/workflow-scout.js +175 -0
  36. package/dist/clis/clinvar/search.d.ts +8 -0
  37. package/dist/clis/clinvar/search.js +61 -0
  38. package/dist/clis/clinvar/variant.d.ts +7 -0
  39. package/dist/clis/clinvar/variant.js +53 -0
  40. package/dist/clis/enrichr/analyze.d.ts +7 -0
  41. package/dist/clis/enrichr/analyze.js +48 -0
  42. package/dist/clis/ensembl/lookup.d.ts +6 -0
  43. package/dist/clis/ensembl/lookup.js +38 -0
  44. package/dist/clis/ensembl/vep.d.ts +7 -0
  45. package/dist/clis/ensembl/vep.js +86 -0
  46. package/dist/clis/ensembl/xrefs.d.ts +6 -0
  47. package/dist/clis/ensembl/xrefs.js +36 -0
  48. package/dist/clis/gene/fetch.d.ts +10 -0
  49. package/dist/clis/gene/fetch.js +96 -0
  50. package/dist/clis/gene/info.d.ts +7 -0
  51. package/dist/clis/gene/info.js +37 -0
  52. package/dist/clis/gene/search.d.ts +7 -0
  53. package/dist/clis/gene/search.js +71 -0
  54. package/dist/clis/geo/dataset.d.ts +7 -0
  55. package/dist/clis/geo/dataset.js +55 -0
  56. package/dist/clis/geo/download.d.ts +17 -0
  57. package/dist/clis/geo/download.js +115 -0
  58. package/dist/clis/geo/samples.d.ts +7 -0
  59. package/dist/clis/geo/samples.js +57 -0
  60. package/dist/clis/geo/search.d.ts +8 -0
  61. package/dist/clis/geo/search.js +66 -0
  62. package/dist/clis/kegg/convert.d.ts +7 -0
  63. package/dist/clis/kegg/convert.js +37 -0
  64. package/dist/clis/kegg/disease.d.ts +6 -0
  65. package/dist/clis/kegg/disease.js +57 -0
  66. package/dist/clis/kegg/link.d.ts +7 -0
  67. package/dist/clis/kegg/link.js +36 -0
  68. package/dist/clis/kegg/pathway.d.ts +6 -0
  69. package/dist/clis/kegg/pathway.js +37 -0
  70. package/dist/clis/pubmed/abstract.d.ts +7 -0
  71. package/dist/clis/pubmed/abstract.js +42 -0
  72. package/dist/clis/pubmed/cited-by.d.ts +7 -0
  73. package/dist/clis/pubmed/cited-by.js +77 -0
  74. package/dist/clis/pubmed/fetch.d.ts +6 -0
  75. package/dist/clis/pubmed/fetch.js +36 -0
  76. package/dist/clis/pubmed/info.yaml +22 -0
  77. package/dist/clis/pubmed/related.d.ts +7 -0
  78. package/dist/clis/pubmed/related.js +81 -0
  79. package/dist/clis/pubmed/search.d.ts +8 -0
  80. package/dist/clis/pubmed/search.js +63 -0
  81. package/dist/clis/snp/lookup.d.ts +7 -0
  82. package/dist/clis/snp/lookup.js +57 -0
  83. package/dist/clis/sra/download.d.ts +18 -0
  84. package/dist/clis/sra/download.js +217 -0
  85. package/dist/clis/sra/run.d.ts +8 -0
  86. package/dist/clis/sra/run.js +77 -0
  87. package/dist/clis/sra/search.d.ts +8 -0
  88. package/dist/clis/sra/search.js +83 -0
  89. package/dist/clis/string/enrichment.d.ts +7 -0
  90. package/dist/clis/string/enrichment.js +50 -0
  91. package/dist/clis/string/network.d.ts +7 -0
  92. package/dist/clis/string/network.js +47 -0
  93. package/dist/clis/string/partners.d.ts +4 -0
  94. package/dist/clis/string/partners.js +44 -0
  95. package/dist/clis/taxonomy/lookup.d.ts +8 -0
  96. package/dist/clis/taxonomy/lookup.js +54 -0
  97. package/dist/clis/uniprot/fetch.d.ts +7 -0
  98. package/dist/clis/uniprot/fetch.js +82 -0
  99. package/dist/clis/uniprot/search.d.ts +6 -0
  100. package/dist/clis/uniprot/search.js +65 -0
  101. package/dist/clis/uniprot/sequence.d.ts +7 -0
  102. package/dist/clis/uniprot/sequence.js +51 -0
  103. package/dist/commander-adapter.d.ts +27 -0
  104. package/dist/commander-adapter.js +286 -0
  105. package/dist/completion.d.ts +19 -0
  106. package/dist/completion.js +117 -0
  107. package/dist/config.d.ts +57 -0
  108. package/dist/config.js +94 -0
  109. package/dist/databases/enrichr.d.ts +28 -0
  110. package/dist/databases/enrichr.js +131 -0
  111. package/dist/databases/ensembl.d.ts +14 -0
  112. package/dist/databases/ensembl.js +106 -0
  113. package/dist/databases/index.d.ts +45 -0
  114. package/dist/databases/index.js +49 -0
  115. package/dist/databases/kegg.d.ts +26 -0
  116. package/dist/databases/kegg.js +136 -0
  117. package/dist/databases/ncbi.d.ts +28 -0
  118. package/dist/databases/ncbi.js +144 -0
  119. package/dist/databases/string-db.d.ts +19 -0
  120. package/dist/databases/string-db.js +105 -0
  121. package/dist/databases/uniprot.d.ts +13 -0
  122. package/dist/databases/uniprot.js +110 -0
  123. package/dist/discovery.d.ts +32 -0
  124. package/dist/discovery.js +235 -0
  125. package/dist/doctor.d.ts +19 -0
  126. package/dist/doctor.js +151 -0
  127. package/dist/errors.d.ts +68 -0
  128. package/dist/errors.js +105 -0
  129. package/dist/execution.d.ts +15 -0
  130. package/dist/execution.js +178 -0
  131. package/dist/hooks.d.ts +48 -0
  132. package/dist/hooks.js +58 -0
  133. package/dist/main.d.ts +13 -0
  134. package/dist/main.js +31 -0
  135. package/dist/ncbi-fetch.d.ts +10 -0
  136. package/dist/ncbi-fetch.js +10 -0
  137. package/dist/output.d.ts +18 -0
  138. package/dist/output.js +394 -0
  139. package/dist/pipeline/executor.d.ts +22 -0
  140. package/dist/pipeline/executor.js +40 -0
  141. package/dist/pipeline/index.d.ts +6 -0
  142. package/dist/pipeline/index.js +6 -0
  143. package/dist/pipeline/registry.d.ts +16 -0
  144. package/dist/pipeline/registry.js +31 -0
  145. package/dist/pipeline/steps/fetch.d.ts +21 -0
  146. package/dist/pipeline/steps/fetch.js +160 -0
  147. package/dist/pipeline/steps/transform.d.ts +26 -0
  148. package/dist/pipeline/steps/transform.js +92 -0
  149. package/dist/pipeline/steps/xml-parse.d.ts +12 -0
  150. package/dist/pipeline/steps/xml-parse.js +27 -0
  151. package/dist/pipeline/template.d.ts +35 -0
  152. package/dist/pipeline/template.js +312 -0
  153. package/dist/rate-limiter.d.ts +56 -0
  154. package/dist/rate-limiter.js +120 -0
  155. package/dist/registry-api.d.ts +15 -0
  156. package/dist/registry-api.js +13 -0
  157. package/dist/registry.d.ts +90 -0
  158. package/dist/registry.js +100 -0
  159. package/dist/schema.d.ts +80 -0
  160. package/dist/schema.js +72 -0
  161. package/dist/spinner.d.ts +19 -0
  162. package/dist/spinner.js +37 -0
  163. package/dist/types.d.ts +101 -0
  164. package/dist/types.js +27 -0
  165. package/dist/utils.d.ts +16 -0
  166. package/dist/utils.js +40 -0
  167. package/dist/validate.d.ts +29 -0
  168. package/dist/validate.js +136 -0
  169. package/dist/verify.d.ts +20 -0
  170. package/dist/verify.js +131 -0
  171. package/dist/version.d.ts +13 -0
  172. package/dist/version.js +36 -0
  173. package/dist/xml-parser.d.ts +19 -0
  174. package/dist/xml-parser.js +119 -0
  175. package/dist/yaml-schema.d.ts +40 -0
  176. package/dist/yaml-schema.js +62 -0
  177. package/package.json +68 -0
@@ -0,0 +1,305 @@
1
+ /**
2
+ * aggregate/gene-profile — Complete gene profile from multiple databases.
3
+ *
4
+ * THE KILLER FEATURE: one command queries NCBI Gene, UniProt, KEGG, and
5
+ * STRING in parallel and returns a unified, agent-friendly JSON object.
6
+ *
7
+ * Supports:
8
+ * - Single gene: biocli aggregate gene-profile TP53
9
+ * - Batch: biocli aggregate gene-profile TP53,BRCA1,EGFR
10
+ *
11
+ * Design:
12
+ * - Promise.allSettled for partial failure tolerance
13
+ * - _meta.sources tracks which databases contributed
14
+ * - _meta.errors reports partial failures without crashing
15
+ */
16
+ import { cli, Strategy } from '../../registry.js';
17
+ import { CliError } from '../../errors.js';
18
+ import { wrapResult } from '../../types.js';
19
+ import { createHttpContextForDatabase } from '../../databases/index.js';
20
+ import { buildEutilsUrl } from '../../databases/ncbi.js';
21
+ import { buildUniprotUrl } from '../../databases/uniprot.js';
22
+ import { buildKeggUrl, parseKeggTsv, parseKeggEntry } from '../../databases/kegg.js';
23
+ import { buildStringUrl } from '../../databases/string-db.js';
24
+ import { parseGeneSummaries } from '../_shared/xml-helpers.js';
25
+ import { resolveOrganism } from '../_shared/organism-db.js';
26
+ // ── NCBI Gene fetch ───────────────────────────────────────────────────────────
27
+ async function fetchNcbiGene(ctx, symbol, organism) {
28
+ // Fetch top 5 candidates to detect ambiguity
29
+ const searchResult = await ctx.fetchJson(buildEutilsUrl('esearch.fcgi', {
30
+ db: 'gene', term: `${symbol}[Gene Name] AND ${organism}[Organism]`,
31
+ retmax: '5', retmode: 'json',
32
+ }));
33
+ const ids = searchResult?.esearchresult?.idlist ?? [];
34
+ if (!ids.length)
35
+ return null;
36
+ const summaryResult = await ctx.fetchJson(buildEutilsUrl('esummary.fcgi', {
37
+ db: 'gene', id: ids.join(','), retmode: 'json',
38
+ }));
39
+ const genes = parseGeneSummaries(summaryResult);
40
+ if (!genes.length)
41
+ return null;
42
+ // Prefer exact symbol match to avoid returning a wrong gene
43
+ const exactMatch = genes.find(g => g.symbol.toUpperCase() === symbol.toUpperCase());
44
+ const best = exactMatch ?? genes[0];
45
+ return {
46
+ geneId: best.geneId,
47
+ name: best.name,
48
+ summary: best.summary,
49
+ chromosome: best.chromosome,
50
+ location: best.location,
51
+ };
52
+ }
53
+ // ── UniProt fetch ─────────────────────────────────────────────────────────────
54
+ async function fetchUniprotData(ctx, symbol, taxId) {
55
+ // Fetch top 5 and pick the exact gene name match
56
+ const query = `gene:${symbol} AND organism_id:${taxId} AND reviewed:true`;
57
+ const data = await ctx.fetchJson(buildUniprotUrl('/uniprotkb/search', {
58
+ query, format: 'json', size: '5',
59
+ }));
60
+ const results = (data?.results ?? []);
61
+ if (!results.length)
62
+ return null;
63
+ // Find exact gene name match among candidates
64
+ const getGeneName = (e) => {
65
+ const genes = e.genes;
66
+ const primary = genes?.[0];
67
+ const gn = primary?.geneName;
68
+ return String(gn?.value ?? '');
69
+ };
70
+ const exactMatch = results.find(e => getGeneName(e).toUpperCase() === symbol.toUpperCase());
71
+ const entry = exactMatch ?? results[0];
72
+ const accession = String(entry.primaryAccession ?? '');
73
+ // Function
74
+ const comments = (entry.comments ?? []);
75
+ const funcComment = comments.find(c => c.commentType === 'FUNCTION');
76
+ const funcTexts = (funcComment?.texts ?? []);
77
+ const funcText = funcTexts.map(t => String(t.value ?? '')).join(' ');
78
+ // Subcellular location
79
+ const locComment = comments.find(c => c.commentType === 'SUBCELLULAR LOCATION');
80
+ const locEntries = (locComment?.subcellularLocations ?? []);
81
+ const locations = locEntries.map(l => String(l.location?.value ?? '')).filter(Boolean);
82
+ // GO terms
83
+ const xrefs = (entry.uniProtKBCrossReferences ?? []);
84
+ const goTerms = xrefs
85
+ .filter(x => x.database === 'GO')
86
+ .map(x => {
87
+ const id = String(x.id ?? '');
88
+ const props = (x.properties ?? []);
89
+ const termProp = props.find(p => p.key === 'GoTerm');
90
+ const term = String(termProp?.value ?? '');
91
+ const aspectMap = { C: 'CC', F: 'MF', P: 'BP' };
92
+ const [aspect, ...nameParts] = term.split(':');
93
+ return { id, name: nameParts.join(':'), aspect: aspectMap[aspect] ?? aspect };
94
+ });
95
+ // Ensembl cross-ref
96
+ const ensemblXref = xrefs.find(x => x.database === 'Ensembl');
97
+ const ensemblProps = (ensemblXref?.properties ?? []);
98
+ const ensemblGeneProp = ensemblProps.find(p => p.key === 'GeneId');
99
+ const ensemblGeneId = ensemblGeneProp ? String(ensemblGeneProp.value) : undefined;
100
+ return {
101
+ accession,
102
+ function: funcText,
103
+ subcellularLocation: locations.join(', '),
104
+ goTerms,
105
+ ensemblGeneId,
106
+ };
107
+ }
108
+ // ── KEGG fetch ────────────────────────────────────────────────────────────────
109
+ /**
110
+ * Normalize KEGG pathway IDs: /link/pathway returns "path:hsa04115"
111
+ * but /list/pathway returns "hsa04115". Strip the "path:" prefix.
112
+ */
113
+ function normalizeKeggId(id) {
114
+ return id.replace(/^path:/, '');
115
+ }
116
+ async function fetchKeggData(ctx, keggOrg, geneId, errors) {
117
+ const keggId = `${keggOrg}:${geneId}`;
118
+ // Fetch pathways with name resolution
119
+ let pathways = [];
120
+ try {
121
+ const pathText = await ctx.fetchText(buildKeggUrl(`/link/pathway/${keggId}`));
122
+ if (pathText.trim()) {
123
+ const links = parseKeggTsv(pathText);
124
+ const pathIds = links.map(l => l.value).filter(Boolean);
125
+ if (pathIds.length) {
126
+ // /list/pathway/hsa returns "hsa04115\tPathway name - Homo sapiens (human)"
127
+ const listText = await ctx.fetchText(buildKeggUrl(`/list/pathway/${keggOrg}`));
128
+ const allPaths = parseKeggTsv(listText);
129
+ const pathMap = new Map(allPaths.map(p => [p.key, p.value.replace(/ - .*$/, '')]));
130
+ pathways = pathIds.map(rawId => {
131
+ const normalized = normalizeKeggId(rawId);
132
+ return { id: normalized, name: pathMap.get(normalized) ?? normalized };
133
+ });
134
+ }
135
+ }
136
+ }
137
+ catch (err) {
138
+ errors.push(`KEGG pathways: ${err instanceof Error ? err.message : String(err)}`);
139
+ }
140
+ // Fetch diseases with name resolution (reuse kegg/disease.ts pattern)
141
+ let diseases = [];
142
+ try {
143
+ const diseaseText = await ctx.fetchText(buildKeggUrl(`/link/disease/${keggId}`));
144
+ if (diseaseText.trim()) {
145
+ const links = parseKeggTsv(diseaseText);
146
+ const diseaseIds = links.map(l => l.value).filter(Boolean);
147
+ // Batch name resolution: /get accepts up to 10 IDs joined with '+'
148
+ const names = {};
149
+ for (let i = 0; i < diseaseIds.length; i += 10) {
150
+ const batch = diseaseIds.slice(i, i + 10);
151
+ try {
152
+ const text = await ctx.fetchText(buildKeggUrl(`/get/${batch.join('+')}`));
153
+ for (const entryText of text.split('///').filter(e => e.trim())) {
154
+ const parsed = parseKeggEntry(entryText);
155
+ if (parsed.ENTRY && parsed.NAME) {
156
+ const id = 'ds:' + parsed.ENTRY.split(/\s+/)[0];
157
+ names[id] = parsed.NAME;
158
+ }
159
+ }
160
+ }
161
+ catch (err) {
162
+ errors.push(`KEGG disease names (batch ${i / 10 + 1}): ${err instanceof Error ? err.message : String(err)}`);
163
+ }
164
+ }
165
+ diseases = diseaseIds.map(id => ({ id, name: names[id] ?? '' }));
166
+ }
167
+ }
168
+ catch (err) {
169
+ errors.push(`KEGG diseases: ${err instanceof Error ? err.message : String(err)}`);
170
+ }
171
+ return { keggId, pathways, diseases };
172
+ }
173
+ // ── STRING fetch ──────────────────────────────────────────────────────────────
174
+ async function fetchStringPartners(ctx, symbol, taxId) {
175
+ // Let errors propagate — Promise.allSettled in the caller handles them
176
+ const data = await ctx.fetchJson(buildStringUrl('interaction_partners', {
177
+ identifiers: symbol,
178
+ species: String(taxId),
179
+ limit: '10',
180
+ required_score: '400',
181
+ }));
182
+ if (!Array.isArray(data))
183
+ return [];
184
+ return data.map(item => ({
185
+ partner: String(item.preferredName_B ?? ''),
186
+ score: Number(item.score ?? 0),
187
+ }));
188
+ }
189
+ // ── Main command ──────────────────────────────────────────────────────────────
190
+ async function buildGeneProfile(symbol, organismName, taxId, keggOrg) {
191
+ const meta = { sources: [], queriedAt: new Date().toISOString(), errors: [] };
192
+ const ncbiCtx = createHttpContextForDatabase('ncbi');
193
+ const uniprotCtx = createHttpContextForDatabase('uniprot');
194
+ const keggCtx = createHttpContextForDatabase('kegg');
195
+ const stringCtx = createHttpContextForDatabase('string');
196
+ // Parallel queries with partial failure tolerance
197
+ const [ncbiResult, uniprotResult, stringResult] = await Promise.allSettled([
198
+ fetchNcbiGene(ncbiCtx, symbol, organismName),
199
+ fetchUniprotData(uniprotCtx, symbol, taxId),
200
+ fetchStringPartners(stringCtx, symbol, taxId),
201
+ ]);
202
+ // Extract NCBI data
203
+ let ncbiData = null;
204
+ if (ncbiResult.status === 'fulfilled' && ncbiResult.value) {
205
+ ncbiData = ncbiResult.value;
206
+ meta.sources.push('NCBI Gene');
207
+ }
208
+ else {
209
+ meta.errors.push(`NCBI: ${ncbiResult.status === 'rejected' ? ncbiResult.reason : 'no data'}`);
210
+ }
211
+ // Extract UniProt data
212
+ let uniprotData = null;
213
+ if (uniprotResult.status === 'fulfilled' && uniprotResult.value) {
214
+ uniprotData = uniprotResult.value;
215
+ meta.sources.push('UniProt');
216
+ }
217
+ else {
218
+ meta.errors.push(`UniProt: ${uniprotResult.status === 'rejected' ? uniprotResult.reason : 'no data'}`);
219
+ }
220
+ // Extract STRING data
221
+ let interactions = [];
222
+ if (stringResult.status === 'fulfilled' && stringResult.value.length) {
223
+ interactions = stringResult.value;
224
+ meta.sources.push('STRING');
225
+ }
226
+ else {
227
+ meta.errors.push(`STRING: ${stringResult.status === 'rejected' ? stringResult.reason : 'no data'}`);
228
+ }
229
+ // KEGG (needs NCBI Gene ID first, so sequential)
230
+ // Errors are pushed to meta.errors inside fetchKeggData, not silently swallowed
231
+ let keggData = null;
232
+ if (ncbiData?.geneId) {
233
+ try {
234
+ keggData = await fetchKeggData(keggCtx, keggOrg, ncbiData.geneId, meta.errors);
235
+ if (keggData.pathways.length || keggData.diseases.length) {
236
+ meta.sources.push('KEGG');
237
+ }
238
+ }
239
+ catch (err) {
240
+ meta.errors.push(`KEGG: ${err instanceof Error ? err.message : String(err)}`);
241
+ }
242
+ }
243
+ else {
244
+ meta.errors.push('KEGG: skipped (no NCBI Gene ID to map from)');
245
+ }
246
+ const profileData = {
247
+ symbol,
248
+ name: ncbiData?.name ?? '',
249
+ summary: ncbiData?.summary ?? '',
250
+ chromosome: ncbiData?.chromosome ?? '',
251
+ location: ncbiData?.location ?? '',
252
+ function: uniprotData?.function ?? '',
253
+ subcellularLocation: uniprotData?.subcellularLocation ?? '',
254
+ pathways: (keggData?.pathways ?? []).map(p => ({ ...p, source: 'KEGG' })),
255
+ goTerms: uniprotData?.goTerms ?? [],
256
+ interactions,
257
+ diseases: (keggData?.diseases ?? []).map(d => ({ ...d, source: 'KEGG' })),
258
+ };
259
+ const ids = {};
260
+ if (ncbiData?.geneId)
261
+ ids.ncbiGeneId = ncbiData.geneId;
262
+ if (uniprotData?.accession)
263
+ ids.uniprotAccession = uniprotData.accession;
264
+ if (keggData?.keggId)
265
+ ids.keggId = keggData.keggId;
266
+ if (uniprotData?.ensemblGeneId)
267
+ ids.ensemblGeneId = uniprotData.ensemblGeneId;
268
+ return wrapResult(profileData, {
269
+ ids,
270
+ sources: meta.sources,
271
+ warnings: meta.errors,
272
+ organism: organismName,
273
+ query: symbol,
274
+ });
275
+ }
276
+ cli({
277
+ site: 'aggregate',
278
+ name: 'gene-profile',
279
+ description: 'Complete gene profile from NCBI + UniProt + KEGG + STRING',
280
+ database: 'aggregate',
281
+ strategy: Strategy.PUBLIC,
282
+ defaultFormat: 'json',
283
+ timeoutSeconds: 60,
284
+ args: [
285
+ { name: 'genes', positional: true, required: true, help: 'Gene symbol(s), comma-separated (e.g. TP53 or TP53,BRCA1,EGFR)' },
286
+ { name: 'organism', default: 'human', help: 'Organism (e.g. human, mouse, 9606)' },
287
+ ],
288
+ columns: ['symbol', 'name', 'organism', 'pathways', 'goTerms', 'interactions'],
289
+ func: async (_ctx, args) => {
290
+ const genes = String(args.genes).split(',').map(s => s.trim()).filter(Boolean);
291
+ if (!genes.length) {
292
+ throw new CliError('ARGUMENT', 'At least one gene symbol is required');
293
+ }
294
+ const org = resolveOrganism(String(args.organism));
295
+ if (genes.length === 1) {
296
+ return await buildGeneProfile(genes[0], org.name, org.taxId, org.keggOrg);
297
+ }
298
+ // Batch: process genes sequentially to respect rate limits
299
+ const profiles = [];
300
+ for (const gene of genes) {
301
+ profiles.push(await buildGeneProfile(gene, org.name, org.taxId, org.keggOrg));
302
+ }
303
+ return profiles;
304
+ },
305
+ });
@@ -0,0 +1,7 @@
1
+ /**
2
+ * aggregate/literature-brief — PubMed literature summary for a topic.
3
+ *
4
+ * Fetches recent papers and returns structured data with abstracts,
5
+ * optimized for AI agent consumption and literature review tasks.
6
+ */
7
+ export {};
@@ -0,0 +1,79 @@
1
+ /**
2
+ * aggregate/literature-brief — PubMed literature summary for a topic.
3
+ *
4
+ * Fetches recent papers and returns structured data with abstracts,
5
+ * optimized for AI agent consumption and literature review tasks.
6
+ */
7
+ import { cli, Strategy } from '../../registry.js';
8
+ import { CliError } from '../../errors.js';
9
+ import { wrapResult } from '../../types.js';
10
+ import { createHttpContextForDatabase } from '../../databases/index.js';
11
+ import { buildEutilsUrl } from '../../databases/ncbi.js';
12
+ import { parsePubmedArticles } from '../_shared/xml-helpers.js';
13
+ cli({
14
+ site: 'aggregate',
15
+ name: 'literature-brief',
16
+ description: 'Literature summary with abstracts for a research topic',
17
+ database: 'aggregate',
18
+ strategy: Strategy.PUBLIC,
19
+ defaultFormat: 'json',
20
+ timeoutSeconds: 60,
21
+ args: [
22
+ { name: 'query', positional: true, required: true, help: 'Search query (e.g. "TP53 immunotherapy", "CRISPR cancer")' },
23
+ { name: 'limit', type: 'int', default: 10, help: 'Number of papers (1-50)' },
24
+ { name: 'sort', default: 'relevance', choices: ['relevance', 'date'], help: 'Sort order' },
25
+ { name: 'years', type: 'int', default: 5, help: 'Limit to last N years' },
26
+ ],
27
+ columns: ['pmid', 'title', 'journal', 'year', 'abstract'],
28
+ func: async (_ctx, args) => {
29
+ const query = String(args.query).trim();
30
+ if (!query)
31
+ throw new CliError('ARGUMENT', 'Search query is required');
32
+ const limit = Math.max(1, Math.min(Number(args.limit), 50));
33
+ const sort = String(args.sort) === 'date' ? 'pub_date' : 'relevance';
34
+ const years = Math.max(1, Math.min(Number(args.years), 20));
35
+ const ncbiCtx = createHttpContextForDatabase('ncbi');
36
+ const warnings = [];
37
+ // Build date-restricted query
38
+ const dateQuery = `${query} AND "last ${years} years"[PDat]`;
39
+ // Step 1: esearch
40
+ const searchResult = await ncbiCtx.fetchJson(buildEutilsUrl('esearch.fcgi', {
41
+ db: 'pubmed',
42
+ term: dateQuery,
43
+ retmax: String(limit),
44
+ sort,
45
+ retmode: 'json',
46
+ }));
47
+ const esearch = searchResult?.esearchresult;
48
+ const pmids = esearch?.idlist ?? [];
49
+ const totalCount = Number(esearch?.count ?? 0);
50
+ if (!pmids.length) {
51
+ throw new CliError('NOT_FOUND', `No papers found for "${query}"`, 'Try broader terms or increase --years');
52
+ }
53
+ // Step 2: efetch with full abstracts
54
+ const xmlData = await ncbiCtx.fetchXml(buildEutilsUrl('efetch.fcgi', {
55
+ db: 'pubmed',
56
+ id: pmids.join(','),
57
+ rettype: 'xml',
58
+ }));
59
+ const articles = parsePubmedArticles(xmlData);
60
+ if (!articles.length) {
61
+ throw new CliError('PARSE_ERROR', 'Failed to parse PubMed response');
62
+ }
63
+ const papers = articles.map(a => ({
64
+ pmid: a.pmid,
65
+ title: a.title,
66
+ authors: a.authors,
67
+ journal: a.journal,
68
+ year: a.year,
69
+ doi: a.doi,
70
+ abstract: a.abstract,
71
+ }));
72
+ return wrapResult({ papers, totalAvailable: totalCount }, {
73
+ sources: ['PubMed'],
74
+ warnings,
75
+ query,
76
+ ids: { totalPmids: String(totalCount) },
77
+ });
78
+ },
79
+ });
@@ -0,0 +1,11 @@
1
+ /**
2
+ * aggregate/variant-dossier — Comprehensive variant interpretation report.
3
+ *
4
+ * Cross-queries:
5
+ * - NCBI dbSNP (basic variant info)
6
+ * - ClinVar (clinical significance)
7
+ * - Ensembl VEP (functional consequence prediction)
8
+ *
9
+ * Accepts rsID (rs334), HGVS notation, or gene:variant format.
10
+ */
11
+ export {};
@@ -0,0 +1,161 @@
1
+ /**
2
+ * aggregate/variant-dossier — Comprehensive variant interpretation report.
3
+ *
4
+ * Cross-queries:
5
+ * - NCBI dbSNP (basic variant info)
6
+ * - ClinVar (clinical significance)
7
+ * - Ensembl VEP (functional consequence prediction)
8
+ *
9
+ * Accepts rsID (rs334), HGVS notation, or gene:variant format.
10
+ */
11
+ import { cli, Strategy } from '../../registry.js';
12
+ import { CliError } from '../../errors.js';
13
+ import { wrapResult } from '../../types.js';
14
+ import { createHttpContextForDatabase } from '../../databases/index.js';
15
+ import { buildEutilsUrl } from '../../databases/ncbi.js';
16
+ import { buildEnsemblUrl } from '../../databases/ensembl.js';
17
+ cli({
18
+ site: 'aggregate',
19
+ name: 'variant-dossier',
20
+ description: 'Comprehensive variant interpretation (dbSNP + ClinVar + VEP)',
21
+ database: 'aggregate',
22
+ strategy: Strategy.PUBLIC,
23
+ defaultFormat: 'json',
24
+ timeoutSeconds: 60,
25
+ args: [
26
+ { name: 'variant', positional: true, required: true, help: 'Variant ID: rsID (rs334), HGVS, or genomic coordinate' },
27
+ ],
28
+ columns: ['variant', 'gene', 'consequence', 'clinicalSignificance', 'condition'],
29
+ func: async (_ctx, args) => {
30
+ const variant = String(args.variant).trim();
31
+ if (!variant)
32
+ throw new CliError('ARGUMENT', 'Variant ID is required');
33
+ const sources = [];
34
+ const warnings = [];
35
+ const ids = {};
36
+ const ncbiCtx = createHttpContextForDatabase('ncbi');
37
+ const ensemblCtx = createHttpContextForDatabase('ensembl');
38
+ // Determine if input is rsID
39
+ const isRsId = /^rs\d+$/i.test(variant);
40
+ if (isRsId)
41
+ ids.rsId = variant;
42
+ // Parallel queries
43
+ const [snpResult, clinvarResult, vepResult] = await Promise.allSettled([
44
+ // dbSNP lookup
45
+ isRsId ? (async () => {
46
+ const data = await ncbiCtx.fetchJson(buildEutilsUrl('esummary.fcgi', {
47
+ db: 'snp', id: variant.replace(/^rs/i, ''), retmode: 'json',
48
+ }));
49
+ const result = data?.result;
50
+ const snpId = variant.replace(/^rs/i, '');
51
+ const entry = result?.[snpId];
52
+ if (!entry)
53
+ return null;
54
+ return {
55
+ rsid: `rs${snpId}`,
56
+ gene: String((Array.isArray(entry.genes) && entry.genes.length > 0) ? entry.genes[0].name ?? '' : ''),
57
+ chromosome: String(entry.chr ?? ''),
58
+ position: String(entry.chrpos ?? ''),
59
+ alleles: String(entry.docsum ?? ''),
60
+ maf: String(entry.global_maf ?? ''),
61
+ };
62
+ })() : Promise.resolve(null),
63
+ // ClinVar search
64
+ isRsId ? (async () => {
65
+ const sr = await ncbiCtx.fetchJson(buildEutilsUrl('esearch.fcgi', {
66
+ db: 'clinvar', term: `${variant}[Variant ID]`, retmax: '5', retmode: 'json',
67
+ }));
68
+ const cvIds = sr?.esearchresult?.idlist ?? [];
69
+ if (!cvIds.length)
70
+ return [];
71
+ const summ = await ncbiCtx.fetchJson(buildEutilsUrl('esummary.fcgi', {
72
+ db: 'clinvar', id: cvIds.join(','), retmode: 'json',
73
+ }));
74
+ const resultObj = summ?.result;
75
+ const uids = resultObj?.uids ?? [];
76
+ return uids.map(uid => {
77
+ const item = (resultObj?.[uid] ?? {});
78
+ const sig = typeof item.clinical_significance === 'object'
79
+ ? String(item.clinical_significance?.description ?? '')
80
+ : String(item.clinical_significance ?? '');
81
+ const traits = Array.isArray(item.trait_set)
82
+ ? item.trait_set.map(t => String(t.trait_name ?? '')).join('; ')
83
+ : '';
84
+ return {
85
+ title: String(item.title ?? ''),
86
+ significance: sig,
87
+ condition: traits,
88
+ accession: String(item.accession ?? ''),
89
+ };
90
+ });
91
+ })() : Promise.resolve([]),
92
+ // Ensembl VEP
93
+ (async () => {
94
+ const vepPath = isRsId
95
+ ? `/vep/human/id/${variant}`
96
+ : `/vep/human/hgvs/${encodeURIComponent(variant)}`;
97
+ const data = await ensemblCtx.fetchJson(buildEnsemblUrl(vepPath, { canonical: '1', hgvs: '1', protein: '1' }));
98
+ if (!Array.isArray(data) || !data.length)
99
+ return [];
100
+ const entry = data[0];
101
+ const tc = (entry.transcript_consequences ?? []);
102
+ // Pick canonical transcript or first
103
+ const sorted = [...tc].sort((a, b) => (a.canonical ? -1 : 0) - (b.canonical ? -1 : 0));
104
+ return sorted.slice(0, 5).map(t => ({
105
+ gene: String(t.gene_symbol ?? ''),
106
+ transcript: String(t.transcript_id ?? ''),
107
+ consequence: (t.consequence_terms ?? []).join(', '),
108
+ impact: String(t.impact ?? ''),
109
+ aminoAcids: String(t.amino_acids ?? ''),
110
+ codons: String(t.codons ?? ''),
111
+ biotype: String(t.biotype ?? ''),
112
+ canonical: Boolean(t.canonical),
113
+ }));
114
+ })(),
115
+ ]);
116
+ // Assemble
117
+ let snpData = null;
118
+ if (snpResult.status === 'fulfilled' && snpResult.value) {
119
+ snpData = snpResult.value;
120
+ sources.push('dbSNP');
121
+ if (snpData.gene)
122
+ ids.gene = snpData.gene;
123
+ }
124
+ else if (snpResult.status === 'rejected') {
125
+ warnings.push(`dbSNP: ${snpResult.reason}`);
126
+ }
127
+ const clinvar = clinvarResult.status === 'fulfilled' ? clinvarResult.value : [];
128
+ if (clinvar.length)
129
+ sources.push('ClinVar');
130
+ else if (clinvarResult.status === 'rejected') {
131
+ warnings.push(`ClinVar: ${clinvarResult.reason}`);
132
+ }
133
+ const vep = vepResult.status === 'fulfilled' ? vepResult.value : [];
134
+ if (vep.length)
135
+ sources.push('Ensembl VEP');
136
+ else if (vepResult.status === 'rejected') {
137
+ warnings.push(`Ensembl VEP: ${vepResult.reason}`);
138
+ }
139
+ if (!snpData && !clinvar.length && !vep.length) {
140
+ throw new CliError('NOT_FOUND', `No data found for variant "${variant}"`, 'Check the variant ID format (e.g. rs334, NM_000518.5:c.20A>T)');
141
+ }
142
+ const dossier = {
143
+ variant,
144
+ gene: snpData?.gene ?? vep[0]?.gene ?? '',
145
+ chromosome: snpData?.chromosome ?? '',
146
+ position: snpData?.position ?? '',
147
+ vepConsequences: vep,
148
+ clinicalVariants: clinvar,
149
+ dbsnp: snpData ? {
150
+ alleles: snpData.alleles,
151
+ maf: snpData.maf,
152
+ } : null,
153
+ };
154
+ return wrapResult(dossier, {
155
+ ids,
156
+ sources,
157
+ warnings,
158
+ query: variant,
159
+ });
160
+ },
161
+ });
@@ -0,0 +1,10 @@
1
+ /**
2
+ * aggregate/variant-interpret — Variant interpretation with clinical context.
3
+ *
4
+ * Builds on variant-dossier by adding:
5
+ * - UniProt protein function context for the affected gene
6
+ * - Structured interpretation summary (pathogenicity, impact, recommendation)
7
+ *
8
+ * Cross-queries: dbSNP + ClinVar + Ensembl VEP + UniProt
9
+ */
10
+ export {};