@yangfei_93sky/biocli 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +197 -0
  3. package/dist/batch.d.ts +20 -0
  4. package/dist/batch.js +69 -0
  5. package/dist/build-manifest.d.ts +38 -0
  6. package/dist/build-manifest.js +186 -0
  7. package/dist/cache.d.ts +28 -0
  8. package/dist/cache.js +126 -0
  9. package/dist/cli-manifest.json +1500 -0
  10. package/dist/cli.d.ts +7 -0
  11. package/dist/cli.js +336 -0
  12. package/dist/clis/_shared/common.d.ts +8 -0
  13. package/dist/clis/_shared/common.js +13 -0
  14. package/dist/clis/_shared/eutils.d.ts +9 -0
  15. package/dist/clis/_shared/eutils.js +9 -0
  16. package/dist/clis/_shared/organism-db.d.ts +23 -0
  17. package/dist/clis/_shared/organism-db.js +58 -0
  18. package/dist/clis/_shared/xml-helpers.d.ts +58 -0
  19. package/dist/clis/_shared/xml-helpers.js +266 -0
  20. package/dist/clis/aggregate/enrichment.d.ts +7 -0
  21. package/dist/clis/aggregate/enrichment.js +105 -0
  22. package/dist/clis/aggregate/gene-dossier.d.ts +13 -0
  23. package/dist/clis/aggregate/gene-dossier.js +248 -0
  24. package/dist/clis/aggregate/gene-profile.d.ts +16 -0
  25. package/dist/clis/aggregate/gene-profile.js +305 -0
  26. package/dist/clis/aggregate/literature-brief.d.ts +7 -0
  27. package/dist/clis/aggregate/literature-brief.js +79 -0
  28. package/dist/clis/aggregate/variant-dossier.d.ts +11 -0
  29. package/dist/clis/aggregate/variant-dossier.js +161 -0
  30. package/dist/clis/aggregate/variant-interpret.d.ts +10 -0
  31. package/dist/clis/aggregate/variant-interpret.js +210 -0
  32. package/dist/clis/aggregate/workflow-prepare.d.ts +12 -0
  33. package/dist/clis/aggregate/workflow-prepare.js +228 -0
  34. package/dist/clis/aggregate/workflow-scout.d.ts +13 -0
  35. package/dist/clis/aggregate/workflow-scout.js +175 -0
  36. package/dist/clis/clinvar/search.d.ts +8 -0
  37. package/dist/clis/clinvar/search.js +61 -0
  38. package/dist/clis/clinvar/variant.d.ts +7 -0
  39. package/dist/clis/clinvar/variant.js +53 -0
  40. package/dist/clis/enrichr/analyze.d.ts +7 -0
  41. package/dist/clis/enrichr/analyze.js +48 -0
  42. package/dist/clis/ensembl/lookup.d.ts +6 -0
  43. package/dist/clis/ensembl/lookup.js +38 -0
  44. package/dist/clis/ensembl/vep.d.ts +7 -0
  45. package/dist/clis/ensembl/vep.js +86 -0
  46. package/dist/clis/ensembl/xrefs.d.ts +6 -0
  47. package/dist/clis/ensembl/xrefs.js +36 -0
  48. package/dist/clis/gene/fetch.d.ts +10 -0
  49. package/dist/clis/gene/fetch.js +96 -0
  50. package/dist/clis/gene/info.d.ts +7 -0
  51. package/dist/clis/gene/info.js +37 -0
  52. package/dist/clis/gene/search.d.ts +7 -0
  53. package/dist/clis/gene/search.js +71 -0
  54. package/dist/clis/geo/dataset.d.ts +7 -0
  55. package/dist/clis/geo/dataset.js +55 -0
  56. package/dist/clis/geo/download.d.ts +17 -0
  57. package/dist/clis/geo/download.js +115 -0
  58. package/dist/clis/geo/samples.d.ts +7 -0
  59. package/dist/clis/geo/samples.js +57 -0
  60. package/dist/clis/geo/search.d.ts +8 -0
  61. package/dist/clis/geo/search.js +66 -0
  62. package/dist/clis/kegg/convert.d.ts +7 -0
  63. package/dist/clis/kegg/convert.js +37 -0
  64. package/dist/clis/kegg/disease.d.ts +6 -0
  65. package/dist/clis/kegg/disease.js +57 -0
  66. package/dist/clis/kegg/link.d.ts +7 -0
  67. package/dist/clis/kegg/link.js +36 -0
  68. package/dist/clis/kegg/pathway.d.ts +6 -0
  69. package/dist/clis/kegg/pathway.js +37 -0
  70. package/dist/clis/pubmed/abstract.d.ts +7 -0
  71. package/dist/clis/pubmed/abstract.js +42 -0
  72. package/dist/clis/pubmed/cited-by.d.ts +7 -0
  73. package/dist/clis/pubmed/cited-by.js +77 -0
  74. package/dist/clis/pubmed/fetch.d.ts +6 -0
  75. package/dist/clis/pubmed/fetch.js +36 -0
  76. package/dist/clis/pubmed/info.yaml +22 -0
  77. package/dist/clis/pubmed/related.d.ts +7 -0
  78. package/dist/clis/pubmed/related.js +81 -0
  79. package/dist/clis/pubmed/search.d.ts +8 -0
  80. package/dist/clis/pubmed/search.js +63 -0
  81. package/dist/clis/snp/lookup.d.ts +7 -0
  82. package/dist/clis/snp/lookup.js +57 -0
  83. package/dist/clis/sra/download.d.ts +18 -0
  84. package/dist/clis/sra/download.js +217 -0
  85. package/dist/clis/sra/run.d.ts +8 -0
  86. package/dist/clis/sra/run.js +77 -0
  87. package/dist/clis/sra/search.d.ts +8 -0
  88. package/dist/clis/sra/search.js +83 -0
  89. package/dist/clis/string/enrichment.d.ts +7 -0
  90. package/dist/clis/string/enrichment.js +50 -0
  91. package/dist/clis/string/network.d.ts +7 -0
  92. package/dist/clis/string/network.js +47 -0
  93. package/dist/clis/string/partners.d.ts +4 -0
  94. package/dist/clis/string/partners.js +44 -0
  95. package/dist/clis/taxonomy/lookup.d.ts +8 -0
  96. package/dist/clis/taxonomy/lookup.js +54 -0
  97. package/dist/clis/uniprot/fetch.d.ts +7 -0
  98. package/dist/clis/uniprot/fetch.js +82 -0
  99. package/dist/clis/uniprot/search.d.ts +6 -0
  100. package/dist/clis/uniprot/search.js +65 -0
  101. package/dist/clis/uniprot/sequence.d.ts +7 -0
  102. package/dist/clis/uniprot/sequence.js +51 -0
  103. package/dist/commander-adapter.d.ts +27 -0
  104. package/dist/commander-adapter.js +286 -0
  105. package/dist/completion.d.ts +19 -0
  106. package/dist/completion.js +117 -0
  107. package/dist/config.d.ts +57 -0
  108. package/dist/config.js +94 -0
  109. package/dist/databases/enrichr.d.ts +28 -0
  110. package/dist/databases/enrichr.js +131 -0
  111. package/dist/databases/ensembl.d.ts +14 -0
  112. package/dist/databases/ensembl.js +106 -0
  113. package/dist/databases/index.d.ts +45 -0
  114. package/dist/databases/index.js +49 -0
  115. package/dist/databases/kegg.d.ts +26 -0
  116. package/dist/databases/kegg.js +136 -0
  117. package/dist/databases/ncbi.d.ts +28 -0
  118. package/dist/databases/ncbi.js +144 -0
  119. package/dist/databases/string-db.d.ts +19 -0
  120. package/dist/databases/string-db.js +105 -0
  121. package/dist/databases/uniprot.d.ts +13 -0
  122. package/dist/databases/uniprot.js +110 -0
  123. package/dist/discovery.d.ts +32 -0
  124. package/dist/discovery.js +235 -0
  125. package/dist/doctor.d.ts +19 -0
  126. package/dist/doctor.js +151 -0
  127. package/dist/errors.d.ts +68 -0
  128. package/dist/errors.js +105 -0
  129. package/dist/execution.d.ts +15 -0
  130. package/dist/execution.js +178 -0
  131. package/dist/hooks.d.ts +48 -0
  132. package/dist/hooks.js +58 -0
  133. package/dist/main.d.ts +13 -0
  134. package/dist/main.js +31 -0
  135. package/dist/ncbi-fetch.d.ts +10 -0
  136. package/dist/ncbi-fetch.js +10 -0
  137. package/dist/output.d.ts +18 -0
  138. package/dist/output.js +394 -0
  139. package/dist/pipeline/executor.d.ts +22 -0
  140. package/dist/pipeline/executor.js +40 -0
  141. package/dist/pipeline/index.d.ts +6 -0
  142. package/dist/pipeline/index.js +6 -0
  143. package/dist/pipeline/registry.d.ts +16 -0
  144. package/dist/pipeline/registry.js +31 -0
  145. package/dist/pipeline/steps/fetch.d.ts +21 -0
  146. package/dist/pipeline/steps/fetch.js +160 -0
  147. package/dist/pipeline/steps/transform.d.ts +26 -0
  148. package/dist/pipeline/steps/transform.js +92 -0
  149. package/dist/pipeline/steps/xml-parse.d.ts +12 -0
  150. package/dist/pipeline/steps/xml-parse.js +27 -0
  151. package/dist/pipeline/template.d.ts +35 -0
  152. package/dist/pipeline/template.js +312 -0
  153. package/dist/rate-limiter.d.ts +56 -0
  154. package/dist/rate-limiter.js +120 -0
  155. package/dist/registry-api.d.ts +15 -0
  156. package/dist/registry-api.js +13 -0
  157. package/dist/registry.d.ts +90 -0
  158. package/dist/registry.js +100 -0
  159. package/dist/schema.d.ts +80 -0
  160. package/dist/schema.js +72 -0
  161. package/dist/spinner.d.ts +19 -0
  162. package/dist/spinner.js +37 -0
  163. package/dist/types.d.ts +101 -0
  164. package/dist/types.js +27 -0
  165. package/dist/utils.d.ts +16 -0
  166. package/dist/utils.js +40 -0
  167. package/dist/validate.d.ts +29 -0
  168. package/dist/validate.js +136 -0
  169. package/dist/verify.d.ts +20 -0
  170. package/dist/verify.js +131 -0
  171. package/dist/version.d.ts +13 -0
  172. package/dist/version.js +36 -0
  173. package/dist/xml-parser.d.ts +19 -0
  174. package/dist/xml-parser.js +119 -0
  175. package/dist/yaml-schema.d.ts +40 -0
  176. package/dist/yaml-schema.js +62 -0
  177. package/package.json +68 -0
@@ -0,0 +1,266 @@
1
+ /**
2
+ * PubMed & Gene XML parsing helpers.
3
+ *
4
+ * After fast-xml-parser processes NCBI XML (see xml-parser.ts), the
5
+ * result is a deeply nested JS object. These helpers navigate that
6
+ * structure and return flat, typed records suitable for CLI table output.
7
+ *
8
+ * NOTE: The xml-parser config uses:
9
+ * - '@_' prefix for attributes (e.g. @_EIdType)
10
+ * - '#text' for text nodes
11
+ * - Tags listed in ARRAY_TAGS are always arrays (Author, PubmedArticle, etc.)
12
+ */
13
+ import { isRecord } from '../../utils.js';
14
+ import { truncate } from './common.js';
15
+ /**
16
+ * Safely drill into a nested object by a dot-separated path.
17
+ * Returns `undefined` if any intermediate key is missing.
18
+ */
19
+ function dig(obj, ...keys) {
20
+ let cur = obj;
21
+ for (const k of keys) {
22
+ if (!isRecord(cur))
23
+ return undefined;
24
+ cur = cur[k];
25
+ }
26
+ return cur;
27
+ }
28
+ /** Coerce a value to a string, returning '' for nullish values. */
29
+ function str(v) {
30
+ if (v === undefined || v === null)
31
+ return '';
32
+ if (typeof v === 'string')
33
+ return v;
34
+ if (typeof v === 'number')
35
+ return String(v);
36
+ // fast-xml-parser may produce { '#text': 'value' } for text-only nodes
37
+ if (isRecord(v) && '#text' in v)
38
+ return String(v['#text']);
39
+ return String(v);
40
+ }
41
+ /**
42
+ * Format author list from parsed Author array.
43
+ *
44
+ * Each Author element is typically:
45
+ * { LastName: 'Smith', ForeName: 'John', Initials: 'J' }
46
+ * or sometimes:
47
+ * { CollectiveName: 'COVID-19 Genomics UK Consortium' }
48
+ *
49
+ * Returns first 3 authors as "LastName FN, ..." plus "et al." if more.
50
+ */
51
+ function formatAuthors(authorList) {
52
+ if (!Array.isArray(authorList))
53
+ return '';
54
+ const names = [];
55
+ for (const author of authorList) {
56
+ if (!isRecord(author))
57
+ continue;
58
+ if (author.CollectiveName) {
59
+ names.push(str(author.CollectiveName));
60
+ }
61
+ else {
62
+ const last = str(author.LastName);
63
+ const fore = str(author.ForeName);
64
+ if (last) {
65
+ names.push(fore ? `${last} ${fore.charAt(0)}` : last);
66
+ }
67
+ }
68
+ }
69
+ if (names.length === 0)
70
+ return '';
71
+ if (names.length <= 3)
72
+ return names.join(', ');
73
+ return names.slice(0, 3).join(', ') + ' et al.';
74
+ }
75
+ /**
76
+ * Extract DOI from Article's ELocationID list.
77
+ *
78
+ * ELocationID can be a single object or array (though xml-parser doesn't
79
+ * force it into an array since it's not in ARRAY_TAGS). We check for
80
+ * @_EIdType === 'doi'.
81
+ */
82
+ function extractDoi(article) {
83
+ const eloc = article.ELocationID;
84
+ if (!eloc)
85
+ return '';
86
+ const candidates = Array.isArray(eloc) ? eloc : [eloc];
87
+ for (const entry of candidates) {
88
+ if (isRecord(entry) && entry['@_EIdType'] === 'doi') {
89
+ return str(entry['#text'] ?? entry);
90
+ }
91
+ }
92
+ return '';
93
+ }
94
+ /**
95
+ * Extract publication year from a Journal > JournalIssue > PubDate node.
96
+ */
97
+ function extractYear(article) {
98
+ // Try Journal > JournalIssue > PubDate > Year
99
+ const journalYear = str(dig(article, 'Journal', 'JournalIssue', 'PubDate', 'Year'));
100
+ if (journalYear)
101
+ return journalYear;
102
+ // Fallback: Journal > JournalIssue > PubDate > MedlineDate (e.g. "2024 Jan-Feb")
103
+ const medlineDate = str(dig(article, 'Journal', 'JournalIssue', 'PubDate', 'MedlineDate'));
104
+ if (medlineDate) {
105
+ const yearMatch = medlineDate.match(/\d{4}/);
106
+ if (yearMatch)
107
+ return yearMatch[0];
108
+ }
109
+ return '';
110
+ }
111
+ /**
112
+ * Extract abstract text.
113
+ *
114
+ * AbstractText is always an array (from ARRAY_TAGS). Each element may be
115
+ * a plain string or an object with @_Label and #text (structured abstracts).
116
+ */
117
+ function extractAbstract(article) {
118
+ const abstractNode = article.Abstract;
119
+ if (!isRecord(abstractNode))
120
+ return '';
121
+ const textList = abstractNode.AbstractText;
122
+ if (!Array.isArray(textList))
123
+ return str(textList);
124
+ // Structured abstract: multiple labeled sections
125
+ const parts = [];
126
+ for (const part of textList) {
127
+ if (isRecord(part)) {
128
+ const label = str(part['@_Label']);
129
+ const text = str(part['#text'] ?? part);
130
+ parts.push(label ? `${label}: ${text}` : text);
131
+ }
132
+ else {
133
+ parts.push(str(part));
134
+ }
135
+ }
136
+ return parts.join(' ');
137
+ }
138
+ /**
139
+ * Parse a PubMed efetch XML response (after fast-xml-parser processing)
140
+ * into an array of PubmedArticle records.
141
+ */
142
+ export function parsePubmedArticles(parsed) {
143
+ if (!isRecord(parsed))
144
+ return [];
145
+ // Top-level key is PubmedArticleSet
146
+ const articleSet = parsed.PubmedArticleSet;
147
+ if (!isRecord(articleSet))
148
+ return [];
149
+ // PubmedArticle is always an array (from ARRAY_TAGS)
150
+ const articles = articleSet.PubmedArticle;
151
+ if (!Array.isArray(articles))
152
+ return [];
153
+ const results = [];
154
+ for (const pa of articles) {
155
+ if (!isRecord(pa))
156
+ continue;
157
+ const citation = pa.MedlineCitation;
158
+ if (!isRecord(citation))
159
+ continue;
160
+ const pmid = str(isRecord(citation.PMID)
161
+ ? citation.PMID['#text']
162
+ : citation.PMID);
163
+ const article = citation.Article;
164
+ if (!isRecord(article))
165
+ continue;
166
+ const articleRec = article;
167
+ // Title may be a string or { '#text': '...' } with inline markup
168
+ const title = str(articleRec.ArticleTitle).replace(/<[^>]+>/g, '');
169
+ // Authors
170
+ const authorListNode = articleRec.AuthorList;
171
+ const authorArray = isRecord(authorListNode)
172
+ ? authorListNode.Author
173
+ : undefined;
174
+ const authors = formatAuthors(authorArray);
175
+ // Journal title
176
+ const journal = str(dig(articleRec, 'Journal', 'Title'));
177
+ // Year
178
+ const year = extractYear(articleRec);
179
+ // DOI
180
+ const doi = extractDoi(articleRec);
181
+ // Abstract
182
+ const abstract = extractAbstract(articleRec);
183
+ results.push({ pmid, title, authors, journal, year, doi, abstract });
184
+ }
185
+ return results;
186
+ }
187
+ /**
188
+ * Parse Gene esummary JSON response into GeneInfo records.
189
+ *
190
+ * The esummary JSON for the gene database has the structure:
191
+ * {
192
+ * result: {
193
+ * uids: ["7157", ...],
194
+ * "7157": { uid: "7157", name: "TP53", description: "...", ... }
195
+ * }
196
+ * }
197
+ */
198
+ export function parseGeneSummaries(parsed) {
199
+ if (!isRecord(parsed))
200
+ return [];
201
+ const resultObj = parsed.result;
202
+ if (!isRecord(resultObj))
203
+ return [];
204
+ const uids = resultObj.uids;
205
+ if (!Array.isArray(uids))
206
+ return [];
207
+ const results = [];
208
+ for (const uid of uids) {
209
+ const entry = resultObj[String(uid)];
210
+ if (!isRecord(entry))
211
+ continue;
212
+ const rec = entry;
213
+ results.push({
214
+ geneId: str(rec.uid),
215
+ symbol: str(rec.name),
216
+ name: str(rec.description),
217
+ organism: str(dig(rec, 'organism', 'scientificname') ?? rec.orgname),
218
+ summary: truncate(str(rec.summary), 300),
219
+ chromosome: str(rec.chromosome),
220
+ location: str(rec.maplocation),
221
+ });
222
+ }
223
+ return results;
224
+ }
225
+ /**
226
+ * Parse Gene efetch XML response (Entrezgene-Set) into GeneInfo records.
227
+ *
228
+ * Gene efetch XML has the structure:
229
+ * Entrezgene-Set > Entrezgene[] > Entrezgene_track-info > Gene-track > Gene-track_geneid
230
+ * etc.
231
+ *
232
+ * This is considerably more complex than esummary, so we prefer esummary
233
+ * for most gene commands. This parser is provided for completeness.
234
+ */
235
+ export function parseGeneEntries(parsed) {
236
+ if (!isRecord(parsed))
237
+ return [];
238
+ const entrezSet = parsed['Entrezgene-Set'];
239
+ if (!isRecord(entrezSet))
240
+ return [];
241
+ const genes = entrezSet.Entrezgene;
242
+ if (!Array.isArray(genes))
243
+ return [];
244
+ const results = [];
245
+ for (const gene of genes) {
246
+ if (!isRecord(gene))
247
+ continue;
248
+ const g = gene;
249
+ // Gene ID
250
+ const geneId = str(dig(g, 'Entrezgene_track-info', 'Gene-track', 'Gene-track_geneid'));
251
+ // Symbol and name from Entrezgene_gene > Gene-ref
252
+ const geneRef = dig(g, 'Entrezgene_gene', 'Gene-ref');
253
+ const symbol = isRecord(geneRef) ? str(geneRef['Gene-ref_locus']) : '';
254
+ const name = isRecord(geneRef) ? str(geneRef['Gene-ref_desc']) : '';
255
+ // Organism from Entrezgene_source > BioSource > BioSource_org > Org-ref > Org-ref_taxname
256
+ const organism = str(dig(g, 'Entrezgene_source', 'BioSource', 'BioSource_org', 'Org-ref', 'Org-ref_taxname'));
257
+ // Summary
258
+ const summary = truncate(str(g['Entrezgene_summary']), 300);
259
+ // Chromosome & location from Entrezgene_gene > Gene-ref
260
+ const chromosome = isRecord(geneRef) ? str(geneRef['Gene-ref_maploc']) : '';
261
+ // Map location (more specific)
262
+ const location = str(dig(g, 'Entrezgene_location'));
263
+ results.push({ geneId, symbol, name, organism, summary, chromosome, location });
264
+ }
265
+ return results;
266
+ }
@@ -0,0 +1,7 @@
1
+ /**
2
+ * aggregate/enrichment — Combined enrichment analysis from Enrichr + STRING.
3
+ *
4
+ * Queries both Enrichr and STRING functional enrichment in parallel,
5
+ * merges and deduplicates results into a unified enrichment report.
6
+ */
7
+ export {};
@@ -0,0 +1,105 @@
1
+ /**
2
+ * aggregate/enrichment — Combined enrichment analysis from Enrichr + STRING.
3
+ *
4
+ * Queries both Enrichr and STRING functional enrichment in parallel,
5
+ * merges and deduplicates results into a unified enrichment report.
6
+ */
7
+ import { cli, Strategy } from '../../registry.js';
8
+ import { CliError } from '../../errors.js';
9
+ import { submitGeneList, getEnrichment } from '../../databases/enrichr.js';
10
+ import { buildStringUrl, encodeStringIds } from '../../databases/string-db.js';
11
+ import { createHttpContextForDatabase } from '../../databases/index.js';
12
+ import { wrapResult } from '../../types.js';
13
+ cli({
14
+ site: 'aggregate',
15
+ name: 'enrichment',
16
+ description: 'Combined pathway enrichment from Enrichr + STRING',
17
+ database: 'aggregate',
18
+ strategy: Strategy.PUBLIC,
19
+ defaultFormat: 'json',
20
+ timeoutSeconds: 60,
21
+ args: [
22
+ { name: 'genes', positional: true, required: true, help: 'Comma-separated gene symbols (e.g. TP53,BRCA1,EGFR,MYC,CDK2)' },
23
+ { name: 'library', default: 'KEGG_2021_Human', help: 'Enrichr library (e.g. GO_Biological_Process_2023, Reactome_2022)' },
24
+ { name: 'limit', type: 'int', default: 20, help: 'Max results per source (1-50)' },
25
+ { name: 'species', type: 'int', default: 9606, help: 'NCBI taxonomy ID for STRING (default: 9606)' },
26
+ ],
27
+ columns: ['term', 'category', 'source', 'pValue', 'genes'],
28
+ func: async (_ctx, args) => {
29
+ const geneList = String(args.genes).split(',').map(s => s.trim()).filter(Boolean);
30
+ if (geneList.length < 2) {
31
+ throw new CliError('ARGUMENT', 'At least 2 genes required', 'Example: biocli aggregate enrichment TP53,BRCA1,EGFR,MYC,CDK2');
32
+ }
33
+ const library = String(args.library);
34
+ const limit = Math.max(1, Math.min(Number(args.limit), 50));
35
+ const species = String(args.species);
36
+ const errors = [];
37
+ // Run both in parallel
38
+ const [enrichrResult, stringResult] = await Promise.allSettled([
39
+ // Enrichr: 2-step workflow
40
+ (async () => {
41
+ const userListId = await submitGeneList(geneList);
42
+ const results = await getEnrichment(userListId, library);
43
+ return results.slice(0, limit).map(r => ({
44
+ term: String(r.term),
45
+ category: library,
46
+ source: 'Enrichr',
47
+ pValue: Number(r.adjustedPValue).toExponential(2),
48
+ genes: String(r.genes),
49
+ }));
50
+ })(),
51
+ // STRING functional enrichment
52
+ (async () => {
53
+ const stringCtx = createHttpContextForDatabase('string');
54
+ const data = await stringCtx.fetchJson(buildStringUrl('enrichment', {
55
+ identifiers: encodeStringIds(geneList),
56
+ species,
57
+ }));
58
+ if (!Array.isArray(data))
59
+ return [];
60
+ return data
61
+ .filter(item => {
62
+ // Only keep KEGG/GO/Reactome categories
63
+ const cat = String(item.category ?? '');
64
+ return ['Process', 'Function', 'Component', 'KEGG', 'Reactome'].some(c => cat.includes(c));
65
+ })
66
+ .slice(0, limit)
67
+ .map(item => {
68
+ const inputGenes = item.inputGenes;
69
+ const geneStr = Array.isArray(inputGenes) ? inputGenes.join(',') : String(inputGenes ?? '');
70
+ return {
71
+ term: String(item.description ?? item.term ?? ''),
72
+ category: String(item.category ?? ''),
73
+ source: 'STRING',
74
+ pValue: Number(item.fdr ?? 1).toExponential(2),
75
+ genes: geneStr,
76
+ };
77
+ });
78
+ })(),
79
+ ]);
80
+ const rows = [];
81
+ if (enrichrResult.status === 'fulfilled') {
82
+ rows.push(...enrichrResult.value);
83
+ }
84
+ else {
85
+ errors.push(`Enrichr: ${enrichrResult.reason}`);
86
+ }
87
+ if (stringResult.status === 'fulfilled') {
88
+ rows.push(...stringResult.value);
89
+ }
90
+ else {
91
+ errors.push(`STRING: ${stringResult.reason}`);
92
+ }
93
+ if (!rows.length) {
94
+ throw new CliError('NOT_FOUND', 'No enrichment results from any source', errors.length ? `Errors: ${errors.join('; ')}` : 'Try adding more genes');
95
+ }
96
+ // Sort by p-value
97
+ rows.sort((a, b) => parseFloat(a.pValue) - parseFloat(b.pValue));
98
+ const activeSources = [...new Set(rows.map(r => r.source))];
99
+ return wrapResult(rows, {
100
+ sources: activeSources,
101
+ warnings: errors,
102
+ query: geneList.join(','),
103
+ });
104
+ },
105
+ });
@@ -0,0 +1,13 @@
1
+ /**
2
+ * aggregate/gene-dossier — Comprehensive gene intelligence report.
3
+ *
4
+ * Builds on gene-profile and adds:
5
+ * - Recent PubMed literature (top papers)
6
+ * - ClinVar clinical significance
7
+ * - Summary assessment for agent consumption
8
+ *
9
+ * This is the highest-level gene command — a complete "dossier" that
10
+ * an AI agent can use to understand a gene's biological role, clinical
11
+ * relevance, and research landscape in one call.
12
+ */
13
+ export {};
@@ -0,0 +1,248 @@
1
+ /**
2
+ * aggregate/gene-dossier — Comprehensive gene intelligence report.
3
+ *
4
+ * Builds on gene-profile and adds:
5
+ * - Recent PubMed literature (top papers)
6
+ * - ClinVar clinical significance
7
+ * - Summary assessment for agent consumption
8
+ *
9
+ * This is the highest-level gene command — a complete "dossier" that
10
+ * an AI agent can use to understand a gene's biological role, clinical
11
+ * relevance, and research landscape in one call.
12
+ */
13
+ import { cli, Strategy } from '../../registry.js';
14
+ import { CliError } from '../../errors.js';
15
+ import { wrapResult } from '../../types.js';
16
+ import { createHttpContextForDatabase } from '../../databases/index.js';
17
+ import { buildEutilsUrl } from '../../databases/ncbi.js';
18
+ import { parsePubmedArticles } from '../_shared/xml-helpers.js';
19
+ import { buildUniprotUrl } from '../../databases/uniprot.js';
20
+ import { buildKeggUrl, parseKeggTsv } from '../../databases/kegg.js';
21
+ import { buildStringUrl } from '../../databases/string-db.js';
22
+ import { parseGeneSummaries } from '../_shared/xml-helpers.js';
23
+ import { resolveOrganism } from '../_shared/organism-db.js';
24
+ // Reuse the gene-profile building blocks but add literature + clinical layers
25
+ async function fetchRecentLiterature(ctx, symbol, limit) {
26
+ // Search PubMed for recent papers about this gene
27
+ const searchResult = await ctx.fetchJson(buildEutilsUrl('esearch.fcgi', {
28
+ db: 'pubmed',
29
+ term: `${symbol} AND "last 5 years"[PDat]`,
30
+ retmax: String(limit),
31
+ sort: 'relevance',
32
+ retmode: 'json',
33
+ }));
34
+ const esearch = searchResult?.esearchresult;
35
+ const pmids = esearch?.idlist ?? [];
36
+ if (!pmids.length)
37
+ return [];
38
+ const xmlData = await ctx.fetchXml(buildEutilsUrl('efetch.fcgi', {
39
+ db: 'pubmed',
40
+ id: pmids.join(','),
41
+ rettype: 'xml',
42
+ }));
43
+ const articles = parsePubmedArticles(xmlData);
44
+ return articles.map(a => ({
45
+ pmid: a.pmid,
46
+ title: a.title,
47
+ authors: a.authors,
48
+ journal: a.journal,
49
+ year: a.year,
50
+ doi: a.doi,
51
+ }));
52
+ }
53
+ async function fetchClinvarSignificance(ctx, symbol) {
54
+ const searchResult = await ctx.fetchJson(buildEutilsUrl('esearch.fcgi', {
55
+ db: 'clinvar',
56
+ term: `${symbol}[Gene Name]`,
57
+ retmax: '10',
58
+ retmode: 'json',
59
+ }));
60
+ const ids = searchResult?.esearchresult?.idlist ?? [];
61
+ if (!ids.length)
62
+ return [];
63
+ const summary = await ctx.fetchJson(buildEutilsUrl('esummary.fcgi', {
64
+ db: 'clinvar',
65
+ id: ids.join(','),
66
+ retmode: 'json',
67
+ }));
68
+ const resultObj = summary?.result;
69
+ const uids = resultObj?.uids ?? [];
70
+ return uids.map(uid => {
71
+ const item = (resultObj?.[uid] ?? {});
72
+ const sig = typeof item.clinical_significance === 'object'
73
+ ? item.clinical_significance?.description ?? ''
74
+ : String(item.clinical_significance ?? '');
75
+ const traits = Array.isArray(item.trait_set)
76
+ ? item.trait_set.map(t => String(t.trait_name ?? '')).join('; ')
77
+ : '';
78
+ return {
79
+ title: String(item.title ?? ''),
80
+ significance: String(sig),
81
+ condition: traits,
82
+ accession: String(item.accession ?? ''),
83
+ };
84
+ });
85
+ }
86
+ cli({
87
+ site: 'aggregate',
88
+ name: 'gene-dossier',
89
+ description: 'Complete gene intelligence report (profile + literature + clinical)',
90
+ database: 'aggregate',
91
+ strategy: Strategy.PUBLIC,
92
+ defaultFormat: 'json',
93
+ timeoutSeconds: 90,
94
+ args: [
95
+ { name: 'gene', positional: true, required: true, help: 'Gene symbol (e.g. TP53)' },
96
+ { name: 'organism', default: 'human', help: 'Organism (e.g. human, mouse)' },
97
+ { name: 'papers', type: 'int', default: 5, help: 'Number of recent papers to include' },
98
+ ],
99
+ columns: ['symbol', 'name', 'pathways', 'interactions', 'literature', 'clinvar'],
100
+ func: async (_ctx, args) => {
101
+ const symbol = String(args.gene).trim();
102
+ if (!symbol)
103
+ throw new CliError('ARGUMENT', 'Gene symbol is required');
104
+ const org = resolveOrganism(String(args.organism));
105
+ const paperCount = Math.max(1, Math.min(Number(args.papers), 20));
106
+ const sources = [];
107
+ const warnings = [];
108
+ const ids = {};
109
+ const ncbiCtx = createHttpContextForDatabase('ncbi');
110
+ const uniprotCtx = createHttpContextForDatabase('uniprot');
111
+ const keggCtx = createHttpContextForDatabase('kegg');
112
+ const stringCtx = createHttpContextForDatabase('string');
113
+ // Phase 1: Core profile (parallel)
114
+ const [ncbiResult, uniprotResult, stringResult, litResult, clinvarResult] = await Promise.allSettled([
115
+ // NCBI Gene
116
+ (async () => {
117
+ const sr = await ncbiCtx.fetchJson(buildEutilsUrl('esearch.fcgi', {
118
+ db: 'gene', term: `${symbol}[Gene Name] AND ${org.name}[Organism]`,
119
+ retmax: '5', retmode: 'json',
120
+ }));
121
+ const gids = sr?.esearchresult?.idlist ?? [];
122
+ if (!gids.length)
123
+ return null;
124
+ const summ = await ncbiCtx.fetchJson(buildEutilsUrl('esummary.fcgi', {
125
+ db: 'gene', id: gids.join(','), retmode: 'json',
126
+ }));
127
+ const genes = parseGeneSummaries(summ);
128
+ const best = genes.find(g => g.symbol.toUpperCase() === symbol.toUpperCase()) ?? genes[0];
129
+ return best ?? null;
130
+ })(),
131
+ // UniProt
132
+ (async () => {
133
+ const data = await uniprotCtx.fetchJson(buildUniprotUrl('/uniprotkb/search', {
134
+ query: `gene:${symbol} AND organism_id:${org.taxId} AND reviewed:true`,
135
+ format: 'json', size: '1',
136
+ }));
137
+ const results = (data?.results ?? []);
138
+ return results[0] ?? null;
139
+ })(),
140
+ // STRING partners
141
+ (async () => {
142
+ const data = await stringCtx.fetchJson(buildStringUrl('interaction_partners', {
143
+ identifiers: symbol, species: String(org.taxId), limit: '10', required_score: '400',
144
+ }));
145
+ return Array.isArray(data) ? data.map(i => ({
146
+ partner: String(i.preferredName_B ?? ''),
147
+ score: Number(i.score ?? 0),
148
+ })) : [];
149
+ })(),
150
+ // Literature
151
+ fetchRecentLiterature(ncbiCtx, symbol, paperCount),
152
+ // ClinVar
153
+ fetchClinvarSignificance(ncbiCtx, symbol),
154
+ ]);
155
+ // Extract NCBI
156
+ let ncbiGene = null;
157
+ if (ncbiResult.status === 'fulfilled' && ncbiResult.value) {
158
+ ncbiGene = ncbiResult.value;
159
+ sources.push('NCBI Gene');
160
+ ids.ncbiGeneId = String(ncbiGene.geneId);
161
+ }
162
+ else {
163
+ warnings.push(`NCBI Gene: ${ncbiResult.status === 'rejected' ? ncbiResult.reason : 'not found'}`);
164
+ }
165
+ // Extract UniProt (function + GO terms)
166
+ let uniprotFunc = '';
167
+ let goTerms = [];
168
+ if (uniprotResult.status === 'fulfilled' && uniprotResult.value) {
169
+ const entry = uniprotResult.value;
170
+ ids.uniprotAccession = String(entry.primaryAccession ?? '');
171
+ const comments = (entry.comments ?? []);
172
+ const fc = comments.find(c => c.commentType === 'FUNCTION');
173
+ const texts = (fc?.texts ?? []);
174
+ uniprotFunc = texts.map(t => String(t.value ?? '')).join(' ');
175
+ // Extract GO terms from cross-references
176
+ const xrefs = (entry.uniProtKBCrossReferences ?? []);
177
+ goTerms = xrefs
178
+ .filter(x => x.database === 'GO')
179
+ .map(x => {
180
+ const id = String(x.id ?? '');
181
+ const props = (x.properties ?? []);
182
+ const termProp = props.find(p => p.key === 'GoTerm');
183
+ const term = String(termProp?.value ?? '');
184
+ const aspectMap = { C: 'CC', F: 'MF', P: 'BP' };
185
+ const [aspect, ...nameParts] = term.split(':');
186
+ return { id, name: nameParts.join(':'), aspect: aspectMap[aspect] ?? aspect };
187
+ });
188
+ sources.push('UniProt');
189
+ }
190
+ else {
191
+ warnings.push(`UniProt: ${uniprotResult.status === 'rejected' ? uniprotResult.reason : 'not found'}`);
192
+ }
193
+ // Extract STRING
194
+ const interactions = stringResult.status === 'fulfilled' ? stringResult.value : [];
195
+ if (interactions.length)
196
+ sources.push('STRING');
197
+ // Extract literature
198
+ const literature = litResult.status === 'fulfilled' ? litResult.value : [];
199
+ if (literature.length)
200
+ sources.push('PubMed');
201
+ else
202
+ warnings.push(`PubMed: ${litResult.status === 'rejected' ? litResult.reason : 'no recent papers'}`);
203
+ // Extract ClinVar
204
+ const clinvar = clinvarResult.status === 'fulfilled' ? clinvarResult.value : [];
205
+ if (clinvar.length)
206
+ sources.push('ClinVar');
207
+ // KEGG pathways (sequential, needs gene ID)
208
+ let pathways = [];
209
+ if (ncbiGene?.geneId) {
210
+ try {
211
+ const keggId = `${org.keggOrg}:${ncbiGene.geneId}`;
212
+ const pathText = await keggCtx.fetchText(buildKeggUrl(`/link/pathway/${keggId}`));
213
+ if (pathText.trim()) {
214
+ const links = parseKeggTsv(pathText);
215
+ const pathIds = links.map(l => l.value.replace(/^path:/, '')).filter(Boolean);
216
+ const listText = await keggCtx.fetchText(buildKeggUrl(`/list/pathway/${org.keggOrg}`));
217
+ const pathMap = new Map(parseKeggTsv(listText).map(p => [p.key, p.value.replace(/ - .*$/, '')]));
218
+ pathways = pathIds.map(id => ({ id, name: pathMap.get(id) ?? id }));
219
+ ids.keggId = keggId;
220
+ sources.push('KEGG');
221
+ }
222
+ }
223
+ catch (err) {
224
+ warnings.push(`KEGG: ${err instanceof Error ? err.message : String(err)}`);
225
+ }
226
+ }
227
+ const dossier = {
228
+ symbol,
229
+ name: String(ncbiGene?.name ?? ''),
230
+ summary: String(ncbiGene?.summary ?? ''),
231
+ function: uniprotFunc,
232
+ chromosome: String(ncbiGene?.chromosome ?? ''),
233
+ location: String(ncbiGene?.location ?? ''),
234
+ pathways,
235
+ goTerms,
236
+ interactions,
237
+ recentLiterature: literature,
238
+ clinicalVariants: clinvar,
239
+ };
240
+ return wrapResult(dossier, {
241
+ ids,
242
+ sources,
243
+ warnings,
244
+ organism: org.name,
245
+ query: symbol,
246
+ });
247
+ },
248
+ });
@@ -0,0 +1,16 @@
1
+ /**
2
+ * aggregate/gene-profile — Complete gene profile from multiple databases.
3
+ *
4
+ * THE KILLER FEATURE: one command queries NCBI Gene, UniProt, KEGG, and
5
+ * STRING in parallel and returns a unified, agent-friendly JSON object.
6
+ *
7
+ * Supports:
8
+ * - Single gene: biocli aggregate gene-profile TP53
9
+ * - Batch: biocli aggregate gene-profile TP53,BRCA1,EGFR
10
+ *
11
+ * Design:
12
+ * - Promise.allSettled for partial failure tolerance
13
+ * - _meta.sources tracks which databases contributed
14
+ * - _meta.errors reports partial failures without crashing
15
+ */
16
+ export {};